bahuvrihi-hansen_lab 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +19 -0
- data/README +0 -0
- data/lib/hansen_lab/tasks/align_columns.rb +105 -0
- data/lib/hansen_lab/tasks/align_mod.rb +97 -0
- data/lib/hansen_lab/tasks/flag_row.rb +69 -0
- data/tap.yml +1 -0
- data/test/tap_test_suite.rb +5 -0
- metadata +76 -0
data/MIT-LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2008, YOUR_NAME_HERE
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
4
|
+
software and associated documentation files (the "Software"), to deal in the Software
|
5
|
+
without restriction, including without limitation the rights to use, copy, modify, merge,
|
6
|
+
publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
7
|
+
to whom the Software is furnished to do so, subject to the following conditions:
|
8
|
+
|
9
|
+
The above copyright notice and this permission notice shall be included in all copies or
|
10
|
+
substantial portions of the Software.
|
11
|
+
|
12
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
13
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
14
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
15
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
16
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
17
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
18
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
19
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
File without changes
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'tap/tasks/table_task'
|
2
|
+
|
3
|
+
module HansenLab
|
4
|
+
module Tasks
|
5
|
+
|
6
|
+
# startdoc::manifest align table data across multiple files
|
7
|
+
# Aligns the table data across multiple files. Takes a list of tab-delimited
|
8
|
+
# input files.
|
9
|
+
#
|
10
|
+
class AlignColumns < Tap::Tasks::TableTask
|
11
|
+
|
12
|
+
config :sort_column, 0, &c.yaml(String, Integer) # specifies the sort column, index or name
|
13
|
+
config :default_value, "", &c.string # a default string value for empty cells
|
14
|
+
|
15
|
+
def format_row(data)
|
16
|
+
data.join(col_sep * 2) + row_sep
|
17
|
+
end
|
18
|
+
|
19
|
+
def process(target, *filepaths)
|
20
|
+
|
21
|
+
# load the table data
|
22
|
+
tables = filepaths.collect do |filepath|
|
23
|
+
log_basename :align, filepath
|
24
|
+
parse_table(File.read(filepath))
|
25
|
+
end
|
26
|
+
|
27
|
+
# hash rows by the sorting keys
|
28
|
+
sorting_keys = []
|
29
|
+
key_hashes = tables.collect do |table|
|
30
|
+
|
31
|
+
keys = case sort_column
|
32
|
+
when Integer then table.column(sort_column)
|
33
|
+
when String then table.column_by_header(sort_column)
|
34
|
+
end
|
35
|
+
|
36
|
+
hash = {}
|
37
|
+
keys.each_with_index do |key, i|
|
38
|
+
(hash[key] ||= []) << i
|
39
|
+
end
|
40
|
+
sorting_keys.concat(keys)
|
41
|
+
|
42
|
+
hash
|
43
|
+
end
|
44
|
+
|
45
|
+
# sort, omitting empty keys
|
46
|
+
sorting_keys = sorting_keys.uniq.sort.delete_if do |key|
|
47
|
+
key.strip.empty?
|
48
|
+
end
|
49
|
+
|
50
|
+
# normalize the number of rows for each key, cross table
|
51
|
+
sorting_keys.each do |key|
|
52
|
+
max = key_hashes.inject(0) do |max, hash|
|
53
|
+
row_index = (hash[key] ||= [])
|
54
|
+
row_index.length > max ? row_index.length : max
|
55
|
+
end
|
56
|
+
|
57
|
+
key_hashes.each do |hash|
|
58
|
+
row_index = hash[key]
|
59
|
+
row_index.concat Array.new(max - row_index.length, nil)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# prepare the target file and dump the results
|
64
|
+
prepare(target)
|
65
|
+
File.open(target, "wb") do |file|
|
66
|
+
# print file header
|
67
|
+
header = []
|
68
|
+
filepaths.each_with_index do |filepath, i|
|
69
|
+
table = tables[i]
|
70
|
+
unless table.n_columns == 0
|
71
|
+
array = table.blank_row
|
72
|
+
array[0] = "# #{File.basename(filepath)}"
|
73
|
+
header << array.join(col_sep)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
file.print format_row(header)
|
77
|
+
|
78
|
+
# print header if applicable
|
79
|
+
if header_row
|
80
|
+
row = tables.collect {|table| table.headers.join(col_sep)}
|
81
|
+
file.print format_row(row)
|
82
|
+
end
|
83
|
+
|
84
|
+
sorting_keys.each do |key|
|
85
|
+
key_rows = []
|
86
|
+
tables.each_with_index do |table, index|
|
87
|
+
row_index = key_hashes[index][key]
|
88
|
+
rows = row_index.collect do |i|
|
89
|
+
(i == nil ? table.blank_row : table.data[i]).join(col_sep)
|
90
|
+
end
|
91
|
+
|
92
|
+
key_rows << rows
|
93
|
+
end
|
94
|
+
|
95
|
+
key_rows.transpose.collect do |row|
|
96
|
+
file.print format_row(row)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
target
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'tap/tasks/table_task'
|
2
|
+
|
3
|
+
module HansenLab
|
4
|
+
module Tasks
|
5
|
+
|
6
|
+
# :startdoc::manifest align mascot peptide ids at a modification
|
7
|
+
# Aligns Mascot peptide identifications along a modification boundary.
|
8
|
+
#
|
9
|
+
# For example:
|
10
|
+
# [input.txt]
|
11
|
+
# AAAQAAA 0.0004000.0 first
|
12
|
+
# QBBBBBQ 0.4000004.0 second
|
13
|
+
#
|
14
|
+
# Becomes:
|
15
|
+
# [input.align]
|
16
|
+
# |AAA|Q|AAA|first|
|
17
|
+
# |.|Q|BBBBBQ|second (1)|
|
18
|
+
# |QBBBBB|Q|.|second (2)|
|
19
|
+
#
|
20
|
+
# Extra fields can be present in the input file, they will be carried forward
|
21
|
+
# to the result file. If a sequence has multiple modifications, each will be
|
22
|
+
# listed in the result, as above.
|
23
|
+
#
|
24
|
+
# Note that while the results don't seem terribly well aligned here, they can
|
25
|
+
# be turned into a table by using RedCloth (for example by posting the result
|
26
|
+
# as a message to Basecamp), or you can modify the output with the configs.
|
27
|
+
#
|
28
|
+
class AlignMod < Tap::Tasks::TableTask
|
29
|
+
|
30
|
+
config :mod_numbers, [1], :arg_name => '[1, 2, 3]', &c.array # the alignment modification number
|
31
|
+
|
32
|
+
config :output_empty_cell, "", &c.string # the content for empty output cells
|
33
|
+
config :output_line_format, "%s", &c.string # the format string for the output lines
|
34
|
+
config :output_col_sep, "\t", &c.string # the output column delimiter
|
35
|
+
config :default_value, "", &c.string # a default string value for empty cells
|
36
|
+
|
37
|
+
def format_row(data)
|
38
|
+
output_line_format % data.join(output_col_sep)
|
39
|
+
end
|
40
|
+
|
41
|
+
def process(target, source)
|
42
|
+
|
43
|
+
table = parse_table( File.read(source) )
|
44
|
+
|
45
|
+
prepare(target)
|
46
|
+
File.open(target, "wb") do |file|
|
47
|
+
|
48
|
+
# handle the header row. Note that the headers need to be
|
49
|
+
# moved around a little to conform to the output format.
|
50
|
+
if header_row
|
51
|
+
file.puts format_row(["", "", ""] + table.headers[2..-1])
|
52
|
+
end
|
53
|
+
|
54
|
+
sequence_locations = {}
|
55
|
+
table.data.each do |line|
|
56
|
+
seq, locator, *identifiers = line
|
57
|
+
|
58
|
+
# checks
|
59
|
+
unless locator =~ /^\d\.(\d+)\.\d$/ && seq.length == $1.length
|
60
|
+
raise "could not split line correctly: #{line}"
|
61
|
+
end
|
62
|
+
|
63
|
+
locator = $1
|
64
|
+
locations = sequence_locations[seq] ||= []
|
65
|
+
split_locations = []
|
66
|
+
0.upto(locator.length-1) do |index|
|
67
|
+
if mod_numbers.include?(locator[index, 1].to_i)
|
68
|
+
unless locations.include?(index)
|
69
|
+
split_locations << index
|
70
|
+
locations << index
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
split_locations.each_with_index do |location, index|
|
76
|
+
data = [
|
77
|
+
seq[0...location],
|
78
|
+
seq[location, 1],
|
79
|
+
seq[location+1..-1],
|
80
|
+
]
|
81
|
+
data += identifiers if index == 0
|
82
|
+
|
83
|
+
# modify the lead identifier to note duplicates
|
84
|
+
data[3] = "#{identifiers[0]} (#{index + 1})" if split_locations.length > 1
|
85
|
+
data.collect! {|str| str.empty? ? output_empty_cell : str }
|
86
|
+
|
87
|
+
file.puts format_row(data)
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
target
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'tap/tasks/table_task'
|
2
|
+
|
3
|
+
module HansenLab
|
4
|
+
module Tasks
|
5
|
+
|
6
|
+
# :startdoc::manifest flag rows in a table
|
7
|
+
# Flags rows in a table that have a column matching a specified
|
8
|
+
# regexp pattern. You can specify the column to check by name
|
9
|
+
# (in which case the first row is assumed to be headers) or by
|
10
|
+
# number (starting with column 0). Patterns are matched without
|
11
|
+
# regard to case.
|
12
|
+
#
|
13
|
+
# Examples:
|
14
|
+
# # match the 'accession' column with entries like 'mouse'
|
15
|
+
# % tap run -- flag_row FILE --column accession --pattern mouse
|
16
|
+
#
|
17
|
+
# # match the first (ie index == 0) column with entries
|
18
|
+
# # like 'a mouse' or 'a human'
|
19
|
+
# % tap run -- flag_row FILE --column 0 --pattern "a mouse|a human"
|
20
|
+
#
|
21
|
+
class FlagRow < Tap::Tasks::TableTask
|
22
|
+
|
23
|
+
config :check_column, 0, &c.yaml(String, Integer) # the column to check
|
24
|
+
config :pattern, /./, &c.regexp # the matching pattern
|
25
|
+
config :flag, 1 # the flag value
|
26
|
+
config :no_flag, 0 # the not-flagged value
|
27
|
+
|
28
|
+
def flag_rows(table)
|
29
|
+
column_index = case check_column
|
30
|
+
when String then table.headers.index(check_column)
|
31
|
+
when Integer then check_column
|
32
|
+
else nil
|
33
|
+
end
|
34
|
+
|
35
|
+
if column_index == nil || table.n_columns <= column_index
|
36
|
+
raise "could not identify column: #{check_column}"
|
37
|
+
end
|
38
|
+
|
39
|
+
# iterate over the rows and add a flag signaling
|
40
|
+
# a match to the pattern
|
41
|
+
table.data.collect! do |row|
|
42
|
+
row << (row[column_index] =~ pattern ? flag : no_flag)
|
43
|
+
end
|
44
|
+
|
45
|
+
table
|
46
|
+
end
|
47
|
+
|
48
|
+
# process defines what the task does; use the
|
49
|
+
# same number of inputs to enque the task
|
50
|
+
# as specified here
|
51
|
+
def process(target, source)
|
52
|
+
|
53
|
+
log_basename :process, source
|
54
|
+
|
55
|
+
# load the data and split into rows
|
56
|
+
table = parse_table(File.read(source))
|
57
|
+
flag_rows(table)
|
58
|
+
|
59
|
+
# prepare the target file and dump the results
|
60
|
+
prepare(target)
|
61
|
+
File.open(target, "wb") do |file|
|
62
|
+
file << table.join(row_sep, col_sep, header_row)
|
63
|
+
end
|
64
|
+
|
65
|
+
target
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/tap.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
gems: [data_explorer, xcalibur]
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bahuvrihi-hansen_lab
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Simon Chiang
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-07-08 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: tap
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ~>
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.10.0
|
23
|
+
version:
|
24
|
+
- !ruby/object:Gem::Dependency
|
25
|
+
name: sample_tasks
|
26
|
+
version_requirement:
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ~>
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: 0.10.0
|
32
|
+
version:
|
33
|
+
description:
|
34
|
+
email: simon.a.chiang@gmail.com
|
35
|
+
executables: []
|
36
|
+
|
37
|
+
extensions: []
|
38
|
+
|
39
|
+
extra_rdoc_files:
|
40
|
+
- README
|
41
|
+
- MIT-LICENSE
|
42
|
+
files:
|
43
|
+
- lib/hansen_lab/tasks/align_columns.rb
|
44
|
+
- lib/hansen_lab/tasks/align_mod.rb
|
45
|
+
- lib/hansen_lab/tasks/flag_row.rb
|
46
|
+
- tap.yml
|
47
|
+
- README
|
48
|
+
- MIT-LICENSE
|
49
|
+
has_rdoc: true
|
50
|
+
homepage: http://github.com/bahuvrihi/hansen_lab/wikis
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
|
54
|
+
require_paths:
|
55
|
+
- lib
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
version:
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: "0"
|
67
|
+
version:
|
68
|
+
requirements: []
|
69
|
+
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 1.2.0
|
72
|
+
signing_key:
|
73
|
+
specification_version: 2
|
74
|
+
summary: Code developed for use in the Hansen Lab
|
75
|
+
test_files:
|
76
|
+
- test/tap_test_suite.rb
|