bahuvrihi-hansen_lab 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +19 -0
- data/README +0 -0
- data/lib/hansen_lab/tasks/align_columns.rb +105 -0
- data/lib/hansen_lab/tasks/align_mod.rb +97 -0
- data/lib/hansen_lab/tasks/flag_row.rb +69 -0
- data/tap.yml +1 -0
- data/test/tap_test_suite.rb +5 -0
- metadata +76 -0
data/MIT-LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2008, YOUR_NAME_HERE
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
4
|
+
software and associated documentation files (the "Software"), to deal in the Software
|
5
|
+
without restriction, including without limitation the rights to use, copy, modify, merge,
|
6
|
+
publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
7
|
+
to whom the Software is furnished to do so, subject to the following conditions:
|
8
|
+
|
9
|
+
The above copyright notice and this permission notice shall be included in all copies or
|
10
|
+
substantial portions of the Software.
|
11
|
+
|
12
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
13
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
14
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
15
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
16
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
17
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
18
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
19
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
File without changes
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'tap/tasks/table_task'
|
2
|
+
|
3
|
+
module HansenLab
|
4
|
+
module Tasks
|
5
|
+
|
6
|
+
# startdoc::manifest align table data across multiple files
|
7
|
+
# Aligns the table data across multiple files. Takes a list of tab-delimited
|
8
|
+
# input files.
|
9
|
+
#
|
10
|
+
class AlignColumns < Tap::Tasks::TableTask
|
11
|
+
|
12
|
+
config :sort_column, 0, &c.yaml(String, Integer) # specifies the sort column, index or name
|
13
|
+
config :default_value, "", &c.string # a default string value for empty cells
|
14
|
+
|
15
|
+
def format_row(data)
|
16
|
+
data.join(col_sep * 2) + row_sep
|
17
|
+
end
|
18
|
+
|
19
|
+
def process(target, *filepaths)
|
20
|
+
|
21
|
+
# load the table data
|
22
|
+
tables = filepaths.collect do |filepath|
|
23
|
+
log_basename :align, filepath
|
24
|
+
parse_table(File.read(filepath))
|
25
|
+
end
|
26
|
+
|
27
|
+
# hash rows by the sorting keys
|
28
|
+
sorting_keys = []
|
29
|
+
key_hashes = tables.collect do |table|
|
30
|
+
|
31
|
+
keys = case sort_column
|
32
|
+
when Integer then table.column(sort_column)
|
33
|
+
when String then table.column_by_header(sort_column)
|
34
|
+
end
|
35
|
+
|
36
|
+
hash = {}
|
37
|
+
keys.each_with_index do |key, i|
|
38
|
+
(hash[key] ||= []) << i
|
39
|
+
end
|
40
|
+
sorting_keys.concat(keys)
|
41
|
+
|
42
|
+
hash
|
43
|
+
end
|
44
|
+
|
45
|
+
# sort, omitting empty keys
|
46
|
+
sorting_keys = sorting_keys.uniq.sort.delete_if do |key|
|
47
|
+
key.strip.empty?
|
48
|
+
end
|
49
|
+
|
50
|
+
# normalize the number of rows for each key, cross table
|
51
|
+
sorting_keys.each do |key|
|
52
|
+
max = key_hashes.inject(0) do |max, hash|
|
53
|
+
row_index = (hash[key] ||= [])
|
54
|
+
row_index.length > max ? row_index.length : max
|
55
|
+
end
|
56
|
+
|
57
|
+
key_hashes.each do |hash|
|
58
|
+
row_index = hash[key]
|
59
|
+
row_index.concat Array.new(max - row_index.length, nil)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# prepare the target file and dump the results
|
64
|
+
prepare(target)
|
65
|
+
File.open(target, "wb") do |file|
|
66
|
+
# print file header
|
67
|
+
header = []
|
68
|
+
filepaths.each_with_index do |filepath, i|
|
69
|
+
table = tables[i]
|
70
|
+
unless table.n_columns == 0
|
71
|
+
array = table.blank_row
|
72
|
+
array[0] = "# #{File.basename(filepath)}"
|
73
|
+
header << array.join(col_sep)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
file.print format_row(header)
|
77
|
+
|
78
|
+
# print header if applicable
|
79
|
+
if header_row
|
80
|
+
row = tables.collect {|table| table.headers.join(col_sep)}
|
81
|
+
file.print format_row(row)
|
82
|
+
end
|
83
|
+
|
84
|
+
sorting_keys.each do |key|
|
85
|
+
key_rows = []
|
86
|
+
tables.each_with_index do |table, index|
|
87
|
+
row_index = key_hashes[index][key]
|
88
|
+
rows = row_index.collect do |i|
|
89
|
+
(i == nil ? table.blank_row : table.data[i]).join(col_sep)
|
90
|
+
end
|
91
|
+
|
92
|
+
key_rows << rows
|
93
|
+
end
|
94
|
+
|
95
|
+
key_rows.transpose.collect do |row|
|
96
|
+
file.print format_row(row)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
target
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'tap/tasks/table_task'
|
2
|
+
|
3
|
+
module HansenLab
|
4
|
+
module Tasks
|
5
|
+
|
6
|
+
# :startdoc::manifest align mascot peptide ids at a modification
|
7
|
+
# Aligns Mascot peptide identifications along a modification boundary.
|
8
|
+
#
|
9
|
+
# For example:
|
10
|
+
# [input.txt]
|
11
|
+
# AAAQAAA 0.0004000.0 first
|
12
|
+
# QBBBBBQ 0.4000004.0 second
|
13
|
+
#
|
14
|
+
# Becomes:
|
15
|
+
# [input.align]
|
16
|
+
# |AAA|Q|AAA|first|
|
17
|
+
# |.|Q|BBBBBQ|second (1)|
|
18
|
+
# |QBBBBB|Q|.|second (2)|
|
19
|
+
#
|
20
|
+
# Extra fields can be present in the input file, they will be carried forward
|
21
|
+
# to the result file. If a sequence has multiple modifications, each will be
|
22
|
+
# listed in the result, as above.
|
23
|
+
#
|
24
|
+
# Note that while the results don't seem terribly well aligned here, they can
|
25
|
+
# be turned into a table by using RedCloth (for example by posting the result
|
26
|
+
# as a message to Basecamp), or you can modify the output with the configs.
|
27
|
+
#
|
28
|
+
class AlignMod < Tap::Tasks::TableTask
|
29
|
+
|
30
|
+
config :mod_numbers, [1], :arg_name => '[1, 2, 3]', &c.array # the alignment modification number
|
31
|
+
|
32
|
+
config :output_empty_cell, "", &c.string # the content for empty output cells
|
33
|
+
config :output_line_format, "%s", &c.string # the format string for the output lines
|
34
|
+
config :output_col_sep, "\t", &c.string # the output column delimiter
|
35
|
+
config :default_value, "", &c.string # a default string value for empty cells
|
36
|
+
|
37
|
+
def format_row(data)
|
38
|
+
output_line_format % data.join(output_col_sep)
|
39
|
+
end
|
40
|
+
|
41
|
+
def process(target, source)
|
42
|
+
|
43
|
+
table = parse_table( File.read(source) )
|
44
|
+
|
45
|
+
prepare(target)
|
46
|
+
File.open(target, "wb") do |file|
|
47
|
+
|
48
|
+
# handle the header row. Note that the headers need to be
|
49
|
+
# moved around a little to conform to the output format.
|
50
|
+
if header_row
|
51
|
+
file.puts format_row(["", "", ""] + table.headers[2..-1])
|
52
|
+
end
|
53
|
+
|
54
|
+
sequence_locations = {}
|
55
|
+
table.data.each do |line|
|
56
|
+
seq, locator, *identifiers = line
|
57
|
+
|
58
|
+
# checks
|
59
|
+
unless locator =~ /^\d\.(\d+)\.\d$/ && seq.length == $1.length
|
60
|
+
raise "could not split line correctly: #{line}"
|
61
|
+
end
|
62
|
+
|
63
|
+
locator = $1
|
64
|
+
locations = sequence_locations[seq] ||= []
|
65
|
+
split_locations = []
|
66
|
+
0.upto(locator.length-1) do |index|
|
67
|
+
if mod_numbers.include?(locator[index, 1].to_i)
|
68
|
+
unless locations.include?(index)
|
69
|
+
split_locations << index
|
70
|
+
locations << index
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
split_locations.each_with_index do |location, index|
|
76
|
+
data = [
|
77
|
+
seq[0...location],
|
78
|
+
seq[location, 1],
|
79
|
+
seq[location+1..-1],
|
80
|
+
]
|
81
|
+
data += identifiers if index == 0
|
82
|
+
|
83
|
+
# modify the lead identifier to note duplicates
|
84
|
+
data[3] = "#{identifiers[0]} (#{index + 1})" if split_locations.length > 1
|
85
|
+
data.collect! {|str| str.empty? ? output_empty_cell : str }
|
86
|
+
|
87
|
+
file.puts format_row(data)
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
target
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'tap/tasks/table_task'
|
2
|
+
|
3
|
+
module HansenLab
|
4
|
+
module Tasks
|
5
|
+
|
6
|
+
# :startdoc::manifest flag rows in a table
|
7
|
+
# Flags rows in a table that have a column matching a specified
|
8
|
+
# regexp pattern. You can specify the column to check by name
|
9
|
+
# (in which case the first row is assumed to be headers) or by
|
10
|
+
# number (starting with column 0). Patterns are matched without
|
11
|
+
# regard to case.
|
12
|
+
#
|
13
|
+
# Examples:
|
14
|
+
# # match the 'accession' column with entries like 'mouse'
|
15
|
+
# % tap run -- flag_row FILE --column accession --pattern mouse
|
16
|
+
#
|
17
|
+
# # match the first (ie index == 0) column with entries
|
18
|
+
# # like 'a mouse' or 'a human'
|
19
|
+
# % tap run -- flag_row FILE --column 0 --pattern "a mouse|a human"
|
20
|
+
#
|
21
|
+
class FlagRow < Tap::Tasks::TableTask
|
22
|
+
|
23
|
+
config :check_column, 0, &c.yaml(String, Integer) # the column to check
|
24
|
+
config :pattern, /./, &c.regexp # the matching pattern
|
25
|
+
config :flag, 1 # the flag value
|
26
|
+
config :no_flag, 0 # the not-flagged value
|
27
|
+
|
28
|
+
def flag_rows(table)
|
29
|
+
column_index = case check_column
|
30
|
+
when String then table.headers.index(check_column)
|
31
|
+
when Integer then check_column
|
32
|
+
else nil
|
33
|
+
end
|
34
|
+
|
35
|
+
if column_index == nil || table.n_columns <= column_index
|
36
|
+
raise "could not identify column: #{check_column}"
|
37
|
+
end
|
38
|
+
|
39
|
+
# iterate over the rows and add a flag signaling
|
40
|
+
# a match to the pattern
|
41
|
+
table.data.collect! do |row|
|
42
|
+
row << (row[column_index] =~ pattern ? flag : no_flag)
|
43
|
+
end
|
44
|
+
|
45
|
+
table
|
46
|
+
end
|
47
|
+
|
48
|
+
# process defines what the task does; use the
|
49
|
+
# same number of inputs to enque the task
|
50
|
+
# as specified here
|
51
|
+
def process(target, source)
|
52
|
+
|
53
|
+
log_basename :process, source
|
54
|
+
|
55
|
+
# load the data and split into rows
|
56
|
+
table = parse_table(File.read(source))
|
57
|
+
flag_rows(table)
|
58
|
+
|
59
|
+
# prepare the target file and dump the results
|
60
|
+
prepare(target)
|
61
|
+
File.open(target, "wb") do |file|
|
62
|
+
file << table.join(row_sep, col_sep, header_row)
|
63
|
+
end
|
64
|
+
|
65
|
+
target
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/tap.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
gems: [data_explorer, xcalibur]
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bahuvrihi-hansen_lab
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Simon Chiang
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-07-08 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: tap
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ~>
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.10.0
|
23
|
+
version:
|
24
|
+
- !ruby/object:Gem::Dependency
|
25
|
+
name: sample_tasks
|
26
|
+
version_requirement:
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ~>
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: 0.10.0
|
32
|
+
version:
|
33
|
+
description:
|
34
|
+
email: simon.a.chiang@gmail.com
|
35
|
+
executables: []
|
36
|
+
|
37
|
+
extensions: []
|
38
|
+
|
39
|
+
extra_rdoc_files:
|
40
|
+
- README
|
41
|
+
- MIT-LICENSE
|
42
|
+
files:
|
43
|
+
- lib/hansen_lab/tasks/align_columns.rb
|
44
|
+
- lib/hansen_lab/tasks/align_mod.rb
|
45
|
+
- lib/hansen_lab/tasks/flag_row.rb
|
46
|
+
- tap.yml
|
47
|
+
- README
|
48
|
+
- MIT-LICENSE
|
49
|
+
has_rdoc: true
|
50
|
+
homepage: http://github.com/bahuvrihi/hansen_lab/wikis
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
|
54
|
+
require_paths:
|
55
|
+
- lib
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
version:
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: "0"
|
67
|
+
version:
|
68
|
+
requirements: []
|
69
|
+
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 1.2.0
|
72
|
+
signing_key:
|
73
|
+
specification_version: 2
|
74
|
+
summary: Code developed for use in the Hansen Lab
|
75
|
+
test_files:
|
76
|
+
- test/tap_test_suite.rb
|