bahuvrihi-hansen_lab 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2008, YOUR_NAME_HERE
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this
4
+ software and associated documentation files (the "Software"), to deal in the Software
5
+ without restriction, including without limitation the rights to use, copy, modify, merge,
6
+ publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
7
+ to whom the Software is furnished to do so, subject to the following conditions:
8
+
9
+ The above copyright notice and this permission notice shall be included in all copies or
10
+ substantial portions of the Software.
11
+
12
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
16
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
File without changes
@@ -0,0 +1,105 @@
1
+ require 'tap/tasks/table_task'
2
+
3
+ module HansenLab
4
+ module Tasks
5
+
6
+ # startdoc::manifest align table data across multiple files
7
+ # Aligns the table data across multiple files. Takes a list of tab-delimited
8
+ # input files.
9
+ #
10
+ class AlignColumns < Tap::Tasks::TableTask
11
+
12
+ config :sort_column, 0, &c.yaml(String, Integer) # specifies the sort column, index or name
13
+ config :default_value, "", &c.string # a default string value for empty cells
14
+
15
+ def format_row(data)
16
+ data.join(col_sep * 2) + row_sep
17
+ end
18
+
19
+ def process(target, *filepaths)
20
+
21
+ # load the table data
22
+ tables = filepaths.collect do |filepath|
23
+ log_basename :align, filepath
24
+ parse_table(File.read(filepath))
25
+ end
26
+
27
+ # hash rows by the sorting keys
28
+ sorting_keys = []
29
+ key_hashes = tables.collect do |table|
30
+
31
+ keys = case sort_column
32
+ when Integer then table.column(sort_column)
33
+ when String then table.column_by_header(sort_column)
34
+ end
35
+
36
+ hash = {}
37
+ keys.each_with_index do |key, i|
38
+ (hash[key] ||= []) << i
39
+ end
40
+ sorting_keys.concat(keys)
41
+
42
+ hash
43
+ end
44
+
45
+ # sort, omitting empty keys
46
+ sorting_keys = sorting_keys.uniq.sort.delete_if do |key|
47
+ key.strip.empty?
48
+ end
49
+
50
+ # normalize the number of rows for each key, cross table
51
+ sorting_keys.each do |key|
52
+ max = key_hashes.inject(0) do |max, hash|
53
+ row_index = (hash[key] ||= [])
54
+ row_index.length > max ? row_index.length : max
55
+ end
56
+
57
+ key_hashes.each do |hash|
58
+ row_index = hash[key]
59
+ row_index.concat Array.new(max - row_index.length, nil)
60
+ end
61
+ end
62
+
63
+ # prepare the target file and dump the results
64
+ prepare(target)
65
+ File.open(target, "wb") do |file|
66
+ # print file header
67
+ header = []
68
+ filepaths.each_with_index do |filepath, i|
69
+ table = tables[i]
70
+ unless table.n_columns == 0
71
+ array = table.blank_row
72
+ array[0] = "# #{File.basename(filepath)}"
73
+ header << array.join(col_sep)
74
+ end
75
+ end
76
+ file.print format_row(header)
77
+
78
+ # print header if applicable
79
+ if header_row
80
+ row = tables.collect {|table| table.headers.join(col_sep)}
81
+ file.print format_row(row)
82
+ end
83
+
84
+ sorting_keys.each do |key|
85
+ key_rows = []
86
+ tables.each_with_index do |table, index|
87
+ row_index = key_hashes[index][key]
88
+ rows = row_index.collect do |i|
89
+ (i == nil ? table.blank_row : table.data[i]).join(col_sep)
90
+ end
91
+
92
+ key_rows << rows
93
+ end
94
+
95
+ key_rows.transpose.collect do |row|
96
+ file.print format_row(row)
97
+ end
98
+ end
99
+ end
100
+
101
+ target
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,97 @@
1
+ require 'tap/tasks/table_task'
2
+
3
+ module HansenLab
4
+ module Tasks
5
+
6
+ # :startdoc::manifest align mascot peptide ids at a modification
7
+ # Aligns Mascot peptide identifications along a modification boundary.
8
+ #
9
+ # For example:
10
+ # [input.txt]
11
+ # AAAQAAA 0.0004000.0 first
12
+ # QBBBBBQ 0.4000004.0 second
13
+ #
14
+ # Becomes:
15
+ # [input.align]
16
+ # |AAA|Q|AAA|first|
17
+ # |.|Q|BBBBBQ|second (1)|
18
+ # |QBBBBB|Q|.|second (2)|
19
+ #
20
+ # Extra fields can be present in the input file, they will be carried forward
21
+ # to the result file. If a sequence has multiple modifications, each will be
22
+ # listed in the result, as above.
23
+ #
24
+ # Note that while the results don't seem terribly well aligned here, they can
25
+ # be turned into a table by using RedCloth (for example by posting the result
26
+ # as a message to Basecamp), or you can modify the output with the configs.
27
+ #
28
+ class AlignMod < Tap::Tasks::TableTask
29
+
30
+ config :mod_numbers, [1], :arg_name => '[1, 2, 3]', &c.array # the alignment modification number
31
+
32
+ config :output_empty_cell, "", &c.string # the content for empty output cells
33
+ config :output_line_format, "%s", &c.string # the format string for the output lines
34
+ config :output_col_sep, "\t", &c.string # the output column delimiter
35
+ config :default_value, "", &c.string # a default string value for empty cells
36
+
37
+ def format_row(data)
38
+ output_line_format % data.join(output_col_sep)
39
+ end
40
+
41
+ def process(target, source)
42
+
43
+ table = parse_table( File.read(source) )
44
+
45
+ prepare(target)
46
+ File.open(target, "wb") do |file|
47
+
48
+ # handle the header row. Note that the headers need to be
49
+ # moved around a little to conform to the output format.
50
+ if header_row
51
+ file.puts format_row(["", "", ""] + table.headers[2..-1])
52
+ end
53
+
54
+ sequence_locations = {}
55
+ table.data.each do |line|
56
+ seq, locator, *identifiers = line
57
+
58
+ # checks
59
+ unless locator =~ /^\d\.(\d+)\.\d$/ && seq.length == $1.length
60
+ raise "could not split line correctly: #{line}"
61
+ end
62
+
63
+ locator = $1
64
+ locations = sequence_locations[seq] ||= []
65
+ split_locations = []
66
+ 0.upto(locator.length-1) do |index|
67
+ if mod_numbers.include?(locator[index, 1].to_i)
68
+ unless locations.include?(index)
69
+ split_locations << index
70
+ locations << index
71
+ end
72
+ end
73
+ end
74
+
75
+ split_locations.each_with_index do |location, index|
76
+ data = [
77
+ seq[0...location],
78
+ seq[location, 1],
79
+ seq[location+1..-1],
80
+ ]
81
+ data += identifiers if index == 0
82
+
83
+ # modify the lead identifier to note duplicates
84
+ data[3] = "#{identifiers[0]} (#{index + 1})" if split_locations.length > 1
85
+ data.collect! {|str| str.empty? ? output_empty_cell : str }
86
+
87
+ file.puts format_row(data)
88
+ end
89
+
90
+ end
91
+ end
92
+
93
+ target
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,69 @@
1
+ require 'tap/tasks/table_task'
2
+
3
+ module HansenLab
4
+ module Tasks
5
+
6
+ # :startdoc::manifest flag rows in a table
7
+ # Flags rows in a table that have a column matching a specified
8
+ # regexp pattern. You can specify the column to check by name
9
+ # (in which case the first row is assumed to be headers) or by
10
+ # number (starting with column 0). Patterns are matched without
11
+ # regard to case.
12
+ #
13
+ # Examples:
14
+ # # match the 'accession' column with entries like 'mouse'
15
+ # % tap run -- flag_row FILE --column accession --pattern mouse
16
+ #
17
+ # # match the first (ie index == 0) column with entries
18
+ # # like 'a mouse' or 'a human'
19
+ # % tap run -- flag_row FILE --column 0 --pattern "a mouse|a human"
20
+ #
21
+ class FlagRow < Tap::Tasks::TableTask
22
+
23
+ config :check_column, 0, &c.yaml(String, Integer) # the column to check
24
+ config :pattern, /./, &c.regexp # the matching pattern
25
+ config :flag, 1 # the flag value
26
+ config :no_flag, 0 # the not-flagged value
27
+
28
+ def flag_rows(table)
29
+ column_index = case check_column
30
+ when String then table.headers.index(check_column)
31
+ when Integer then check_column
32
+ else nil
33
+ end
34
+
35
+ if column_index == nil || table.n_columns <= column_index
36
+ raise "could not identify column: #{check_column}"
37
+ end
38
+
39
+ # iterate over the rows and add a flag signaling
40
+ # a match to the pattern
41
+ table.data.collect! do |row|
42
+ row << (row[column_index] =~ pattern ? flag : no_flag)
43
+ end
44
+
45
+ table
46
+ end
47
+
48
+ # process defines what the task does; use the
49
+ # same number of inputs to enque the task
50
+ # as specified here
51
+ def process(target, source)
52
+
53
+ log_basename :process, source
54
+
55
+ # load the data and split into rows
56
+ table = parse_table(File.read(source))
57
+ flag_rows(table)
58
+
59
+ # prepare the target file and dump the results
60
+ prepare(target)
61
+ File.open(target, "wb") do |file|
62
+ file << table.join(row_sep, col_sep, header_row)
63
+ end
64
+
65
+ target
66
+ end
67
+ end
68
+ end
69
+ end
data/tap.yml ADDED
@@ -0,0 +1 @@
1
+ gems: [data_explorer, xcalibur]
@@ -0,0 +1,5 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), '../lib')
2
+
3
+ # runs all subsets (see Tap::Test::SubsetMethods)
4
+ ENV["ALL"] = true
5
+ Dir.glob("./**/*_test.rb").each {|test| require test}
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bahuvrihi-hansen_lab
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Simon Chiang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-07-08 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: tap
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: 0.10.0
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: sample_tasks
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ~>
30
+ - !ruby/object:Gem::Version
31
+ version: 0.10.0
32
+ version:
33
+ description:
34
+ email: simon.a.chiang@gmail.com
35
+ executables: []
36
+
37
+ extensions: []
38
+
39
+ extra_rdoc_files:
40
+ - README
41
+ - MIT-LICENSE
42
+ files:
43
+ - lib/hansen_lab/tasks/align_columns.rb
44
+ - lib/hansen_lab/tasks/align_mod.rb
45
+ - lib/hansen_lab/tasks/flag_row.rb
46
+ - tap.yml
47
+ - README
48
+ - MIT-LICENSE
49
+ has_rdoc: true
50
+ homepage: http://github.com/bahuvrihi/hansen_lab/wikis
51
+ post_install_message:
52
+ rdoc_options: []
53
+
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.2.0
72
+ signing_key:
73
+ specification_version: 2
74
+ summary: Code developed for use in the Hansen Lab
75
+ test_files:
76
+ - test/tap_test_suite.rb