bahuvrihi-hansen_lab 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2008, YOUR_NAME_HERE
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this
4
+ software and associated documentation files (the "Software"), to deal in the Software
5
+ without restriction, including without limitation the rights to use, copy, modify, merge,
6
+ publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
7
+ to whom the Software is furnished to do so, subject to the following conditions:
8
+
9
+ The above copyright notice and this permission notice shall be included in all copies or
10
+ substantial portions of the Software.
11
+
12
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
16
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
File without changes
@@ -0,0 +1,105 @@
1
+ require 'tap/tasks/table_task'
2
+
3
+ module HansenLab
4
+ module Tasks
5
+
6
+ # startdoc::manifest align table data across multiple files
7
+ # Aligns the table data across multiple files. Takes a list of tab-delimited
8
+ # input files.
9
+ #
10
+ class AlignColumns < Tap::Tasks::TableTask
11
+
12
+ config :sort_column, 0, &c.yaml(String, Integer) # specifies the sort column, index or name
13
+ config :default_value, "", &c.string # a default string value for empty cells
14
+
15
+ def format_row(data)
16
+ data.join(col_sep * 2) + row_sep
17
+ end
18
+
19
+ def process(target, *filepaths)
20
+
21
+ # load the table data
22
+ tables = filepaths.collect do |filepath|
23
+ log_basename :align, filepath
24
+ parse_table(File.read(filepath))
25
+ end
26
+
27
+ # hash rows by the sorting keys
28
+ sorting_keys = []
29
+ key_hashes = tables.collect do |table|
30
+
31
+ keys = case sort_column
32
+ when Integer then table.column(sort_column)
33
+ when String then table.column_by_header(sort_column)
34
+ end
35
+
36
+ hash = {}
37
+ keys.each_with_index do |key, i|
38
+ (hash[key] ||= []) << i
39
+ end
40
+ sorting_keys.concat(keys)
41
+
42
+ hash
43
+ end
44
+
45
+ # sort, omitting empty keys
46
+ sorting_keys = sorting_keys.uniq.sort.delete_if do |key|
47
+ key.strip.empty?
48
+ end
49
+
50
+ # normalize the number of rows for each key, cross table
51
+ sorting_keys.each do |key|
52
+ max = key_hashes.inject(0) do |max, hash|
53
+ row_index = (hash[key] ||= [])
54
+ row_index.length > max ? row_index.length : max
55
+ end
56
+
57
+ key_hashes.each do |hash|
58
+ row_index = hash[key]
59
+ row_index.concat Array.new(max - row_index.length, nil)
60
+ end
61
+ end
62
+
63
+ # prepare the target file and dump the results
64
+ prepare(target)
65
+ File.open(target, "wb") do |file|
66
+ # print file header
67
+ header = []
68
+ filepaths.each_with_index do |filepath, i|
69
+ table = tables[i]
70
+ unless table.n_columns == 0
71
+ array = table.blank_row
72
+ array[0] = "# #{File.basename(filepath)}"
73
+ header << array.join(col_sep)
74
+ end
75
+ end
76
+ file.print format_row(header)
77
+
78
+ # print header if applicable
79
+ if header_row
80
+ row = tables.collect {|table| table.headers.join(col_sep)}
81
+ file.print format_row(row)
82
+ end
83
+
84
+ sorting_keys.each do |key|
85
+ key_rows = []
86
+ tables.each_with_index do |table, index|
87
+ row_index = key_hashes[index][key]
88
+ rows = row_index.collect do |i|
89
+ (i == nil ? table.blank_row : table.data[i]).join(col_sep)
90
+ end
91
+
92
+ key_rows << rows
93
+ end
94
+
95
+ key_rows.transpose.collect do |row|
96
+ file.print format_row(row)
97
+ end
98
+ end
99
+ end
100
+
101
+ target
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,97 @@
1
+ require 'tap/tasks/table_task'
2
+
3
+ module HansenLab
4
+ module Tasks
5
+
6
+ # :startdoc::manifest align mascot peptide ids at a modification
7
+ # Aligns Mascot peptide identifications along a modification boundary.
8
+ #
9
+ # For example:
10
+ # [input.txt]
11
+ # AAAQAAA 0.0004000.0 first
12
+ # QBBBBBQ 0.4000004.0 second
13
+ #
14
+ # Becomes:
15
+ # [input.align]
16
+ # |AAA|Q|AAA|first|
17
+ # |.|Q|BBBBBQ|second (1)|
18
+ # |QBBBBB|Q|.|second (2)|
19
+ #
20
+ # Extra fields can be present in the input file, they will be carried forward
21
+ # to the result file. If a sequence has multiple modifications, each will be
22
+ # listed in the result, as above.
23
+ #
24
+ # Note that while the results don't seem terribly well aligned here, they can
25
+ # be turned into a table by using RedCloth (for example by posting the result
26
+ # as a message to Basecamp), or you can modify the output with the configs.
27
+ #
28
+ class AlignMod < Tap::Tasks::TableTask
29
+
30
+ config :mod_numbers, [1], :arg_name => '[1, 2, 3]', &c.array # the alignment modification number
31
+
32
+ config :output_empty_cell, "", &c.string # the content for empty output cells
33
+ config :output_line_format, "%s", &c.string # the format string for the output lines
34
+ config :output_col_sep, "\t", &c.string # the output column delimiter
35
+ config :default_value, "", &c.string # a default string value for empty cells
36
+
37
+ def format_row(data)
38
+ output_line_format % data.join(output_col_sep)
39
+ end
40
+
41
+ def process(target, source)
42
+
43
+ table = parse_table( File.read(source) )
44
+
45
+ prepare(target)
46
+ File.open(target, "wb") do |file|
47
+
48
+ # handle the header row. Note that the headers need to be
49
+ # moved around a little to conform to the output format.
50
+ if header_row
51
+ file.puts format_row(["", "", ""] + table.headers[2..-1])
52
+ end
53
+
54
+ sequence_locations = {}
55
+ table.data.each do |line|
56
+ seq, locator, *identifiers = line
57
+
58
+ # checks
59
+ unless locator =~ /^\d\.(\d+)\.\d$/ && seq.length == $1.length
60
+ raise "could not split line correctly: #{line}"
61
+ end
62
+
63
+ locator = $1
64
+ locations = sequence_locations[seq] ||= []
65
+ split_locations = []
66
+ 0.upto(locator.length-1) do |index|
67
+ if mod_numbers.include?(locator[index, 1].to_i)
68
+ unless locations.include?(index)
69
+ split_locations << index
70
+ locations << index
71
+ end
72
+ end
73
+ end
74
+
75
+ split_locations.each_with_index do |location, index|
76
+ data = [
77
+ seq[0...location],
78
+ seq[location, 1],
79
+ seq[location+1..-1],
80
+ ]
81
+ data += identifiers if index == 0
82
+
83
+ # modify the lead identifier to note duplicates
84
+ data[3] = "#{identifiers[0]} (#{index + 1})" if split_locations.length > 1
85
+ data.collect! {|str| str.empty? ? output_empty_cell : str }
86
+
87
+ file.puts format_row(data)
88
+ end
89
+
90
+ end
91
+ end
92
+
93
+ target
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,69 @@
1
+ require 'tap/tasks/table_task'
2
+
3
+ module HansenLab
4
+ module Tasks
5
+
6
+ # :startdoc::manifest flag rows in a table
7
+ # Flags rows in a table that have a column matching a specified
8
+ # regexp pattern. You can specify the column to check by name
9
+ # (in which case the first row is assumed to be headers) or by
10
+ # number (starting with column 0). Patterns are matched without
11
+ # regard to case.
12
+ #
13
+ # Examples:
14
+ # # match the 'accession' column with entries like 'mouse'
15
+ # % tap run -- flag_row FILE --column accession --pattern mouse
16
+ #
17
+ # # match the first (ie index == 0) column with entries
18
+ # # like 'a mouse' or 'a human'
19
+ # % tap run -- flag_row FILE --column 0 --pattern "a mouse|a human"
20
+ #
21
+ class FlagRow < Tap::Tasks::TableTask
22
+
23
+ config :check_column, 0, &c.yaml(String, Integer) # the column to check
24
+ config :pattern, /./, &c.regexp # the matching pattern
25
+ config :flag, 1 # the flag value
26
+ config :no_flag, 0 # the not-flagged value
27
+
28
+ def flag_rows(table)
29
+ column_index = case check_column
30
+ when String then table.headers.index(check_column)
31
+ when Integer then check_column
32
+ else nil
33
+ end
34
+
35
+ if column_index == nil || table.n_columns <= column_index
36
+ raise "could not identify column: #{check_column}"
37
+ end
38
+
39
+ # iterate over the rows and add a flag signaling
40
+ # a match to the pattern
41
+ table.data.collect! do |row|
42
+ row << (row[column_index] =~ pattern ? flag : no_flag)
43
+ end
44
+
45
+ table
46
+ end
47
+
48
+ # process defines what the task does; use the
49
+ # same number of inputs to enque the task
50
+ # as specified here
51
+ def process(target, source)
52
+
53
+ log_basename :process, source
54
+
55
+ # load the data and split into rows
56
+ table = parse_table(File.read(source))
57
+ flag_rows(table)
58
+
59
+ # prepare the target file and dump the results
60
+ prepare(target)
61
+ File.open(target, "wb") do |file|
62
+ file << table.join(row_sep, col_sep, header_row)
63
+ end
64
+
65
+ target
66
+ end
67
+ end
68
+ end
69
+ end
data/tap.yml ADDED
@@ -0,0 +1 @@
1
+ gems: [data_explorer, xcalibur]
@@ -0,0 +1,5 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), '../lib')
2
+
3
+ # runs all subsets (see Tap::Test::SubsetMethods)
4
+ ENV["ALL"] = true
5
+ Dir.glob("./**/*_test.rb").each {|test| require test}
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bahuvrihi-hansen_lab
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Simon Chiang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-07-08 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: tap
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: 0.10.0
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: sample_tasks
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ~>
30
+ - !ruby/object:Gem::Version
31
+ version: 0.10.0
32
+ version:
33
+ description:
34
+ email: simon.a.chiang@gmail.com
35
+ executables: []
36
+
37
+ extensions: []
38
+
39
+ extra_rdoc_files:
40
+ - README
41
+ - MIT-LICENSE
42
+ files:
43
+ - lib/hansen_lab/tasks/align_columns.rb
44
+ - lib/hansen_lab/tasks/align_mod.rb
45
+ - lib/hansen_lab/tasks/flag_row.rb
46
+ - tap.yml
47
+ - README
48
+ - MIT-LICENSE
49
+ has_rdoc: true
50
+ homepage: http://github.com/bahuvrihi/hansen_lab/wikis
51
+ post_install_message:
52
+ rdoc_options: []
53
+
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.2.0
72
+ signing_key:
73
+ specification_version: 2
74
+ summary: Code developed for use in the Hansen Lab
75
+ test_files:
76
+ - test/tap_test_suite.rb