genfrag 0.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/.bnsignore +16 -0
  2. data/History.txt +4 -0
  3. data/LICENSE.txt +58 -0
  4. data/README.rdoc +40 -0
  5. data/Rakefile +53 -0
  6. data/bin/genfrag +8 -0
  7. data/lib/genfrag.rb +129 -0
  8. data/lib/genfrag/app.rb +105 -0
  9. data/lib/genfrag/app/command.rb +145 -0
  10. data/lib/genfrag/app/index_command.rb +227 -0
  11. data/lib/genfrag/app/index_command/db.rb +105 -0
  12. data/lib/genfrag/app/search_command.rb +298 -0
  13. data/lib/genfrag/app/search_command/match.rb +165 -0
  14. data/lib/genfrag/app/search_command/process_file.rb +125 -0
  15. data/lib/genfrag/app/search_command/trim.rb +121 -0
  16. data/lib/genfrag/debug.rb +0 -0
  17. data/spec/data/index_command/in/a.fasta +109 -0
  18. data/spec/data/index_command/out/1-a_lookup.tdf +4 -0
  19. data/spec/data/index_command/out/2-a_lookup.db +0 -0
  20. data/spec/data/index_command/out/3-a_lookup.tdf +2 -0
  21. data/spec/data/index_command/out/4-a_lookup.db +0 -0
  22. data/spec/data/index_command/out/5-a_lookup.tdf +4 -0
  23. data/spec/data/index_command/out/6-a_lookup.db +0 -0
  24. data/spec/data/index_command/out/a.fasta.db +0 -0
  25. data/spec/data/index_command/out/a.fasta.tdf +6 -0
  26. data/spec/genfrag/app/command_spec.rb +55 -0
  27. data/spec/genfrag/app/index_command_spec.rb +258 -0
  28. data/spec/genfrag/app/search_command/match_spec.rb +77 -0
  29. data/spec/genfrag/app/search_command/process_file_spec.rb +185 -0
  30. data/spec/genfrag/app/search_command/trim_spec.rb +75 -0
  31. data/spec/genfrag/app/search_command_spec.rb +260 -0
  32. data/spec/genfrag/app_spec.rb +77 -0
  33. data/spec/genfrag_spec.rb +87 -0
  34. data/spec/spec_helper.rb +56 -0
  35. data/tasks/ann.rake +80 -0
  36. data/tasks/bones.rake +20 -0
  37. data/tasks/gem.rake +201 -0
  38. data/tasks/git.rake +40 -0
  39. data/tasks/notes.rake +27 -0
  40. data/tasks/post_load.rake +34 -0
  41. data/tasks/rdoc.rake +50 -0
  42. data/tasks/rubyforge.rake +55 -0
  43. data/tasks/setup.rb +300 -0
  44. data/tasks/spec.rake +54 -0
  45. data/tasks/svn.rake +47 -0
  46. data/tasks/test.rake +40 -0
  47. metadata +136 -0
@@ -0,0 +1,227 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class IndexCommand < Command
6
+
7
+ attr_reader :sizes
8
+
9
+ # Run from command-line
10
+ #
11
+ def cli_run( args )
12
+ parse args
13
+
14
+ @input_filenames = ARGV
15
+
16
+ validate_options(options)
17
+
18
+ if options[:tracktime]
19
+ Genfrag.tracktime {
20
+ run(options, @input_filenames, true)
21
+ }
22
+ else
23
+ run(options, @input_filenames, true)
24
+ end
25
+
26
+ end
27
+
28
+ # Main class for creating the index - accepts multiple input files. Either an SQLite database or
29
+ # a flat file index is created (extension .tdf) which is unique for the input file combination.
30
+ # This file is used by the Search routine later.
31
+ #
32
+ def run(ops=@ops, input_filenames=[], cli=false)
33
+ if ops.kind_of? OpenStruct
34
+ @ops = ops.dup
35
+ elsif ops.kind_of? Hash
36
+ @ops = OpenStruct.new(ops)
37
+ else
38
+ raise ArgumentError
39
+ end
40
+
41
+ # Set defaults
42
+ @ops.verbose ||= false
43
+ @ops.quiet ||= false
44
+ @ops.sqlite ||= false
45
+ @ops.filelookup ||= nil
46
+ @ops.filefasta ||= nil
47
+ @ops.re5 ||= nil
48
+ @ops.re3 ||= nil
49
+ @ops.indir ||= '.'
50
+ @ops.outdir ||= '.'
51
+
52
+ @input_filenames = input_filenames.empty? ? [@ops.filefasta] : input_filenames
53
+ @sizes = {}
54
+ db = IndexCommand::DB.new(@ops, @input_filenames)
55
+ @re5_ds, @re3_ds = [@ops.re5, @ops.re3].map {|x| Bio::RestrictionEnzyme::DoubleStranded.new(x)}
56
+ db.write_headers
57
+
58
+ if @ops.verbose
59
+ cli_p(cli, <<-END
60
+ RE5: #{@ops.re5}
61
+ #{@re5_ds.aligned_strands_with_cuts.primary}
62
+ #{@re5_ds.aligned_strands_with_cuts.complement}
63
+
64
+ RE3: #{@ops.re3}
65
+ #{@re3_ds.aligned_strands_with_cuts.primary}
66
+ #{@re3_ds.aligned_strands_with_cuts.complement}
67
+ END
68
+ )
69
+ end
70
+
71
+ # unit test with aasi, aari, and ppii
72
+ re5_regexp, re3_regexp = [@ops.re5, @ops.re3].map {|x| Bio::Sequence::NA.new( Bio::RestrictionEnzyme::DoubleStranded.new(x).aligned_strands.primary ).to_re }
73
+
74
+ entries = {}
75
+ # Account for exact duplicate sequences
76
+ @input_filenames.each do |input_filename|
77
+ Bio::FlatFile.auto(File.join(@ops.indir, input_filename)).each_entry do |e|
78
+ e.definition.tr!("\t",'')
79
+ s = e.seq.to_s.downcase
80
+ if entries[s]
81
+ entries[s] << e.definition
82
+ else
83
+ entries[s] = [e.definition]
84
+ end
85
+ end
86
+ end
87
+
88
+ a_re = /(.*)(#{re5_regexp})/
89
+ b_re = /(.*?)(#{re3_regexp})/
90
+
91
+ normalized_fasta_id=0
92
+ entries.each do |seq, definitions|
93
+ normalized_fasta_id+=1
94
+ db.write_entry_to_fasta(normalized_fasta_id, seq, definitions)
95
+
96
+ # NOTE the index command is slow because of the match functions, compare with ruby 1.9
97
+ m1 = a_re.match(seq)
98
+ if m1
99
+ # Find the fragment 'frag1' cut most right in seq with re5_regexp
100
+ frag1 = $2 + m1.post_match
101
+
102
+ position = $1.size
103
+
104
+ m2 = b_re.match( frag1 )
105
+
106
+ # Now cut frag1 with re3_regexp resulting in frag2
107
+ if m2
108
+ @frag2 = $1 + $2
109
+ if @ops.verbose
110
+ cli_p(cli, <<-END
111
+ ---
112
+ #{definitions.join("\n")}
113
+ #{@frag2}
114
+ END
115
+ )
116
+ end
117
+ @sizes[@frag2.size] ||= []
118
+ @sizes[@frag2.size] << [position, normalized_fasta_id]
119
+ end
120
+ end
121
+
122
+ end
123
+
124
+ i=0
125
+ @sizes.each do |size,info|
126
+ i+=1
127
+ db.write_entry_to_freq(i, size, info.map {|x| x.join(' ')}.join(', ') )
128
+ end
129
+
130
+ if @ops.verbose
131
+ @sizes.each { |@entry| cli_p(cli, @entry.inspect) }
132
+ else
133
+ cli_p(cli, "Cut sites found: #{@sizes.values.flatten.size / 2}")
134
+ end
135
+
136
+ db.close
137
+ end
138
+
139
+
140
+ ############
141
+ # Command-line
142
+ ############
143
+
144
+
145
+ # Option parser for command-line
146
+ #
147
+ def opt_parser
148
+ std_opts = standard_options
149
+
150
+ opts = OptionParser.new
151
+ opts.banner = 'Usage: genfrag index [options]'
152
+
153
+ opts.separator ''
154
+ opts.separator " Create a database of sequence fragments that match the last 5' fragment"
155
+ opts.separator " cut by two restricting enzymes RE3 and RE5."
156
+ opts.separator " The Fasta file defined by the --fasta option is taken as input."
157
+ opts.separator " Two files are created for the search function - a lookup file, and"
158
+ opts.separator " the contents of the Fasta file rewritten in a special format. You can"
159
+ opts.separator " specify the name of the lookup file with the --lookup option."
160
+
161
+ opts.separator ''
162
+
163
+ ary = [:verbose, :quiet, :tracktime, :indir, :outdir, :sqlite, :re5, :re3,
164
+ :filelookup, :filefasta
165
+ ]
166
+ ary.each { |a| opts.on(*std_opts[a]) }
167
+
168
+ opts.separator ''
169
+ opts.separator ' Common Options:'
170
+ opts.on( '-h', '--help', 'show this message' ) { @out.puts opts; exit 1 }
171
+ opts.separator ' Examples:'
172
+ opts.separator ' genfrag index -f example.fasta --re5 BstYI --re3 MseI'
173
+ opts.separator ' genfrag index --out /tmp --in . -f example.fasta --re5 BstYI --re3 MseI'
174
+ opts
175
+ end
176
+
177
+ # Parse options passed from command-line
178
+ #
179
+ def parse( args )
180
+ opts = opt_parser
181
+
182
+ if args.empty?
183
+ @out.puts opts
184
+ exit 1
185
+ end
186
+
187
+ # parse the command line arguments
188
+ opts.parse! args
189
+ end
190
+
191
+ # Validate options passed from the command-line
192
+ def validate_options(o)
193
+ if o[:filefasta] == nil
194
+ clierr_p "missing option: must supply fasta filename"
195
+ exit 1
196
+ end
197
+
198
+ if o[:re5] == nil
199
+ clierr_p "missing option: re5"
200
+ exit 1
201
+ end
202
+
203
+ if o[:re3] == nil
204
+ clierr_p "missing option: re3"
205
+ exit 1
206
+ end
207
+
208
+ begin
209
+ Bio::RestrictionEnzyme::DoubleStranded.new(o[:re3])
210
+ rescue
211
+ clierr_p "re3 is not an enzyme name"
212
+ exit 1
213
+ end
214
+
215
+ begin
216
+ Bio::RestrictionEnzyme::DoubleStranded.new(o[:re5])
217
+ rescue
218
+ clierr_p "re5 is not an enzyme name"
219
+ exit 1
220
+ end
221
+ end
222
+
223
+ end # class IndexCommand
224
+ end # class App
225
+ end # module Genfrag
226
+
227
+ # EOF
@@ -0,0 +1,105 @@
1
+ module Genfrag
2
+ class App
3
+
4
+ class IndexCommand < Command
5
+
6
+ class DB
7
+ attr_accessor :ops # an OpenStruct of the options
8
+ attr_accessor :input_filenames
9
+ attr_accessor :normalized_fasta
10
+ attr_accessor :freq_lookup
11
+
12
+ def initialize( ops, input_filenames )
13
+ @normalized_fasta = nil
14
+ @freq_lookup = nil
15
+ @ops = ops
16
+ @input_filenames = input_filenames
17
+
18
+ end
19
+
20
+ def sc
21
+ @ops.sqlite ? 'sqlite' : 'csv'
22
+ end
23
+
24
+
25
+ def write_headers
26
+ self.send("write_headers_#{sc}")
27
+ end
28
+
29
+ def write_headers_sqlite
30
+ @normalized_fasta = SQLite3::Database.new( File.join(@ops.outdir, Genfrag.name_normalized_fasta(@input_filenames,@ops.filefasta) + '.db') )
31
+ sql = <<-SQL
32
+ drop table if exists db_normalized_fasta;
33
+ create table db_normalized_fasta (
34
+ id integer,
35
+ definitions text,
36
+ sequence text
37
+ );
38
+ create unique index db_normalized_fasta_idx on db_normalized_fasta(id);
39
+ SQL
40
+ @normalized_fasta.execute_batch( sql )
41
+ @freq_lookup = SQLite3::Database.new( File.join(@ops.outdir, Genfrag.name_freq_lookup(@input_filenames,@ops.filefasta,@ops.filelookup,@ops.re5,@ops.re3) + '.db') )
42
+ sql = <<-SQL
43
+ drop table if exists db_freq_lookup;
44
+ create table db_freq_lookup (
45
+ id integer,
46
+ size integer,
47
+ positions text
48
+ );
49
+ create unique index db_freq_lookup_idx on db_freq_lookup(id);
50
+ SQL
51
+ @freq_lookup.execute_batch( sql )
52
+ end
53
+
54
+ def write_headers_csv
55
+ @normalized_fasta = File.new(File.join(@ops.outdir,Genfrag.name_normalized_fasta(@input_filenames,@ops.filefasta) + '.tdf'), 'w')
56
+ @normalized_fasta.puts %w(id Definitions Sequence).join("\t")
57
+ @freq_lookup = File.new( File.join(@ops.outdir,Genfrag.name_freq_lookup(@input_filenames,@ops.filefasta,@ops.filelookup,@ops.re5,@ops.re3) + '.tdf'), 'w')
58
+ @freq_lookup.puts %w(id Size Positions).join("\t")
59
+ end
60
+
61
+
62
+ def write_entry_to_fasta(normalized_fasta_id, seq, definitions)
63
+ self.send("write_entry_to_fasta_#{sc}", normalized_fasta_id, seq, definitions)
64
+ end
65
+
66
+ def write_entry_to_fasta_sqlite(normalized_fasta_id, seq, definitions)
67
+ @normalized_fasta.execute( "insert into db_normalized_fasta values ( ?, ?, ? )", normalized_fasta_id, CSV.generate_line(definitions), seq )
68
+ end
69
+
70
+ def write_entry_to_fasta_csv(normalized_fasta_id, seq, definitions)
71
+ @normalized_fasta.puts [normalized_fasta_id,CSV.generate_line(definitions),seq].join("\t")
72
+ end
73
+
74
+
75
+ def write_entry_to_freq(i, size, str)
76
+ self.send("write_entry_to_freq_#{sc}", i, size, str)
77
+ end
78
+
79
+ def write_entry_to_freq_sqlite(i, size, str)
80
+ @freq_lookup.execute( "insert into db_freq_lookup values ( ?, ?, ? )", i, size, str )
81
+ end
82
+
83
+ def write_entry_to_freq_csv(i, size, str)
84
+ @freq_lookup.puts [i,size,str].join("\t")
85
+ end
86
+
87
+
88
+ def close
89
+ self.send("close_#{sc}")
90
+ end
91
+
92
+ def close_sqlite
93
+ end
94
+
95
+ def close_csv
96
+ @normalized_fasta.close
97
+ @freq_lookup.close
98
+ end
99
+ end
100
+
101
+ end # class IndexCommand
102
+ end # class App
103
+ end # module Genfrag
104
+
105
+ # EOF
@@ -0,0 +1,298 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class SearchCommand < Command
6
+
7
+ def cli_run( args )
8
+ parse args
9
+
10
+ @input_filenames = ARGV
11
+ input_filenames = [@input_filenames].flatten
12
+ processed_adapters=nil
13
+
14
+ validate_options(options)
15
+
16
+
17
+ if options[:sqlite]
18
+ processed_fasta_file = SearchCommand::ProcessFile.process_db_fasta_file( SQLite3::Database.new( Genfrag.name_normalized_fasta(input_filenames,options[:filefasta]) + '.db' ) )
19
+ processed_freq_lookup = SearchCommand::ProcessFile.process_db_freq_lookup( SQLite3::Database.new( Genfrag.name_freq_lookup(input_filenames,options[:filefasta],options[:filelookup],options[:re5],options[:re3]) + '.db' ) )
20
+ else
21
+ processed_fasta_file = SearchCommand::ProcessFile.process_tdf_fasta_file( IO.readlines( Genfrag.name_normalized_fasta(input_filenames,options[:filefasta]) + '.tdf' ) )
22
+ processed_freq_lookup = SearchCommand::ProcessFile.process_tdf_freq_lookup( IO.readlines( Genfrag.name_freq_lookup(input_filenames,options[:filefasta],options[:filelookup],options[:re5],options[:re3]) + '.tdf' ) )
23
+ end
24
+
25
+ if options[:fileadapters]
26
+ processed_adapters = SearchCommand::ProcessFile.process_tdf_adapters( IO.readlines( Genfrag.name_adapters(options[:fileadapters]) + '.tdf' ), options[:named_adapter5], options[:named_adapter3] )
27
+ end
28
+
29
+ run(options, processed_fasta_file, processed_freq_lookup, processed_adapters, true)
30
+ end
31
+
32
+ def opt_parser
33
+ std_opts = standard_options
34
+
35
+ opts = OptionParser.new
36
+ opts.banner = 'Usage: genfrag search [options]'
37
+
38
+ opts.separator ''
39
+ opts.separator " Search a database of sequence fragments that match the last 5'"
40
+ opts.separator " fragment cut by two restricting enzymes RE3 and RE5, as created by the"
41
+ opts.separator " index function. Next, adapters are applied to search a subset of"
42
+ opts.separator " fragments, as is used in some protocols."
43
+
44
+ opts.separator ''
45
+ ary = [:verbose, :quiet, :tracktime, :indir, :outdir, :sqlite, :re5, :re3,
46
+ :filelookup, :filefasta, :fileadapters, :adapter5_sequence, :adapter3_sequence,
47
+ :adapter5_size, :adapter3_size, :named_adapter5, :named_adapter3,
48
+ :adapter5, :adapter3
49
+ ]
50
+ ary.each { |a| opts.on(*std_opts[a]) }
51
+
52
+ opts.separator ''
53
+ opts.separator ' Common Options:'
54
+ opts.on( '-h', '--help', 'show this message' ) { @out.puts opts; exit }
55
+
56
+ opts.separator ' Examples:'
57
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5 tt'
58
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --add 26 --adapter5 ct --adapter3 aa --size 190,215'
59
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5-size 11 --adapter5 tt --adapter3-size 15 --size 168'
60
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5-sequence GACTGCGTAGTGATC --adapter5 tt --adapter3-size 15 --size 168'
61
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5-size 11 --adapter5 ct --adapter3-size 15 --adapter3 aa --size 190,215'
62
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --add 26 --named-adapter5 BstYI-T4 --named-adapter3 MseI-21 --size 190,215'
63
+ opts
64
+ end
65
+
66
+ def parse( args )
67
+ opts = opt_parser
68
+
69
+ if args.empty?
70
+ @out.puts opts
71
+ exit 1
72
+ end
73
+
74
+ # parse the command line arguments
75
+ opts.parse! args
76
+
77
+ end
78
+
79
+ def validate_options(o)
80
+ if o[:filefasta] == nil
81
+ clierr_p "missing option: must supply fasta filename"
82
+ exit 1
83
+ end
84
+
85
+ if o[:re5] == nil
86
+ clierr_p "missing option: re5"
87
+ exit 1
88
+ end
89
+
90
+ if o[:re3] == nil
91
+ clierr_p "missing option: re3"
92
+ exit 1
93
+ end
94
+
95
+ begin
96
+ Bio::RestrictionEnzyme::DoubleStranded.new(o[:re3])
97
+ rescue
98
+ clierr_p "re3 is not an enzyme name"
99
+ exit 1
100
+ end
101
+
102
+ begin
103
+ Bio::RestrictionEnzyme::DoubleStranded.new(o[:re5])
104
+ rescue
105
+ clierr_p "re5 is not an enzyme name"
106
+ exit 1
107
+ end
108
+ end
109
+
110
+ def run(ops=OpenStruct.new, processed_fasta_file=nil, processed_freq_lookup=nil, processed_adapters=nil, cli=false)
111
+ if ops.kind_of? OpenStruct
112
+ @ops = ops.dup
113
+ elsif ops.kind_of? Hash
114
+ @ops = OpenStruct.new(ops)
115
+ else
116
+ raise ArgumentError
117
+ end
118
+
119
+ # Set defaults
120
+ @ops.verbose ||= false
121
+ @ops.quiet ||= false
122
+ @ops.sqlite ||= false
123
+ @ops.re5 ||= nil
124
+ @ops.re3 ||= nil
125
+ @ops.size ||= [0]
126
+ @ops.adapter5_size ||= nil
127
+ @ops.adapter3_size ||= nil
128
+ @ops.adapter5 ||= nil
129
+ @ops.adapter3 ||= nil
130
+
131
+ @sizes = processed_freq_lookup
132
+ @sequences = processed_fasta_file
133
+ @adapters = {}
134
+ @re5_ds, @re3_ds = [@ops.re5, @ops.re3].map {|x| Bio::RestrictionEnzyme::DoubleStranded.new(x)}
135
+ if @ops.verbose
136
+ cli_p(cli, <<-END
137
+ RE5: #{@ops.re5}
138
+ #{@re5_ds.aligned_strands_with_cuts.primary}
139
+ #{@re5_ds.aligned_strands_with_cuts.complement}
140
+
141
+ RE3: #{@ops.re3}
142
+ #{@re3_ds.aligned_strands_with_cuts.primary}
143
+ #{@re3_ds.aligned_strands_with_cuts.complement}
144
+
145
+ adapter5: #{@ops.adapter5}
146
+ adapter3: #{@ops.adapter3}
147
+ END
148
+ )
149
+ end
150
+
151
+ if @ops.named_adapter5 and @ops.adapter5
152
+ raise ArgumentError, "Cannot have both 'adapter5' and 'named_adapter5'"
153
+ elsif @ops.named_adapter3 and @ops.adapter3
154
+ raise ArgumentError, "Cannot have both 'adapter3' and 'named_adapter3'"
155
+ end
156
+
157
+ if !processed_adapters and (@ops.named_adapter5 or @ops.named_adapter3)
158
+ raise ArgumentError, "Must specify --fileadapters when using a named_adapter"
159
+ end
160
+
161
+ if processed_adapters
162
+ adapter_setup_1(processed_adapters)
163
+ else
164
+ adapter_setup_2
165
+ end
166
+
167
+ # translated adapter 3' if given in reverse orientation - e.g. _tt is
168
+ # translated to aa (reversed) and _tct returns the primary strand
169
+ # ending in specific 'tct'
170
+ if @adapters[:adapter3_specificity] =~ /^_/
171
+ seq3 = Bio::Sequence::NA.new(@adapters[:adapter3_specificity][1..-1]).downcase
172
+ @adapters[:adapter3_specificity] = seq3.complement.to_s
173
+ end
174
+
175
+ if @ops.adapter5_size and @ops.adapter5_sequence and (@ops.adapter5_size != @adapters[:adapter5_size])
176
+ raise ArgumentError, "--adapter5-sequence and --adapter5-size both supplied"
177
+ end
178
+ if @ops.adapter3_size and @ops.adapter3_sequence and (@ops.adapter3_size != @adapters[:adapter3_size])
179
+ raise ArgumentError, "--adapter3-sequence and --adapter3-size both supplied"
180
+ end
181
+
182
+ @trim = calculate_trim_for_nucleotides(@re5_ds, @re3_ds)
183
+
184
+ # ------
185
+ # Start calculations
186
+ #
187
+ left_trim, right_trim = calculate_left_and_right_trims(@trim)
188
+
189
+ matching_fragments = find_matching_fragments(@sizes, left_trim, right_trim)
190
+ results = []
191
+
192
+ matching_fragments.each do |hit|
193
+ hit.each do |entry|
194
+ seq = @sequences[entry[:fasta_id]][:sequence]
195
+ raw_frag = seq[entry[:offset]..(entry[:offset]+entry[:raw_size]-1)]
196
+
197
+ primary_frag, complement_frag = trim_sequences(raw_frag, Bio::Sequence::NA.new(raw_frag).forward_complement, left_trim, right_trim, @trim)
198
+
199
+ p = primary_frag.dup
200
+ c = complement_frag.dup
201
+
202
+ # note the next two if-statements at this lever chain together with 'p' and 'c'
203
+ if @adapters[:adapter5_specificity]
204
+ p, c = matches_adapter(5, p, c, raw_frag, @trim)
205
+ next if !p # next if returned false -- no match
206
+ end
207
+
208
+ if @adapters[:adapter3_specificity]
209
+ p, c = matches_adapter(3, p, c, raw_frag, @trim)
210
+ next if !p # next if returned false -- no match
211
+ end
212
+
213
+ primary_frag_with_adapters = p
214
+ complement_frag_with_adapters = c
215
+
216
+ results << {:raw_frag => raw_frag, :primary_frag => primary_frag, :primary_frag_with_adapters => primary_frag_with_adapters, :complement_frag => complement_frag, :complement_frag_with_adapters => complement_frag_with_adapters, :entry => entry, :seq => seq} # FIXME
217
+ end
218
+ end
219
+
220
+ if results.size == 0
221
+ cli_p(cli,"Nothing found") if @ops.verbose
222
+ end
223
+
224
+ sorted_results = {}
225
+ results.sort {|a,b| a[:seq] <=> b[:seq]}.each do |r|
226
+ raise "shouldn't happen" if sorted_results[r[:seq]] != nil
227
+ sorted_results[r[:seq]] = {}
228
+ x = sorted_results[r[:seq]]
229
+ x['sequence size'] = r[:seq].size
230
+ x['fragment - primary strand'] = r[:primary_frag]
231
+ x['fragment - complement strand'] = r[:complement_frag]
232
+ x['fragment with adapters - primary strand'] = r[:primary_frag_with_adapters]
233
+ x['fragment with adapters - complement strand'] = r[:complement_frag_with_adapters]
234
+ end
235
+
236
+ if @ops.verbose
237
+ ary = ['sequence size', 'fragment - primary strand', 'fragment - complement strand',
238
+ 'fragment with adapters - primary strand', 'fragment with adapters - complement strand']
239
+ else
240
+ ary = ['fragment with adapters - primary strand', 'fragment with adapters - complement strand']
241
+ end
242
+ sorted_results.each do |k,v|
243
+ cli_p(cli, '---')
244
+ if @ops.verbose
245
+ cli_p(cli, '- sequence')
246
+ cli_p(cli, " #{k}")
247
+ end
248
+
249
+ ary.each do |a|
250
+ cli_p(cli, "- #{a}")
251
+ cli_p(cli, " #{v[a]}")
252
+ end
253
+ end
254
+
255
+ return results
256
+ end
257
+
258
+ def adapter_setup_1(hsh)
259
+ l = lambda do |i|
260
+ if @ops.send("adapter#{i}")
261
+ @adapters["adapter#{i}_specificity".to_sym] = @ops.send("adapter#{i}")
262
+ if @ops.send("adapter#{i}_sequence")
263
+ @adapters["adapter#{i}_sequence".to_sym] = @ops.send("adapter#{i}_sequence").gsub(/\|N*$/i,'')
264
+ @adapters["adapter#{i}_size".to_sym] = @adapters["adapter#{i}_sequence".to_sym].size + @adapters["adapter#{i}_specificity".to_sym].size
265
+ else
266
+ @adapters["adapter#{i}_size".to_sym] = @ops.send("adapter#{i}_size")
267
+ end
268
+ elsif hsh["adapter#{i}_specificity".to_sym]
269
+ @adapters["adapter#{i}_specificity".to_sym] = hsh["adapter#{i}_specificity".to_sym]
270
+ @adapters["adapter#{i}_sequence".to_sym] = hsh["adapter#{i}_sequence".to_sym]
271
+ @adapters["adapter#{i}_size".to_sym] = hsh["adapter#{i}_sequence".to_sym].size + hsh["adapter#{i}_specificity".to_sym].size
272
+ end
273
+ end
274
+ # set adapter 5' and 3' respectively using above procs
275
+ l.call(5)
276
+ l.call(3)
277
+ end
278
+
279
+ def adapter_setup_2
280
+ l = lambda do |i|
281
+ @adapters["adapter#{i}_specificity".to_sym] = @ops.send("adapter#{i}")
282
+ if @ops.send("adapter#{i}_sequence")
283
+ @adapters["adapter#{i}_sequence".to_sym] = @ops.send("adapter#{i}_sequence").gsub(/\|N*$/i,'')
284
+ @adapters["adapter#{i}_size".to_sym] = @adapters["adapter#{i}_sequence".to_sym].size + @adapters["adapter#{i}_specificity".to_sym].size
285
+ else
286
+ @adapters["adapter#{i}_size".to_sym] = @ops.send("adapter#{i}_size")
287
+ end
288
+ end
289
+ l.call(5)
290
+ l.call(3)
291
+ end
292
+
293
+
294
+ end # class SearchCommand
295
+ end # class App
296
+ end # module Genfrag
297
+
298
+ # EOF