genfrag 0.0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/.bnsignore +16 -0
  2. data/History.txt +4 -0
  3. data/LICENSE.txt +58 -0
  4. data/README.rdoc +40 -0
  5. data/Rakefile +53 -0
  6. data/bin/genfrag +8 -0
  7. data/lib/genfrag.rb +129 -0
  8. data/lib/genfrag/app.rb +105 -0
  9. data/lib/genfrag/app/command.rb +145 -0
  10. data/lib/genfrag/app/index_command.rb +227 -0
  11. data/lib/genfrag/app/index_command/db.rb +105 -0
  12. data/lib/genfrag/app/search_command.rb +298 -0
  13. data/lib/genfrag/app/search_command/match.rb +165 -0
  14. data/lib/genfrag/app/search_command/process_file.rb +125 -0
  15. data/lib/genfrag/app/search_command/trim.rb +121 -0
  16. data/lib/genfrag/debug.rb +0 -0
  17. data/spec/data/index_command/in/a.fasta +109 -0
  18. data/spec/data/index_command/out/1-a_lookup.tdf +4 -0
  19. data/spec/data/index_command/out/2-a_lookup.db +0 -0
  20. data/spec/data/index_command/out/3-a_lookup.tdf +2 -0
  21. data/spec/data/index_command/out/4-a_lookup.db +0 -0
  22. data/spec/data/index_command/out/5-a_lookup.tdf +4 -0
  23. data/spec/data/index_command/out/6-a_lookup.db +0 -0
  24. data/spec/data/index_command/out/a.fasta.db +0 -0
  25. data/spec/data/index_command/out/a.fasta.tdf +6 -0
  26. data/spec/genfrag/app/command_spec.rb +55 -0
  27. data/spec/genfrag/app/index_command_spec.rb +258 -0
  28. data/spec/genfrag/app/search_command/match_spec.rb +77 -0
  29. data/spec/genfrag/app/search_command/process_file_spec.rb +185 -0
  30. data/spec/genfrag/app/search_command/trim_spec.rb +75 -0
  31. data/spec/genfrag/app/search_command_spec.rb +260 -0
  32. data/spec/genfrag/app_spec.rb +77 -0
  33. data/spec/genfrag_spec.rb +87 -0
  34. data/spec/spec_helper.rb +56 -0
  35. data/tasks/ann.rake +80 -0
  36. data/tasks/bones.rake +20 -0
  37. data/tasks/gem.rake +201 -0
  38. data/tasks/git.rake +40 -0
  39. data/tasks/notes.rake +27 -0
  40. data/tasks/post_load.rake +34 -0
  41. data/tasks/rdoc.rake +50 -0
  42. data/tasks/rubyforge.rake +55 -0
  43. data/tasks/setup.rb +300 -0
  44. data/tasks/spec.rake +54 -0
  45. data/tasks/svn.rake +47 -0
  46. data/tasks/test.rake +40 -0
  47. metadata +136 -0
@@ -0,0 +1,227 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class IndexCommand < Command
6
+
7
+ attr_reader :sizes
8
+
9
+ # Run from command-line
10
+ #
11
+ def cli_run( args )
12
+ parse args
13
+
14
+ @input_filenames = ARGV
15
+
16
+ validate_options(options)
17
+
18
+ if options[:tracktime]
19
+ Genfrag.tracktime {
20
+ run(options, @input_filenames, true)
21
+ }
22
+ else
23
+ run(options, @input_filenames, true)
24
+ end
25
+
26
+ end
27
+
28
+ # Main class for creating the index - accepts multiple input files. Either an SQLite database or
29
+ # a flat file index is created (extension .tdf) which is unique for the input file combination.
30
+ # This file is used by the Search routine later.
31
+ #
32
+ def run(ops=@ops, input_filenames=[], cli=false)
33
+ if ops.kind_of? OpenStruct
34
+ @ops = ops.dup
35
+ elsif ops.kind_of? Hash
36
+ @ops = OpenStruct.new(ops)
37
+ else
38
+ raise ArgumentError
39
+ end
40
+
41
+ # Set defaults
42
+ @ops.verbose ||= false
43
+ @ops.quiet ||= false
44
+ @ops.sqlite ||= false
45
+ @ops.filelookup ||= nil
46
+ @ops.filefasta ||= nil
47
+ @ops.re5 ||= nil
48
+ @ops.re3 ||= nil
49
+ @ops.indir ||= '.'
50
+ @ops.outdir ||= '.'
51
+
52
+ @input_filenames = input_filenames.empty? ? [@ops.filefasta] : input_filenames
53
+ @sizes = {}
54
+ db = IndexCommand::DB.new(@ops, @input_filenames)
55
+ @re5_ds, @re3_ds = [@ops.re5, @ops.re3].map {|x| Bio::RestrictionEnzyme::DoubleStranded.new(x)}
56
+ db.write_headers
57
+
58
+ if @ops.verbose
59
+ cli_p(cli, <<-END
60
+ RE5: #{@ops.re5}
61
+ #{@re5_ds.aligned_strands_with_cuts.primary}
62
+ #{@re5_ds.aligned_strands_with_cuts.complement}
63
+
64
+ RE3: #{@ops.re3}
65
+ #{@re3_ds.aligned_strands_with_cuts.primary}
66
+ #{@re3_ds.aligned_strands_with_cuts.complement}
67
+ END
68
+ )
69
+ end
70
+
71
+ # unit test with aasi, aari, and ppii
72
+ re5_regexp, re3_regexp = [@ops.re5, @ops.re3].map {|x| Bio::Sequence::NA.new( Bio::RestrictionEnzyme::DoubleStranded.new(x).aligned_strands.primary ).to_re }
73
+
74
+ entries = {}
75
+ # Account for exact duplicate sequences
76
+ @input_filenames.each do |input_filename|
77
+ Bio::FlatFile.auto(File.join(@ops.indir, input_filename)).each_entry do |e|
78
+ e.definition.tr!("\t",'')
79
+ s = e.seq.to_s.downcase
80
+ if entries[s]
81
+ entries[s] << e.definition
82
+ else
83
+ entries[s] = [e.definition]
84
+ end
85
+ end
86
+ end
87
+
88
+ a_re = /(.*)(#{re5_regexp})/
89
+ b_re = /(.*?)(#{re3_regexp})/
90
+
91
+ normalized_fasta_id=0
92
+ entries.each do |seq, definitions|
93
+ normalized_fasta_id+=1
94
+ db.write_entry_to_fasta(normalized_fasta_id, seq, definitions)
95
+
96
+ # NOTE the index command is slow because of the match functions, compare with ruby 1.9
97
+ m1 = a_re.match(seq)
98
+ if m1
99
+ # Find the fragment 'frag1' cut most right in seq with re5_regexp
100
+ frag1 = $2 + m1.post_match
101
+
102
+ position = $1.size
103
+
104
+ m2 = b_re.match( frag1 )
105
+
106
+ # Now cut frag1 with re3_regexp resulting in frag2
107
+ if m2
108
+ @frag2 = $1 + $2
109
+ if @ops.verbose
110
+ cli_p(cli, <<-END
111
+ ---
112
+ #{definitions.join("\n")}
113
+ #{@frag2}
114
+ END
115
+ )
116
+ end
117
+ @sizes[@frag2.size] ||= []
118
+ @sizes[@frag2.size] << [position, normalized_fasta_id]
119
+ end
120
+ end
121
+
122
+ end
123
+
124
+ i=0
125
+ @sizes.each do |size,info|
126
+ i+=1
127
+ db.write_entry_to_freq(i, size, info.map {|x| x.join(' ')}.join(', ') )
128
+ end
129
+
130
+ if @ops.verbose
131
+ @sizes.each { |@entry| cli_p(cli, @entry.inspect) }
132
+ else
133
+ cli_p(cli, "Cut sites found: #{@sizes.values.flatten.size / 2}")
134
+ end
135
+
136
+ db.close
137
+ end
138
+
139
+
140
+ ############
141
+ # Command-line
142
+ ############
143
+
144
+
145
+ # Option parser for command-line
146
+ #
147
+ def opt_parser
148
+ std_opts = standard_options
149
+
150
+ opts = OptionParser.new
151
+ opts.banner = 'Usage: genfrag index [options]'
152
+
153
+ opts.separator ''
154
+ opts.separator " Create a database of sequence fragments that match the last 5' fragment"
155
+ opts.separator " cut by two restricting enzymes RE3 and RE5."
156
+ opts.separator " The Fasta file defined by the --fasta option is taken as input."
157
+ opts.separator " Two files are created for the search function - a lookup file, and"
158
+ opts.separator " the contents of the Fasta file rewritten in a special format. You can"
159
+ opts.separator " specify the name of the lookup file with the --lookup option."
160
+
161
+ opts.separator ''
162
+
163
+ ary = [:verbose, :quiet, :tracktime, :indir, :outdir, :sqlite, :re5, :re3,
164
+ :filelookup, :filefasta
165
+ ]
166
+ ary.each { |a| opts.on(*std_opts[a]) }
167
+
168
+ opts.separator ''
169
+ opts.separator ' Common Options:'
170
+ opts.on( '-h', '--help', 'show this message' ) { @out.puts opts; exit 1 }
171
+ opts.separator ' Examples:'
172
+ opts.separator ' genfrag index -f example.fasta --re5 BstYI --re3 MseI'
173
+ opts.separator ' genfrag index --out /tmp --in . -f example.fasta --re5 BstYI --re3 MseI'
174
+ opts
175
+ end
176
+
177
+ # Parse options passed from command-line
178
+ #
179
+ def parse( args )
180
+ opts = opt_parser
181
+
182
+ if args.empty?
183
+ @out.puts opts
184
+ exit 1
185
+ end
186
+
187
+ # parse the command line arguments
188
+ opts.parse! args
189
+ end
190
+
191
+ # Validate options passed from the command-line
192
+ def validate_options(o)
193
+ if o[:filefasta] == nil
194
+ clierr_p "missing option: must supply fasta filename"
195
+ exit 1
196
+ end
197
+
198
+ if o[:re5] == nil
199
+ clierr_p "missing option: re5"
200
+ exit 1
201
+ end
202
+
203
+ if o[:re3] == nil
204
+ clierr_p "missing option: re3"
205
+ exit 1
206
+ end
207
+
208
+ begin
209
+ Bio::RestrictionEnzyme::DoubleStranded.new(o[:re3])
210
+ rescue
211
+ clierr_p "re3 is not an enzyme name"
212
+ exit 1
213
+ end
214
+
215
+ begin
216
+ Bio::RestrictionEnzyme::DoubleStranded.new(o[:re5])
217
+ rescue
218
+ clierr_p "re5 is not an enzyme name"
219
+ exit 1
220
+ end
221
+ end
222
+
223
+ end # class IndexCommand
224
+ end # class App
225
+ end # module Genfrag
226
+
227
+ # EOF
@@ -0,0 +1,105 @@
1
+ module Genfrag
2
+ class App
3
+
4
+ class IndexCommand < Command
5
+
6
+ class DB
7
+ attr_accessor :ops # an OpenStruct of the options
8
+ attr_accessor :input_filenames
9
+ attr_accessor :normalized_fasta
10
+ attr_accessor :freq_lookup
11
+
12
+ def initialize( ops, input_filenames )
13
+ @normalized_fasta = nil
14
+ @freq_lookup = nil
15
+ @ops = ops
16
+ @input_filenames = input_filenames
17
+
18
+ end
19
+
20
+ def sc
21
+ @ops.sqlite ? 'sqlite' : 'csv'
22
+ end
23
+
24
+
25
+ def write_headers
26
+ self.send("write_headers_#{sc}")
27
+ end
28
+
29
+ def write_headers_sqlite
30
+ @normalized_fasta = SQLite3::Database.new( File.join(@ops.outdir, Genfrag.name_normalized_fasta(@input_filenames,@ops.filefasta) + '.db') )
31
+ sql = <<-SQL
32
+ drop table if exists db_normalized_fasta;
33
+ create table db_normalized_fasta (
34
+ id integer,
35
+ definitions text,
36
+ sequence text
37
+ );
38
+ create unique index db_normalized_fasta_idx on db_normalized_fasta(id);
39
+ SQL
40
+ @normalized_fasta.execute_batch( sql )
41
+ @freq_lookup = SQLite3::Database.new( File.join(@ops.outdir, Genfrag.name_freq_lookup(@input_filenames,@ops.filefasta,@ops.filelookup,@ops.re5,@ops.re3) + '.db') )
42
+ sql = <<-SQL
43
+ drop table if exists db_freq_lookup;
44
+ create table db_freq_lookup (
45
+ id integer,
46
+ size integer,
47
+ positions text
48
+ );
49
+ create unique index db_freq_lookup_idx on db_freq_lookup(id);
50
+ SQL
51
+ @freq_lookup.execute_batch( sql )
52
+ end
53
+
54
+ def write_headers_csv
55
+ @normalized_fasta = File.new(File.join(@ops.outdir,Genfrag.name_normalized_fasta(@input_filenames,@ops.filefasta) + '.tdf'), 'w')
56
+ @normalized_fasta.puts %w(id Definitions Sequence).join("\t")
57
+ @freq_lookup = File.new( File.join(@ops.outdir,Genfrag.name_freq_lookup(@input_filenames,@ops.filefasta,@ops.filelookup,@ops.re5,@ops.re3) + '.tdf'), 'w')
58
+ @freq_lookup.puts %w(id Size Positions).join("\t")
59
+ end
60
+
61
+
62
+ def write_entry_to_fasta(normalized_fasta_id, seq, definitions)
63
+ self.send("write_entry_to_fasta_#{sc}", normalized_fasta_id, seq, definitions)
64
+ end
65
+
66
+ def write_entry_to_fasta_sqlite(normalized_fasta_id, seq, definitions)
67
+ @normalized_fasta.execute( "insert into db_normalized_fasta values ( ?, ?, ? )", normalized_fasta_id, CSV.generate_line(definitions), seq )
68
+ end
69
+
70
+ def write_entry_to_fasta_csv(normalized_fasta_id, seq, definitions)
71
+ @normalized_fasta.puts [normalized_fasta_id,CSV.generate_line(definitions),seq].join("\t")
72
+ end
73
+
74
+
75
+ def write_entry_to_freq(i, size, str)
76
+ self.send("write_entry_to_freq_#{sc}", i, size, str)
77
+ end
78
+
79
+ def write_entry_to_freq_sqlite(i, size, str)
80
+ @freq_lookup.execute( "insert into db_freq_lookup values ( ?, ?, ? )", i, size, str )
81
+ end
82
+
83
+ def write_entry_to_freq_csv(i, size, str)
84
+ @freq_lookup.puts [i,size,str].join("\t")
85
+ end
86
+
87
+
88
+ def close
89
+ self.send("close_#{sc}")
90
+ end
91
+
92
+ def close_sqlite
93
+ end
94
+
95
+ def close_csv
96
+ @normalized_fasta.close
97
+ @freq_lookup.close
98
+ end
99
+ end
100
+
101
+ end # class IndexCommand
102
+ end # class App
103
+ end # module Genfrag
104
+
105
+ # EOF
@@ -0,0 +1,298 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class SearchCommand < Command
6
+
7
+ def cli_run( args )
8
+ parse args
9
+
10
+ @input_filenames = ARGV
11
+ input_filenames = [@input_filenames].flatten
12
+ processed_adapters=nil
13
+
14
+ validate_options(options)
15
+
16
+
17
+ if options[:sqlite]
18
+ processed_fasta_file = SearchCommand::ProcessFile.process_db_fasta_file( SQLite3::Database.new( Genfrag.name_normalized_fasta(input_filenames,options[:filefasta]) + '.db' ) )
19
+ processed_freq_lookup = SearchCommand::ProcessFile.process_db_freq_lookup( SQLite3::Database.new( Genfrag.name_freq_lookup(input_filenames,options[:filefasta],options[:filelookup],options[:re5],options[:re3]) + '.db' ) )
20
+ else
21
+ processed_fasta_file = SearchCommand::ProcessFile.process_tdf_fasta_file( IO.readlines( Genfrag.name_normalized_fasta(input_filenames,options[:filefasta]) + '.tdf' ) )
22
+ processed_freq_lookup = SearchCommand::ProcessFile.process_tdf_freq_lookup( IO.readlines( Genfrag.name_freq_lookup(input_filenames,options[:filefasta],options[:filelookup],options[:re5],options[:re3]) + '.tdf' ) )
23
+ end
24
+
25
+ if options[:fileadapters]
26
+ processed_adapters = SearchCommand::ProcessFile.process_tdf_adapters( IO.readlines( Genfrag.name_adapters(options[:fileadapters]) + '.tdf' ), options[:named_adapter5], options[:named_adapter3] )
27
+ end
28
+
29
+ run(options, processed_fasta_file, processed_freq_lookup, processed_adapters, true)
30
+ end
31
+
32
+ def opt_parser
33
+ std_opts = standard_options
34
+
35
+ opts = OptionParser.new
36
+ opts.banner = 'Usage: genfrag search [options]'
37
+
38
+ opts.separator ''
39
+ opts.separator " Search a database of sequence fragments that match the last 5'"
40
+ opts.separator " fragment cut by two restricting enzymes RE3 and RE5, as created by the"
41
+ opts.separator " index function. Next, adapters are applied to search a subset of"
42
+ opts.separator " fragments, as is used in some protocols."
43
+
44
+ opts.separator ''
45
+ ary = [:verbose, :quiet, :tracktime, :indir, :outdir, :sqlite, :re5, :re3,
46
+ :filelookup, :filefasta, :fileadapters, :adapter5_sequence, :adapter3_sequence,
47
+ :adapter5_size, :adapter3_size, :named_adapter5, :named_adapter3,
48
+ :adapter5, :adapter3
49
+ ]
50
+ ary.each { |a| opts.on(*std_opts[a]) }
51
+
52
+ opts.separator ''
53
+ opts.separator ' Common Options:'
54
+ opts.on( '-h', '--help', 'show this message' ) { @out.puts opts; exit }
55
+
56
+ opts.separator ' Examples:'
57
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5 tt'
58
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --add 26 --adapter5 ct --adapter3 aa --size 190,215'
59
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5-size 11 --adapter5 tt --adapter3-size 15 --size 168'
60
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5-sequence GACTGCGTAGTGATC --adapter5 tt --adapter3-size 15 --size 168'
61
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5-size 11 --adapter5 ct --adapter3-size 15 --adapter3 aa --size 190,215'
62
+ opts.separator ' genfrag search -f example.fasta --re5 BstYI --re3 MseI --add 26 --named-adapter5 BstYI-T4 --named-adapter3 MseI-21 --size 190,215'
63
+ opts
64
+ end
65
+
66
+ def parse( args )
67
+ opts = opt_parser
68
+
69
+ if args.empty?
70
+ @out.puts opts
71
+ exit 1
72
+ end
73
+
74
+ # parse the command line arguments
75
+ opts.parse! args
76
+
77
+ end
78
+
79
+ def validate_options(o)
80
+ if o[:filefasta] == nil
81
+ clierr_p "missing option: must supply fasta filename"
82
+ exit 1
83
+ end
84
+
85
+ if o[:re5] == nil
86
+ clierr_p "missing option: re5"
87
+ exit 1
88
+ end
89
+
90
+ if o[:re3] == nil
91
+ clierr_p "missing option: re3"
92
+ exit 1
93
+ end
94
+
95
+ begin
96
+ Bio::RestrictionEnzyme::DoubleStranded.new(o[:re3])
97
+ rescue
98
+ clierr_p "re3 is not an enzyme name"
99
+ exit 1
100
+ end
101
+
102
+ begin
103
+ Bio::RestrictionEnzyme::DoubleStranded.new(o[:re5])
104
+ rescue
105
+ clierr_p "re5 is not an enzyme name"
106
+ exit 1
107
+ end
108
+ end
109
+
110
+ def run(ops=OpenStruct.new, processed_fasta_file=nil, processed_freq_lookup=nil, processed_adapters=nil, cli=false)
111
+ if ops.kind_of? OpenStruct
112
+ @ops = ops.dup
113
+ elsif ops.kind_of? Hash
114
+ @ops = OpenStruct.new(ops)
115
+ else
116
+ raise ArgumentError
117
+ end
118
+
119
+ # Set defaults
120
+ @ops.verbose ||= false
121
+ @ops.quiet ||= false
122
+ @ops.sqlite ||= false
123
+ @ops.re5 ||= nil
124
+ @ops.re3 ||= nil
125
+ @ops.size ||= [0]
126
+ @ops.adapter5_size ||= nil
127
+ @ops.adapter3_size ||= nil
128
+ @ops.adapter5 ||= nil
129
+ @ops.adapter3 ||= nil
130
+
131
+ @sizes = processed_freq_lookup
132
+ @sequences = processed_fasta_file
133
+ @adapters = {}
134
+ @re5_ds, @re3_ds = [@ops.re5, @ops.re3].map {|x| Bio::RestrictionEnzyme::DoubleStranded.new(x)}
135
+ if @ops.verbose
136
+ cli_p(cli, <<-END
137
+ RE5: #{@ops.re5}
138
+ #{@re5_ds.aligned_strands_with_cuts.primary}
139
+ #{@re5_ds.aligned_strands_with_cuts.complement}
140
+
141
+ RE3: #{@ops.re3}
142
+ #{@re3_ds.aligned_strands_with_cuts.primary}
143
+ #{@re3_ds.aligned_strands_with_cuts.complement}
144
+
145
+ adapter5: #{@ops.adapter5}
146
+ adapter3: #{@ops.adapter3}
147
+ END
148
+ )
149
+ end
150
+
151
+ if @ops.named_adapter5 and @ops.adapter5
152
+ raise ArgumentError, "Cannot have both 'adapter5' and 'named_adapter5'"
153
+ elsif @ops.named_adapter3 and @ops.adapter3
154
+ raise ArgumentError, "Cannot have both 'adapter3' and 'named_adapter3'"
155
+ end
156
+
157
+ if !processed_adapters and (@ops.named_adapter5 or @ops.named_adapter3)
158
+ raise ArgumentError, "Must specify --fileadapters when using a named_adapter"
159
+ end
160
+
161
+ if processed_adapters
162
+ adapter_setup_1(processed_adapters)
163
+ else
164
+ adapter_setup_2
165
+ end
166
+
167
+ # translated adapter 3' if given in reverse orientation - e.g. _tt is
168
+ # translated to aa (reversed) and _tct returns the primary strand
169
+ # ending in specific 'tct'
170
+ if @adapters[:adapter3_specificity] =~ /^_/
171
+ seq3 = Bio::Sequence::NA.new(@adapters[:adapter3_specificity][1..-1]).downcase
172
+ @adapters[:adapter3_specificity] = seq3.complement.to_s
173
+ end
174
+
175
+ if @ops.adapter5_size and @ops.adapter5_sequence and (@ops.adapter5_size != @adapters[:adapter5_size])
176
+ raise ArgumentError, "--adapter5-sequence and --adapter5-size both supplied"
177
+ end
178
+ if @ops.adapter3_size and @ops.adapter3_sequence and (@ops.adapter3_size != @adapters[:adapter3_size])
179
+ raise ArgumentError, "--adapter3-sequence and --adapter3-size both supplied"
180
+ end
181
+
182
+ @trim = calculate_trim_for_nucleotides(@re5_ds, @re3_ds)
183
+
184
+ # ------
185
+ # Start calculations
186
+ #
187
+ left_trim, right_trim = calculate_left_and_right_trims(@trim)
188
+
189
+ matching_fragments = find_matching_fragments(@sizes, left_trim, right_trim)
190
+ results = []
191
+
192
+ matching_fragments.each do |hit|
193
+ hit.each do |entry|
194
+ seq = @sequences[entry[:fasta_id]][:sequence]
195
+ raw_frag = seq[entry[:offset]..(entry[:offset]+entry[:raw_size]-1)]
196
+
197
+ primary_frag, complement_frag = trim_sequences(raw_frag, Bio::Sequence::NA.new(raw_frag).forward_complement, left_trim, right_trim, @trim)
198
+
199
+ p = primary_frag.dup
200
+ c = complement_frag.dup
201
+
202
+ # note the next two if-statements at this lever chain together with 'p' and 'c'
203
+ if @adapters[:adapter5_specificity]
204
+ p, c = matches_adapter(5, p, c, raw_frag, @trim)
205
+ next if !p # next if returned false -- no match
206
+ end
207
+
208
+ if @adapters[:adapter3_specificity]
209
+ p, c = matches_adapter(3, p, c, raw_frag, @trim)
210
+ next if !p # next if returned false -- no match
211
+ end
212
+
213
+ primary_frag_with_adapters = p
214
+ complement_frag_with_adapters = c
215
+
216
+ results << {:raw_frag => raw_frag, :primary_frag => primary_frag, :primary_frag_with_adapters => primary_frag_with_adapters, :complement_frag => complement_frag, :complement_frag_with_adapters => complement_frag_with_adapters, :entry => entry, :seq => seq} # FIXME
217
+ end
218
+ end
219
+
220
+ if results.size == 0
221
+ cli_p(cli,"Nothing found") if @ops.verbose
222
+ end
223
+
224
+ sorted_results = {}
225
+ results.sort {|a,b| a[:seq] <=> b[:seq]}.each do |r|
226
+ raise "shouldn't happen" if sorted_results[r[:seq]] != nil
227
+ sorted_results[r[:seq]] = {}
228
+ x = sorted_results[r[:seq]]
229
+ x['sequence size'] = r[:seq].size
230
+ x['fragment - primary strand'] = r[:primary_frag]
231
+ x['fragment - complement strand'] = r[:complement_frag]
232
+ x['fragment with adapters - primary strand'] = r[:primary_frag_with_adapters]
233
+ x['fragment with adapters - complement strand'] = r[:complement_frag_with_adapters]
234
+ end
235
+
236
+ if @ops.verbose
237
+ ary = ['sequence size', 'fragment - primary strand', 'fragment - complement strand',
238
+ 'fragment with adapters - primary strand', 'fragment with adapters - complement strand']
239
+ else
240
+ ary = ['fragment with adapters - primary strand', 'fragment with adapters - complement strand']
241
+ end
242
+ sorted_results.each do |k,v|
243
+ cli_p(cli, '---')
244
+ if @ops.verbose
245
+ cli_p(cli, '- sequence')
246
+ cli_p(cli, " #{k}")
247
+ end
248
+
249
+ ary.each do |a|
250
+ cli_p(cli, "- #{a}")
251
+ cli_p(cli, " #{v[a]}")
252
+ end
253
+ end
254
+
255
+ return results
256
+ end
257
+
258
+ def adapter_setup_1(hsh)
259
+ l = lambda do |i|
260
+ if @ops.send("adapter#{i}")
261
+ @adapters["adapter#{i}_specificity".to_sym] = @ops.send("adapter#{i}")
262
+ if @ops.send("adapter#{i}_sequence")
263
+ @adapters["adapter#{i}_sequence".to_sym] = @ops.send("adapter#{i}_sequence").gsub(/\|N*$/i,'')
264
+ @adapters["adapter#{i}_size".to_sym] = @adapters["adapter#{i}_sequence".to_sym].size + @adapters["adapter#{i}_specificity".to_sym].size
265
+ else
266
+ @adapters["adapter#{i}_size".to_sym] = @ops.send("adapter#{i}_size")
267
+ end
268
+ elsif hsh["adapter#{i}_specificity".to_sym]
269
+ @adapters["adapter#{i}_specificity".to_sym] = hsh["adapter#{i}_specificity".to_sym]
270
+ @adapters["adapter#{i}_sequence".to_sym] = hsh["adapter#{i}_sequence".to_sym]
271
+ @adapters["adapter#{i}_size".to_sym] = hsh["adapter#{i}_sequence".to_sym].size + hsh["adapter#{i}_specificity".to_sym].size
272
+ end
273
+ end
274
+ # set adapter 5' and 3' respectively using above procs
275
+ l.call(5)
276
+ l.call(3)
277
+ end
278
+
279
+ def adapter_setup_2
280
+ l = lambda do |i|
281
+ @adapters["adapter#{i}_specificity".to_sym] = @ops.send("adapter#{i}")
282
+ if @ops.send("adapter#{i}_sequence")
283
+ @adapters["adapter#{i}_sequence".to_sym] = @ops.send("adapter#{i}_sequence").gsub(/\|N*$/i,'')
284
+ @adapters["adapter#{i}_size".to_sym] = @adapters["adapter#{i}_sequence".to_sym].size + @adapters["adapter#{i}_specificity".to_sym].size
285
+ else
286
+ @adapters["adapter#{i}_size".to_sym] = @ops.send("adapter#{i}_size")
287
+ end
288
+ end
289
+ l.call(5)
290
+ l.call(3)
291
+ end
292
+
293
+
294
+ end # class SearchCommand
295
+ end # class App
296
+ end # module Genfrag
297
+
298
+ # EOF