seqtrimnext 2.0.29

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +114 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +159 -0
  5. data/Rakefile +38 -0
  6. data/bin/create_graphs.rb +46 -0
  7. data/bin/extract_seqs.rb +45 -0
  8. data/bin/extract_seqs_from_fasta.rb +56 -0
  9. data/bin/extract_seqs_from_fastq.rb +45 -0
  10. data/bin/fasta2fastq.rb +38 -0
  11. data/bin/fastq2fasta.rb +35 -0
  12. data/bin/gen_qual.rb +46 -0
  13. data/bin/get_seq.rb +46 -0
  14. data/bin/group_by_range.rb +17 -0
  15. data/bin/join_ilumina_paired.rb +130 -0
  16. data/bin/parse_amplicons.rb +95 -0
  17. data/bin/parse_json_results.rb +66 -0
  18. data/bin/parse_params.rb +82 -0
  19. data/bin/resume_clusters.rb +48 -0
  20. data/bin/resume_rejected.sh +9 -0
  21. data/bin/reverse_paired.rb +49 -0
  22. data/bin/seqtrimnext +368 -0
  23. data/bin/split_fastq.rb +42 -0
  24. data/bin/split_ilumina_paired.rb +65 -0
  25. data/bin/split_paired.rb +70 -0
  26. data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
  27. data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
  28. data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
  29. data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
  30. data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
  31. data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
  32. data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
  33. data/lib/seqtrimnext/actions/action_insert.rb +32 -0
  34. data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
  35. data/lib/seqtrimnext/actions/action_key.rb +30 -0
  36. data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
  37. data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
  38. data/lib/seqtrimnext/actions/action_linker.rb +30 -0
  39. data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
  40. data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
  41. data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
  42. data/lib/seqtrimnext/actions/action_mid.rb +30 -0
  43. data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
  44. data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
  45. data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
  46. data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
  47. data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
  48. data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
  49. data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
  50. data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
  51. data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
  52. data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
  53. data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
  54. data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
  55. data/lib/seqtrimnext/classes/action_manager.rb +47 -0
  56. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
  57. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
  58. data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
  59. data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
  60. data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
  61. data/lib/seqtrimnext/classes/install_database.rb +43 -0
  62. data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
  63. data/lib/seqtrimnext/classes/list_db.rb +49 -0
  64. data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
  65. data/lib/seqtrimnext/classes/one_blast.rb +41 -0
  66. data/lib/seqtrimnext/classes/params.rb +387 -0
  67. data/lib/seqtrimnext/classes/piro.rb +78 -0
  68. data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
  69. data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
  70. data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
  71. data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
  72. data/lib/seqtrimnext/classes/sequence.rb +55 -0
  73. data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
  74. data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
  75. data/lib/seqtrimnext/plugins/plugin.rb +267 -0
  76. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
  77. data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
  78. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
  79. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
  80. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
  81. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
  82. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
  83. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
  84. data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
  85. data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
  86. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
  87. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
  88. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
  89. data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
  90. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
  91. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
  92. data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
  93. data/lib/seqtrimnext/templates/amplicons.txt +16 -0
  94. data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
  95. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
  96. data/lib/seqtrimnext/templates/low_quality.txt +5 -0
  97. data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
  98. data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
  99. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
  100. data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
  101. data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
  102. data/lib/seqtrimnext/utils/global_match.rb +65 -0
  103. data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
  104. data/lib/seqtrimnext/utils/json_utils.rb +50 -0
  105. data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
  106. data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
  107. data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
  108. data/lib/seqtrimnext/utils/string_utils.rb +56 -0
  109. data/lib/seqtrimnext.rb +37 -0
  110. data/script/console +10 -0
  111. data/script/destroy +14 -0
  112. data/script/generate +14 -0
  113. data/test/test_helper.rb +3 -0
  114. data/test/test_seqtrimnext.rb +11 -0
  115. metadata +318 -0
data/bin/seqtrimnext ADDED
@@ -0,0 +1,368 @@
1
+ #!/usr/bin/env ruby
2
+ # SeqTrimNext: Next generation sequencing preprocessor
3
+ # Copyright (C) <2011>
4
+ # Authors: Almudena Bocinos Rioboo, Diego Dario Guerrero Fernandez,
5
+ # Rocio Bautista Moreno, Juan Falgueras Cano & M. Gonzalo Claros
6
+ # email: soporte@scbi.uma.es - http://www.scbi.uma.es
7
+ #
8
+ # This program is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU Affero General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU Affero General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Affero General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+
21
+ #= SEQTRIM II
22
+ #
23
+ #== Running
24
+ #
25
+ # Seqtrim can be run locally or in a parallel/distributted environment.
26
+ #
27
+ #=== Running locally
28
+ #* list
29
+ #
30
+ #=== Running in a distributted environment
31
+ #
32
+ #== SEC 2
33
+ #
34
+ #=== SUB 2.1
35
+ #
36
+
37
+
38
+ # #finds the classes that were in the folder 'classes'
39
+ # ROOT_PATH=File.dirname(__FILE__)
40
+ # $: << File.expand_path(File.join(ROOT_PATH, 'classes'))
41
+ #
42
+ # #finds the classes that were in the folder 'plugins'
43
+ # $: << File.expand_path(File.join(ROOT_PATH, 'plugins'))
44
+ #
45
+ #
46
+ # #finds the classes that were in the folder 'plugins'
47
+ # $: << File.expand_path(File.join(ROOT_PATH, 'actions'))
48
+ #
49
+ # #finds the classes that were in the folder 'utils'
50
+ # $: << File.expand_path(File.join(ROOT_PATH, 'utils'))
51
+ #
52
+ # $: << File.expand_path(File.join(ROOT_PATH, 'classes','em_classes'))
53
+
54
+ # to test scbi_drb gem locally
55
+ # $: << File.expand_path('~/progs/ruby/gems/scbi_drb/lib/')
56
+
57
+ # $: << File.expand_path(ROOT_PATH)
58
+
59
+ $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
60
+
61
+ require 'seqtrimnext'
62
+
63
+ ############ PATHS #######################
64
+ $SEQTRIM_PATH = ROOT_PATH
65
+
66
+ # if there is a BLASTDB environment var, then use it
67
+ if ENV['BLASTDB']# && Dir.exists?(ENV['BLASTDB'])
68
+ $FORMATTED_DB_PATH = ENV['BLASTDB']
69
+ $DB_PATH = File.dirname($FORMATTED_DB_PATH)
70
+ else # otherwise use ROOTPATH + DB
71
+ $FORMATTED_DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB",'formatted'))
72
+ $DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB"))
73
+ end
74
+
75
+ ENV['BLASTDB']=$FORMATTED_DB_PATH
76
+
77
+ OUTPUT_PATH='output_files'
78
+
79
+
80
+ # TODO - COMENTAR todas las clases y metodos para que salga la descripcion cuando hagas rdoc en el terminal
81
+
82
+ #Checks install requeriments
83
+ require 'install_requirements'
84
+
85
+ ins = InstallRequirements.new
86
+ if (!ins.check_install_requirements)
87
+ exit
88
+ end
89
+
90
+ require "logger"
91
+ require 'optparse'
92
+ require "global_match"
93
+ require "seqtrim"
94
+ require "params.rb"
95
+ require "plugin.rb"
96
+ require "sequence.rb"
97
+ require "plugin_manager.rb"
98
+ require "make_blast_db"
99
+ require 'hash_stats'
100
+ require 'list_db'
101
+ require 'install_database'
102
+ require 'socket'
103
+
104
+
105
+ def show_additional_help
106
+
107
+ puts "\n"*3
108
+ puts "E.g.: processing a fastq sequences file"
109
+ puts "#{$0} -t genomics_454.txt -Q sequences.fastq"
110
+ puts "\n"*2
111
+
112
+ puts "E.g.: processing a fasta file with qual"
113
+ puts "#{$0} -t genomics_454.txt -f sequences.fasta -q sequences.qual"
114
+
115
+ templates = Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
116
+
117
+ puts "\n\n ========================================================================================================"
118
+ puts " Available templates to use with -t option (you can also use your own template):"
119
+ puts " Templates at: #{File.join($SEQTRIM_PATH,'templates')}"
120
+ puts " ========================================================================================================\n\n"
121
+
122
+ templates.map{|e| puts " "+e}
123
+
124
+ puts "\n\n ========================================================================================================"
125
+ puts " Available databases to use in custom template files (you can also use your own database):"
126
+ puts " Databases at: #{$DB_PATH}"
127
+ puts " ========================================================================================================\n\n"
128
+
129
+ ListDb.list_databases($DB_PATH).map{|e| puts " "+e}
130
+ #
131
+ # ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
132
+ #
133
+ # puts ip_list
134
+
135
+
136
+ exit
137
+
138
+ end
139
+
140
+
141
+ # Reads the parameters from console. For this is used ARGV, that is an array.
142
+ options = {}
143
+
144
+ optparse = OptionParser.new do |opts|
145
+
146
+ # Set a banner, displayed at the top
147
+ # of the help screen.
148
+ opts.banner = "Usage: #{$0} -t template_file \{-Q fastaQ_file | -f fasta_file -q qual_file\} [options]"
149
+
150
+ # Define the options, and what they do
151
+ #options[:server_ip] = '127.0.0.1'
152
+ options[:server_ip] = '0.0.0.0'
153
+ opts.on( '-s', '--server IP', 'Server ip. Can use a partial ip to select the apropriate interface' ) do |server_ip|
154
+
155
+ # get list of available ips
156
+ ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
157
+
158
+ ip=ip_list.select{|ip| ip.index(server_ip)==0}.first
159
+
160
+ if !ip
161
+ ip='0.0.0.0'
162
+ # $LOG.info("No available ip matching #{server_ip}")
163
+ end
164
+ # $ .info("Using ip #{ip}")
165
+ options[:server_ip] = ip
166
+ end
167
+
168
+ options[:port] = 0 #50000
169
+ opts.on( '-p', '--port PORT', 'Server port. If set to 0, an arbitrary empty port will be used') do |port|
170
+ options[:port] = port.to_i
171
+ end
172
+
173
+ options[:workers] = 2
174
+
175
+ opts.on( '-w', '--workers COUNT', 'Number of workers, or file containing machine names to launch workers with ssh' ) do |workers|
176
+ if File.exists?(workers)
177
+ # use workers file
178
+ options[:workers] = File.read(workers).split("\n").map{|w| w.chomp}
179
+ else
180
+ begin
181
+ options[:workers] = Integer(workers)
182
+ rescue
183
+ STDERR.puts "ERROR:Invalid workers parameter #{options[:workers]}"
184
+ exit
185
+ end
186
+
187
+ end
188
+
189
+ end
190
+
191
+
192
+
193
+ options[:only_workers] = false
194
+ opts.on( '-o', '--only_workers', 'Only launch workers' ) do
195
+ options[:only_workers] = true
196
+ end
197
+
198
+ options[:check_db] = false
199
+ opts.on( '-c', '--check_databases', 'Check Blast databases and reformat if necessary' ) do
200
+ options[:check_db] = true
201
+ end
202
+
203
+ options[:install_db] = nil
204
+ opts.on( '-i', '--install_databases TYPE', 'Install base databases and reformat them if necessary') do |db_type|
205
+ options[:install_db] = db_type
206
+ end
207
+
208
+ options[:logfile] = STDOUT
209
+ opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) do |file|
210
+ options[:logfile] = file
211
+ end
212
+
213
+ options[:fastq] = nil
214
+ opts.on( '-Q', '--fastq FILE', 'Fastq input file. Use - for <STDIN>' ) do |file|
215
+ options[:fastq] = file
216
+ end
217
+
218
+ options[:fasta] = nil
219
+ opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file|
220
+ options[:fasta] = file
221
+ end
222
+
223
+ options[:qual] = nil
224
+ opts.on( '-q', '--qual FILE', 'Qual input file' ) do |file|
225
+ options[:qual] = file
226
+ end
227
+
228
+
229
+
230
+ options[:list_db] = nil
231
+ opts.on( '-L', '--list_db DB_NAME', 'List entries IDs in DB_NAME. Use "-L all" to view all available databases' ) do |value|
232
+ options[:list_db] = value
233
+ end
234
+
235
+ options[:gen_params] = false
236
+ opts.on( '-G', '--generate_template', 'Generates a sample template file with default parameters' ) do
237
+ options[:gen_params] = true
238
+ end
239
+
240
+ options[:template] = nil
241
+ opts.on( '-t', '--template TEMPLATE_FILE', 'Use TEMPLATE_FILE instead of default parameters' ) do |file|
242
+ options[:template] = file
243
+ end
244
+
245
+ options[:chunk_size] = 100
246
+ opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
247
+ options[:chunk_size] = cs.to_i
248
+ end
249
+
250
+
251
+ options[:json] = nil
252
+ opts.on( '-j', '--json', 'Save results in json file' ) do
253
+ options[:json] = true
254
+ end
255
+
256
+ # This displays the help screen, all programs are
257
+ # assumed to have this option.
258
+ opts.on_tail( '-h', '--help', 'Display this screen' ) do
259
+ puts opts
260
+ show_additional_help
261
+ exit
262
+ end
263
+ end
264
+
265
+ # parse options and remove from ARGV
266
+ optparse.parse!
267
+
268
+ if options[:list_db] then
269
+ # List database entries in a database
270
+ ListDb.new($DB_PATH,options[:list_db])
271
+ exit
272
+ end
273
+
274
+ if options[:gen_params] then
275
+ # Generates a sample params file in current directory
276
+ Params.generate_sample_params
277
+ exit
278
+ end
279
+
280
+ #set logger
281
+ # system('rm logs/*')
282
+ FileUtils.mkdir('logs') if !File.exists?('logs')
283
+
284
+ $LOG = Logger.new(options[:logfile])
285
+ $LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
286
+ #logger.level = Logger::INFO
287
+
288
+ #DEBUG < INFO < WARN < ERROR < FATAL < UNKNOWN
289
+
290
+
291
+ $LOG.info("Using BLASTDB: "+ $FORMATTED_DB_PATH)
292
+ $LOG.info("Using options: "+ options.to_json)
293
+
294
+ if options[:install_db] then
295
+ #install databases
296
+ InstallDatabase.new(options[:install_db],$DB_PATH)
297
+
298
+ # reformat databases
299
+ MakeBlastDb.new($DB_PATH)
300
+ exit
301
+ end
302
+
303
+ if !File.exists?($FORMATTED_DB_PATH)
304
+ STDERR.puts "Database path not found: #{$FORMATTED_DB_PATH}. \n\n\nInstall databases to this path or set your BLASTDB environment variable (eg.: export BLASTDB=new_path)"
305
+ exit
306
+ end
307
+
308
+
309
+ if options[:check_db] then
310
+ # check and format blast databases
311
+ MakeBlastDb.new($DB_PATH)
312
+ exit
313
+ end
314
+
315
+ required_options = options[:template] && (options[:fastq] || (options[:fasta]))
316
+
317
+ # if ((ARGV.count != 2) && (ARGV.count != 3)) # con esto vemos si hay argumentos,
318
+ if (ARGV.count != 0) || (!required_options) # con esto vemos si hay argumentos,
319
+ puts "You must provide all required options"
320
+ puts ""
321
+ puts optparse.help
322
+ exit
323
+ end
324
+
325
+ # check for template
326
+ if (!File.exists?(options[:template]))
327
+ if File.exists?(File.join($SEQTRIM_PATH,'templates',options[:template]))
328
+ options[:template] = File.join($SEQTRIM_PATH,'templates',options[:template])
329
+ else
330
+ $LOG.info "Params file: #{options[:template]} doesn't exists. \n\nYou can use your own template or specify one from this list:\n============================="
331
+ puts Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
332
+ exit
333
+ end
334
+ end
335
+ $LOG.info "Using params file: #{options[:template]}"
336
+
337
+ # fastq file
338
+ if (!options[:fastq].nil? && options[:fastq]!='-' && !File.exists?(options[:fastq]))
339
+ $LOG.error "Input file: #{options[:fasta]} doesn't exists"
340
+ exit
341
+ end
342
+
343
+
344
+
345
+ # fasta file
346
+ if (!options[:fasta].nil? && !File.exists?(options[:fasta]))
347
+ $LOG.error "Input file: #{options[:fasta]} doesn't exists"
348
+ exit
349
+ end
350
+
351
+ # qual file
352
+ if ((!options[:qual].nil?)&&(!File.exists?(options[:qual])))
353
+ $LOG.error "Input file: #{options[:qual]} doesn't exists"
354
+ exit
355
+ end
356
+
357
+ s = Seqtrim.new(options)
358
+
359
+ #generate report
360
+
361
+
362
+ if system("which generate_report.rb > /dev/null ")
363
+ cmd="generate_report.rb output_files 2> report_generation_errors.log"
364
+ $LOG.info "Generating report #{cmd}"
365
+ `#{cmd}`
366
+ else
367
+ $LOG.info "If you want a detailed report in PDF format, install the optional seqtrimnext_report gem (gem install seqtrimnext_report)"
368
+ end
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fastq'
4
+
5
+
6
+ if ARGV.count < 3
7
+ puts "#{$0} FASTQ OUTPUT_NAME SPLIT_BY"
8
+ exit
9
+ end
10
+
11
+
12
+
13
+ fastq = ARGV.shift
14
+ output_name = ARGV.shift
15
+ split_by = ARGV.shift.to_i
16
+
17
+
18
+ file_index=1
19
+ out=FastqFile.new("#{output_name}#{file_index}.fastq",'w')
20
+
21
+ fqr=FastqFile.new(fastq)
22
+
23
+ count = 0
24
+
25
+ fqr.each do |seq_name,seq_fasta,seq_qual,comments|
26
+
27
+ out.write_seq(seq_name,seq_fasta,seq_qual,comments)
28
+
29
+ count +=1
30
+
31
+ if (count % split_by) == 0
32
+
33
+ file_index +=1
34
+ out.close
35
+ out=FastqFile.new("#{output_name}#{file_index}.fastq",'w')
36
+
37
+ end
38
+ end
39
+
40
+ out.close
41
+ fqr.close
42
+
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Splits a FastQ file with ilumina paired data into two separate files.
4
+
5
+ require 'scbi_fastq'
6
+
7
+ VERBOSE=false
8
+
9
+ if !(ARGV.count==2 or ARGV.count==4)
10
+ puts "Usage: #{$0} paired.fastq output_name [pared1_tag paired2_tag]"
11
+ exit
12
+ end
13
+
14
+ p1_path=ARGV[0]
15
+ output_base_name=ARGV[1]
16
+
17
+ paired1_tag='/1'
18
+ paired2_tag='/2'
19
+
20
+ if (ARGV.count==4)
21
+ paired1_tag=ARGV[2]
22
+ paired2_tag=ARGV[3]
23
+ end
24
+
25
+ PAIRED1_TAG_RE=/#{Regexp.quote(paired1_tag)}$/
26
+ PAIRED2_TAG_RE=/#{Regexp.quote(paired2_tag)}$/
27
+
28
+
29
+ if !File.exists?(p1_path)
30
+ puts "File #{p1_path} doesn't exists"
31
+ exit
32
+ end
33
+
34
+ paired1_out = FastqFile.new(output_base_name+'_paired1.fastq','w',:sanger, true)
35
+ paired2_out = FastqFile.new(output_base_name+'_paired2.fastq','w',:sanger, true)
36
+
37
+
38
+ f_file = FastqFile.new(p1_path,'r',:sanger, true)
39
+
40
+ f_file.each do |n,f,q,c|
41
+
42
+ if n=~ PAIRED1_TAG_RE
43
+ paired1_out.write_seq(n,f,q,c)
44
+ elsif n=~ PAIRED2_TAG_RE
45
+ paired2_out.write_seq(n,f,q,c)
46
+ else
47
+ STDERR.puts "Aborting due to ERROR in file: #{n} doens't match neither left (#{paired1_tag}) nor right (#{paired2_tag}) tags"
48
+ exit
49
+ end
50
+
51
+ if ((f_file.num_seqs%10000) == 0)
52
+ puts "Count: #{f_file.num_seqs}"
53
+ end
54
+
55
+
56
+ end
57
+
58
+ f_file.close
59
+
60
+ paired1_out.close
61
+ paired2_out.close
62
+
63
+
64
+
65
+
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+ require 'scbi_fasta'
5
+
6
+ if ARGV.count!=3
7
+ puts "Usage: #{$0} fasta qual output_base_name"
8
+ exit
9
+ end
10
+
11
+ fasta_path = ARGV[0]
12
+ qual_path = ARGV[1]
13
+ name = ARGV[2]
14
+
15
+
16
+ out_fasta = name+'.fasta'
17
+ out_qual = name+'.fasta.qual'
18
+
19
+ puts "Opening #{fasta_path}, #{qual_path}"
20
+
21
+ fqr=FastaQualFile.new(fasta_path,qual_path,true)
22
+
23
+ out_f=File.new(out_fasta,'w+')
24
+ out_q=File.new(out_qual,'w+')
25
+
26
+ c=0
27
+
28
+ linker = 'TCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACG'
29
+
30
+ fqr.each do |n,f,q|
31
+ l_start= 0
32
+ l_end=f.index(linker)
33
+
34
+ if l_end
35
+ r_start=l_end+linker.length
36
+ r_end =f.length
37
+
38
+ forward=f[l_start..l_end-1]
39
+ reverse=f[r_start..r_end]
40
+
41
+ forward_q = q[l_start..l_end-1]
42
+ reverse_q = q[r_start..r_end]
43
+
44
+ if (forward.length!=forward_q.length) || (reverse.length!=reverse_q.length)
45
+ puts [forward.length,forward_q.length,reverse.length,reverse_q.length].join(',')
46
+ end
47
+
48
+ out_f.puts ">#{n}F template=#{n} dir=F library=unadeellas"
49
+ out_f.puts forward
50
+ out_f.puts ">#{n}R template=#{n} dir=R library=unadeellas"
51
+ out_f.puts reverse
52
+
53
+ out_q.puts ">#{n}F template=#{n} dir=F library=unadeellas"
54
+ out_q.puts forward_q.join(' ')
55
+ out_q.puts ">#{n}R template=#{n} dir=R library=unadeellas"
56
+ out_q.puts reverse_q.join(' ')
57
+
58
+
59
+ end
60
+
61
+ c=c+1
62
+ end
63
+
64
+ puts c
65
+
66
+ fqr.close
67
+
68
+ out_f.close
69
+ out_q.close
70
+
@@ -0,0 +1,32 @@
1
+ require "seqtrim_action"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute Plugin1
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class ActionAbAdapter < SeqtrimAction
11
+
12
+ def initialize(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
+ @cut =true
15
+
16
+ end
17
+ #
18
+ # def apply_to(seq)
19
+ #
20
+ # # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
21
+ # $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
22
+ #
23
+ # end
24
+
25
+ def apply_decoration(char)
26
+ return char.magenta.underline
27
+
28
+ # return char.magenta.negative
29
+ end
30
+
31
+
32
+ end
@@ -0,0 +1,32 @@
1
+ require "seqtrim_action"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute Plugin1
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class ActionAbFarAdapter < SeqtrimAction
11
+
12
+ def initialize(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
+ @cut =true
15
+
16
+ end
17
+ #
18
+ # def apply_to(seq)
19
+ #
20
+ # # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
21
+ # $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
22
+ #
23
+ # end
24
+
25
+ def apply_decoration(char)
26
+ return char.magenta.negative
27
+
28
+ # return char.magenta.negative
29
+ end
30
+
31
+
32
+ end
@@ -0,0 +1,32 @@
1
+ require "seqtrim_action"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute Plugin1
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class ActionAbLeftAdapter < SeqtrimAction
11
+
12
+ def initialize(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
+ @cut =true
15
+
16
+ end
17
+ #
18
+ # def apply_to(seq)
19
+ #
20
+ # # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
21
+ # $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
22
+ #
23
+ # end
24
+
25
+ def apply_decoration(char)
26
+ return char.magenta.negative
27
+
28
+ # return char.magenta.negative
29
+ end
30
+
31
+
32
+ end
@@ -0,0 +1,22 @@
1
+ require "seqtrim_action"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute ActionShortInserted
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class ActionEmptyInsert < SeqtrimAction
11
+
12
+ def initialize(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
+ @cut =false
15
+ @informative = true
16
+ end
17
+
18
+ def apply_decoration(char)
19
+ return char
20
+ end
21
+
22
+ end
@@ -0,0 +1,24 @@
1
+ require "seqtrim_action"
2
+
3
+
4
+ ########################################################
5
+ # Author: Almudena Bocinos Rioboo
6
+ #
7
+ # Defines the main methods that are necessary to execute Plugin1
8
+ # Inherit: Plugin
9
+ ########################################################
10
+
11
+ class ActionIgnoreRepeated < SeqtrimAction
12
+
13
+ def initialize(start_pos,end_pos)
14
+ super(start_pos,end_pos)
15
+ @cut =false
16
+ end
17
+
18
+
19
+ def apply_decoration(char)
20
+ return char
21
+ # return char.magenta.negative
22
+ end
23
+
24
+ end