seqtrimnext 2.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +114 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +159 -0
  5. data/Rakefile +38 -0
  6. data/bin/create_graphs.rb +46 -0
  7. data/bin/extract_seqs.rb +45 -0
  8. data/bin/extract_seqs_from_fasta.rb +56 -0
  9. data/bin/extract_seqs_from_fastq.rb +45 -0
  10. data/bin/fasta2fastq.rb +38 -0
  11. data/bin/fastq2fasta.rb +35 -0
  12. data/bin/gen_qual.rb +46 -0
  13. data/bin/get_seq.rb +46 -0
  14. data/bin/group_by_range.rb +17 -0
  15. data/bin/join_ilumina_paired.rb +130 -0
  16. data/bin/parse_amplicons.rb +95 -0
  17. data/bin/parse_json_results.rb +66 -0
  18. data/bin/parse_params.rb +82 -0
  19. data/bin/resume_clusters.rb +48 -0
  20. data/bin/resume_rejected.sh +9 -0
  21. data/bin/reverse_paired.rb +49 -0
  22. data/bin/seqtrimnext +368 -0
  23. data/bin/split_fastq.rb +42 -0
  24. data/bin/split_ilumina_paired.rb +65 -0
  25. data/bin/split_paired.rb +70 -0
  26. data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
  27. data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
  28. data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
  29. data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
  30. data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
  31. data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
  32. data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
  33. data/lib/seqtrimnext/actions/action_insert.rb +32 -0
  34. data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
  35. data/lib/seqtrimnext/actions/action_key.rb +30 -0
  36. data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
  37. data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
  38. data/lib/seqtrimnext/actions/action_linker.rb +30 -0
  39. data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
  40. data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
  41. data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
  42. data/lib/seqtrimnext/actions/action_mid.rb +30 -0
  43. data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
  44. data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
  45. data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
  46. data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
  47. data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
  48. data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
  49. data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
  50. data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
  51. data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
  52. data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
  53. data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
  54. data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
  55. data/lib/seqtrimnext/classes/action_manager.rb +47 -0
  56. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
  57. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
  58. data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
  59. data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
  60. data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
  61. data/lib/seqtrimnext/classes/install_database.rb +43 -0
  62. data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
  63. data/lib/seqtrimnext/classes/list_db.rb +49 -0
  64. data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
  65. data/lib/seqtrimnext/classes/one_blast.rb +41 -0
  66. data/lib/seqtrimnext/classes/params.rb +387 -0
  67. data/lib/seqtrimnext/classes/piro.rb +78 -0
  68. data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
  69. data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
  70. data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
  71. data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
  72. data/lib/seqtrimnext/classes/sequence.rb +55 -0
  73. data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
  74. data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
  75. data/lib/seqtrimnext/plugins/plugin.rb +267 -0
  76. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
  77. data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
  78. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
  79. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
  80. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
  81. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
  82. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
  83. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
  84. data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
  85. data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
  86. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
  87. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
  88. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
  89. data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
  90. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
  91. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
  92. data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
  93. data/lib/seqtrimnext/templates/amplicons.txt +16 -0
  94. data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
  95. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
  96. data/lib/seqtrimnext/templates/low_quality.txt +5 -0
  97. data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
  98. data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
  99. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
  100. data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
  101. data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
  102. data/lib/seqtrimnext/utils/global_match.rb +65 -0
  103. data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
  104. data/lib/seqtrimnext/utils/json_utils.rb +50 -0
  105. data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
  106. data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
  107. data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
  108. data/lib/seqtrimnext/utils/string_utils.rb +56 -0
  109. data/lib/seqtrimnext.rb +37 -0
  110. data/script/console +10 -0
  111. data/script/destroy +14 -0
  112. data/script/generate +14 -0
  113. data/test/test_helper.rb +3 -0
  114. data/test/test_seqtrimnext.rb +11 -0
  115. metadata +318 -0
data/bin/seqtrimnext ADDED
@@ -0,0 +1,368 @@
1
+ #!/usr/bin/env ruby
2
+ # SeqTrimNext: Next generation sequencing preprocessor
3
+ # Copyright (C) <2011>
4
+ # Authors: Almudena Bocinos Rioboo, Diego Dario Guerrero Fernandez,
5
+ # Rocio Bautista Moreno, Juan Falgueras Cano & M. Gonzalo Claros
6
+ # email: soporte@scbi.uma.es - http://www.scbi.uma.es
7
+ #
8
+ # This program is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU Affero General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU Affero General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Affero General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+
21
+ #= SEQTRIM II
22
+ #
23
+ #== Running
24
+ #
25
+ # Seqtrim can be run locally or in a parallel/distributted environment.
26
+ #
27
+ #=== Running locally
28
+ #* list
29
+ #
30
+ #=== Running in a distributted environment
31
+ #
32
+ #== SEC 2
33
+ #
34
+ #=== SUB 2.1
35
+ #
36
+
37
+
38
+ # #finds the classes that were in the folder 'classes'
39
+ # ROOT_PATH=File.dirname(__FILE__)
40
+ # $: << File.expand_path(File.join(ROOT_PATH, 'classes'))
41
+ #
42
+ # #finds the classes that were in the folder 'plugins'
43
+ # $: << File.expand_path(File.join(ROOT_PATH, 'plugins'))
44
+ #
45
+ #
46
+ # #finds the classes that were in the folder 'plugins'
47
+ # $: << File.expand_path(File.join(ROOT_PATH, 'actions'))
48
+ #
49
+ # #finds the classes that were in the folder 'utils'
50
+ # $: << File.expand_path(File.join(ROOT_PATH, 'utils'))
51
+ #
52
+ # $: << File.expand_path(File.join(ROOT_PATH, 'classes','em_classes'))
53
+
54
+ # to test scbi_drb gem locally
55
+ # $: << File.expand_path('~/progs/ruby/gems/scbi_drb/lib/')
56
+
57
+ # $: << File.expand_path(ROOT_PATH)
58
+
59
+ $: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
60
+
61
+ require 'seqtrimnext'
62
+
63
+ ############ PATHS #######################
64
+ $SEQTRIM_PATH = ROOT_PATH
65
+
66
+ # if there is a BLASTDB environment var, then use it
67
+ if ENV['BLASTDB']# && Dir.exists?(ENV['BLASTDB'])
68
+ $FORMATTED_DB_PATH = ENV['BLASTDB']
69
+ $DB_PATH = File.dirname($FORMATTED_DB_PATH)
70
+ else # otherwise use ROOTPATH + DB
71
+ $FORMATTED_DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB",'formatted'))
72
+ $DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB"))
73
+ end
74
+
75
+ ENV['BLASTDB']=$FORMATTED_DB_PATH
76
+
77
+ OUTPUT_PATH='output_files'
78
+
79
+
80
+ # TODO - COMENTAR todas las clases y metodos para que salga la descripcion cuando hagas rdoc en el terminal
81
+
82
+ #Checks install requeriments
83
+ require 'install_requirements'
84
+
85
+ ins = InstallRequirements.new
86
+ if (!ins.check_install_requirements)
87
+ exit
88
+ end
89
+
90
+ require "logger"
91
+ require 'optparse'
92
+ require "global_match"
93
+ require "seqtrim"
94
+ require "params.rb"
95
+ require "plugin.rb"
96
+ require "sequence.rb"
97
+ require "plugin_manager.rb"
98
+ require "make_blast_db"
99
+ require 'hash_stats'
100
+ require 'list_db'
101
+ require 'install_database'
102
+ require 'socket'
103
+
104
+
105
+ def show_additional_help
106
+
107
+ puts "\n"*3
108
+ puts "E.g.: processing a fastq sequences file"
109
+ puts "#{$0} -t genomics_454.txt -Q sequences.fastq"
110
+ puts "\n"*2
111
+
112
+ puts "E.g.: processing a fasta file with qual"
113
+ puts "#{$0} -t genomics_454.txt -f sequences.fasta -q sequences.qual"
114
+
115
+ templates = Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
116
+
117
+ puts "\n\n ========================================================================================================"
118
+ puts " Available templates to use with -t option (you can also use your own template):"
119
+ puts " Templates at: #{File.join($SEQTRIM_PATH,'templates')}"
120
+ puts " ========================================================================================================\n\n"
121
+
122
+ templates.map{|e| puts " "+e}
123
+
124
+ puts "\n\n ========================================================================================================"
125
+ puts " Available databases to use in custom template files (you can also use your own database):"
126
+ puts " Databases at: #{$DB_PATH}"
127
+ puts " ========================================================================================================\n\n"
128
+
129
+ ListDb.list_databases($DB_PATH).map{|e| puts " "+e}
130
+ #
131
+ # ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
132
+ #
133
+ # puts ip_list
134
+
135
+
136
+ exit
137
+
138
+ end
139
+
140
+
141
+ # Reads the parameters from console. For this is used ARGV, that is an array.
142
+ options = {}
143
+
144
+ optparse = OptionParser.new do |opts|
145
+
146
+ # Set a banner, displayed at the top
147
+ # of the help screen.
148
+ opts.banner = "Usage: #{$0} -t template_file \{-Q fastaQ_file | -f fasta_file -q qual_file\} [options]"
149
+
150
+ # Define the options, and what they do
151
+ #options[:server_ip] = '127.0.0.1'
152
+ options[:server_ip] = '0.0.0.0'
153
+ opts.on( '-s', '--server IP', 'Server ip. Can use a partial ip to select the apropriate interface' ) do |server_ip|
154
+
155
+ # get list of available ips
156
+ ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
157
+
158
+ ip=ip_list.select{|ip| ip.index(server_ip)==0}.first
159
+
160
+ if !ip
161
+ ip='0.0.0.0'
162
+ # $LOG.info("No available ip matching #{server_ip}")
163
+ end
164
+ # $ .info("Using ip #{ip}")
165
+ options[:server_ip] = ip
166
+ end
167
+
168
+ options[:port] = 0 #50000
169
+ opts.on( '-p', '--port PORT', 'Server port. If set to 0, an arbitrary empty port will be used') do |port|
170
+ options[:port] = port.to_i
171
+ end
172
+
173
+ options[:workers] = 2
174
+
175
+ opts.on( '-w', '--workers COUNT', 'Number of workers, or file containing machine names to launch workers with ssh' ) do |workers|
176
+ if File.exists?(workers)
177
+ # use workers file
178
+ options[:workers] = File.read(workers).split("\n").map{|w| w.chomp}
179
+ else
180
+ begin
181
+ options[:workers] = Integer(workers)
182
+ rescue
183
+ STDERR.puts "ERROR:Invalid workers parameter #{options[:workers]}"
184
+ exit
185
+ end
186
+
187
+ end
188
+
189
+ end
190
+
191
+
192
+
193
+ options[:only_workers] = false
194
+ opts.on( '-o', '--only_workers', 'Only launch workers' ) do
195
+ options[:only_workers] = true
196
+ end
197
+
198
+ options[:check_db] = false
199
+ opts.on( '-c', '--check_databases', 'Check Blast databases and reformat if necessary' ) do
200
+ options[:check_db] = true
201
+ end
202
+
203
+ options[:install_db] = nil
204
+ opts.on( '-i', '--install_databases TYPE', 'Install base databases and reformat them if necessary') do |db_type|
205
+ options[:install_db] = db_type
206
+ end
207
+
208
+ options[:logfile] = STDOUT
209
+ opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) do |file|
210
+ options[:logfile] = file
211
+ end
212
+
213
+ options[:fastq] = nil
214
+ opts.on( '-Q', '--fastq FILE', 'Fastq input file. Use - for <STDIN>' ) do |file|
215
+ options[:fastq] = file
216
+ end
217
+
218
+ options[:fasta] = nil
219
+ opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file|
220
+ options[:fasta] = file
221
+ end
222
+
223
+ options[:qual] = nil
224
+ opts.on( '-q', '--qual FILE', 'Qual input file' ) do |file|
225
+ options[:qual] = file
226
+ end
227
+
228
+
229
+
230
+ options[:list_db] = nil
231
+ opts.on( '-L', '--list_db DB_NAME', 'List entries IDs in DB_NAME. Use "-L all" to view all available databases' ) do |value|
232
+ options[:list_db] = value
233
+ end
234
+
235
+ options[:gen_params] = false
236
+ opts.on( '-G', '--generate_template', 'Generates a sample template file with default parameters' ) do
237
+ options[:gen_params] = true
238
+ end
239
+
240
+ options[:template] = nil
241
+ opts.on( '-t', '--template TEMPLATE_FILE', 'Use TEMPLATE_FILE instead of default parameters' ) do |file|
242
+ options[:template] = file
243
+ end
244
+
245
+ options[:chunk_size] = 100
246
+ opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
247
+ options[:chunk_size] = cs.to_i
248
+ end
249
+
250
+
251
+ options[:json] = nil
252
+ opts.on( '-j', '--json', 'Save results in json file' ) do
253
+ options[:json] = true
254
+ end
255
+
256
+ # This displays the help screen, all programs are
257
+ # assumed to have this option.
258
+ opts.on_tail( '-h', '--help', 'Display this screen' ) do
259
+ puts opts
260
+ show_additional_help
261
+ exit
262
+ end
263
+ end
264
+
265
+ # parse options and remove from ARGV
266
+ optparse.parse!
267
+
268
+ if options[:list_db] then
269
+ # List database entries in a database
270
+ ListDb.new($DB_PATH,options[:list_db])
271
+ exit
272
+ end
273
+
274
+ if options[:gen_params] then
275
+ # Generates a sample params file in current directory
276
+ Params.generate_sample_params
277
+ exit
278
+ end
279
+
280
+ #set logger
281
+ # system('rm logs/*')
282
+ FileUtils.mkdir('logs') if !File.exists?('logs')
283
+
284
+ $LOG = Logger.new(options[:logfile])
285
+ $LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
286
+ #logger.level = Logger::INFO
287
+
288
+ #DEBUG < INFO < WARN < ERROR < FATAL < UNKNOWN
289
+
290
+
291
+ $LOG.info("Using BLASTDB: "+ $FORMATTED_DB_PATH)
292
+ $LOG.info("Using options: "+ options.to_json)
293
+
294
+ if options[:install_db] then
295
+ #install databases
296
+ InstallDatabase.new(options[:install_db],$DB_PATH)
297
+
298
+ # reformat databases
299
+ MakeBlastDb.new($DB_PATH)
300
+ exit
301
+ end
302
+
303
+ if !File.exists?($FORMATTED_DB_PATH)
304
+ STDERR.puts "Database path not found: #{$FORMATTED_DB_PATH}. \n\n\nInstall databases to this path or set your BLASTDB environment variable (eg.: export BLASTDB=new_path)"
305
+ exit
306
+ end
307
+
308
+
309
+ if options[:check_db] then
310
+ # check and format blast databases
311
+ MakeBlastDb.new($DB_PATH)
312
+ exit
313
+ end
314
+
315
+ required_options = options[:template] && (options[:fastq] || (options[:fasta]))
316
+
317
+ # if ((ARGV.count != 2) && (ARGV.count != 3)) # con esto vemos si hay argumentos,
318
+ if (ARGV.count != 0) || (!required_options) # con esto vemos si hay argumentos,
319
+ puts "You must provide all required options"
320
+ puts ""
321
+ puts optparse.help
322
+ exit
323
+ end
324
+
325
+ # check for template
326
+ if (!File.exists?(options[:template]))
327
+ if File.exists?(File.join($SEQTRIM_PATH,'templates',options[:template]))
328
+ options[:template] = File.join($SEQTRIM_PATH,'templates',options[:template])
329
+ else
330
+ $LOG.info "Params file: #{options[:template]} doesn't exists. \n\nYou can use your own template or specify one from this list:\n============================="
331
+ puts Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
332
+ exit
333
+ end
334
+ end
335
+ $LOG.info "Using params file: #{options[:template]}"
336
+
337
+ # fastq file
338
+ if (!options[:fastq].nil? && options[:fastq]!='-' && !File.exists?(options[:fastq]))
339
+ $LOG.error "Input file: #{options[:fasta]} doesn't exists"
340
+ exit
341
+ end
342
+
343
+
344
+
345
+ # fasta file
346
+ if (!options[:fasta].nil? && !File.exists?(options[:fasta]))
347
+ $LOG.error "Input file: #{options[:fasta]} doesn't exists"
348
+ exit
349
+ end
350
+
351
+ # qual file
352
+ if ((!options[:qual].nil?)&&(!File.exists?(options[:qual])))
353
+ $LOG.error "Input file: #{options[:qual]} doesn't exists"
354
+ exit
355
+ end
356
+
357
+ s = Seqtrim.new(options)
358
+
359
+ #generate report
360
+
361
+
362
+ if system("which generate_report.rb > /dev/null ")
363
+ cmd="generate_report.rb output_files 2> report_generation_errors.log"
364
+ $LOG.info "Generating report #{cmd}"
365
+ `#{cmd}`
366
+ else
367
+ $LOG.info "If you want a detailed report in PDF format, install the optional seqtrimnext_report gem (gem install seqtrimnext_report)"
368
+ end
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fastq'
4
+
5
+
6
+ if ARGV.count < 3
7
+ puts "#{$0} FASTQ OUTPUT_NAME SPLIT_BY"
8
+ exit
9
+ end
10
+
11
+
12
+
13
+ fastq = ARGV.shift
14
+ output_name = ARGV.shift
15
+ split_by = ARGV.shift.to_i
16
+
17
+
18
+ file_index=1
19
+ out=FastqFile.new("#{output_name}#{file_index}.fastq",'w')
20
+
21
+ fqr=FastqFile.new(fastq)
22
+
23
+ count = 0
24
+
25
+ fqr.each do |seq_name,seq_fasta,seq_qual,comments|
26
+
27
+ out.write_seq(seq_name,seq_fasta,seq_qual,comments)
28
+
29
+ count +=1
30
+
31
+ if (count % split_by) == 0
32
+
33
+ file_index +=1
34
+ out.close
35
+ out=FastqFile.new("#{output_name}#{file_index}.fastq",'w')
36
+
37
+ end
38
+ end
39
+
40
+ out.close
41
+ fqr.close
42
+
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Splits a FastQ file with ilumina paired data into two separate files.
4
+
5
+ require 'scbi_fastq'
6
+
7
+ VERBOSE=false
8
+
9
+ if !(ARGV.count==2 or ARGV.count==4)
10
+ puts "Usage: #{$0} paired.fastq output_name [pared1_tag paired2_tag]"
11
+ exit
12
+ end
13
+
14
+ p1_path=ARGV[0]
15
+ output_base_name=ARGV[1]
16
+
17
+ paired1_tag='/1'
18
+ paired2_tag='/2'
19
+
20
+ if (ARGV.count==4)
21
+ paired1_tag=ARGV[2]
22
+ paired2_tag=ARGV[3]
23
+ end
24
+
25
+ PAIRED1_TAG_RE=/#{Regexp.quote(paired1_tag)}$/
26
+ PAIRED2_TAG_RE=/#{Regexp.quote(paired2_tag)}$/
27
+
28
+
29
+ if !File.exists?(p1_path)
30
+ puts "File #{p1_path} doesn't exists"
31
+ exit
32
+ end
33
+
34
+ paired1_out = FastqFile.new(output_base_name+'_paired1.fastq','w',:sanger, true)
35
+ paired2_out = FastqFile.new(output_base_name+'_paired2.fastq','w',:sanger, true)
36
+
37
+
38
+ f_file = FastqFile.new(p1_path,'r',:sanger, true)
39
+
40
+ f_file.each do |n,f,q,c|
41
+
42
+ if n=~ PAIRED1_TAG_RE
43
+ paired1_out.write_seq(n,f,q,c)
44
+ elsif n=~ PAIRED2_TAG_RE
45
+ paired2_out.write_seq(n,f,q,c)
46
+ else
47
+ STDERR.puts "Aborting due to ERROR in file: #{n} doens't match neither left (#{paired1_tag}) nor right (#{paired2_tag}) tags"
48
+ exit
49
+ end
50
+
51
+ if ((f_file.num_seqs%10000) == 0)
52
+ puts "Count: #{f_file.num_seqs}"
53
+ end
54
+
55
+
56
+ end
57
+
58
+ f_file.close
59
+
60
+ paired1_out.close
61
+ paired2_out.close
62
+
63
+
64
+
65
+
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+ require 'scbi_fasta'
5
+
6
+ if ARGV.count!=3
7
+ puts "Usage: #{$0} fasta qual output_base_name"
8
+ exit
9
+ end
10
+
11
+ fasta_path = ARGV[0]
12
+ qual_path = ARGV[1]
13
+ name = ARGV[2]
14
+
15
+
16
+ out_fasta = name+'.fasta'
17
+ out_qual = name+'.fasta.qual'
18
+
19
+ puts "Opening #{fasta_path}, #{qual_path}"
20
+
21
+ fqr=FastaQualFile.new(fasta_path,qual_path,true)
22
+
23
+ out_f=File.new(out_fasta,'w+')
24
+ out_q=File.new(out_qual,'w+')
25
+
26
+ c=0
27
+
28
+ linker = 'TCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACG'
29
+
30
+ fqr.each do |n,f,q|
31
+ l_start= 0
32
+ l_end=f.index(linker)
33
+
34
+ if l_end
35
+ r_start=l_end+linker.length
36
+ r_end =f.length
37
+
38
+ forward=f[l_start..l_end-1]
39
+ reverse=f[r_start..r_end]
40
+
41
+ forward_q = q[l_start..l_end-1]
42
+ reverse_q = q[r_start..r_end]
43
+
44
+ if (forward.length!=forward_q.length) || (reverse.length!=reverse_q.length)
45
+ puts [forward.length,forward_q.length,reverse.length,reverse_q.length].join(',')
46
+ end
47
+
48
+ out_f.puts ">#{n}F template=#{n} dir=F library=unadeellas"
49
+ out_f.puts forward
50
+ out_f.puts ">#{n}R template=#{n} dir=R library=unadeellas"
51
+ out_f.puts reverse
52
+
53
+ out_q.puts ">#{n}F template=#{n} dir=F library=unadeellas"
54
+ out_q.puts forward_q.join(' ')
55
+ out_q.puts ">#{n}R template=#{n} dir=R library=unadeellas"
56
+ out_q.puts reverse_q.join(' ')
57
+
58
+
59
+ end
60
+
61
+ c=c+1
62
+ end
63
+
64
+ puts c
65
+
66
+ fqr.close
67
+
68
+ out_f.close
69
+ out_q.close
70
+
@@ -0,0 +1,32 @@
1
+ require "seqtrim_action"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute Plugin1
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class ActionAbAdapter < SeqtrimAction
11
+
12
+ def initialize(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
+ @cut =true
15
+
16
+ end
17
+ #
18
+ # def apply_to(seq)
19
+ #
20
+ # # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
21
+ # $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
22
+ #
23
+ # end
24
+
25
+ def apply_decoration(char)
26
+ return char.magenta.underline
27
+
28
+ # return char.magenta.negative
29
+ end
30
+
31
+
32
+ end
@@ -0,0 +1,32 @@
1
+ require "seqtrim_action"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute Plugin1
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class ActionAbFarAdapter < SeqtrimAction
11
+
12
+ def initialize(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
+ @cut =true
15
+
16
+ end
17
+ #
18
+ # def apply_to(seq)
19
+ #
20
+ # # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
21
+ # $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
22
+ #
23
+ # end
24
+
25
+ def apply_decoration(char)
26
+ return char.magenta.negative
27
+
28
+ # return char.magenta.negative
29
+ end
30
+
31
+
32
+ end
@@ -0,0 +1,32 @@
1
+ require "seqtrim_action"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute Plugin1
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class ActionAbLeftAdapter < SeqtrimAction
11
+
12
+ def initialize(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
+ @cut =true
15
+
16
+ end
17
+ #
18
+ # def apply_to(seq)
19
+ #
20
+ # # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
21
+ # $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
22
+ #
23
+ # end
24
+
25
+ def apply_decoration(char)
26
+ return char.magenta.negative
27
+
28
+ # return char.magenta.negative
29
+ end
30
+
31
+
32
+ end
@@ -0,0 +1,22 @@
1
+ require "seqtrim_action"
2
+
3
+ ########################################################
4
+ # Author: Almudena Bocinos Rioboo
5
+ #
6
+ # Defines the main methods that are necessary to execute ActionShortInserted
7
+ # Inherit: Plugin
8
+ ########################################################
9
+
10
+ class ActionEmptyInsert < SeqtrimAction
11
+
12
+ def initialize(start_pos,end_pos)
13
+ super(start_pos,end_pos)
14
+ @cut =false
15
+ @informative = true
16
+ end
17
+
18
+ def apply_decoration(char)
19
+ return char
20
+ end
21
+
22
+ end
@@ -0,0 +1,24 @@
1
+ require "seqtrim_action"
2
+
3
+
4
+ ########################################################
5
+ # Author: Almudena Bocinos Rioboo
6
+ #
7
+ # Defines the main methods that are necessary to execute Plugin1
8
+ # Inherit: Plugin
9
+ ########################################################
10
+
11
+ class ActionIgnoreRepeated < SeqtrimAction
12
+
13
+ def initialize(start_pos,end_pos)
14
+ super(start_pos,end_pos)
15
+ @cut =false
16
+ end
17
+
18
+
19
+ def apply_decoration(char)
20
+ return char
21
+ # return char.magenta.negative
22
+ end
23
+
24
+ end