seqtrimnext 2.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
data/bin/seqtrimnext
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# SeqTrimNext: Next generation sequencing preprocessor
|
|
3
|
+
# Copyright (C) <2011>
|
|
4
|
+
# Authors: Almudena Bocinos Rioboo, Diego Dario Guerrero Fernandez,
|
|
5
|
+
# Rocio Bautista Moreno, Juan Falgueras Cano & M. Gonzalo Claros
|
|
6
|
+
# email: soporte@scbi.uma.es - http://www.scbi.uma.es
|
|
7
|
+
#
|
|
8
|
+
# This program is free software: you can redistribute it and/or modify
|
|
9
|
+
# it under the terms of the GNU Affero General Public License as published by
|
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
11
|
+
# (at your option) any later version.
|
|
12
|
+
#
|
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16
|
+
# GNU Affero General Public License for more details.
|
|
17
|
+
#
|
|
18
|
+
# You should have received a copy of the GNU Affero General Public License
|
|
19
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
20
|
+
|
|
21
|
+
#= SEQTRIM II
|
|
22
|
+
#
|
|
23
|
+
#== Running
|
|
24
|
+
#
|
|
25
|
+
# Seqtrim can be run locally or in a parallel/distributted environment.
|
|
26
|
+
#
|
|
27
|
+
#=== Running locally
|
|
28
|
+
#* list
|
|
29
|
+
#
|
|
30
|
+
#=== Running in a distributted environment
|
|
31
|
+
#
|
|
32
|
+
#== SEC 2
|
|
33
|
+
#
|
|
34
|
+
#=== SUB 2.1
|
|
35
|
+
#
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# #finds the classes that were in the folder 'classes'
|
|
39
|
+
# ROOT_PATH=File.dirname(__FILE__)
|
|
40
|
+
# $: << File.expand_path(File.join(ROOT_PATH, 'classes'))
|
|
41
|
+
#
|
|
42
|
+
# #finds the classes that were in the folder 'plugins'
|
|
43
|
+
# $: << File.expand_path(File.join(ROOT_PATH, 'plugins'))
|
|
44
|
+
#
|
|
45
|
+
#
|
|
46
|
+
# #finds the classes that were in the folder 'plugins'
|
|
47
|
+
# $: << File.expand_path(File.join(ROOT_PATH, 'actions'))
|
|
48
|
+
#
|
|
49
|
+
# #finds the classes that were in the folder 'utils'
|
|
50
|
+
# $: << File.expand_path(File.join(ROOT_PATH, 'utils'))
|
|
51
|
+
#
|
|
52
|
+
# $: << File.expand_path(File.join(ROOT_PATH, 'classes','em_classes'))
|
|
53
|
+
|
|
54
|
+
# to test scbi_drb gem locally
|
|
55
|
+
# $: << File.expand_path('~/progs/ruby/gems/scbi_drb/lib/')
|
|
56
|
+
|
|
57
|
+
# $: << File.expand_path(ROOT_PATH)
|
|
58
|
+
|
|
59
|
+
$: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
|
|
60
|
+
|
|
61
|
+
require 'seqtrimnext'
|
|
62
|
+
|
|
63
|
+
############ PATHS #######################
|
|
64
|
+
$SEQTRIM_PATH = ROOT_PATH
|
|
65
|
+
|
|
66
|
+
# if there is a BLASTDB environment var, then use it
|
|
67
|
+
if ENV['BLASTDB']# && Dir.exists?(ENV['BLASTDB'])
|
|
68
|
+
$FORMATTED_DB_PATH = ENV['BLASTDB']
|
|
69
|
+
$DB_PATH = File.dirname($FORMATTED_DB_PATH)
|
|
70
|
+
else # otherwise use ROOTPATH + DB
|
|
71
|
+
$FORMATTED_DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB",'formatted'))
|
|
72
|
+
$DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB"))
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
ENV['BLASTDB']=$FORMATTED_DB_PATH
|
|
76
|
+
|
|
77
|
+
OUTPUT_PATH='output_files'
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# TODO - COMENTAR todas las clases y metodos para que salga la descripcion cuando hagas rdoc en el terminal
|
|
81
|
+
|
|
82
|
+
#Checks install requeriments
|
|
83
|
+
require 'install_requirements'
|
|
84
|
+
|
|
85
|
+
ins = InstallRequirements.new
|
|
86
|
+
if (!ins.check_install_requirements)
|
|
87
|
+
exit
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
require "logger"
|
|
91
|
+
require 'optparse'
|
|
92
|
+
require "global_match"
|
|
93
|
+
require "seqtrim"
|
|
94
|
+
require "params.rb"
|
|
95
|
+
require "plugin.rb"
|
|
96
|
+
require "sequence.rb"
|
|
97
|
+
require "plugin_manager.rb"
|
|
98
|
+
require "make_blast_db"
|
|
99
|
+
require 'hash_stats'
|
|
100
|
+
require 'list_db'
|
|
101
|
+
require 'install_database'
|
|
102
|
+
require 'socket'
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def show_additional_help
|
|
106
|
+
|
|
107
|
+
puts "\n"*3
|
|
108
|
+
puts "E.g.: processing a fastq sequences file"
|
|
109
|
+
puts "#{$0} -t genomics_454.txt -Q sequences.fastq"
|
|
110
|
+
puts "\n"*2
|
|
111
|
+
|
|
112
|
+
puts "E.g.: processing a fasta file with qual"
|
|
113
|
+
puts "#{$0} -t genomics_454.txt -f sequences.fasta -q sequences.qual"
|
|
114
|
+
|
|
115
|
+
templates = Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
|
|
116
|
+
|
|
117
|
+
puts "\n\n ========================================================================================================"
|
|
118
|
+
puts " Available templates to use with -t option (you can also use your own template):"
|
|
119
|
+
puts " Templates at: #{File.join($SEQTRIM_PATH,'templates')}"
|
|
120
|
+
puts " ========================================================================================================\n\n"
|
|
121
|
+
|
|
122
|
+
templates.map{|e| puts " "+e}
|
|
123
|
+
|
|
124
|
+
puts "\n\n ========================================================================================================"
|
|
125
|
+
puts " Available databases to use in custom template files (you can also use your own database):"
|
|
126
|
+
puts " Databases at: #{$DB_PATH}"
|
|
127
|
+
puts " ========================================================================================================\n\n"
|
|
128
|
+
|
|
129
|
+
ListDb.list_databases($DB_PATH).map{|e| puts " "+e}
|
|
130
|
+
#
|
|
131
|
+
# ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
|
|
132
|
+
#
|
|
133
|
+
# puts ip_list
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
exit
|
|
137
|
+
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# Reads the parameters from console. For this is used ARGV, that is an array.
|
|
142
|
+
options = {}
|
|
143
|
+
|
|
144
|
+
optparse = OptionParser.new do |opts|
|
|
145
|
+
|
|
146
|
+
# Set a banner, displayed at the top
|
|
147
|
+
# of the help screen.
|
|
148
|
+
opts.banner = "Usage: #{$0} -t template_file \{-Q fastaQ_file | -f fasta_file -q qual_file\} [options]"
|
|
149
|
+
|
|
150
|
+
# Define the options, and what they do
|
|
151
|
+
#options[:server_ip] = '127.0.0.1'
|
|
152
|
+
options[:server_ip] = '0.0.0.0'
|
|
153
|
+
opts.on( '-s', '--server IP', 'Server ip. Can use a partial ip to select the apropriate interface' ) do |server_ip|
|
|
154
|
+
|
|
155
|
+
# get list of available ips
|
|
156
|
+
ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
|
|
157
|
+
|
|
158
|
+
ip=ip_list.select{|ip| ip.index(server_ip)==0}.first
|
|
159
|
+
|
|
160
|
+
if !ip
|
|
161
|
+
ip='0.0.0.0'
|
|
162
|
+
# $LOG.info("No available ip matching #{server_ip}")
|
|
163
|
+
end
|
|
164
|
+
# $ .info("Using ip #{ip}")
|
|
165
|
+
options[:server_ip] = ip
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
options[:port] = 0 #50000
|
|
169
|
+
opts.on( '-p', '--port PORT', 'Server port. If set to 0, an arbitrary empty port will be used') do |port|
|
|
170
|
+
options[:port] = port.to_i
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
options[:workers] = 2
|
|
174
|
+
|
|
175
|
+
opts.on( '-w', '--workers COUNT', 'Number of workers, or file containing machine names to launch workers with ssh' ) do |workers|
|
|
176
|
+
if File.exists?(workers)
|
|
177
|
+
# use workers file
|
|
178
|
+
options[:workers] = File.read(workers).split("\n").map{|w| w.chomp}
|
|
179
|
+
else
|
|
180
|
+
begin
|
|
181
|
+
options[:workers] = Integer(workers)
|
|
182
|
+
rescue
|
|
183
|
+
STDERR.puts "ERROR:Invalid workers parameter #{options[:workers]}"
|
|
184
|
+
exit
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
options[:only_workers] = false
|
|
194
|
+
opts.on( '-o', '--only_workers', 'Only launch workers' ) do
|
|
195
|
+
options[:only_workers] = true
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
options[:check_db] = false
|
|
199
|
+
opts.on( '-c', '--check_databases', 'Check Blast databases and reformat if necessary' ) do
|
|
200
|
+
options[:check_db] = true
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
options[:install_db] = nil
|
|
204
|
+
opts.on( '-i', '--install_databases TYPE', 'Install base databases and reformat them if necessary') do |db_type|
|
|
205
|
+
options[:install_db] = db_type
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
options[:logfile] = STDOUT
|
|
209
|
+
opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) do |file|
|
|
210
|
+
options[:logfile] = file
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
options[:fastq] = nil
|
|
214
|
+
opts.on( '-Q', '--fastq FILE', 'Fastq input file. Use - for <STDIN>' ) do |file|
|
|
215
|
+
options[:fastq] = file
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
options[:fasta] = nil
|
|
219
|
+
opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file|
|
|
220
|
+
options[:fasta] = file
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
options[:qual] = nil
|
|
224
|
+
opts.on( '-q', '--qual FILE', 'Qual input file' ) do |file|
|
|
225
|
+
options[:qual] = file
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
options[:list_db] = nil
|
|
231
|
+
opts.on( '-L', '--list_db DB_NAME', 'List entries IDs in DB_NAME. Use "-L all" to view all available databases' ) do |value|
|
|
232
|
+
options[:list_db] = value
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
options[:gen_params] = false
|
|
236
|
+
opts.on( '-G', '--generate_template', 'Generates a sample template file with default parameters' ) do
|
|
237
|
+
options[:gen_params] = true
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
options[:template] = nil
|
|
241
|
+
opts.on( '-t', '--template TEMPLATE_FILE', 'Use TEMPLATE_FILE instead of default parameters' ) do |file|
|
|
242
|
+
options[:template] = file
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
options[:chunk_size] = 100
|
|
246
|
+
opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
|
|
247
|
+
options[:chunk_size] = cs.to_i
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
options[:json] = nil
|
|
252
|
+
opts.on( '-j', '--json', 'Save results in json file' ) do
|
|
253
|
+
options[:json] = true
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# This displays the help screen, all programs are
|
|
257
|
+
# assumed to have this option.
|
|
258
|
+
opts.on_tail( '-h', '--help', 'Display this screen' ) do
|
|
259
|
+
puts opts
|
|
260
|
+
show_additional_help
|
|
261
|
+
exit
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# parse options and remove from ARGV
|
|
266
|
+
optparse.parse!
|
|
267
|
+
|
|
268
|
+
if options[:list_db] then
|
|
269
|
+
# List database entries in a database
|
|
270
|
+
ListDb.new($DB_PATH,options[:list_db])
|
|
271
|
+
exit
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
if options[:gen_params] then
|
|
275
|
+
# Generates a sample params file in current directory
|
|
276
|
+
Params.generate_sample_params
|
|
277
|
+
exit
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
#set logger
|
|
281
|
+
# system('rm logs/*')
|
|
282
|
+
FileUtils.mkdir('logs') if !File.exists?('logs')
|
|
283
|
+
|
|
284
|
+
$LOG = Logger.new(options[:logfile])
|
|
285
|
+
$LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
|
|
286
|
+
#logger.level = Logger::INFO
|
|
287
|
+
|
|
288
|
+
#DEBUG < INFO < WARN < ERROR < FATAL < UNKNOWN
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
$LOG.info("Using BLASTDB: "+ $FORMATTED_DB_PATH)
|
|
292
|
+
$LOG.info("Using options: "+ options.to_json)
|
|
293
|
+
|
|
294
|
+
if options[:install_db] then
|
|
295
|
+
#install databases
|
|
296
|
+
InstallDatabase.new(options[:install_db],$DB_PATH)
|
|
297
|
+
|
|
298
|
+
# reformat databases
|
|
299
|
+
MakeBlastDb.new($DB_PATH)
|
|
300
|
+
exit
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
if !File.exists?($FORMATTED_DB_PATH)
|
|
304
|
+
STDERR.puts "Database path not found: #{$FORMATTED_DB_PATH}. \n\n\nInstall databases to this path or set your BLASTDB environment variable (eg.: export BLASTDB=new_path)"
|
|
305
|
+
exit
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
if options[:check_db] then
|
|
310
|
+
# check and format blast databases
|
|
311
|
+
MakeBlastDb.new($DB_PATH)
|
|
312
|
+
exit
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
required_options = options[:template] && (options[:fastq] || (options[:fasta]))
|
|
316
|
+
|
|
317
|
+
# if ((ARGV.count != 2) && (ARGV.count != 3)) # con esto vemos si hay argumentos,
|
|
318
|
+
if (ARGV.count != 0) || (!required_options) # con esto vemos si hay argumentos,
|
|
319
|
+
puts "You must provide all required options"
|
|
320
|
+
puts ""
|
|
321
|
+
puts optparse.help
|
|
322
|
+
exit
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# check for template
|
|
326
|
+
if (!File.exists?(options[:template]))
|
|
327
|
+
if File.exists?(File.join($SEQTRIM_PATH,'templates',options[:template]))
|
|
328
|
+
options[:template] = File.join($SEQTRIM_PATH,'templates',options[:template])
|
|
329
|
+
else
|
|
330
|
+
$LOG.info "Params file: #{options[:template]} doesn't exists. \n\nYou can use your own template or specify one from this list:\n============================="
|
|
331
|
+
puts Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
|
|
332
|
+
exit
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
$LOG.info "Using params file: #{options[:template]}"
|
|
336
|
+
|
|
337
|
+
# fastq file
|
|
338
|
+
if (!options[:fastq].nil? && options[:fastq]!='-' && !File.exists?(options[:fastq]))
|
|
339
|
+
$LOG.error "Input file: #{options[:fasta]} doesn't exists"
|
|
340
|
+
exit
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# fasta file
|
|
346
|
+
if (!options[:fasta].nil? && !File.exists?(options[:fasta]))
|
|
347
|
+
$LOG.error "Input file: #{options[:fasta]} doesn't exists"
|
|
348
|
+
exit
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# qual file
|
|
352
|
+
if ((!options[:qual].nil?)&&(!File.exists?(options[:qual])))
|
|
353
|
+
$LOG.error "Input file: #{options[:qual]} doesn't exists"
|
|
354
|
+
exit
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
s = Seqtrim.new(options)
|
|
358
|
+
|
|
359
|
+
#generate report
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
if system("which generate_report.rb > /dev/null ")
|
|
363
|
+
cmd="generate_report.rb output_files 2> report_generation_errors.log"
|
|
364
|
+
$LOG.info "Generating report #{cmd}"
|
|
365
|
+
`#{cmd}`
|
|
366
|
+
else
|
|
367
|
+
$LOG.info "If you want a detailed report in PDF format, install the optional seqtrimnext_report gem (gem install seqtrimnext_report)"
|
|
368
|
+
end
|
data/bin/split_fastq.rb
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'scbi_fastq'
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
if ARGV.count < 3
|
|
7
|
+
puts "#{$0} FASTQ OUTPUT_NAME SPLIT_BY"
|
|
8
|
+
exit
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
fastq = ARGV.shift
|
|
14
|
+
output_name = ARGV.shift
|
|
15
|
+
split_by = ARGV.shift.to_i
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
file_index=1
|
|
19
|
+
out=FastqFile.new("#{output_name}#{file_index}.fastq",'w')
|
|
20
|
+
|
|
21
|
+
fqr=FastqFile.new(fastq)
|
|
22
|
+
|
|
23
|
+
count = 0
|
|
24
|
+
|
|
25
|
+
fqr.each do |seq_name,seq_fasta,seq_qual,comments|
|
|
26
|
+
|
|
27
|
+
out.write_seq(seq_name,seq_fasta,seq_qual,comments)
|
|
28
|
+
|
|
29
|
+
count +=1
|
|
30
|
+
|
|
31
|
+
if (count % split_by) == 0
|
|
32
|
+
|
|
33
|
+
file_index +=1
|
|
34
|
+
out.close
|
|
35
|
+
out=FastqFile.new("#{output_name}#{file_index}.fastq",'w')
|
|
36
|
+
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
out.close
|
|
41
|
+
fqr.close
|
|
42
|
+
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
# Splits a FastQ file with ilumina paired data into two separate files.
|
|
4
|
+
|
|
5
|
+
require 'scbi_fastq'
|
|
6
|
+
|
|
7
|
+
VERBOSE=false
|
|
8
|
+
|
|
9
|
+
if !(ARGV.count==2 or ARGV.count==4)
|
|
10
|
+
puts "Usage: #{$0} paired.fastq output_name [pared1_tag paired2_tag]"
|
|
11
|
+
exit
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
p1_path=ARGV[0]
|
|
15
|
+
output_base_name=ARGV[1]
|
|
16
|
+
|
|
17
|
+
paired1_tag='/1'
|
|
18
|
+
paired2_tag='/2'
|
|
19
|
+
|
|
20
|
+
if (ARGV.count==4)
|
|
21
|
+
paired1_tag=ARGV[2]
|
|
22
|
+
paired2_tag=ARGV[3]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
PAIRED1_TAG_RE=/#{Regexp.quote(paired1_tag)}$/
|
|
26
|
+
PAIRED2_TAG_RE=/#{Regexp.quote(paired2_tag)}$/
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
if !File.exists?(p1_path)
|
|
30
|
+
puts "File #{p1_path} doesn't exists"
|
|
31
|
+
exit
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
paired1_out = FastqFile.new(output_base_name+'_paired1.fastq','w',:sanger, true)
|
|
35
|
+
paired2_out = FastqFile.new(output_base_name+'_paired2.fastq','w',:sanger, true)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
f_file = FastqFile.new(p1_path,'r',:sanger, true)
|
|
39
|
+
|
|
40
|
+
f_file.each do |n,f,q,c|
|
|
41
|
+
|
|
42
|
+
if n=~ PAIRED1_TAG_RE
|
|
43
|
+
paired1_out.write_seq(n,f,q,c)
|
|
44
|
+
elsif n=~ PAIRED2_TAG_RE
|
|
45
|
+
paired2_out.write_seq(n,f,q,c)
|
|
46
|
+
else
|
|
47
|
+
STDERR.puts "Aborting due to ERROR in file: #{n} doens't match neither left (#{paired1_tag}) nor right (#{paired2_tag}) tags"
|
|
48
|
+
exit
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
if ((f_file.num_seqs%10000) == 0)
|
|
52
|
+
puts "Count: #{f_file.num_seqs}"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
f_file.close
|
|
59
|
+
|
|
60
|
+
paired1_out.close
|
|
61
|
+
paired2_out.close
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
data/bin/split_paired.rb
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
require 'scbi_fasta'
|
|
5
|
+
|
|
6
|
+
if ARGV.count!=3
|
|
7
|
+
puts "Usage: #{$0} fasta qual output_base_name"
|
|
8
|
+
exit
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
fasta_path = ARGV[0]
|
|
12
|
+
qual_path = ARGV[1]
|
|
13
|
+
name = ARGV[2]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
out_fasta = name+'.fasta'
|
|
17
|
+
out_qual = name+'.fasta.qual'
|
|
18
|
+
|
|
19
|
+
puts "Opening #{fasta_path}, #{qual_path}"
|
|
20
|
+
|
|
21
|
+
fqr=FastaQualFile.new(fasta_path,qual_path,true)
|
|
22
|
+
|
|
23
|
+
out_f=File.new(out_fasta,'w+')
|
|
24
|
+
out_q=File.new(out_qual,'w+')
|
|
25
|
+
|
|
26
|
+
c=0
|
|
27
|
+
|
|
28
|
+
linker = 'TCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACG'
|
|
29
|
+
|
|
30
|
+
fqr.each do |n,f,q|
|
|
31
|
+
l_start= 0
|
|
32
|
+
l_end=f.index(linker)
|
|
33
|
+
|
|
34
|
+
if l_end
|
|
35
|
+
r_start=l_end+linker.length
|
|
36
|
+
r_end =f.length
|
|
37
|
+
|
|
38
|
+
forward=f[l_start..l_end-1]
|
|
39
|
+
reverse=f[r_start..r_end]
|
|
40
|
+
|
|
41
|
+
forward_q = q[l_start..l_end-1]
|
|
42
|
+
reverse_q = q[r_start..r_end]
|
|
43
|
+
|
|
44
|
+
if (forward.length!=forward_q.length) || (reverse.length!=reverse_q.length)
|
|
45
|
+
puts [forward.length,forward_q.length,reverse.length,reverse_q.length].join(',')
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
out_f.puts ">#{n}F template=#{n} dir=F library=unadeellas"
|
|
49
|
+
out_f.puts forward
|
|
50
|
+
out_f.puts ">#{n}R template=#{n} dir=R library=unadeellas"
|
|
51
|
+
out_f.puts reverse
|
|
52
|
+
|
|
53
|
+
out_q.puts ">#{n}F template=#{n} dir=F library=unadeellas"
|
|
54
|
+
out_q.puts forward_q.join(' ')
|
|
55
|
+
out_q.puts ">#{n}R template=#{n} dir=R library=unadeellas"
|
|
56
|
+
out_q.puts reverse_q.join(' ')
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
c=c+1
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
puts c
|
|
65
|
+
|
|
66
|
+
fqr.close
|
|
67
|
+
|
|
68
|
+
out_f.close
|
|
69
|
+
out_q.close
|
|
70
|
+
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require "seqtrim_action"
|
|
2
|
+
|
|
3
|
+
########################################################
|
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
|
5
|
+
#
|
|
6
|
+
# Defines the main methods that are necessary to execute Plugin1
|
|
7
|
+
# Inherit: Plugin
|
|
8
|
+
########################################################
|
|
9
|
+
|
|
10
|
+
class ActionAbAdapter < SeqtrimAction
|
|
11
|
+
|
|
12
|
+
def initialize(start_pos,end_pos)
|
|
13
|
+
super(start_pos,end_pos)
|
|
14
|
+
@cut =true
|
|
15
|
+
|
|
16
|
+
end
|
|
17
|
+
#
|
|
18
|
+
# def apply_to(seq)
|
|
19
|
+
#
|
|
20
|
+
# # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
|
|
21
|
+
# $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
|
|
22
|
+
#
|
|
23
|
+
# end
|
|
24
|
+
|
|
25
|
+
def apply_decoration(char)
|
|
26
|
+
return char.magenta.underline
|
|
27
|
+
|
|
28
|
+
# return char.magenta.negative
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require "seqtrim_action"
|
|
2
|
+
|
|
3
|
+
########################################################
|
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
|
5
|
+
#
|
|
6
|
+
# Defines the main methods that are necessary to execute Plugin1
|
|
7
|
+
# Inherit: Plugin
|
|
8
|
+
########################################################
|
|
9
|
+
|
|
10
|
+
class ActionAbFarAdapter < SeqtrimAction
|
|
11
|
+
|
|
12
|
+
def initialize(start_pos,end_pos)
|
|
13
|
+
super(start_pos,end_pos)
|
|
14
|
+
@cut =true
|
|
15
|
+
|
|
16
|
+
end
|
|
17
|
+
#
|
|
18
|
+
# def apply_to(seq)
|
|
19
|
+
#
|
|
20
|
+
# # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
|
|
21
|
+
# $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
|
|
22
|
+
#
|
|
23
|
+
# end
|
|
24
|
+
|
|
25
|
+
def apply_decoration(char)
|
|
26
|
+
return char.magenta.negative
|
|
27
|
+
|
|
28
|
+
# return char.magenta.negative
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require "seqtrim_action"
|
|
2
|
+
|
|
3
|
+
########################################################
|
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
|
5
|
+
#
|
|
6
|
+
# Defines the main methods that are necessary to execute Plugin1
|
|
7
|
+
# Inherit: Plugin
|
|
8
|
+
########################################################
|
|
9
|
+
|
|
10
|
+
class ActionAbLeftAdapter < SeqtrimAction
|
|
11
|
+
|
|
12
|
+
def initialize(start_pos,end_pos)
|
|
13
|
+
super(start_pos,end_pos)
|
|
14
|
+
@cut =true
|
|
15
|
+
|
|
16
|
+
end
|
|
17
|
+
#
|
|
18
|
+
# def apply_to(seq)
|
|
19
|
+
#
|
|
20
|
+
# # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
|
|
21
|
+
# $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
|
|
22
|
+
#
|
|
23
|
+
# end
|
|
24
|
+
|
|
25
|
+
def apply_decoration(char)
|
|
26
|
+
return char.magenta.negative
|
|
27
|
+
|
|
28
|
+
# return char.magenta.negative
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require "seqtrim_action"
|
|
2
|
+
|
|
3
|
+
########################################################
|
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
|
5
|
+
#
|
|
6
|
+
# Defines the main methods that are necessary to execute ActionShortInserted
|
|
7
|
+
# Inherit: Plugin
|
|
8
|
+
########################################################
|
|
9
|
+
|
|
10
|
+
class ActionEmptyInsert < SeqtrimAction
|
|
11
|
+
|
|
12
|
+
def initialize(start_pos,end_pos)
|
|
13
|
+
super(start_pos,end_pos)
|
|
14
|
+
@cut =false
|
|
15
|
+
@informative = true
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def apply_decoration(char)
|
|
19
|
+
return char
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require "seqtrim_action"
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
########################################################
|
|
5
|
+
# Author: Almudena Bocinos Rioboo
|
|
6
|
+
#
|
|
7
|
+
# Defines the main methods that are necessary to execute Plugin1
|
|
8
|
+
# Inherit: Plugin
|
|
9
|
+
########################################################
|
|
10
|
+
|
|
11
|
+
class ActionIgnoreRepeated < SeqtrimAction
|
|
12
|
+
|
|
13
|
+
def initialize(start_pos,end_pos)
|
|
14
|
+
super(start_pos,end_pos)
|
|
15
|
+
@cut =false
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def apply_decoration(char)
|
|
20
|
+
return char
|
|
21
|
+
# return char.magenta.negative
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end
|