seqtrimnext 2.0.29
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
@@ -0,0 +1,31 @@
|
|
1
|
+
require "seqtrim_action"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute Plugin1
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class ActionVectors < SeqtrimAction
|
11
|
+
|
12
|
+
def initialize(start_pos,end_pos)
|
13
|
+
super(start_pos,end_pos)
|
14
|
+
@cut =true
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
# def apply_to(seq)
|
19
|
+
#
|
20
|
+
# # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
|
21
|
+
# $LOG.debug " Applying #{self.class} to seq #{seq.seq_name}. BEGIN: #{@start_pos} END: #{@end_pos} "
|
22
|
+
#
|
23
|
+
# end
|
24
|
+
|
25
|
+
def apply_decoration(char)
|
26
|
+
return char.on_green
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
######################################
|
2
|
+
# Author:: Almudena Bocinos Rioboo
|
3
|
+
# This class creates the structures that storage the necessary values from an action
|
4
|
+
######################################
|
5
|
+
|
6
|
+
|
7
|
+
require 'term/ansicolor'
|
8
|
+
include Term::ANSIColor
|
9
|
+
|
10
|
+
class SeqtrimAction
|
11
|
+
|
12
|
+
|
13
|
+
attr_accessor :start_pos , :end_pos, :message, :cut , :reversed , :left_action , :right_action , :found_definition, :tag_id, :informative
|
14
|
+
|
15
|
+
# Creates the nexts values from an action: start_position, end_position, type
|
16
|
+
def initialize(start_pos,end_pos)
|
17
|
+
# puts " #{start_pos} #{end_pos} #{self.class.to_s}"
|
18
|
+
|
19
|
+
@start_pos = start_pos.to_i
|
20
|
+
@end_pos = end_pos.to_i
|
21
|
+
|
22
|
+
#@notes = ''
|
23
|
+
@left_action = false
|
24
|
+
@right_action = false
|
25
|
+
@message = ''
|
26
|
+
@found_definition=[] #array when contaminant or vectors definitions are saved, each separately
|
27
|
+
@cut = false
|
28
|
+
@informative = false
|
29
|
+
@reversed = false
|
30
|
+
# puts " #{@start_pos} #{@end_pos} #{self.class.to_s}"
|
31
|
+
@tag_id =''
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
def apply_to(seq)
|
36
|
+
|
37
|
+
$LOG.debug " Applying #{self.class} to seq #{seq.seq_name} . BEGIN: #{@start_pos} END: #{@end_pos} "
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
def description
|
42
|
+
|
43
|
+
return "Action Type: #{self.class} Begin: #{start_pos} End: #{end_pos}"
|
44
|
+
end
|
45
|
+
|
46
|
+
def inspect
|
47
|
+
description
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
def contains?(pos)
|
52
|
+
return ((pos>=@start_pos) && (pos<=@end_pos))
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
def contains_action?(start_pos,end_pos,margin=10)
|
57
|
+
#puts "#{start_pos}>=#{@start_pos-margin} && #{end_pos}<=#{@end_pos+margin} "
|
58
|
+
return ((start_pos>=@start_pos-margin) && (end_pos<=@end_pos+margin))
|
59
|
+
end
|
60
|
+
|
61
|
+
def apply_decoration(char)
|
62
|
+
return char
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
def decorate(char,pos)
|
68
|
+
if contains?(pos)
|
69
|
+
return apply_decoration(char)
|
70
|
+
else
|
71
|
+
return char
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def type
|
76
|
+
return self.class.to_s
|
77
|
+
end
|
78
|
+
def action_type
|
79
|
+
|
80
|
+
a_type='INFO'
|
81
|
+
if !@informative
|
82
|
+
a_type = 'LEFT'
|
83
|
+
if right_action
|
84
|
+
a_type = 'RIGHT'
|
85
|
+
end
|
86
|
+
end
|
87
|
+
return a_type
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
def to_human(camel_cased_word)
|
92
|
+
word = camel_cased_word.to_s.dup
|
93
|
+
word.gsub!(/::/, '/')
|
94
|
+
word.gsub!(/([A-Z]+)([A-Z][a-z])/,'\1 \2')
|
95
|
+
word.gsub!(/([a-z\d])([A-Z])/,'\1 \2')
|
96
|
+
word.tr!("-", " ")
|
97
|
+
word.downcase!
|
98
|
+
word.capitalize!
|
99
|
+
word
|
100
|
+
end
|
101
|
+
|
102
|
+
def title
|
103
|
+
return to_human(type.gsub('Action',''))
|
104
|
+
end
|
105
|
+
|
106
|
+
def near_left?(seq_fasta_size)
|
107
|
+
return ((@start_pos - 0 ) < (seq_fasta_size - @end_pos))
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
def left_action?(seq_fasta_size)
|
112
|
+
res = (@left_action || (!@right_action && near_left?(seq_fasta_size)))
|
113
|
+
# @left_action = res
|
114
|
+
|
115
|
+
return res
|
116
|
+
end
|
117
|
+
|
118
|
+
def right_action?(seq_fasta_size)
|
119
|
+
res= (@right_action || !left_action?(seq_fasta_size))
|
120
|
+
# @right_action = res
|
121
|
+
return res
|
122
|
+
end
|
123
|
+
|
124
|
+
def to_hash
|
125
|
+
a = {}
|
126
|
+
|
127
|
+
a[:type]=type
|
128
|
+
a[:start_pos]=@start_pos
|
129
|
+
a[:end_pos]=@end_pos
|
130
|
+
a[:message]=@message
|
131
|
+
|
132
|
+
return a
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#########################################
|
2
|
+
# Author:: Almudena Bocinos Rioboo
|
3
|
+
# This class provided the methods to apply actions to sequences
|
4
|
+
#########################################
|
5
|
+
require 'seqtrim_action'
|
6
|
+
class ActionManager
|
7
|
+
|
8
|
+
#Storages the necessary plugins specified in 'plugin_list' and start the loading of plugins
|
9
|
+
def initialize
|
10
|
+
|
11
|
+
load_actions_from_files
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.new_action(start_pos,end_pos,action_type)
|
15
|
+
action_class = Object.const_get(action_type)
|
16
|
+
# DONE mirar si la action_class es de verdad una action existente
|
17
|
+
res = nil
|
18
|
+
if !action_class.nil? && action_class.ancestors.include?(SeqtrimAction)
|
19
|
+
res= action_class.new(start_pos,end_pos)
|
20
|
+
else
|
21
|
+
#$LOG.error ' Error. Don´t exist the action: ' + action_class.to_s
|
22
|
+
puts ' Error. The action : ' + action_class.to_s+ ' does not exists'
|
23
|
+
end
|
24
|
+
return res
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
# Iterates by the files from the folder 'actions', and load it
|
31
|
+
def load_actions_from_files
|
32
|
+
ignore = ['.','..','seqtrim_action.rb']
|
33
|
+
#carpeta=Dir.open("progs/ruby/seqtrimii/actions")
|
34
|
+
actions_path = File.expand_path(File.join(File.dirname(__FILE__), "..","actions"))
|
35
|
+
if !File.exists?(actions_path)
|
36
|
+
raise "Action folder does not exists"
|
37
|
+
end
|
38
|
+
carpeta=Dir.open(actions_path)
|
39
|
+
|
40
|
+
carpeta.entries.each do |action|
|
41
|
+
if !ignore.include?(action)
|
42
|
+
require action
|
43
|
+
end # end if
|
44
|
+
end # end each
|
45
|
+
end # end def
|
46
|
+
|
47
|
+
end
|
@@ -0,0 +1,335 @@
|
|
1
|
+
require 'scbi_fasta'
|
2
|
+
require 'scbi_fastq'
|
3
|
+
# require 'work_manager'
|
4
|
+
require 'graph_stats'
|
5
|
+
require 'sequence_with_action'
|
6
|
+
require 'sequence_group'
|
7
|
+
|
8
|
+
# OUTPUT_PATH='output_files'
|
9
|
+
STATS_PATH=File.join(OUTPUT_PATH,'stats.json')
|
10
|
+
# TODO - Pasar secuencias en grupos, y hacer blast en grupos de secuencias.
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
class SeqtrimWorkManager < ScbiMapreduce::WorkManager
|
15
|
+
|
16
|
+
def self.init_work_manager(sequence_reader, params, chunk_size = 100, use_json=false)
|
17
|
+
@@full_stats={}
|
18
|
+
@@params= params
|
19
|
+
@@exit = false
|
20
|
+
|
21
|
+
@@chunk_size = chunk_size
|
22
|
+
|
23
|
+
|
24
|
+
# puts "CHECKPOINT: #{self.checkpoint}\n"*20
|
25
|
+
|
26
|
+
checkpoint_exists=File.exists?('scbi_drb_checkpoint')
|
27
|
+
|
28
|
+
# @@use_qual = !qual_path.nil? and File.exists?(qual_path)
|
29
|
+
@@open_mode='w'
|
30
|
+
if checkpoint_exists
|
31
|
+
@@open_mode = 'a'
|
32
|
+
if File.exists?(STATS_PATH)
|
33
|
+
# load stats
|
34
|
+
text = File.read(STATS_PATH)
|
35
|
+
|
36
|
+
# wipe text
|
37
|
+
# text=text.grep(/^\s*[^#]/).to_s
|
38
|
+
|
39
|
+
# decode json
|
40
|
+
@@full_stats = JSON.parse(text)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
#open input file
|
45
|
+
@@fqr=sequence_reader
|
46
|
+
|
47
|
+
# @@use_qual = @@fqr.with_qual?
|
48
|
+
# @@use_json = use_json
|
49
|
+
|
50
|
+
@@params.set_param('use_qual',@@fqr.with_qual?)
|
51
|
+
@@params.set_param('use_json',use_json)
|
52
|
+
|
53
|
+
@@use_json=use_json
|
54
|
+
|
55
|
+
@@fqr.rewind
|
56
|
+
|
57
|
+
# open output files
|
58
|
+
|
59
|
+
if !Dir.exists?(OUTPUT_PATH)
|
60
|
+
Dir.mkdir(OUTPUT_PATH)
|
61
|
+
end
|
62
|
+
|
63
|
+
@@files={}
|
64
|
+
|
65
|
+
# @@rejected_output_file=File.open(File.join(OUTPUT_PATH,'rejected.txt'),@@open_mode)
|
66
|
+
|
67
|
+
# seqs_with_errors
|
68
|
+
@@errors_file=File.open('errors.txt',@@open_mode)
|
69
|
+
|
70
|
+
if @@use_json
|
71
|
+
@@json_output=File.open('results.json',@@open_mode)
|
72
|
+
end
|
73
|
+
|
74
|
+
@@json_separator=''
|
75
|
+
|
76
|
+
@@paired_output_files={}
|
77
|
+
|
78
|
+
@@sequences_output_files={}
|
79
|
+
|
80
|
+
@@low_complexity_output_files={}
|
81
|
+
|
82
|
+
@@sffinfo_files={}
|
83
|
+
|
84
|
+
@@low_sffinfo_files={}
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.end_work_manager
|
89
|
+
|
90
|
+
puts "FULL STATS:\n" +JSON.pretty_generate(@@full_stats)
|
91
|
+
|
92
|
+
f = File.open(STATS_PATH,'w')
|
93
|
+
f.puts JSON.pretty_generate(@@full_stats)
|
94
|
+
f.close
|
95
|
+
|
96
|
+
r=File.read(STATS_PATH)
|
97
|
+
|
98
|
+
stats=JSON::parse(r)
|
99
|
+
|
100
|
+
|
101
|
+
gs=GraphStats.new(stats)
|
102
|
+
|
103
|
+
#gs=GraphStats.new(@@full_stats)
|
104
|
+
|
105
|
+
|
106
|
+
#close all files
|
107
|
+
|
108
|
+
# @@fqr.close
|
109
|
+
if @@use_json
|
110
|
+
@@json_output.close
|
111
|
+
end
|
112
|
+
@@errors_file.close
|
113
|
+
# @@rejected_output_file.close
|
114
|
+
|
115
|
+
# @@paired_output_files.each do |k,file|
|
116
|
+
# file.close
|
117
|
+
# end
|
118
|
+
|
119
|
+
@@files.each do |k,file|
|
120
|
+
file.close
|
121
|
+
end
|
122
|
+
|
123
|
+
# @@paired_qual_output_files.each do |k,file|
|
124
|
+
# file.close
|
125
|
+
# end
|
126
|
+
|
127
|
+
# @@sequences_output_files.each do |k,file|
|
128
|
+
# file.close
|
129
|
+
# end
|
130
|
+
#
|
131
|
+
# @@low_complexity_output_files.each do |k,file|
|
132
|
+
# file.close
|
133
|
+
# end
|
134
|
+
#
|
135
|
+
# @@sffinfo_files.each do |k,file|
|
136
|
+
# file.close
|
137
|
+
# end
|
138
|
+
#
|
139
|
+
# @@low_sffinfo_files.each do |k,file|
|
140
|
+
# file.close
|
141
|
+
# end
|
142
|
+
|
143
|
+
|
144
|
+
# @@qual_output_files.each do |k,file|
|
145
|
+
# file.close
|
146
|
+
# end
|
147
|
+
|
148
|
+
# more than one MID found
|
149
|
+
# if @@full_stats['mid_id'] && @@full_stats['mid_id'].count>1
|
150
|
+
#
|
151
|
+
# end
|
152
|
+
|
153
|
+
if File.exists?('scbi_drb_checkpoint')
|
154
|
+
File.delete('scbi_drb_checkpoint')
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
|
159
|
+
def error_received(worker_error, obj)
|
160
|
+
@@errors_file.puts "Error while processing object #{obj.inspect}\n" + worker_error.original_exception.message + ":\n" +worker_error.original_exception.backtrace.join("\n")
|
161
|
+
@@errors_file.puts "="*60
|
162
|
+
end
|
163
|
+
|
164
|
+
def too_many_errors_received
|
165
|
+
$LOG.error "Too many errors: #{@@error_count} errors on #{@@count} executed sequences, exiting before finishing"
|
166
|
+
end
|
167
|
+
|
168
|
+
def worker_initial_config
|
169
|
+
return @@params
|
170
|
+
end
|
171
|
+
|
172
|
+
def load_user_checkpoint(checkpoint)
|
173
|
+
|
174
|
+
# reset count stats since they are repeated by checkpointing
|
175
|
+
|
176
|
+
# if @@full_stats['sequences'] && @@full_stats['repeated']
|
177
|
+
# @@full_stats['sequences']['count']['repeated']=0
|
178
|
+
# end
|
179
|
+
#
|
180
|
+
# if @@full_stats['sequences'] && @@full_stats['processed']
|
181
|
+
# @@full_stats['sequences']['processed']['count']=0
|
182
|
+
# end
|
183
|
+
#
|
184
|
+
# if @@full_stats['sequences'] && @@full_stats['total']
|
185
|
+
# @@full_stats['sequences']['total']['count']=0
|
186
|
+
# end
|
187
|
+
|
188
|
+
super
|
189
|
+
# return checkpoint
|
190
|
+
end
|
191
|
+
|
192
|
+
def save_user_checkpoint
|
193
|
+
|
194
|
+
f = File.open(STATS_PATH,'w')
|
195
|
+
f.puts JSON.pretty_generate(@@full_stats)
|
196
|
+
f.close
|
197
|
+
|
198
|
+
end
|
199
|
+
|
200
|
+
|
201
|
+
# read a work that will not be processed, only to skip until checkpoint
|
202
|
+
def trash_checkpointed_work
|
203
|
+
|
204
|
+
@@chunk_size.times do
|
205
|
+
begin
|
206
|
+
n,f,q,c = @@fqr.next_seq
|
207
|
+
end while (!n.nil? && @@params.repeated_seq?(n))
|
208
|
+
|
209
|
+
if n.nil?
|
210
|
+
break
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
215
|
+
|
216
|
+
def next_work
|
217
|
+
|
218
|
+
if @@exit
|
219
|
+
return nil
|
220
|
+
end
|
221
|
+
|
222
|
+
begin
|
223
|
+
|
224
|
+
n,f,q,c = @@fqr.next_seq
|
225
|
+
|
226
|
+
if !n.nil? && @@params.repeated_seq?(n)
|
227
|
+
@@full_stats.add_stats({'sequences' => {'count' => {'rejected' => 1}}})
|
228
|
+
@@full_stats.add_stats({'sequences' => {'rejected' => {'repeated' => 1}}})
|
229
|
+
|
230
|
+
get_file(File.join(OUTPUT_PATH,'rejected.txt')).puts('>'+n+ ' repeated')
|
231
|
+
|
232
|
+
end
|
233
|
+
|
234
|
+
if !n.nil?
|
235
|
+
@@full_stats.add_stats({'sequences' => {'count' => {'input_count' => 1}}})
|
236
|
+
end
|
237
|
+
end while (!n.nil? && @@params.repeated_seq?(n))
|
238
|
+
|
239
|
+
if !n.nil?
|
240
|
+
return SequenceWithAction.new(n,f.upcase,q,c)
|
241
|
+
else
|
242
|
+
return nil
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
end
|
247
|
+
|
248
|
+
# def next_work
|
249
|
+
#
|
250
|
+
# if @@exit
|
251
|
+
# return nil
|
252
|
+
# end
|
253
|
+
# group = SequenceGroup.new
|
254
|
+
#
|
255
|
+
# @@chunk_size.times do
|
256
|
+
# begin
|
257
|
+
#
|
258
|
+
# n,f,q,c = @@fqr.next_seq
|
259
|
+
#
|
260
|
+
# if !n.nil? && @@params.repeated_seq?(n)
|
261
|
+
# @@full_stats.add_stats({'sequences' => {'count' => {'rejected' => 1}}})
|
262
|
+
# @@full_stats.add_stats({'sequences' => {'rejected' => {'repeated' => 1}}})
|
263
|
+
#
|
264
|
+
# get_file(File.join(OUTPUT_PATH,'rejected.txt')).puts('>'+n+ ' repeated')
|
265
|
+
#
|
266
|
+
# end
|
267
|
+
# if !n.nil?
|
268
|
+
# @@full_stats.add_stats({'sequences' => {'count' => {'input_count' => 1}}})
|
269
|
+
# end
|
270
|
+
# end while (!n.nil? && @@params.repeated_seq?(n))
|
271
|
+
#
|
272
|
+
# if !n.nil?
|
273
|
+
# # @@full_stats.add_stats({'sequences' => {'count' => {'processed' => 1}}})
|
274
|
+
# group.push SequenceWithAction.new(n,f.upcase,q,c)
|
275
|
+
# else
|
276
|
+
# break
|
277
|
+
# end
|
278
|
+
# end
|
279
|
+
#
|
280
|
+
# # puts "Processing #{group.inspect}"
|
281
|
+
#
|
282
|
+
# if group.empty?
|
283
|
+
# return nil
|
284
|
+
# else
|
285
|
+
# return group
|
286
|
+
# end
|
287
|
+
#
|
288
|
+
# end
|
289
|
+
|
290
|
+
def work_received(obj)
|
291
|
+
|
292
|
+
res = obj
|
293
|
+
|
294
|
+
# collect stats
|
295
|
+
@@full_stats.add_stats(obj.stats)
|
296
|
+
|
297
|
+
# print output in screen
|
298
|
+
puts obj.output_text
|
299
|
+
|
300
|
+
# save results to files
|
301
|
+
save_files(obj)
|
302
|
+
|
303
|
+
end
|
304
|
+
|
305
|
+
def save_files(obj)
|
306
|
+
files=obj.output_files
|
307
|
+
files.each do |file_name,content|
|
308
|
+
f=get_file(file_name)
|
309
|
+
f.puts content
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
def get_file(file_name)
|
314
|
+
res_file = @@files[file_name]
|
315
|
+
|
316
|
+
# if file is not already open, create it
|
317
|
+
if res_file.nil?
|
318
|
+
# create dir if necessary
|
319
|
+
dir = File.dirname(file_name)
|
320
|
+
if !File.exists?(dir)
|
321
|
+
FileUtils.mkdir_p(dir)
|
322
|
+
end
|
323
|
+
|
324
|
+
# open file
|
325
|
+
res_file=File.open(file_name,@@open_mode)
|
326
|
+
|
327
|
+
# save it in hash for next use
|
328
|
+
@@files[file_name]=res_file
|
329
|
+
end
|
330
|
+
|
331
|
+
return res_file
|
332
|
+
end
|
333
|
+
|
334
|
+
|
335
|
+
end
|