seqtrimnext 2.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
require "seqtrim_action"
|
|
2
|
+
|
|
3
|
+
########################################################
|
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
|
5
|
+
#
|
|
6
|
+
# Defines the main methods that are necessary to execute Plugin1
|
|
7
|
+
# Inherit: Plugin
|
|
8
|
+
########################################################
|
|
9
|
+
|
|
10
|
+
class ActionVectors < SeqtrimAction
|
|
11
|
+
|
|
12
|
+
def initialize(start_pos,end_pos)
|
|
13
|
+
super(start_pos,end_pos)
|
|
14
|
+
@cut =true
|
|
15
|
+
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# def apply_to(seq)
|
|
19
|
+
#
|
|
20
|
+
# # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
|
|
21
|
+
# $LOG.debug " Applying #{self.class} to seq #{seq.seq_name}. BEGIN: #{@start_pos} END: #{@end_pos} "
|
|
22
|
+
#
|
|
23
|
+
# end
|
|
24
|
+
|
|
25
|
+
def apply_decoration(char)
|
|
26
|
+
return char.on_green
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
end
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
######################################
|
|
2
|
+
# Author:: Almudena Bocinos Rioboo
|
|
3
|
+
# This class creates the structures that storage the necessary values from an action
|
|
4
|
+
######################################
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
require 'term/ansicolor'
|
|
8
|
+
include Term::ANSIColor
|
|
9
|
+
|
|
10
|
+
class SeqtrimAction
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
attr_accessor :start_pos , :end_pos, :message, :cut , :reversed , :left_action , :right_action , :found_definition, :tag_id, :informative
|
|
14
|
+
|
|
15
|
+
# Creates the nexts values from an action: start_position, end_position, type
|
|
16
|
+
def initialize(start_pos,end_pos)
|
|
17
|
+
# puts " #{start_pos} #{end_pos} #{self.class.to_s}"
|
|
18
|
+
|
|
19
|
+
@start_pos = start_pos.to_i
|
|
20
|
+
@end_pos = end_pos.to_i
|
|
21
|
+
|
|
22
|
+
#@notes = ''
|
|
23
|
+
@left_action = false
|
|
24
|
+
@right_action = false
|
|
25
|
+
@message = ''
|
|
26
|
+
@found_definition=[] #array when contaminant or vectors definitions are saved, each separately
|
|
27
|
+
@cut = false
|
|
28
|
+
@informative = false
|
|
29
|
+
@reversed = false
|
|
30
|
+
# puts " #{@start_pos} #{@end_pos} #{self.class.to_s}"
|
|
31
|
+
@tag_id =''
|
|
32
|
+
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def apply_to(seq)
|
|
36
|
+
|
|
37
|
+
$LOG.debug " Applying #{self.class} to seq #{seq.seq_name} . BEGIN: #{@start_pos} END: #{@end_pos} "
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def description
|
|
42
|
+
|
|
43
|
+
return "Action Type: #{self.class} Begin: #{start_pos} End: #{end_pos}"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def inspect
|
|
47
|
+
description
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def contains?(pos)
|
|
52
|
+
return ((pos>=@start_pos) && (pos<=@end_pos))
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def contains_action?(start_pos,end_pos,margin=10)
|
|
57
|
+
#puts "#{start_pos}>=#{@start_pos-margin} && #{end_pos}<=#{@end_pos+margin} "
|
|
58
|
+
return ((start_pos>=@start_pos-margin) && (end_pos<=@end_pos+margin))
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def apply_decoration(char)
|
|
62
|
+
return char
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def decorate(char,pos)
|
|
68
|
+
if contains?(pos)
|
|
69
|
+
return apply_decoration(char)
|
|
70
|
+
else
|
|
71
|
+
return char
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def type
|
|
76
|
+
return self.class.to_s
|
|
77
|
+
end
|
|
78
|
+
def action_type
|
|
79
|
+
|
|
80
|
+
a_type='INFO'
|
|
81
|
+
if !@informative
|
|
82
|
+
a_type = 'LEFT'
|
|
83
|
+
if right_action
|
|
84
|
+
a_type = 'RIGHT'
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
return a_type
|
|
88
|
+
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def to_human(camel_cased_word)
|
|
92
|
+
word = camel_cased_word.to_s.dup
|
|
93
|
+
word.gsub!(/::/, '/')
|
|
94
|
+
word.gsub!(/([A-Z]+)([A-Z][a-z])/,'\1 \2')
|
|
95
|
+
word.gsub!(/([a-z\d])([A-Z])/,'\1 \2')
|
|
96
|
+
word.tr!("-", " ")
|
|
97
|
+
word.downcase!
|
|
98
|
+
word.capitalize!
|
|
99
|
+
word
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def title
|
|
103
|
+
return to_human(type.gsub('Action',''))
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def near_left?(seq_fasta_size)
|
|
107
|
+
return ((@start_pos - 0 ) < (seq_fasta_size - @end_pos))
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def left_action?(seq_fasta_size)
|
|
112
|
+
res = (@left_action || (!@right_action && near_left?(seq_fasta_size)))
|
|
113
|
+
# @left_action = res
|
|
114
|
+
|
|
115
|
+
return res
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def right_action?(seq_fasta_size)
|
|
119
|
+
res= (@right_action || !left_action?(seq_fasta_size))
|
|
120
|
+
# @right_action = res
|
|
121
|
+
return res
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def to_hash
|
|
125
|
+
a = {}
|
|
126
|
+
|
|
127
|
+
a[:type]=type
|
|
128
|
+
a[:start_pos]=@start_pos
|
|
129
|
+
a[:end_pos]=@end_pos
|
|
130
|
+
a[:message]=@message
|
|
131
|
+
|
|
132
|
+
return a
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
#########################################
|
|
2
|
+
# Author:: Almudena Bocinos Rioboo
|
|
3
|
+
# This class provided the methods to apply actions to sequences
|
|
4
|
+
#########################################
|
|
5
|
+
require 'seqtrim_action'
|
|
6
|
+
class ActionManager
|
|
7
|
+
|
|
8
|
+
#Storages the necessary plugins specified in 'plugin_list' and start the loading of plugins
|
|
9
|
+
def initialize
|
|
10
|
+
|
|
11
|
+
load_actions_from_files
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.new_action(start_pos,end_pos,action_type)
|
|
15
|
+
action_class = Object.const_get(action_type)
|
|
16
|
+
# DONE mirar si la action_class es de verdad una action existente
|
|
17
|
+
res = nil
|
|
18
|
+
if !action_class.nil? && action_class.ancestors.include?(SeqtrimAction)
|
|
19
|
+
res= action_class.new(start_pos,end_pos)
|
|
20
|
+
else
|
|
21
|
+
#$LOG.error ' Error. Don´t exist the action: ' + action_class.to_s
|
|
22
|
+
puts ' Error. The action : ' + action_class.to_s+ ' does not exists'
|
|
23
|
+
end
|
|
24
|
+
return res
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# Iterates by the files from the folder 'actions', and load it
|
|
31
|
+
def load_actions_from_files
|
|
32
|
+
ignore = ['.','..','seqtrim_action.rb']
|
|
33
|
+
#carpeta=Dir.open("progs/ruby/seqtrimii/actions")
|
|
34
|
+
actions_path = File.expand_path(File.join(File.dirname(__FILE__), "..","actions"))
|
|
35
|
+
if !File.exists?(actions_path)
|
|
36
|
+
raise "Action folder does not exists"
|
|
37
|
+
end
|
|
38
|
+
carpeta=Dir.open(actions_path)
|
|
39
|
+
|
|
40
|
+
carpeta.entries.each do |action|
|
|
41
|
+
if !ignore.include?(action)
|
|
42
|
+
require action
|
|
43
|
+
end # end if
|
|
44
|
+
end # end each
|
|
45
|
+
end # end def
|
|
46
|
+
|
|
47
|
+
end
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
require 'scbi_fasta'
|
|
2
|
+
require 'scbi_fastq'
|
|
3
|
+
# require 'work_manager'
|
|
4
|
+
require 'graph_stats'
|
|
5
|
+
require 'sequence_with_action'
|
|
6
|
+
require 'sequence_group'
|
|
7
|
+
|
|
8
|
+
# OUTPUT_PATH='output_files'
|
|
9
|
+
STATS_PATH=File.join(OUTPUT_PATH,'stats.json')
|
|
10
|
+
# TODO - Pasar secuencias en grupos, y hacer blast en grupos de secuencias.
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SeqtrimWorkManager < ScbiMapreduce::WorkManager
|
|
15
|
+
|
|
16
|
+
def self.init_work_manager(sequence_reader, params, chunk_size = 100, use_json=false)
|
|
17
|
+
@@full_stats={}
|
|
18
|
+
@@params= params
|
|
19
|
+
@@exit = false
|
|
20
|
+
|
|
21
|
+
@@chunk_size = chunk_size
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# puts "CHECKPOINT: #{self.checkpoint}\n"*20
|
|
25
|
+
|
|
26
|
+
checkpoint_exists=File.exists?('scbi_drb_checkpoint')
|
|
27
|
+
|
|
28
|
+
# @@use_qual = !qual_path.nil? and File.exists?(qual_path)
|
|
29
|
+
@@open_mode='w'
|
|
30
|
+
if checkpoint_exists
|
|
31
|
+
@@open_mode = 'a'
|
|
32
|
+
if File.exists?(STATS_PATH)
|
|
33
|
+
# load stats
|
|
34
|
+
text = File.read(STATS_PATH)
|
|
35
|
+
|
|
36
|
+
# wipe text
|
|
37
|
+
# text=text.grep(/^\s*[^#]/).to_s
|
|
38
|
+
|
|
39
|
+
# decode json
|
|
40
|
+
@@full_stats = JSON.parse(text)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
#open input file
|
|
45
|
+
@@fqr=sequence_reader
|
|
46
|
+
|
|
47
|
+
# @@use_qual = @@fqr.with_qual?
|
|
48
|
+
# @@use_json = use_json
|
|
49
|
+
|
|
50
|
+
@@params.set_param('use_qual',@@fqr.with_qual?)
|
|
51
|
+
@@params.set_param('use_json',use_json)
|
|
52
|
+
|
|
53
|
+
@@use_json=use_json
|
|
54
|
+
|
|
55
|
+
@@fqr.rewind
|
|
56
|
+
|
|
57
|
+
# open output files
|
|
58
|
+
|
|
59
|
+
if !Dir.exists?(OUTPUT_PATH)
|
|
60
|
+
Dir.mkdir(OUTPUT_PATH)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
@@files={}
|
|
64
|
+
|
|
65
|
+
# @@rejected_output_file=File.open(File.join(OUTPUT_PATH,'rejected.txt'),@@open_mode)
|
|
66
|
+
|
|
67
|
+
# seqs_with_errors
|
|
68
|
+
@@errors_file=File.open('errors.txt',@@open_mode)
|
|
69
|
+
|
|
70
|
+
if @@use_json
|
|
71
|
+
@@json_output=File.open('results.json',@@open_mode)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
@@json_separator=''
|
|
75
|
+
|
|
76
|
+
@@paired_output_files={}
|
|
77
|
+
|
|
78
|
+
@@sequences_output_files={}
|
|
79
|
+
|
|
80
|
+
@@low_complexity_output_files={}
|
|
81
|
+
|
|
82
|
+
@@sffinfo_files={}
|
|
83
|
+
|
|
84
|
+
@@low_sffinfo_files={}
|
|
85
|
+
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def self.end_work_manager
|
|
89
|
+
|
|
90
|
+
puts "FULL STATS:\n" +JSON.pretty_generate(@@full_stats)
|
|
91
|
+
|
|
92
|
+
f = File.open(STATS_PATH,'w')
|
|
93
|
+
f.puts JSON.pretty_generate(@@full_stats)
|
|
94
|
+
f.close
|
|
95
|
+
|
|
96
|
+
r=File.read(STATS_PATH)
|
|
97
|
+
|
|
98
|
+
stats=JSON::parse(r)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
gs=GraphStats.new(stats)
|
|
102
|
+
|
|
103
|
+
#gs=GraphStats.new(@@full_stats)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
#close all files
|
|
107
|
+
|
|
108
|
+
# @@fqr.close
|
|
109
|
+
if @@use_json
|
|
110
|
+
@@json_output.close
|
|
111
|
+
end
|
|
112
|
+
@@errors_file.close
|
|
113
|
+
# @@rejected_output_file.close
|
|
114
|
+
|
|
115
|
+
# @@paired_output_files.each do |k,file|
|
|
116
|
+
# file.close
|
|
117
|
+
# end
|
|
118
|
+
|
|
119
|
+
@@files.each do |k,file|
|
|
120
|
+
file.close
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# @@paired_qual_output_files.each do |k,file|
|
|
124
|
+
# file.close
|
|
125
|
+
# end
|
|
126
|
+
|
|
127
|
+
# @@sequences_output_files.each do |k,file|
|
|
128
|
+
# file.close
|
|
129
|
+
# end
|
|
130
|
+
#
|
|
131
|
+
# @@low_complexity_output_files.each do |k,file|
|
|
132
|
+
# file.close
|
|
133
|
+
# end
|
|
134
|
+
#
|
|
135
|
+
# @@sffinfo_files.each do |k,file|
|
|
136
|
+
# file.close
|
|
137
|
+
# end
|
|
138
|
+
#
|
|
139
|
+
# @@low_sffinfo_files.each do |k,file|
|
|
140
|
+
# file.close
|
|
141
|
+
# end
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# @@qual_output_files.each do |k,file|
|
|
145
|
+
# file.close
|
|
146
|
+
# end
|
|
147
|
+
|
|
148
|
+
# more than one MID found
|
|
149
|
+
# if @@full_stats['mid_id'] && @@full_stats['mid_id'].count>1
|
|
150
|
+
#
|
|
151
|
+
# end
|
|
152
|
+
|
|
153
|
+
if File.exists?('scbi_drb_checkpoint')
|
|
154
|
+
File.delete('scbi_drb_checkpoint')
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def error_received(worker_error, obj)
|
|
160
|
+
@@errors_file.puts "Error while processing object #{obj.inspect}\n" + worker_error.original_exception.message + ":\n" +worker_error.original_exception.backtrace.join("\n")
|
|
161
|
+
@@errors_file.puts "="*60
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def too_many_errors_received
|
|
165
|
+
$LOG.error "Too many errors: #{@@error_count} errors on #{@@count} executed sequences, exiting before finishing"
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def worker_initial_config
|
|
169
|
+
return @@params
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def load_user_checkpoint(checkpoint)
|
|
173
|
+
|
|
174
|
+
# reset count stats since they are repeated by checkpointing
|
|
175
|
+
|
|
176
|
+
# if @@full_stats['sequences'] && @@full_stats['repeated']
|
|
177
|
+
# @@full_stats['sequences']['count']['repeated']=0
|
|
178
|
+
# end
|
|
179
|
+
#
|
|
180
|
+
# if @@full_stats['sequences'] && @@full_stats['processed']
|
|
181
|
+
# @@full_stats['sequences']['processed']['count']=0
|
|
182
|
+
# end
|
|
183
|
+
#
|
|
184
|
+
# if @@full_stats['sequences'] && @@full_stats['total']
|
|
185
|
+
# @@full_stats['sequences']['total']['count']=0
|
|
186
|
+
# end
|
|
187
|
+
|
|
188
|
+
super
|
|
189
|
+
# return checkpoint
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def save_user_checkpoint
|
|
193
|
+
|
|
194
|
+
f = File.open(STATS_PATH,'w')
|
|
195
|
+
f.puts JSON.pretty_generate(@@full_stats)
|
|
196
|
+
f.close
|
|
197
|
+
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
# read a work that will not be processed, only to skip until checkpoint
|
|
202
|
+
def trash_checkpointed_work
|
|
203
|
+
|
|
204
|
+
@@chunk_size.times do
|
|
205
|
+
begin
|
|
206
|
+
n,f,q,c = @@fqr.next_seq
|
|
207
|
+
end while (!n.nil? && @@params.repeated_seq?(n))
|
|
208
|
+
|
|
209
|
+
if n.nil?
|
|
210
|
+
break
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def next_work
|
|
217
|
+
|
|
218
|
+
if @@exit
|
|
219
|
+
return nil
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
begin
|
|
223
|
+
|
|
224
|
+
n,f,q,c = @@fqr.next_seq
|
|
225
|
+
|
|
226
|
+
if !n.nil? && @@params.repeated_seq?(n)
|
|
227
|
+
@@full_stats.add_stats({'sequences' => {'count' => {'rejected' => 1}}})
|
|
228
|
+
@@full_stats.add_stats({'sequences' => {'rejected' => {'repeated' => 1}}})
|
|
229
|
+
|
|
230
|
+
get_file(File.join(OUTPUT_PATH,'rejected.txt')).puts('>'+n+ ' repeated')
|
|
231
|
+
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
if !n.nil?
|
|
235
|
+
@@full_stats.add_stats({'sequences' => {'count' => {'input_count' => 1}}})
|
|
236
|
+
end
|
|
237
|
+
end while (!n.nil? && @@params.repeated_seq?(n))
|
|
238
|
+
|
|
239
|
+
if !n.nil?
|
|
240
|
+
return SequenceWithAction.new(n,f.upcase,q,c)
|
|
241
|
+
else
|
|
242
|
+
return nil
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# def next_work
|
|
249
|
+
#
|
|
250
|
+
# if @@exit
|
|
251
|
+
# return nil
|
|
252
|
+
# end
|
|
253
|
+
# group = SequenceGroup.new
|
|
254
|
+
#
|
|
255
|
+
# @@chunk_size.times do
|
|
256
|
+
# begin
|
|
257
|
+
#
|
|
258
|
+
# n,f,q,c = @@fqr.next_seq
|
|
259
|
+
#
|
|
260
|
+
# if !n.nil? && @@params.repeated_seq?(n)
|
|
261
|
+
# @@full_stats.add_stats({'sequences' => {'count' => {'rejected' => 1}}})
|
|
262
|
+
# @@full_stats.add_stats({'sequences' => {'rejected' => {'repeated' => 1}}})
|
|
263
|
+
#
|
|
264
|
+
# get_file(File.join(OUTPUT_PATH,'rejected.txt')).puts('>'+n+ ' repeated')
|
|
265
|
+
#
|
|
266
|
+
# end
|
|
267
|
+
# if !n.nil?
|
|
268
|
+
# @@full_stats.add_stats({'sequences' => {'count' => {'input_count' => 1}}})
|
|
269
|
+
# end
|
|
270
|
+
# end while (!n.nil? && @@params.repeated_seq?(n))
|
|
271
|
+
#
|
|
272
|
+
# if !n.nil?
|
|
273
|
+
# # @@full_stats.add_stats({'sequences' => {'count' => {'processed' => 1}}})
|
|
274
|
+
# group.push SequenceWithAction.new(n,f.upcase,q,c)
|
|
275
|
+
# else
|
|
276
|
+
# break
|
|
277
|
+
# end
|
|
278
|
+
# end
|
|
279
|
+
#
|
|
280
|
+
# # puts "Processing #{group.inspect}"
|
|
281
|
+
#
|
|
282
|
+
# if group.empty?
|
|
283
|
+
# return nil
|
|
284
|
+
# else
|
|
285
|
+
# return group
|
|
286
|
+
# end
|
|
287
|
+
#
|
|
288
|
+
# end
|
|
289
|
+
|
|
290
|
+
def work_received(obj)
|
|
291
|
+
|
|
292
|
+
res = obj
|
|
293
|
+
|
|
294
|
+
# collect stats
|
|
295
|
+
@@full_stats.add_stats(obj.stats)
|
|
296
|
+
|
|
297
|
+
# print output in screen
|
|
298
|
+
puts obj.output_text
|
|
299
|
+
|
|
300
|
+
# save results to files
|
|
301
|
+
save_files(obj)
|
|
302
|
+
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def save_files(obj)
|
|
306
|
+
files=obj.output_files
|
|
307
|
+
files.each do |file_name,content|
|
|
308
|
+
f=get_file(file_name)
|
|
309
|
+
f.puts content
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def get_file(file_name)
|
|
314
|
+
res_file = @@files[file_name]
|
|
315
|
+
|
|
316
|
+
# if file is not already open, create it
|
|
317
|
+
if res_file.nil?
|
|
318
|
+
# create dir if necessary
|
|
319
|
+
dir = File.dirname(file_name)
|
|
320
|
+
if !File.exists?(dir)
|
|
321
|
+
FileUtils.mkdir_p(dir)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# open file
|
|
325
|
+
res_file=File.open(file_name,@@open_mode)
|
|
326
|
+
|
|
327
|
+
# save it in hash for next use
|
|
328
|
+
@@files[file_name]=res_file
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
return res_file
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
end
|