seqtrimnext 2.0.29
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
@@ -0,0 +1,130 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'scbi_fastq'
|
4
|
+
|
5
|
+
VERBOSE=false
|
6
|
+
|
7
|
+
if !(ARGV.count==3 or ARGV.count==5)
|
8
|
+
puts "Usage: #{$0} paired1 paired2 output_base_name [paired1_tag paired2_tag]"
|
9
|
+
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
|
13
|
+
p1_path=ARGV[0]
|
14
|
+
p2_path=ARGV[1]
|
15
|
+
output_base_name=ARGV[2]
|
16
|
+
|
17
|
+
paired1_tag='/1'
|
18
|
+
paired2_tag='/2'
|
19
|
+
|
20
|
+
if (ARGV.count==5)
|
21
|
+
paired1_tag=ARGV[3]
|
22
|
+
paired2_tag=ARGV[4]
|
23
|
+
end
|
24
|
+
|
25
|
+
PAIRED1_TAG_RE=/#{Regexp.quote(paired1_tag)}$/
|
26
|
+
PAIRED2_TAG_RE=/#{Regexp.quote(paired2_tag)}$/
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
if !File.exists?(p1_path)
|
31
|
+
puts "File #{p1_path} doesn't exists"
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
|
35
|
+
if !File.exists?(p2_path)
|
36
|
+
puts "File #{p2_path} doesn't exists"
|
37
|
+
exit
|
38
|
+
end
|
39
|
+
|
40
|
+
def read_to_file(file)
|
41
|
+
res ={}
|
42
|
+
|
43
|
+
f_file = FastqFile.new(file,'r',:sanger, true)
|
44
|
+
|
45
|
+
f_file.each do |n,f,q,c|
|
46
|
+
res[n.gsub(PAIRED2_TAG_RE,'')]=[f,q,c]
|
47
|
+
|
48
|
+
if ((f_file.num_seqs%10000) == 0)
|
49
|
+
puts "Loading: #{f_file.num_seqs}"
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
f_file.close
|
56
|
+
|
57
|
+
return res
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
p1 = FastqFile.new(p1_path,'r',:sanger, true)
|
63
|
+
|
64
|
+
# p2 = FastqFile.new(p2_path,'r',:sanger, true)
|
65
|
+
|
66
|
+
p2 = read_to_file(p2_path)
|
67
|
+
|
68
|
+
puts "Sequences from #{p2_path} loaded. Total: #{p2.count}"
|
69
|
+
|
70
|
+
|
71
|
+
normal_out = FastqFile.new(output_base_name+'_normal.fastq','w',:sanger, true)
|
72
|
+
paired_out = FastqFile.new(output_base_name+'_all_paired.fastq','w',:sanger, true)
|
73
|
+
paired1_out = FastqFile.new(output_base_name+'_paired1.fastq','w',:sanger, true)
|
74
|
+
paired2_out = FastqFile.new(output_base_name+'_paired2.fastq','w',:sanger, true)
|
75
|
+
|
76
|
+
|
77
|
+
p1.each do |n1,f1,q1,c1|
|
78
|
+
|
79
|
+
n1.gsub!(PAIRED1_TAG_RE,'')
|
80
|
+
puts "Find #{n1}" if VERBOSE
|
81
|
+
|
82
|
+
seq_in_p2=p2[n1]
|
83
|
+
# p2.find{|e| e[0]==n1}
|
84
|
+
|
85
|
+
if seq_in_p2
|
86
|
+
n2=n1
|
87
|
+
f2,q2,c2=seq_in_p2
|
88
|
+
puts " ===> PAIRED #{n2}" if VERBOSE
|
89
|
+
|
90
|
+
paired_out.write_seq(n1+paired1_tag,f1,q1,c1)
|
91
|
+
paired1_out.write_seq(n1+paired1_tag,f1,q1,c1)
|
92
|
+
|
93
|
+
paired_out.write_seq(n2+paired2_tag,f2,q2,c2)
|
94
|
+
paired2_out.write_seq(n2+paired2_tag,f2,q2,c2)
|
95
|
+
|
96
|
+
p2.delete(n2)
|
97
|
+
|
98
|
+
else
|
99
|
+
puts " ===> NOT PAIRED #{n1}" if VERBOSE
|
100
|
+
normal_out.write_seq(n1+paired1_tag,f1,q1,c1)
|
101
|
+
end
|
102
|
+
|
103
|
+
if ((p1.num_seqs%10000) == 0)
|
104
|
+
puts p1.num_seqs
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
# remaining at p2 goes to normal_out
|
110
|
+
|
111
|
+
|
112
|
+
p2.each do |seq_in_p2,v|
|
113
|
+
n2=seq_in_p2
|
114
|
+
f2,q2,c2=v
|
115
|
+
|
116
|
+
normal_out.write_seq(n2+paired2_tag,f2,q2,c2)
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
p1.close
|
121
|
+
# p2.close
|
122
|
+
|
123
|
+
normal_out.close
|
124
|
+
paired_out.close
|
125
|
+
paired1_out.close
|
126
|
+
paired2_out.close
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
|
@@ -0,0 +1,95 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'scbi_fastq'
|
5
|
+
|
6
|
+
if ARGV.count != 2
|
7
|
+
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
# >Cluster 0
|
13
|
+
# 0 216aa, >E9LAHD006DQKVK... *
|
14
|
+
# >Cluster 1
|
15
|
+
# 0 203aa, >E9LAHD006DODWR... *
|
16
|
+
# >Cluster 2
|
17
|
+
# 0 198aa, >E9LAHD006DQCDS... *
|
18
|
+
# >Cluster 3
|
19
|
+
# 0 195aa, >E9LAHD006DQURO... *
|
20
|
+
# 1 172aa, >E9LAHD006DOSHR... at 93.02%
|
21
|
+
# 2 172aa, >E9LAHD006DSV4P... at 93.02%
|
22
|
+
# 3 172aa, >E9LAHD006DI00Q... at 93.02%
|
23
|
+
# 4 172aa, >E9LAHD006DR7MR... at 93.02%
|
24
|
+
# 5 175aa, >E9LAHD006DTDA7... at 90.86%
|
25
|
+
# 6 172aa, >E9LAHD006DVCR3... at 93.02%
|
26
|
+
# 7 172aa, >E9LAHD006DHY3H... at 93.02%
|
27
|
+
# 8 177aa, >E9LAHD006DI52X... at 90.96%
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
def load_repeated_seqs(file_path,min_repetitions)
|
32
|
+
clusters=[]
|
33
|
+
# count=0
|
34
|
+
current_cluster=[]
|
35
|
+
if File.exists?(file_path)
|
36
|
+
# File.open(ARGV[0]).each_line do |line|
|
37
|
+
# $LOG.debug("Repeated file path:"+file_path)
|
38
|
+
|
39
|
+
File.open(file_path).each_line do |line|
|
40
|
+
|
41
|
+
if line =~ /^>Cluster/
|
42
|
+
if !current_cluster.empty? && (current_cluster.count <= min_repetitions)
|
43
|
+
clusters += current_cluster
|
44
|
+
end
|
45
|
+
|
46
|
+
# count=0
|
47
|
+
current_cluster=[]
|
48
|
+
elsif line =~ />([^\.]+)\.\.\.\s/
|
49
|
+
current_cluster << $1
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
if !current_cluster.empty? && (current_cluster.count <= min_repetitions)
|
55
|
+
clusters += current_cluster
|
56
|
+
end
|
57
|
+
|
58
|
+
# $LOG.info("Repeated sequence count: #{@clusters.count}")
|
59
|
+
else
|
60
|
+
# $LOG.error("Clustering file's doesn't exists: #{@clusters.count}")
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
return clusters
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
def remove_singletons_from_file(input_file_path,singletons)
|
70
|
+
fqr=FastqFile.new(input_file_path)
|
71
|
+
|
72
|
+
out=FastqFile.new(input_file_path+'_without_singletons','w+')
|
73
|
+
|
74
|
+
|
75
|
+
fqr.each do |n,f,q,c|
|
76
|
+
if !singletons.include?(n)
|
77
|
+
out.write_seq(n,f,q,c)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
out.close
|
82
|
+
fqr.close
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
input_file_path=ARGV.shift
|
87
|
+
min_repetitions = ARGV.shift.to_i
|
88
|
+
|
89
|
+
`cd-hit -i #{input_file_path} -o clusters`
|
90
|
+
|
91
|
+
singletons = load_repeated_seqs('clusters.clrs',min_repetitions)
|
92
|
+
|
93
|
+
remove_singletons_from_file(input_file_path,singletons)
|
94
|
+
|
95
|
+
# puts singletons.to_json
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'yajl'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
unless file = ARGV.shift
|
7
|
+
puts "\nUsage: $0 results.json action1 [action] [action] [action] ...\n\n"
|
8
|
+
exit(0)
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
actions = ARGV
|
13
|
+
if actions.empty?
|
14
|
+
puts "\nUsage: $0 results.json action1 [action] [action] [action] ...\n\n"
|
15
|
+
exit(0)
|
16
|
+
end
|
17
|
+
|
18
|
+
json = File.new(file, 'r')
|
19
|
+
|
20
|
+
puts "Counting sequences with these actions: #{actions.join(",")}"
|
21
|
+
puts ""
|
22
|
+
|
23
|
+
total = 0
|
24
|
+
count = 0
|
25
|
+
separate_count={}
|
26
|
+
|
27
|
+
actions.each do |a|
|
28
|
+
separate_count[a]=0
|
29
|
+
end
|
30
|
+
|
31
|
+
all_actions =[]
|
32
|
+
|
33
|
+
Yajl::Parser.parse(json) { |seq|
|
34
|
+
|
35
|
+
total += 1
|
36
|
+
action_names=seq['actions'].map {|a| a['type']}
|
37
|
+
|
38
|
+
if (action_names & actions).count == actions.count
|
39
|
+
count +=1
|
40
|
+
end
|
41
|
+
|
42
|
+
action_names.each do |a|
|
43
|
+
if actions.include?(a)
|
44
|
+
separate_count[a] += 1
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
all_actions = (all_actions + action_names).uniq
|
49
|
+
|
50
|
+
}
|
51
|
+
|
52
|
+
puts "="*20 + "Separate count" + "="*20
|
53
|
+
separate_count.each do |k,v|
|
54
|
+
puts "#{k} = #{v}"
|
55
|
+
|
56
|
+
end
|
57
|
+
puts "="*20 + "Summarized" + "="*20
|
58
|
+
|
59
|
+
puts "Number of sequences with all actions: #{count}"
|
60
|
+
puts "Total sequences: #{total}"
|
61
|
+
|
62
|
+
puts "\n"
|
63
|
+
puts "="*20 + "Other used actions" + "="*20
|
64
|
+
puts (all_actions-actions).join(',')
|
65
|
+
|
66
|
+
|
data/bin/parse_params.rb
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
def get_json_data(file_path)
|
6
|
+
|
7
|
+
file1 = File.open(file_path)
|
8
|
+
text = file1.read
|
9
|
+
file1.close
|
10
|
+
|
11
|
+
# puts text
|
12
|
+
# # wipe text
|
13
|
+
# text=text.grep(/^\s*[^#]/).to_s
|
14
|
+
|
15
|
+
# decode json
|
16
|
+
data = JSON.parse(text)
|
17
|
+
|
18
|
+
return data
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# extract params loading to external file in ingebiol
|
23
|
+
|
24
|
+
params={}
|
25
|
+
|
26
|
+
params['vector_db_field']='vectors_db'
|
27
|
+
params['contaminants_db_field']='contaminants_db'
|
28
|
+
params['species_field']='genus'
|
29
|
+
params['min_insert_size_field']='min_insert_size_trimmed'
|
30
|
+
params['min_paired_insert_size_field']='min_insert_size_paired'
|
31
|
+
params['min_quality_value_field']='min_quality'
|
32
|
+
|
33
|
+
if ARGV.count!=2
|
34
|
+
puts "#{$0} ingebiol_params_file.json seqtrim_params_file"
|
35
|
+
exit(-1)
|
36
|
+
end
|
37
|
+
|
38
|
+
input_file = ARGV[0]
|
39
|
+
|
40
|
+
params_file=ARGV[1]
|
41
|
+
|
42
|
+
if !File.exists?(input_file)
|
43
|
+
puts "File #{input_file} doesn't exists"
|
44
|
+
exit(-1)
|
45
|
+
end
|
46
|
+
|
47
|
+
if !File.exists?(params_file)
|
48
|
+
puts "File #{params_file} doesn't exists"
|
49
|
+
exit(-1)
|
50
|
+
end
|
51
|
+
|
52
|
+
sq_params=File.open(params_file,'r')
|
53
|
+
|
54
|
+
data=get_json_data(input_file)
|
55
|
+
# puts data.keys
|
56
|
+
# puts data['vector_db_field']
|
57
|
+
|
58
|
+
# replace params
|
59
|
+
|
60
|
+
# sq_params.each_line do |line|
|
61
|
+
# line.chomp!
|
62
|
+
#
|
63
|
+
# if line =~ /^\s*(.+)\s*=\s*(.+)\s*/
|
64
|
+
# puts $1,$2
|
65
|
+
# end
|
66
|
+
#
|
67
|
+
# end
|
68
|
+
|
69
|
+
sq_params=File.open(params_file,'a+')
|
70
|
+
|
71
|
+
data.each do |k,v|
|
72
|
+
|
73
|
+
sq_name=params[k]
|
74
|
+
# puts k,sq_name
|
75
|
+
|
76
|
+
if sq_name && v && !v.empty?
|
77
|
+
sq_params.puts "#{sq_name}=#{v}"
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
sq_params.close
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
if ARGV.count != 2
|
6
|
+
puts "#{$0} cluster.fasta.clstr COUNT"
|
7
|
+
exit
|
8
|
+
end
|
9
|
+
|
10
|
+
path=ARGV.shift
|
11
|
+
list_max=ARGV.shift.to_i
|
12
|
+
|
13
|
+
# puts path
|
14
|
+
|
15
|
+
h={}
|
16
|
+
|
17
|
+
last_line = ''
|
18
|
+
|
19
|
+
f=File.open(path)
|
20
|
+
|
21
|
+
f.each do |line|
|
22
|
+
if line =~ />Cluster/
|
23
|
+
if !last_line.empty?
|
24
|
+
if last_line =~ /^([\d]+)\s[^>]*>([^\s]*)\.\.\.\s/
|
25
|
+
# puts $1
|
26
|
+
h[$2]=$1.to_i+1
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
last_line=line
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
f.close
|
36
|
+
|
37
|
+
|
38
|
+
# puts "30 most repeated sequences:"
|
39
|
+
list_max.times do
|
40
|
+
ma=h.max_by{|k,v| v}
|
41
|
+
if ma
|
42
|
+
puts ma.join(' => ')
|
43
|
+
h.delete(ma[0])
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
# puts h.sort.to_json
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'scbi_fasta'
|
4
|
+
|
5
|
+
if ARGV.count!=3
|
6
|
+
puts "Usage: #{$0} fasta qual output_base_name"
|
7
|
+
exit
|
8
|
+
end
|
9
|
+
|
10
|
+
fasta_path = ARGV[0]
|
11
|
+
qual_path = ARGV[1]
|
12
|
+
name = ARGV[2]
|
13
|
+
|
14
|
+
|
15
|
+
out_fasta = name+'.fasta'
|
16
|
+
out_qual = name+'.fasta.qual'
|
17
|
+
|
18
|
+
puts "Opening #{fasta_path}, #{qual_path}"
|
19
|
+
|
20
|
+
fqr=FastaQualFile.new(fasta_path,qual_path,true)
|
21
|
+
|
22
|
+
out_f=File.new(out_fasta,'w+')
|
23
|
+
out_q=File.new(out_qual,'w+')
|
24
|
+
|
25
|
+
c=0
|
26
|
+
|
27
|
+
fqr.each do |n,f,q|
|
28
|
+
|
29
|
+
out_f.puts ">#{n}"
|
30
|
+
out_q.puts ">#{n}"
|
31
|
+
|
32
|
+
if n.index('dir=F')
|
33
|
+
out_f.puts f.reverse.tr('actgACTG','tgacTGAC')
|
34
|
+
out_q.puts q.reverse.join(' ')
|
35
|
+
else
|
36
|
+
out_f.puts f
|
37
|
+
out_q.puts q.join(' ')
|
38
|
+
end
|
39
|
+
|
40
|
+
c=c+1
|
41
|
+
end
|
42
|
+
|
43
|
+
puts c
|
44
|
+
|
45
|
+
fqr.close
|
46
|
+
|
47
|
+
out_f.close
|
48
|
+
out_q.close
|
49
|
+
|