seqtrimnext 2.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
# ======================================
|
|
2
|
+
# General parameters GENOMICS WITH POSSIBLE LINKER
|
|
3
|
+
# ======================================
|
|
4
|
+
|
|
5
|
+
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginContaminants,PluginLinker,PluginVectors,PluginLowQuality
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# ======================================
|
|
2
|
+
# General parameters
|
|
3
|
+
# ======================================
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
|
7
|
+
|
|
8
|
+
contaminants_db="contaminants.fasta cont_ribosome.fasta"
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# ======================================
|
|
2
|
+
# General parameters
|
|
3
|
+
# ======================================
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
plugin_list = PluginLowHighSize,PluginMids,PluginIndeterminations,PluginAbAdapters,PluginFindPolyAt,PluginContaminants,PluginVectors,PluginLowQuality,PluginLowComplexity
|
|
7
|
+
|
|
8
|
+
contaminants_db="contaminants.fasta cont_ribosome.fasta cont_mitochondrias.fasta cont_plastids.fasta"
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
require "fasta_reader.rb"
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
######################################
|
|
5
|
+
# Author:: Almudena Bocinos Rioboo
|
|
6
|
+
# Extract ramdom sequences until "num_seqs"
|
|
7
|
+
# Inherit:: FastaReader
|
|
8
|
+
######################################
|
|
9
|
+
|
|
10
|
+
class ExtractSamples < FastaReader
|
|
11
|
+
attr_accessor :num_seqs
|
|
12
|
+
def initialize(file_name)
|
|
13
|
+
@num_seqs = 0
|
|
14
|
+
super(file_name)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# override begin processing
|
|
18
|
+
def on_begin_process()
|
|
19
|
+
$LOG.info " Begin Extract Samples"
|
|
20
|
+
@fich = File.open("results/Sample.txt",'w')
|
|
21
|
+
@max = 1000
|
|
22
|
+
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# override processing sequence
|
|
26
|
+
def on_process_sequence(seq_name,seq_fasta)
|
|
27
|
+
ra_seq = Kernel.rand
|
|
28
|
+
|
|
29
|
+
if ((@num_seqs < @max) and (ra_seq>0.5)) #if cond is successful then, choose a part from this sequence
|
|
30
|
+
#calculate the part from the sequence
|
|
31
|
+
width = (Kernel.rand * 50 ) + 300
|
|
32
|
+
ra_part1 = Kernel.rand * (seq_fasta.length-width)
|
|
33
|
+
ra_part2 = ra_part1 + width
|
|
34
|
+
sub_seq_fasta = seq_fasta.slice(ra_part1,ra_part2)
|
|
35
|
+
|
|
36
|
+
@fich.puts "#{seq_name} "
|
|
37
|
+
@fich.puts "#{sub_seq_fasta} "
|
|
38
|
+
@num_seqs += 1
|
|
39
|
+
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
#override end processing
|
|
47
|
+
def on_end_process()
|
|
48
|
+
$LOG.info "All Samples have been extracted"
|
|
49
|
+
@fich.close
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
command_info="
|
|
5
|
+
#================================================
|
|
6
|
+
# Author: Almudena Bocinos Rioboo
|
|
7
|
+
#
|
|
8
|
+
#
|
|
9
|
+
# Usage: fasta2xml.rb <fasta_file> [> <out_file.xml>]
|
|
10
|
+
#
|
|
11
|
+
# Converts a fasta file to xml format (used for cabog)
|
|
12
|
+
#
|
|
13
|
+
# Prints to stdout, can be redirected to file with >
|
|
14
|
+
#
|
|
15
|
+
#================================================
|
|
16
|
+
\n";
|
|
17
|
+
|
|
18
|
+
#require "utils/fasta_utils"
|
|
19
|
+
require File.dirname(__FILE__) + "/utils/fasta_utils"
|
|
20
|
+
|
|
21
|
+
#receive one argument or fail
|
|
22
|
+
if ARGV.length != 1
|
|
23
|
+
puts command_info;
|
|
24
|
+
Process.exit(-1);
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
#get file name
|
|
28
|
+
file_name=ARGV[0];
|
|
29
|
+
|
|
30
|
+
#check if file exists
|
|
31
|
+
if !File.exist?(file_name)
|
|
32
|
+
puts "File #{file_name} not found.\n";
|
|
33
|
+
puts command_info;
|
|
34
|
+
Process.exit(-1);
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
######################################
|
|
38
|
+
# Define a subclass to override events
|
|
39
|
+
######################################
|
|
40
|
+
class FastaProcessor< FastaUtils::FastaReader
|
|
41
|
+
|
|
42
|
+
#override begin processing
|
|
43
|
+
def on_begin_process()
|
|
44
|
+
|
|
45
|
+
# print XML header
|
|
46
|
+
puts "<?xml version=\"1.0\"?>\n<trace_volume>\n";
|
|
47
|
+
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
#override sequence processing
|
|
51
|
+
def on_process_sequence(seq_name,seq_fasta)
|
|
52
|
+
|
|
53
|
+
# prints the xml tags
|
|
54
|
+
puts "<trace>\n\t<trace_name>#{seq_name}</trace_name>\n\t<clip_vector_left>1</clip_vector_left>\n\t<clip_vector_right>#{seq_fasta.length.to_s}</clip_vector_right>\n</trace>\n";
|
|
55
|
+
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
#override end processing
|
|
59
|
+
def on_end_process()
|
|
60
|
+
|
|
61
|
+
#print foot
|
|
62
|
+
puts "</trace_volume>\n";
|
|
63
|
+
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
#Create a new instance to process the file
|
|
69
|
+
f=FastaProcessor.new(file_name);
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
class GMatch
|
|
2
|
+
|
|
3
|
+
attr_accessor :offset
|
|
4
|
+
attr_accessor :match
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
class Regexp
|
|
10
|
+
def global_match(input_str,overlap_group_no = 0)
|
|
11
|
+
res = []
|
|
12
|
+
|
|
13
|
+
str=input_str
|
|
14
|
+
|
|
15
|
+
last_end = 0
|
|
16
|
+
|
|
17
|
+
loop do
|
|
18
|
+
str = input_str.slice(last_end,input_str.length-last_end)
|
|
19
|
+
if str.nil? or str.empty?
|
|
20
|
+
break
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
m = self.match(str)
|
|
24
|
+
# puts "find in: #{str}"
|
|
25
|
+
|
|
26
|
+
if !m.nil?
|
|
27
|
+
# puts m.inspect
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
new_match=GMatch.new()
|
|
31
|
+
new_match.offset = last_end
|
|
32
|
+
new_match.match = m
|
|
33
|
+
|
|
34
|
+
res.push new_match
|
|
35
|
+
|
|
36
|
+
if overlap_group_no == 0
|
|
37
|
+
last_end += m.end(overlap_group_no)
|
|
38
|
+
else
|
|
39
|
+
last_end += m.begin(overlap_group_no)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
else
|
|
43
|
+
break
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
return res
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# def global_match(str, &proc)
|
|
54
|
+
# retval = nil
|
|
55
|
+
# loop do
|
|
56
|
+
# res = str.sub(self) do |m|
|
|
57
|
+
# proc.call($~) # pass MatchData obj
|
|
58
|
+
# ''
|
|
59
|
+
# end
|
|
60
|
+
# break retval if res == str
|
|
61
|
+
# str = res
|
|
62
|
+
# retval ||= true
|
|
63
|
+
# end
|
|
64
|
+
# end
|
|
65
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
|
|
2
|
+
class Hash
|
|
3
|
+
|
|
4
|
+
#increments a hash with the stats defined in h_stats
|
|
5
|
+
|
|
6
|
+
# three levels:
|
|
7
|
+
# STATS->plugin_name->Property->count
|
|
8
|
+
|
|
9
|
+
def add_stats(h_stats)
|
|
10
|
+
h=self
|
|
11
|
+
|
|
12
|
+
h_stats.each do |plugin_hash,add_stats|
|
|
13
|
+
h[plugin_hash]={} if h[plugin_hash].nil?
|
|
14
|
+
|
|
15
|
+
add_stats.each do |property,hash_value|
|
|
16
|
+
h[plugin_hash][property]={} if h[plugin_hash][property].nil?
|
|
17
|
+
|
|
18
|
+
hash_value.each do |value, count|
|
|
19
|
+
h[plugin_hash][property][value]=(h[plugin_hash][property][value]||0) + count
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
end
|
|
29
|
+
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#================================================
|
|
2
|
+
# SCBI - dariogf <soporte@scbi.uma.es>
|
|
3
|
+
#------------------------------------------------
|
|
4
|
+
#
|
|
5
|
+
# Version: 0.1 - 04/2009
|
|
6
|
+
#
|
|
7
|
+
# Usage: require "utils/json_utils"
|
|
8
|
+
#
|
|
9
|
+
# Fasta utilities
|
|
10
|
+
#
|
|
11
|
+
#
|
|
12
|
+
#
|
|
13
|
+
#================================================
|
|
14
|
+
|
|
15
|
+
module JsonUtils
|
|
16
|
+
|
|
17
|
+
require 'json';
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def to_pretty_json
|
|
21
|
+
return JSON.pretty_generate(self)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def from_json
|
|
26
|
+
return JSON.parse(self)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# ===========================================
|
|
30
|
+
|
|
31
|
+
#------------------------------------
|
|
32
|
+
# get json data
|
|
33
|
+
#------------------------------------
|
|
34
|
+
def self.get_json_data(file_path)
|
|
35
|
+
file1 = File.open(file_path)
|
|
36
|
+
text = file1.read
|
|
37
|
+
file1.close
|
|
38
|
+
|
|
39
|
+
# wipe text
|
|
40
|
+
text=text.grep(/^\s*[^#]/).to_s
|
|
41
|
+
|
|
42
|
+
# decode json
|
|
43
|
+
data = JSON.parse(text)
|
|
44
|
+
|
|
45
|
+
return data
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#================================================
|
|
2
|
+
# SCBI - dariogf <soporte@scbi.uma.es>
|
|
3
|
+
#------------------------------------------------
|
|
4
|
+
#
|
|
5
|
+
# Version: 0.1 - 04/2009
|
|
6
|
+
#
|
|
7
|
+
# Usage: require "utils/fasta_utils"
|
|
8
|
+
#
|
|
9
|
+
# Fasta utilities
|
|
10
|
+
#
|
|
11
|
+
#
|
|
12
|
+
#
|
|
13
|
+
#================================================
|
|
14
|
+
|
|
15
|
+
require File.dirname(__FILE__) +"/fasta_reader.rb"
|
|
16
|
+
|
|
17
|
+
######################################
|
|
18
|
+
# Define a subclass to override events
|
|
19
|
+
######################################
|
|
20
|
+
class LoadFastaNamesInHash< FastaReader
|
|
21
|
+
|
|
22
|
+
attr_reader :names
|
|
23
|
+
|
|
24
|
+
#override begin processing
|
|
25
|
+
def on_begin_process()
|
|
26
|
+
@names = {}
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def on_process_sequence(seq_name,seq_fasta)
|
|
30
|
+
@names[seq_name]=true
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
#override end processing
|
|
34
|
+
def on_end_process()
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#================================================
|
|
2
|
+
# SCBI - dariogf <soporte@scbi.uma.es>
|
|
3
|
+
#------------------------------------------------
|
|
4
|
+
#
|
|
5
|
+
# Version: 0.1 - 04/2009
|
|
6
|
+
#
|
|
7
|
+
# Usage: require "utils/fasta_utils"
|
|
8
|
+
#
|
|
9
|
+
# Fasta utilities
|
|
10
|
+
#
|
|
11
|
+
#
|
|
12
|
+
#
|
|
13
|
+
#================================================
|
|
14
|
+
|
|
15
|
+
require File.dirname(__FILE__) +"/qual_reader.rb"
|
|
16
|
+
|
|
17
|
+
######################################
|
|
18
|
+
# Define a subclass to override events
|
|
19
|
+
######################################
|
|
20
|
+
class LoadQualInHash< QualReader
|
|
21
|
+
|
|
22
|
+
attr_reader :quals
|
|
23
|
+
|
|
24
|
+
#override begin processing
|
|
25
|
+
def on_begin_process()
|
|
26
|
+
@quals = {}
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def on_process_sequence(seq_name,seq_qual)
|
|
30
|
+
@quals[seq_name]=seq_qual
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
#override end processing
|
|
34
|
+
def on_end_process()
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
module RecoverMid
|
|
2
|
+
|
|
3
|
+
#receives hit of mid from blast, complete db_mid from DB and SEQ_fasta
|
|
4
|
+
def recover_mid(hit, db_mid, seq)
|
|
5
|
+
|
|
6
|
+
mid_in_seq = seq[hit.q_beg..hit.q_end]
|
|
7
|
+
mid_in_mid = db_mid[hit.s_beg..hit.s_end]
|
|
8
|
+
|
|
9
|
+
if hit.s_beg==0 # look right parts
|
|
10
|
+
|
|
11
|
+
mid_part=db_mid[hit.s_end+1..db_mid.length]
|
|
12
|
+
seq_part=seq[hit.q_end+1,mid_part.length+1]
|
|
13
|
+
|
|
14
|
+
common=mid_part.lcs(seq_part)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
in_seq_pos=seq_part.index(common)
|
|
18
|
+
|
|
19
|
+
# puts "seq right part: #{seq_part}, mid right part #{mid_part} => Match: #{common}"
|
|
20
|
+
|
|
21
|
+
if in_seq_pos>1 #
|
|
22
|
+
|
|
23
|
+
# puts "NO VALE, comienza en #{in_seq_pos}"
|
|
24
|
+
in_seq_pos=0
|
|
25
|
+
common=''
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
new_q_beg=hit.q_beg
|
|
29
|
+
new_q_end=hit.q_end+in_seq_pos+common.length
|
|
30
|
+
recovered_mid=seq[new_q_beg..new_q_end]
|
|
31
|
+
|
|
32
|
+
recovered_size=hit.q_end-hit.q_beg+1+common.length
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
else hit.s_end == db_mid.length-1#look left parts
|
|
36
|
+
mid_part=db_mid[0..hit.s_beg-1]
|
|
37
|
+
seq_part=seq[hit.q_beg-mid_part.length-1..hit.q_beg-1]
|
|
38
|
+
|
|
39
|
+
common=mid_part.lcs(seq_part)
|
|
40
|
+
|
|
41
|
+
in_seq_pos=hit.q_beg-mid_part.length-1+seq_part.index(common)
|
|
42
|
+
|
|
43
|
+
# puts "seq left part: #{seq_part}, mid right part #{mid_part} => Match: #{common} at #{in_seq_pos}"
|
|
44
|
+
|
|
45
|
+
if in_seq_pos+common.length<hit.q_beg-1
|
|
46
|
+
# puts "NO VALE, comienza en #{in_seq_pos+common.length} < #{hit.q_beg}"
|
|
47
|
+
in_seq_pos=hit.q_beg
|
|
48
|
+
common=''
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
new_q_beg=in_seq_pos
|
|
52
|
+
new_q_end=hit.q_end
|
|
53
|
+
recovered_mid=seq[new_q_beg..new_q_end]
|
|
54
|
+
|
|
55
|
+
recovered_size=hit.q_end-hit.q_beg+1+common.length
|
|
56
|
+
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
return [new_q_beg, new_q_end, recovered_size,recovered_mid]
|
|
60
|
+
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
class String
|
|
66
|
+
def lcs(s2)
|
|
67
|
+
s1=self
|
|
68
|
+
res=""
|
|
69
|
+
num=Array.new(s1.size){Array.new(s2.size)}
|
|
70
|
+
len,ans=0
|
|
71
|
+
lastsub=0
|
|
72
|
+
s1.scan(/./).each_with_index do |l1,i |
|
|
73
|
+
s2.scan(/./).each_with_index do |l2,j |
|
|
74
|
+
unless l1==l2
|
|
75
|
+
num[i][j]=0
|
|
76
|
+
else
|
|
77
|
+
(i==0 || j==0)? num[i][j]=1 : num[i][j]=1 + num[i-1][j-1]
|
|
78
|
+
if num[i][j] > len
|
|
79
|
+
len = ans = num[i][j]
|
|
80
|
+
thissub = i
|
|
81
|
+
thissub -= num[i-1][j-1] unless num[i-1][j-1].nil?
|
|
82
|
+
if lastsub==thissub
|
|
83
|
+
res+=s1[i,1]
|
|
84
|
+
else
|
|
85
|
+
lastsub=thissub
|
|
86
|
+
res=s1[lastsub, (i+1)-lastsub]
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
res
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
|
|
2
|
+
class String
|
|
3
|
+
|
|
4
|
+
def integer?
|
|
5
|
+
|
|
6
|
+
res = true
|
|
7
|
+
|
|
8
|
+
begin
|
|
9
|
+
r=Integer(self)
|
|
10
|
+
rescue
|
|
11
|
+
res=false
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
return res
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def decamelize
|
|
18
|
+
self.to_s.
|
|
19
|
+
gsub(/([A-Z\d]+)([A-Z][a-z])/, '\1_\2').
|
|
20
|
+
gsub(/([a-z]+)([A-Z\d])/, '\1_\2').
|
|
21
|
+
gsub(/([A-Z]{2,})(\d+)/i, '\1_\2').
|
|
22
|
+
gsub(/(\d+)([a-z])/i, '\1_\2').
|
|
23
|
+
gsub(/(.+?)\&(.+?)/, '\1_&_\2').
|
|
24
|
+
gsub(/\s/, '_').downcase
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
class File
|
|
30
|
+
|
|
31
|
+
def self.is_zip?(file_path)
|
|
32
|
+
res=false
|
|
33
|
+
begin
|
|
34
|
+
f=File.open(file_path,'rb')
|
|
35
|
+
head=f.read(4)
|
|
36
|
+
f.close
|
|
37
|
+
res=(head=="PK\x03\x04")
|
|
38
|
+
rescue
|
|
39
|
+
res=false
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
return res
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def self.unzip(file_path)
|
|
46
|
+
unzipped=`unzip "#{file_path}"`
|
|
47
|
+
file_list = unzipped.split("\n")
|
|
48
|
+
list=[]
|
|
49
|
+
|
|
50
|
+
# select only the files, not folders
|
|
51
|
+
list=file_list.select{|e| e=~/inflating/}.map{|e| e.gsub('inflating:','').strip}
|
|
52
|
+
|
|
53
|
+
return list
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
end
|
data/lib/seqtrimnext.rb
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# setup REQUIRE_PATH
|
|
6
|
+
|
|
7
|
+
ROOT_PATH=File.join(File.dirname(__FILE__),'seqtrimnext')
|
|
8
|
+
|
|
9
|
+
$: << File.expand_path(File.join(ROOT_PATH, 'classes'))
|
|
10
|
+
|
|
11
|
+
#finds the classes that were in the folder 'plugins'
|
|
12
|
+
$: << File.expand_path(File.join(ROOT_PATH, 'plugins'))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
#finds the classes that were in the folder 'plugins'
|
|
16
|
+
$: << File.expand_path(File.join(ROOT_PATH, 'actions'))
|
|
17
|
+
|
|
18
|
+
#finds the classes that were in the folder 'utils'
|
|
19
|
+
$: << File.expand_path(File.join(ROOT_PATH, 'utils'))
|
|
20
|
+
|
|
21
|
+
$: << File.expand_path(File.join(ROOT_PATH, 'classes','em_classes'))
|
|
22
|
+
|
|
23
|
+
$: << File.expand_path(File.join(ROOT_PATH, 'latex','classes'))
|
|
24
|
+
|
|
25
|
+
# puts $:
|
|
26
|
+
|
|
27
|
+
module Seqtrimnext
|
|
28
|
+
|
|
29
|
+
# SEQTRIM_VERSION_REVISION=27
|
|
30
|
+
# SEQTRIM_VERSION_STAGE = 'b'
|
|
31
|
+
# SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
|
|
32
|
+
|
|
33
|
+
VERSION = '2.0.29'
|
|
34
|
+
|
|
35
|
+
SEQTRIM_VERSION = VERSION
|
|
36
|
+
|
|
37
|
+
end
|
data/script/console
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# File: script/console
|
|
3
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
|
4
|
+
|
|
5
|
+
libs = " -r irb/completion"
|
|
6
|
+
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
|
7
|
+
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
|
8
|
+
libs << " -r #{File.dirname(__FILE__) + '/../lib/seqtrimnext.rb'}"
|
|
9
|
+
puts "Loading seqtrimnext gem"
|
|
10
|
+
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
|
3
|
+
|
|
4
|
+
begin
|
|
5
|
+
require 'rubigen'
|
|
6
|
+
rescue LoadError
|
|
7
|
+
require 'rubygems'
|
|
8
|
+
require 'rubigen'
|
|
9
|
+
end
|
|
10
|
+
require 'rubigen/scripts/destroy'
|
|
11
|
+
|
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
|
3
|
+
|
|
4
|
+
begin
|
|
5
|
+
require 'rubigen'
|
|
6
|
+
rescue LoadError
|
|
7
|
+
require 'rubygems'
|
|
8
|
+
require 'rubigen'
|
|
9
|
+
end
|
|
10
|
+
require 'rubigen/scripts/generate'
|
|
11
|
+
|
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
data/test/test_helper.rb
ADDED