seqtrimnext 2.0.29
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
bin/create_graphs.rb
|
2
|
+
bin/extract_seqs.rb
|
3
|
+
bin/extract_seqs_from_fasta.rb
|
4
|
+
bin/extract_seqs_from_fastq.rb
|
5
|
+
bin/fasta2fastq.rb
|
6
|
+
bin/fastq2fasta.rb
|
7
|
+
bin/gen_qual.rb
|
8
|
+
bin/get_seq.rb
|
9
|
+
bin/group_by_range.rb
|
10
|
+
bin/join_ilumina_paired.rb
|
11
|
+
bin/parse_amplicons.rb
|
12
|
+
bin/parse_json_results.rb
|
13
|
+
bin/parse_params.rb
|
14
|
+
bin/resume_clusters.rb
|
15
|
+
bin/resume_rejected.sh
|
16
|
+
bin/reverse_paired.rb
|
17
|
+
bin/seqtrimnext
|
18
|
+
bin/split_fastq.rb
|
19
|
+
bin/split_ilumina_paired.rb
|
20
|
+
bin/split_paired.rb
|
21
|
+
lib/seqtrimnext/actions/action_ab_adapter.rb
|
22
|
+
lib/seqtrimnext/actions/action_ab_far_adapter.rb
|
23
|
+
lib/seqtrimnext/actions/action_ab_left_adapter.rb
|
24
|
+
lib/seqtrimnext/actions/action_empty_insert.rb
|
25
|
+
lib/seqtrimnext/actions/action_ignore_repeated.rb
|
26
|
+
lib/seqtrimnext/actions/action_indetermination.rb
|
27
|
+
lib/seqtrimnext/actions/action_induced_low_complexity.rb
|
28
|
+
lib/seqtrimnext/actions/action_insert.rb
|
29
|
+
lib/seqtrimnext/actions/action_is_contaminated.rb
|
30
|
+
lib/seqtrimnext/actions/action_key.rb
|
31
|
+
lib/seqtrimnext/actions/action_left_adapter.rb
|
32
|
+
lib/seqtrimnext/actions/action_left_primer.rb
|
33
|
+
lib/seqtrimnext/actions/action_linker.rb
|
34
|
+
lib/seqtrimnext/actions/action_low_complexity.rb
|
35
|
+
lib/seqtrimnext/actions/action_low_high_size.rb
|
36
|
+
lib/seqtrimnext/actions/action_low_quality.rb
|
37
|
+
lib/seqtrimnext/actions/action_mid.rb
|
38
|
+
lib/seqtrimnext/actions/action_multiple_linker.rb
|
39
|
+
lib/seqtrimnext/actions/action_paired_reads.rb
|
40
|
+
lib/seqtrimnext/actions/action_poly_a.rb
|
41
|
+
lib/seqtrimnext/actions/action_poly_t.rb
|
42
|
+
lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
|
43
|
+
lib/seqtrimnext/actions/action_right_adapter.rb
|
44
|
+
lib/seqtrimnext/actions/action_right_primer.rb
|
45
|
+
lib/seqtrimnext/actions/action_short_insert.rb
|
46
|
+
lib/seqtrimnext/actions/action_unexpected_poly_t.rb
|
47
|
+
lib/seqtrimnext/actions/action_unexpected_vector.rb
|
48
|
+
lib/seqtrimnext/actions/action_vectors.rb
|
49
|
+
lib/seqtrimnext/actions/seqtrim_action.rb
|
50
|
+
lib/seqtrimnext/classes/action_manager.rb
|
51
|
+
lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb
|
52
|
+
lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb
|
53
|
+
lib/seqtrimnext/classes/extract_stats.rb
|
54
|
+
lib/seqtrimnext/classes/gnu_plot_graph.rb
|
55
|
+
lib/seqtrimnext/classes/graph_stats.rb
|
56
|
+
lib/seqtrimnext/classes/install_database.rb
|
57
|
+
lib/seqtrimnext/classes/install_requirements.rb
|
58
|
+
lib/seqtrimnext/classes/list_db.rb
|
59
|
+
lib/seqtrimnext/classes/make_blast_db.rb
|
60
|
+
lib/seqtrimnext/classes/one_blast.rb
|
61
|
+
lib/seqtrimnext/classes/params.rb
|
62
|
+
lib/seqtrimnext/classes/piro.rb
|
63
|
+
lib/seqtrimnext/classes/plugin_manager.rb
|
64
|
+
lib/seqtrimnext/classes/scan_for_restr_site.rb
|
65
|
+
lib/seqtrimnext/classes/scbi_stats.rb
|
66
|
+
lib/seqtrimnext/classes/seqtrim.rb
|
67
|
+
lib/seqtrimnext/classes/sequence.rb
|
68
|
+
lib/seqtrimnext/classes/sequence_group.rb
|
69
|
+
lib/seqtrimnext/classes/sequence_with_action.rb
|
70
|
+
lib/seqtrimnext/plugins/plugin.rb
|
71
|
+
lib/seqtrimnext/plugins/plugin_ab_adapters.rb
|
72
|
+
lib/seqtrimnext/plugins/plugin_adapters.rb
|
73
|
+
lib/seqtrimnext/plugins/plugin_amplicons.rb
|
74
|
+
lib/seqtrimnext/plugins/plugin_contaminants.rb
|
75
|
+
lib/seqtrimnext/plugins/plugin_extract_inserts.rb
|
76
|
+
lib/seqtrimnext/plugins/plugin_find_poly_at.rb
|
77
|
+
lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
|
78
|
+
lib/seqtrimnext/plugins/plugin_indeterminations.rb
|
79
|
+
lib/seqtrimnext/plugins/plugin_key.rb
|
80
|
+
lib/seqtrimnext/plugins/plugin_linker.rb
|
81
|
+
lib/seqtrimnext/plugins/plugin_low_complexity.rb
|
82
|
+
lib/seqtrimnext/plugins/plugin_low_high_size.rb
|
83
|
+
lib/seqtrimnext/plugins/plugin_low_quality.rb
|
84
|
+
lib/seqtrimnext/plugins/plugin_mids.rb
|
85
|
+
lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb
|
86
|
+
lib/seqtrimnext/plugins/plugin_short_insert.rb
|
87
|
+
lib/seqtrimnext/plugins/plugin_vectors.rb
|
88
|
+
lib/seqtrimnext/templates/amplicons.txt
|
89
|
+
lib/seqtrimnext/templates/genomics_454.txt
|
90
|
+
lib/seqtrimnext/templates/genomics_454_with_paired.txt
|
91
|
+
lib/seqtrimnext/templates/low_quality.txt
|
92
|
+
lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
|
93
|
+
lib/seqtrimnext/templates/transcriptomics_454.txt
|
94
|
+
lib/seqtrimnext/templates/transcriptomics_plants.txt
|
95
|
+
lib/seqtrimnext/utils/extract_samples.rb
|
96
|
+
lib/seqtrimnext/utils/fasta2xml.rb
|
97
|
+
lib/seqtrimnext/utils/global_match.rb
|
98
|
+
lib/seqtrimnext/utils/hash_stats.rb
|
99
|
+
lib/seqtrimnext/utils/json_utils.rb
|
100
|
+
lib/seqtrimnext/utils/load_fasta_names_in_hash.rb
|
101
|
+
lib/seqtrimnext/utils/load_qual_in_hash.rb
|
102
|
+
lib/seqtrimnext/utils/recover_mid.rb
|
103
|
+
lib/seqtrimnext/utils/string_utils.rb
|
104
|
+
lib/seqtrimnext.rb
|
105
|
+
History.txt
|
106
|
+
Manifest.txt
|
107
|
+
PostInstall.txt
|
108
|
+
Rakefile
|
109
|
+
README.rdoc
|
110
|
+
script/console
|
111
|
+
script/destroy
|
112
|
+
script/generate
|
113
|
+
test/test_helper.rb
|
114
|
+
test/test_seqtrimnext.rb
|
data/PostInstall.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,159 @@
|
|
1
|
+
= seqtrimnext
|
2
|
+
|
3
|
+
* http://www.scbi.uma.es/downloads
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data. It makes use of scbi_mapreduce gem to be able to run in parallel and distributed environments. It is specially suited for Roche 454 (normal and paired-end) & Ilumina datasets, although it could be easyly adapted to any other situation.
|
8
|
+
|
9
|
+
== FEATURES:
|
10
|
+
|
11
|
+
* SeqtrimNEXT is very flexible since it's architecture is based on plugins.
|
12
|
+
* You can add new plugins if needed.
|
13
|
+
* SeqtrimNEXT uses scbi_mapreduce and thus is able to exploit all the benefits of a cluster environment. It also works in multi-core machines big shared-memory servers.
|
14
|
+
|
15
|
+
== Default templates for genomics & transcriptomics are provided
|
16
|
+
|
17
|
+
<b>genomics_454.txt</b>:: cleans genomics data from Roche 454 sequencer.
|
18
|
+
<b>genomics_454_with_paired.txt</b>:: cleans genomic data from a paired-end experiment sequenced with a Roche 454 sequencer.
|
19
|
+
<b>low_quality.txt</b>:: trims low quality.
|
20
|
+
<b>low_quality_and_low_complexity.txt</b>:: trims low quality and low complexity.
|
21
|
+
<b>transcriptomics_454.txt</b>:: cleans transcriptomics data from a Roche 454 sequencer.
|
22
|
+
<b>transcriptomics_plants.txt</b>:: cleans transcriptomics data from a Roche 454 sequencer with extra databases for plants.
|
23
|
+
<b>amplicons.txt</b>:: filters amplicons.
|
24
|
+
|
25
|
+
== You can define your own templates using a combination of available plugins:
|
26
|
+
|
27
|
+
<b>PluginKey</b>:: to remove sequencing keys from 454 input sequences.
|
28
|
+
<b>PluginMids</b>:: to remove MIDS (barcodes) from 454 sequences.
|
29
|
+
<b>PluginLinker</b>:: splits sequences into two inserts when a valid linker is found (paired-end experiments only)
|
30
|
+
<b>PluginAbAdapters</b>:: removes AB adapters from sequences using a predefined DB or one provided by the user.
|
31
|
+
<b>PluginFindPolyAt</b>:: removes polyA and polyT from sequences.
|
32
|
+
<b>PluginLowComplexity</b>:: filters sequences with low complexity regions
|
33
|
+
<b>PluginAdapters</b>:: removes Adapters from sequences using a predefined DB or one provided by the user.
|
34
|
+
<b>PluginLowHighSize</b>:: removes sequences too small or too big.
|
35
|
+
<b>PluginVectors</b>:: remove vectors from sequences using a predefined database or one provided by the user.
|
36
|
+
<b>PluginAmplicons</b>:: filters amplicons using user predefined primers.
|
37
|
+
<b>PluginIndeterminations</b>:: removes indeterminations (N) from the sequence.
|
38
|
+
<b>PluginLowQuality</b>:: eliminate low quality regions from sequences.
|
39
|
+
<b>PluginContaminants</b>:: remove contaminants from sequences or rejects contaminated ones. It uses a core database, but it can be expanded with user provided ones.
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
== SYNOPSIS:
|
44
|
+
|
45
|
+
Once installed, SeqtrimNEXT is very easy to use:
|
46
|
+
|
47
|
+
To install core databases (it should be done at installation time):
|
48
|
+
|
49
|
+
$> seqtrimnext -i
|
50
|
+
|
51
|
+
To perform an analisys using a predefined template with a FASTQ file format using 4 cpus:
|
52
|
+
|
53
|
+
$> seqtrimnext -t genomics_454.txt -Q input_file_in_FASTQ -w 4
|
54
|
+
|
55
|
+
To perform an analisys using a predefined template with a FASTQ file format:
|
56
|
+
|
57
|
+
$> seqtrimnext -t genomics_454.txt -f input_file_in_FASTA -q input_file_in_QUAL
|
58
|
+
|
59
|
+
|
60
|
+
To get additional help and list available templates and databases:
|
61
|
+
|
62
|
+
$> seqtrimnext -h
|
63
|
+
|
64
|
+
|
65
|
+
== REQUIREMENTS:
|
66
|
+
|
67
|
+
* Ruby 1.9.2
|
68
|
+
* Blast plus 2.24 or greater (prior versions have bugs that produces bad results)
|
69
|
+
* [Optional] - GnuPlot version 4.4.2 or greater (prior versions may produce wrong graphs)
|
70
|
+
* [Optional] - pdflatex - Optional, to produce a detailed report with results
|
71
|
+
|
72
|
+
== INSTALL:
|
73
|
+
|
74
|
+
=== Installing Blast
|
75
|
+
|
76
|
+
*Download the latest version of Blast+ from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/release/LATEST/
|
77
|
+
*You can also use a precompiled version if you like
|
78
|
+
*To install from source, decompress the downloaded file, cd to the decompressed folder, and issue the following commands:
|
79
|
+
|
80
|
+
./configure
|
81
|
+
make
|
82
|
+
sudo make install
|
83
|
+
|
84
|
+
|
85
|
+
=== Installing Ruby 1.9
|
86
|
+
|
87
|
+
*You can use RVM to install ruby:
|
88
|
+
|
89
|
+
Install RVM:
|
90
|
+
|
91
|
+
$ bash < <(curl -s https://rvm.beginrescueend.com/install/rvm)
|
92
|
+
|
93
|
+
Setup environment:
|
94
|
+
|
95
|
+
$ echo '[[ -s "$HOME/.rvm/scripts/rvm" ]] && . "$HOME/.rvm/scripts/rvm" # Load RVM function' >> ~/.bash_profile
|
96
|
+
|
97
|
+
Install ruby 1.9.2 (this can take a while):
|
98
|
+
|
99
|
+
$ rvm install 1.9.2
|
100
|
+
|
101
|
+
Set it as the default:
|
102
|
+
|
103
|
+
$ rvm use 1.9.2 --default
|
104
|
+
|
105
|
+
=== Install SeqtrimNEXT
|
106
|
+
|
107
|
+
SeqtrimNEXT is very easy to install. It is distributed as a ruby gem:
|
108
|
+
|
109
|
+
gem install seqtrimnext
|
110
|
+
|
111
|
+
This will install seqtrimnext and all the required gems.
|
112
|
+
|
113
|
+
=== Install and rebuild SeqtrimNext's core databases
|
114
|
+
|
115
|
+
SeqtrimNEXT needs some core databases to work. To install them:
|
116
|
+
|
117
|
+
seqtrimnext -i core
|
118
|
+
|
119
|
+
=== Database modifications
|
120
|
+
|
121
|
+
Included databases will be usefull for a lot of people, but if you prefer, you can modify them, or add more elements to be search against your sequences.
|
122
|
+
|
123
|
+
You only need to drop new fasta files to each respective directory:
|
124
|
+
|
125
|
+
DB/vectors to add more vectors
|
126
|
+
DB/contaminants to add more contaminants
|
127
|
+
etc...
|
128
|
+
|
129
|
+
Once the databases has been modified, you will need to reformat them by issuing the following command:
|
130
|
+
|
131
|
+
seqtrimnext -c
|
132
|
+
|
133
|
+
Modified databases will be rebuilt.
|
134
|
+
|
135
|
+
|
136
|
+
== LICENSE:
|
137
|
+
|
138
|
+
(The MIT License)
|
139
|
+
|
140
|
+
Copyright (c) 2011 Almudena Bocinos & Dario Guerrero
|
141
|
+
|
142
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
143
|
+
a copy of this software and associated documentation files (the
|
144
|
+
'Software'), to deal in the Software without restriction, including
|
145
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
146
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
147
|
+
permit persons to whom the Software is furnished to do so, subject to
|
148
|
+
the following conditions:
|
149
|
+
|
150
|
+
The above copyright notice and this permission notice shall be
|
151
|
+
included in all copies or substantial portions of the Software.
|
152
|
+
|
153
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
154
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
155
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
156
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
157
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
158
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
159
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'hoe', '>= 2.1.0'
|
3
|
+
require 'hoe'
|
4
|
+
require 'fileutils'
|
5
|
+
require './lib/seqtrimnext'
|
6
|
+
|
7
|
+
Hoe.plugin :newgem
|
8
|
+
# Hoe.plugin :website
|
9
|
+
# Hoe.plugin :cucumberfeatures
|
10
|
+
|
11
|
+
# Generate all the Rake tasks
|
12
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
13
|
+
$hoe = Hoe.spec 'seqtrimnext' do
|
14
|
+
self.developer 'Dario Guerrero & Almudena Bocinos', 'dariogf@gmail.com & alkoke@gmail.com'
|
15
|
+
self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
|
16
|
+
self.rubyforge_name = self.name # TODO this is default value
|
17
|
+
# self.extra_deps = ['narray','gnuplot','term-ansicolor','xml-simple','scbi_blast','scbi_drb','scbi_fasta','scbi_fastq','scbi_plot','scbi_math']
|
18
|
+
|
19
|
+
self.extra_deps = []
|
20
|
+
self.extra_deps << ['narray','>=0']
|
21
|
+
self.extra_deps << ['gnuplot','>=0']
|
22
|
+
self.extra_deps << ['term-ansicolor','>=0']
|
23
|
+
self.extra_deps << ['xml-simple','>=0']
|
24
|
+
self.extra_deps << ['scbi_blast','>=0']
|
25
|
+
self.extra_deps << ['scbi_mapreduce','>=0']
|
26
|
+
self.extra_deps << ['scbi_fasta','>=0']
|
27
|
+
self.extra_deps << ['scbi_fastq','>=0']
|
28
|
+
self.extra_deps << ['scbi_plot','>=0']
|
29
|
+
self.extra_deps << ['scbi_math','>=0']
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
require 'newgem/tasks'
|
34
|
+
Dir['tasks/**/*.rake'].each { |t| load t }
|
35
|
+
|
36
|
+
# TODO - want other tests/tasks run by default? Add them to the list
|
37
|
+
# remove_task :default
|
38
|
+
# task :default => [:spec, :features, :redocs]
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
# require 'test/unit'
|
3
|
+
require 'json'
|
4
|
+
require 'gnuplot'
|
5
|
+
|
6
|
+
ROOT_PATH=File.dirname(File.dirname(__FILE__))
|
7
|
+
|
8
|
+
# $: << File.expand_path(File.join(ROOT_PATH,'test'))
|
9
|
+
$: << File.expand_path(File.join(ROOT_PATH,'classes'))
|
10
|
+
$: << File.expand_path(File.join(ROOT_PATH,'plugins'))
|
11
|
+
$: << File.expand_path(File.join(ROOT_PATH,'utils'))
|
12
|
+
|
13
|
+
if ARGV.empty?
|
14
|
+
puts "Usage: #{$0} stats.json initial_stats.json"
|
15
|
+
exit
|
16
|
+
end
|
17
|
+
|
18
|
+
d=Dir.glob(File.expand_path(File.join(ROOT_PATH,'plugins','*.rb')))
|
19
|
+
|
20
|
+
# puts d.entries
|
21
|
+
# puts "="*20
|
22
|
+
|
23
|
+
require 'plugin'
|
24
|
+
|
25
|
+
# require 'params'
|
26
|
+
|
27
|
+
d.entries.each do |plugin|
|
28
|
+
require plugin
|
29
|
+
# puts "Requiring #{plugin}"
|
30
|
+
end
|
31
|
+
|
32
|
+
require 'graph_stats'
|
33
|
+
|
34
|
+
#load stats
|
35
|
+
|
36
|
+
r=File.read(ARGV[0])
|
37
|
+
stats=JSON::parse(r)
|
38
|
+
|
39
|
+
|
40
|
+
r2=File.read(ARGV[1])
|
41
|
+
init_stats=JSON::parse(r2)
|
42
|
+
|
43
|
+
gs=GraphStats.new(stats,init_stats)
|
44
|
+
|
45
|
+
puts "Graphs generated"
|
46
|
+
|
data/bin/extract_seqs.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'scbi_fastq'
|
4
|
+
|
5
|
+
class Array
|
6
|
+
def count
|
7
|
+
self.length
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
if ARGV.count < 3
|
13
|
+
puts "#{$0} FASTA OUTPUT_NAME SEQ_NAME_FILE [MORE_SEQ_NAMES]"
|
14
|
+
exit
|
15
|
+
else
|
16
|
+
|
17
|
+
fasta = ARGV.shift
|
18
|
+
qual = ARGV.shift
|
19
|
+
output_name = ARGV.shift
|
20
|
+
seqs=ARGV
|
21
|
+
puts seqs.join(';')
|
22
|
+
|
23
|
+
fqr=FastaQualFile.new(fasta,qual)
|
24
|
+
|
25
|
+
output_fasta=File.new(output_name+'.fasta','a')
|
26
|
+
output_qual=File.new(output_name+'.fasta.qual','a')
|
27
|
+
|
28
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
29
|
+
if seqs.index(seq_name)
|
30
|
+
output_fasta.puts ">#{seq_name}"
|
31
|
+
output_fasta.puts seq_fasta
|
32
|
+
output_qual.puts ">#{seq_name}"
|
33
|
+
output_qual.puts seq_qual
|
34
|
+
seqs.delete(seq_name)
|
35
|
+
if seqs.empty?
|
36
|
+
break
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
output_qual.close
|
42
|
+
output_fasta.close
|
43
|
+
fqr.close
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'scbi_fasta'
|
4
|
+
|
5
|
+
# GOOD_QUAL=50
|
6
|
+
# BAD_QUAL=10
|
7
|
+
# DOWN_CASE=('a'..'z')
|
8
|
+
|
9
|
+
|
10
|
+
class Array
|
11
|
+
def count
|
12
|
+
self.length
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
if ARGV.count < 4
|
18
|
+
puts "#{$0} FASTA QUAL OUTPUT_NAME SEQ_NAMES_FILE"
|
19
|
+
exit
|
20
|
+
else
|
21
|
+
|
22
|
+
fasta = ARGV.shift
|
23
|
+
qual = ARGV.shift
|
24
|
+
output_name = ARGV.shift
|
25
|
+
seqs_file=ARGV.shift
|
26
|
+
|
27
|
+
seqs=[]
|
28
|
+
|
29
|
+
f=File.open(seqs_file).each_line do |line|
|
30
|
+
seqs.push line.strip.chomp
|
31
|
+
end
|
32
|
+
# puts seqs.join(';')
|
33
|
+
|
34
|
+
fqr=FastaQualFile.new(fasta,qual)
|
35
|
+
|
36
|
+
output_fasta=File.new(output_name+'.fasta','a')
|
37
|
+
output_qual=File.new(output_name+'.fasta.qual','a')
|
38
|
+
|
39
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
40
|
+
if seqs.index(seq_name)
|
41
|
+
output_fasta.puts ">#{seq_name}"
|
42
|
+
output_fasta.puts seq_fasta
|
43
|
+
output_qual.puts ">#{seq_name}"
|
44
|
+
output_qual.puts seq_qual
|
45
|
+
seqs.delete(seq_name)
|
46
|
+
if seqs.empty?
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
output_qual.close
|
53
|
+
output_fasta.close
|
54
|
+
fqr.close
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'scbi_fastq'
|
4
|
+
|
5
|
+
class Array
|
6
|
+
def count
|
7
|
+
self.length
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
if ARGV.count != 3
|
13
|
+
puts "#{$0} FASTQ OUTPUT_NAME SEQ_NAMES_FILE"
|
14
|
+
exit
|
15
|
+
else
|
16
|
+
|
17
|
+
fasta = ARGV.shift
|
18
|
+
output_name = ARGV.shift
|
19
|
+
seqs_file=ARGV.shift
|
20
|
+
|
21
|
+
seqs=[]
|
22
|
+
|
23
|
+
f=File.open(seqs_file).each_line do |line|
|
24
|
+
seqs.push line.strip.chomp
|
25
|
+
end
|
26
|
+
puts seqs.join(';')
|
27
|
+
|
28
|
+
fqr=FastqFile.new(fasta)
|
29
|
+
|
30
|
+
output_fastq=FastqFile.new(output_name+'.fastq','w')
|
31
|
+
|
32
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
33
|
+
if seqs.index(seq_name)
|
34
|
+
output_fastq.write_seq(seq_name,seq_fasta,seq_qual)
|
35
|
+
seqs.delete(seq_name)
|
36
|
+
if seqs.empty?
|
37
|
+
break
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
output_fastq.close
|
43
|
+
fqr.close
|
44
|
+
|
45
|
+
end
|
data/bin/fasta2fastq.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'scbi_fasta'
|
4
|
+
require 'scbi_fastq'
|
5
|
+
|
6
|
+
|
7
|
+
if ARGV.count < 3
|
8
|
+
puts "#{$0} FASTA QUAL OUTPUT_NAME"
|
9
|
+
exit
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
fasta = ARGV.shift
|
15
|
+
qual = ARGV.shift
|
16
|
+
output_name = ARGV.shift
|
17
|
+
default_qual = nil
|
18
|
+
|
19
|
+
if !File.exists?(qual)
|
20
|
+
fqr=FastaFile.new(fasta)
|
21
|
+
puts "Quality file doesn't exists. Using default qual value = 40"
|
22
|
+
default_qual = [40]
|
23
|
+
else
|
24
|
+
fqr=FastaQualFile.new(fasta,qual)
|
25
|
+
end
|
26
|
+
|
27
|
+
output=FastqFile.new(output_name+'.fastq','w')
|
28
|
+
|
29
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
30
|
+
if default_qual
|
31
|
+
seq_qual = default_qual * seq_fasta.length
|
32
|
+
end
|
33
|
+
output.write_seq(seq_name,seq_fasta,seq_qual)
|
34
|
+
end
|
35
|
+
|
36
|
+
output.close
|
37
|
+
fqr.close
|
38
|
+
|
data/bin/fastq2fasta.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'scbi_fastq'
|
4
|
+
|
5
|
+
|
6
|
+
if ARGV.count < 2
|
7
|
+
puts "#{$0} FASTQ OUTPUT_NAME"
|
8
|
+
exit
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
fastq = ARGV.shift
|
14
|
+
output_name = ARGV.shift
|
15
|
+
|
16
|
+
|
17
|
+
fasta = File.open(output_name+'.fasta','w')
|
18
|
+
qual = File.open(output_name+'.fasta.qual','w')
|
19
|
+
|
20
|
+
fqr=FastqFile.new(fastq)
|
21
|
+
|
22
|
+
fqr.each do |seq_name,seq_fasta,seq_qual,comments|
|
23
|
+
|
24
|
+
fasta.puts ">#{seq_name} #{comments}"
|
25
|
+
fasta.puts seq_fasta
|
26
|
+
|
27
|
+
qual.puts ">#{seq_name} #{comments}"
|
28
|
+
qual.puts seq_qual.join(' ')
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
fasta.close
|
33
|
+
qual.close
|
34
|
+
fqr.close
|
35
|
+
|
data/bin/gen_qual.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'scbi_fasta'
|
4
|
+
|
5
|
+
GOOD_QUAL=50
|
6
|
+
BAD_QUAL=10
|
7
|
+
DOWN_CASE=('a'..'z')
|
8
|
+
|
9
|
+
|
10
|
+
class Array
|
11
|
+
def count
|
12
|
+
self.length
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
if ARGV.count != 2
|
18
|
+
puts "Programa ENTRADA SALIDA"
|
19
|
+
exit
|
20
|
+
else
|
21
|
+
puts ARGV[0]
|
22
|
+
puts ARGV[1]
|
23
|
+
|
24
|
+
fqr=FastaQualFile.new(ARGV[0])
|
25
|
+
|
26
|
+
f = File.new(ARGV[1],'w+')
|
27
|
+
|
28
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
29
|
+
f.puts ">#{seq_name}"
|
30
|
+
res =[]
|
31
|
+
seq_fasta.each_char do |c|
|
32
|
+
if DOWN_CASE.include?(c)
|
33
|
+
res << BAD_QUAL
|
34
|
+
else
|
35
|
+
res << GOOD_QUAL
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
f.puts res.join(' ')
|
40
|
+
#f.puts "50 "*seq_fasta.length
|
41
|
+
end
|
42
|
+
|
43
|
+
f.close
|
44
|
+
fqr.close
|
45
|
+
|
46
|
+
end
|
data/bin/get_seq.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'scbi_fasta'
|
4
|
+
|
5
|
+
GOOD_QUAL=50
|
6
|
+
BAD_QUAL=10
|
7
|
+
DOWN_CASE=('a'..'z')
|
8
|
+
|
9
|
+
|
10
|
+
class Array
|
11
|
+
def count
|
12
|
+
self.length
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
if ARGV.count < 3
|
18
|
+
puts "#{$0} FASTA QUAL SEQ_NAME [f|q|fq]"
|
19
|
+
exit
|
20
|
+
else
|
21
|
+
|
22
|
+
fqr=FastaQualFile.new(ARGV[0],ARGV[1])
|
23
|
+
get_type = 'fq'
|
24
|
+
if ARGV.count == 4
|
25
|
+
get_type=ARGV[3]
|
26
|
+
end
|
27
|
+
|
28
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
29
|
+
if seq_name == ARGV[2]
|
30
|
+
if get_type.index('f')
|
31
|
+
puts ">#{seq_name}"
|
32
|
+
puts seq_fasta
|
33
|
+
end
|
34
|
+
|
35
|
+
if get_type.index('q')
|
36
|
+
puts ">#{seq_name}"
|
37
|
+
puts seq_qual
|
38
|
+
end
|
39
|
+
break
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
fqr.close
|
45
|
+
|
46
|
+
end
|