seqtrimnext 2.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
bin/create_graphs.rb
|
|
2
|
+
bin/extract_seqs.rb
|
|
3
|
+
bin/extract_seqs_from_fasta.rb
|
|
4
|
+
bin/extract_seqs_from_fastq.rb
|
|
5
|
+
bin/fasta2fastq.rb
|
|
6
|
+
bin/fastq2fasta.rb
|
|
7
|
+
bin/gen_qual.rb
|
|
8
|
+
bin/get_seq.rb
|
|
9
|
+
bin/group_by_range.rb
|
|
10
|
+
bin/join_ilumina_paired.rb
|
|
11
|
+
bin/parse_amplicons.rb
|
|
12
|
+
bin/parse_json_results.rb
|
|
13
|
+
bin/parse_params.rb
|
|
14
|
+
bin/resume_clusters.rb
|
|
15
|
+
bin/resume_rejected.sh
|
|
16
|
+
bin/reverse_paired.rb
|
|
17
|
+
bin/seqtrimnext
|
|
18
|
+
bin/split_fastq.rb
|
|
19
|
+
bin/split_ilumina_paired.rb
|
|
20
|
+
bin/split_paired.rb
|
|
21
|
+
lib/seqtrimnext/actions/action_ab_adapter.rb
|
|
22
|
+
lib/seqtrimnext/actions/action_ab_far_adapter.rb
|
|
23
|
+
lib/seqtrimnext/actions/action_ab_left_adapter.rb
|
|
24
|
+
lib/seqtrimnext/actions/action_empty_insert.rb
|
|
25
|
+
lib/seqtrimnext/actions/action_ignore_repeated.rb
|
|
26
|
+
lib/seqtrimnext/actions/action_indetermination.rb
|
|
27
|
+
lib/seqtrimnext/actions/action_induced_low_complexity.rb
|
|
28
|
+
lib/seqtrimnext/actions/action_insert.rb
|
|
29
|
+
lib/seqtrimnext/actions/action_is_contaminated.rb
|
|
30
|
+
lib/seqtrimnext/actions/action_key.rb
|
|
31
|
+
lib/seqtrimnext/actions/action_left_adapter.rb
|
|
32
|
+
lib/seqtrimnext/actions/action_left_primer.rb
|
|
33
|
+
lib/seqtrimnext/actions/action_linker.rb
|
|
34
|
+
lib/seqtrimnext/actions/action_low_complexity.rb
|
|
35
|
+
lib/seqtrimnext/actions/action_low_high_size.rb
|
|
36
|
+
lib/seqtrimnext/actions/action_low_quality.rb
|
|
37
|
+
lib/seqtrimnext/actions/action_mid.rb
|
|
38
|
+
lib/seqtrimnext/actions/action_multiple_linker.rb
|
|
39
|
+
lib/seqtrimnext/actions/action_paired_reads.rb
|
|
40
|
+
lib/seqtrimnext/actions/action_poly_a.rb
|
|
41
|
+
lib/seqtrimnext/actions/action_poly_t.rb
|
|
42
|
+
lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
|
|
43
|
+
lib/seqtrimnext/actions/action_right_adapter.rb
|
|
44
|
+
lib/seqtrimnext/actions/action_right_primer.rb
|
|
45
|
+
lib/seqtrimnext/actions/action_short_insert.rb
|
|
46
|
+
lib/seqtrimnext/actions/action_unexpected_poly_t.rb
|
|
47
|
+
lib/seqtrimnext/actions/action_unexpected_vector.rb
|
|
48
|
+
lib/seqtrimnext/actions/action_vectors.rb
|
|
49
|
+
lib/seqtrimnext/actions/seqtrim_action.rb
|
|
50
|
+
lib/seqtrimnext/classes/action_manager.rb
|
|
51
|
+
lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb
|
|
52
|
+
lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb
|
|
53
|
+
lib/seqtrimnext/classes/extract_stats.rb
|
|
54
|
+
lib/seqtrimnext/classes/gnu_plot_graph.rb
|
|
55
|
+
lib/seqtrimnext/classes/graph_stats.rb
|
|
56
|
+
lib/seqtrimnext/classes/install_database.rb
|
|
57
|
+
lib/seqtrimnext/classes/install_requirements.rb
|
|
58
|
+
lib/seqtrimnext/classes/list_db.rb
|
|
59
|
+
lib/seqtrimnext/classes/make_blast_db.rb
|
|
60
|
+
lib/seqtrimnext/classes/one_blast.rb
|
|
61
|
+
lib/seqtrimnext/classes/params.rb
|
|
62
|
+
lib/seqtrimnext/classes/piro.rb
|
|
63
|
+
lib/seqtrimnext/classes/plugin_manager.rb
|
|
64
|
+
lib/seqtrimnext/classes/scan_for_restr_site.rb
|
|
65
|
+
lib/seqtrimnext/classes/scbi_stats.rb
|
|
66
|
+
lib/seqtrimnext/classes/seqtrim.rb
|
|
67
|
+
lib/seqtrimnext/classes/sequence.rb
|
|
68
|
+
lib/seqtrimnext/classes/sequence_group.rb
|
|
69
|
+
lib/seqtrimnext/classes/sequence_with_action.rb
|
|
70
|
+
lib/seqtrimnext/plugins/plugin.rb
|
|
71
|
+
lib/seqtrimnext/plugins/plugin_ab_adapters.rb
|
|
72
|
+
lib/seqtrimnext/plugins/plugin_adapters.rb
|
|
73
|
+
lib/seqtrimnext/plugins/plugin_amplicons.rb
|
|
74
|
+
lib/seqtrimnext/plugins/plugin_contaminants.rb
|
|
75
|
+
lib/seqtrimnext/plugins/plugin_extract_inserts.rb
|
|
76
|
+
lib/seqtrimnext/plugins/plugin_find_poly_at.rb
|
|
77
|
+
lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
|
|
78
|
+
lib/seqtrimnext/plugins/plugin_indeterminations.rb
|
|
79
|
+
lib/seqtrimnext/plugins/plugin_key.rb
|
|
80
|
+
lib/seqtrimnext/plugins/plugin_linker.rb
|
|
81
|
+
lib/seqtrimnext/plugins/plugin_low_complexity.rb
|
|
82
|
+
lib/seqtrimnext/plugins/plugin_low_high_size.rb
|
|
83
|
+
lib/seqtrimnext/plugins/plugin_low_quality.rb
|
|
84
|
+
lib/seqtrimnext/plugins/plugin_mids.rb
|
|
85
|
+
lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb
|
|
86
|
+
lib/seqtrimnext/plugins/plugin_short_insert.rb
|
|
87
|
+
lib/seqtrimnext/plugins/plugin_vectors.rb
|
|
88
|
+
lib/seqtrimnext/templates/amplicons.txt
|
|
89
|
+
lib/seqtrimnext/templates/genomics_454.txt
|
|
90
|
+
lib/seqtrimnext/templates/genomics_454_with_paired.txt
|
|
91
|
+
lib/seqtrimnext/templates/low_quality.txt
|
|
92
|
+
lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
|
|
93
|
+
lib/seqtrimnext/templates/transcriptomics_454.txt
|
|
94
|
+
lib/seqtrimnext/templates/transcriptomics_plants.txt
|
|
95
|
+
lib/seqtrimnext/utils/extract_samples.rb
|
|
96
|
+
lib/seqtrimnext/utils/fasta2xml.rb
|
|
97
|
+
lib/seqtrimnext/utils/global_match.rb
|
|
98
|
+
lib/seqtrimnext/utils/hash_stats.rb
|
|
99
|
+
lib/seqtrimnext/utils/json_utils.rb
|
|
100
|
+
lib/seqtrimnext/utils/load_fasta_names_in_hash.rb
|
|
101
|
+
lib/seqtrimnext/utils/load_qual_in_hash.rb
|
|
102
|
+
lib/seqtrimnext/utils/recover_mid.rb
|
|
103
|
+
lib/seqtrimnext/utils/string_utils.rb
|
|
104
|
+
lib/seqtrimnext.rb
|
|
105
|
+
History.txt
|
|
106
|
+
Manifest.txt
|
|
107
|
+
PostInstall.txt
|
|
108
|
+
Rakefile
|
|
109
|
+
README.rdoc
|
|
110
|
+
script/console
|
|
111
|
+
script/destroy
|
|
112
|
+
script/generate
|
|
113
|
+
test/test_helper.rb
|
|
114
|
+
test/test_seqtrimnext.rb
|
data/PostInstall.txt
ADDED
data/README.rdoc
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
= seqtrimnext
|
|
2
|
+
|
|
3
|
+
* http://www.scbi.uma.es/downloads
|
|
4
|
+
|
|
5
|
+
== DESCRIPTION:
|
|
6
|
+
|
|
7
|
+
SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data. It makes use of scbi_mapreduce gem to be able to run in parallel and distributed environments. It is specially suited for Roche 454 (normal and paired-end) & Ilumina datasets, although it could be easyly adapted to any other situation.
|
|
8
|
+
|
|
9
|
+
== FEATURES:
|
|
10
|
+
|
|
11
|
+
* SeqtrimNEXT is very flexible since it's architecture is based on plugins.
|
|
12
|
+
* You can add new plugins if needed.
|
|
13
|
+
* SeqtrimNEXT uses scbi_mapreduce and thus is able to exploit all the benefits of a cluster environment. It also works in multi-core machines big shared-memory servers.
|
|
14
|
+
|
|
15
|
+
== Default templates for genomics & transcriptomics are provided
|
|
16
|
+
|
|
17
|
+
<b>genomics_454.txt</b>:: cleans genomics data from Roche 454 sequencer.
|
|
18
|
+
<b>genomics_454_with_paired.txt</b>:: cleans genomic data from a paired-end experiment sequenced with a Roche 454 sequencer.
|
|
19
|
+
<b>low_quality.txt</b>:: trims low quality.
|
|
20
|
+
<b>low_quality_and_low_complexity.txt</b>:: trims low quality and low complexity.
|
|
21
|
+
<b>transcriptomics_454.txt</b>:: cleans transcriptomics data from a Roche 454 sequencer.
|
|
22
|
+
<b>transcriptomics_plants.txt</b>:: cleans transcriptomics data from a Roche 454 sequencer with extra databases for plants.
|
|
23
|
+
<b>amplicons.txt</b>:: filters amplicons.
|
|
24
|
+
|
|
25
|
+
== You can define your own templates using a combination of available plugins:
|
|
26
|
+
|
|
27
|
+
<b>PluginKey</b>:: to remove sequencing keys from 454 input sequences.
|
|
28
|
+
<b>PluginMids</b>:: to remove MIDS (barcodes) from 454 sequences.
|
|
29
|
+
<b>PluginLinker</b>:: splits sequences into two inserts when a valid linker is found (paired-end experiments only)
|
|
30
|
+
<b>PluginAbAdapters</b>:: removes AB adapters from sequences using a predefined DB or one provided by the user.
|
|
31
|
+
<b>PluginFindPolyAt</b>:: removes polyA and polyT from sequences.
|
|
32
|
+
<b>PluginLowComplexity</b>:: filters sequences with low complexity regions
|
|
33
|
+
<b>PluginAdapters</b>:: removes Adapters from sequences using a predefined DB or one provided by the user.
|
|
34
|
+
<b>PluginLowHighSize</b>:: removes sequences too small or too big.
|
|
35
|
+
<b>PluginVectors</b>:: remove vectors from sequences using a predefined database or one provided by the user.
|
|
36
|
+
<b>PluginAmplicons</b>:: filters amplicons using user predefined primers.
|
|
37
|
+
<b>PluginIndeterminations</b>:: removes indeterminations (N) from the sequence.
|
|
38
|
+
<b>PluginLowQuality</b>:: eliminate low quality regions from sequences.
|
|
39
|
+
<b>PluginContaminants</b>:: remove contaminants from sequences or rejects contaminated ones. It uses a core database, but it can be expanded with user provided ones.
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
== SYNOPSIS:
|
|
44
|
+
|
|
45
|
+
Once installed, SeqtrimNEXT is very easy to use:
|
|
46
|
+
|
|
47
|
+
To install core databases (it should be done at installation time):
|
|
48
|
+
|
|
49
|
+
$> seqtrimnext -i
|
|
50
|
+
|
|
51
|
+
To perform an analisys using a predefined template with a FASTQ file format using 4 cpus:
|
|
52
|
+
|
|
53
|
+
$> seqtrimnext -t genomics_454.txt -Q input_file_in_FASTQ -w 4
|
|
54
|
+
|
|
55
|
+
To perform an analisys using a predefined template with a FASTQ file format:
|
|
56
|
+
|
|
57
|
+
$> seqtrimnext -t genomics_454.txt -f input_file_in_FASTA -q input_file_in_QUAL
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
To get additional help and list available templates and databases:
|
|
61
|
+
|
|
62
|
+
$> seqtrimnext -h
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
== REQUIREMENTS:
|
|
66
|
+
|
|
67
|
+
* Ruby 1.9.2
|
|
68
|
+
* Blast plus 2.24 or greater (prior versions have bugs that produces bad results)
|
|
69
|
+
* [Optional] - GnuPlot version 4.4.2 or greater (prior versions may produce wrong graphs)
|
|
70
|
+
* [Optional] - pdflatex - Optional, to produce a detailed report with results
|
|
71
|
+
|
|
72
|
+
== INSTALL:
|
|
73
|
+
|
|
74
|
+
=== Installing Blast
|
|
75
|
+
|
|
76
|
+
*Download the latest version of Blast+ from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/release/LATEST/
|
|
77
|
+
*You can also use a precompiled version if you like
|
|
78
|
+
*To install from source, decompress the downloaded file, cd to the decompressed folder, and issue the following commands:
|
|
79
|
+
|
|
80
|
+
./configure
|
|
81
|
+
make
|
|
82
|
+
sudo make install
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
=== Installing Ruby 1.9
|
|
86
|
+
|
|
87
|
+
*You can use RVM to install ruby:
|
|
88
|
+
|
|
89
|
+
Install RVM:
|
|
90
|
+
|
|
91
|
+
$ bash < <(curl -s https://rvm.beginrescueend.com/install/rvm)
|
|
92
|
+
|
|
93
|
+
Setup environment:
|
|
94
|
+
|
|
95
|
+
$ echo '[[ -s "$HOME/.rvm/scripts/rvm" ]] && . "$HOME/.rvm/scripts/rvm" # Load RVM function' >> ~/.bash_profile
|
|
96
|
+
|
|
97
|
+
Install ruby 1.9.2 (this can take a while):
|
|
98
|
+
|
|
99
|
+
$ rvm install 1.9.2
|
|
100
|
+
|
|
101
|
+
Set it as the default:
|
|
102
|
+
|
|
103
|
+
$ rvm use 1.9.2 --default
|
|
104
|
+
|
|
105
|
+
=== Install SeqtrimNEXT
|
|
106
|
+
|
|
107
|
+
SeqtrimNEXT is very easy to install. It is distributed as a ruby gem:
|
|
108
|
+
|
|
109
|
+
gem install seqtrimnext
|
|
110
|
+
|
|
111
|
+
This will install seqtrimnext and all the required gems.
|
|
112
|
+
|
|
113
|
+
=== Install and rebuild SeqtrimNext's core databases
|
|
114
|
+
|
|
115
|
+
SeqtrimNEXT needs some core databases to work. To install them:
|
|
116
|
+
|
|
117
|
+
seqtrimnext -i core
|
|
118
|
+
|
|
119
|
+
=== Database modifications
|
|
120
|
+
|
|
121
|
+
Included databases will be usefull for a lot of people, but if you prefer, you can modify them, or add more elements to be search against your sequences.
|
|
122
|
+
|
|
123
|
+
You only need to drop new fasta files to each respective directory:
|
|
124
|
+
|
|
125
|
+
DB/vectors to add more vectors
|
|
126
|
+
DB/contaminants to add more contaminants
|
|
127
|
+
etc...
|
|
128
|
+
|
|
129
|
+
Once the databases has been modified, you will need to reformat them by issuing the following command:
|
|
130
|
+
|
|
131
|
+
seqtrimnext -c
|
|
132
|
+
|
|
133
|
+
Modified databases will be rebuilt.
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
== LICENSE:
|
|
137
|
+
|
|
138
|
+
(The MIT License)
|
|
139
|
+
|
|
140
|
+
Copyright (c) 2011 Almudena Bocinos & Dario Guerrero
|
|
141
|
+
|
|
142
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
143
|
+
a copy of this software and associated documentation files (the
|
|
144
|
+
'Software'), to deal in the Software without restriction, including
|
|
145
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
146
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
147
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
148
|
+
the following conditions:
|
|
149
|
+
|
|
150
|
+
The above copyright notice and this permission notice shall be
|
|
151
|
+
included in all copies or substantial portions of the Software.
|
|
152
|
+
|
|
153
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
154
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
155
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
156
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
157
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
158
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
159
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
gem 'hoe', '>= 2.1.0'
|
|
3
|
+
require 'hoe'
|
|
4
|
+
require 'fileutils'
|
|
5
|
+
require './lib/seqtrimnext'
|
|
6
|
+
|
|
7
|
+
Hoe.plugin :newgem
|
|
8
|
+
# Hoe.plugin :website
|
|
9
|
+
# Hoe.plugin :cucumberfeatures
|
|
10
|
+
|
|
11
|
+
# Generate all the Rake tasks
|
|
12
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
|
13
|
+
$hoe = Hoe.spec 'seqtrimnext' do
|
|
14
|
+
self.developer 'Dario Guerrero & Almudena Bocinos', 'dariogf@gmail.com & alkoke@gmail.com'
|
|
15
|
+
self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
|
|
16
|
+
self.rubyforge_name = self.name # TODO this is default value
|
|
17
|
+
# self.extra_deps = ['narray','gnuplot','term-ansicolor','xml-simple','scbi_blast','scbi_drb','scbi_fasta','scbi_fastq','scbi_plot','scbi_math']
|
|
18
|
+
|
|
19
|
+
self.extra_deps = []
|
|
20
|
+
self.extra_deps << ['narray','>=0']
|
|
21
|
+
self.extra_deps << ['gnuplot','>=0']
|
|
22
|
+
self.extra_deps << ['term-ansicolor','>=0']
|
|
23
|
+
self.extra_deps << ['xml-simple','>=0']
|
|
24
|
+
self.extra_deps << ['scbi_blast','>=0']
|
|
25
|
+
self.extra_deps << ['scbi_mapreduce','>=0']
|
|
26
|
+
self.extra_deps << ['scbi_fasta','>=0']
|
|
27
|
+
self.extra_deps << ['scbi_fastq','>=0']
|
|
28
|
+
self.extra_deps << ['scbi_plot','>=0']
|
|
29
|
+
self.extra_deps << ['scbi_math','>=0']
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
require 'newgem/tasks'
|
|
34
|
+
Dir['tasks/**/*.rake'].each { |t| load t }
|
|
35
|
+
|
|
36
|
+
# TODO - want other tests/tasks run by default? Add them to the list
|
|
37
|
+
# remove_task :default
|
|
38
|
+
# task :default => [:spec, :features, :redocs]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
require 'stringio'
|
|
2
|
+
# require 'test/unit'
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'gnuplot'
|
|
5
|
+
|
|
6
|
+
ROOT_PATH=File.dirname(File.dirname(__FILE__))
|
|
7
|
+
|
|
8
|
+
# $: << File.expand_path(File.join(ROOT_PATH,'test'))
|
|
9
|
+
$: << File.expand_path(File.join(ROOT_PATH,'classes'))
|
|
10
|
+
$: << File.expand_path(File.join(ROOT_PATH,'plugins'))
|
|
11
|
+
$: << File.expand_path(File.join(ROOT_PATH,'utils'))
|
|
12
|
+
|
|
13
|
+
if ARGV.empty?
|
|
14
|
+
puts "Usage: #{$0} stats.json initial_stats.json"
|
|
15
|
+
exit
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
d=Dir.glob(File.expand_path(File.join(ROOT_PATH,'plugins','*.rb')))
|
|
19
|
+
|
|
20
|
+
# puts d.entries
|
|
21
|
+
# puts "="*20
|
|
22
|
+
|
|
23
|
+
require 'plugin'
|
|
24
|
+
|
|
25
|
+
# require 'params'
|
|
26
|
+
|
|
27
|
+
d.entries.each do |plugin|
|
|
28
|
+
require plugin
|
|
29
|
+
# puts "Requiring #{plugin}"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
require 'graph_stats'
|
|
33
|
+
|
|
34
|
+
#load stats
|
|
35
|
+
|
|
36
|
+
r=File.read(ARGV[0])
|
|
37
|
+
stats=JSON::parse(r)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
r2=File.read(ARGV[1])
|
|
41
|
+
init_stats=JSON::parse(r2)
|
|
42
|
+
|
|
43
|
+
gs=GraphStats.new(stats,init_stats)
|
|
44
|
+
|
|
45
|
+
puts "Graphs generated"
|
|
46
|
+
|
data/bin/extract_seqs.rb
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'scbi_fastq'
|
|
4
|
+
|
|
5
|
+
class Array
|
|
6
|
+
def count
|
|
7
|
+
self.length
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
if ARGV.count < 3
|
|
13
|
+
puts "#{$0} FASTA OUTPUT_NAME SEQ_NAME_FILE [MORE_SEQ_NAMES]"
|
|
14
|
+
exit
|
|
15
|
+
else
|
|
16
|
+
|
|
17
|
+
fasta = ARGV.shift
|
|
18
|
+
qual = ARGV.shift
|
|
19
|
+
output_name = ARGV.shift
|
|
20
|
+
seqs=ARGV
|
|
21
|
+
puts seqs.join(';')
|
|
22
|
+
|
|
23
|
+
fqr=FastaQualFile.new(fasta,qual)
|
|
24
|
+
|
|
25
|
+
output_fasta=File.new(output_name+'.fasta','a')
|
|
26
|
+
output_qual=File.new(output_name+'.fasta.qual','a')
|
|
27
|
+
|
|
28
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
|
29
|
+
if seqs.index(seq_name)
|
|
30
|
+
output_fasta.puts ">#{seq_name}"
|
|
31
|
+
output_fasta.puts seq_fasta
|
|
32
|
+
output_qual.puts ">#{seq_name}"
|
|
33
|
+
output_qual.puts seq_qual
|
|
34
|
+
seqs.delete(seq_name)
|
|
35
|
+
if seqs.empty?
|
|
36
|
+
break
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
output_qual.close
|
|
42
|
+
output_fasta.close
|
|
43
|
+
fqr.close
|
|
44
|
+
|
|
45
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'scbi_fasta'
|
|
4
|
+
|
|
5
|
+
# GOOD_QUAL=50
|
|
6
|
+
# BAD_QUAL=10
|
|
7
|
+
# DOWN_CASE=('a'..'z')
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Array
|
|
11
|
+
def count
|
|
12
|
+
self.length
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
if ARGV.count < 4
|
|
18
|
+
puts "#{$0} FASTA QUAL OUTPUT_NAME SEQ_NAMES_FILE"
|
|
19
|
+
exit
|
|
20
|
+
else
|
|
21
|
+
|
|
22
|
+
fasta = ARGV.shift
|
|
23
|
+
qual = ARGV.shift
|
|
24
|
+
output_name = ARGV.shift
|
|
25
|
+
seqs_file=ARGV.shift
|
|
26
|
+
|
|
27
|
+
seqs=[]
|
|
28
|
+
|
|
29
|
+
f=File.open(seqs_file).each_line do |line|
|
|
30
|
+
seqs.push line.strip.chomp
|
|
31
|
+
end
|
|
32
|
+
# puts seqs.join(';')
|
|
33
|
+
|
|
34
|
+
fqr=FastaQualFile.new(fasta,qual)
|
|
35
|
+
|
|
36
|
+
output_fasta=File.new(output_name+'.fasta','a')
|
|
37
|
+
output_qual=File.new(output_name+'.fasta.qual','a')
|
|
38
|
+
|
|
39
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
|
40
|
+
if seqs.index(seq_name)
|
|
41
|
+
output_fasta.puts ">#{seq_name}"
|
|
42
|
+
output_fasta.puts seq_fasta
|
|
43
|
+
output_qual.puts ">#{seq_name}"
|
|
44
|
+
output_qual.puts seq_qual
|
|
45
|
+
seqs.delete(seq_name)
|
|
46
|
+
if seqs.empty?
|
|
47
|
+
break
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
output_qual.close
|
|
53
|
+
output_fasta.close
|
|
54
|
+
fqr.close
|
|
55
|
+
|
|
56
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'scbi_fastq'
|
|
4
|
+
|
|
5
|
+
class Array
|
|
6
|
+
def count
|
|
7
|
+
self.length
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
if ARGV.count != 3
|
|
13
|
+
puts "#{$0} FASTQ OUTPUT_NAME SEQ_NAMES_FILE"
|
|
14
|
+
exit
|
|
15
|
+
else
|
|
16
|
+
|
|
17
|
+
fasta = ARGV.shift
|
|
18
|
+
output_name = ARGV.shift
|
|
19
|
+
seqs_file=ARGV.shift
|
|
20
|
+
|
|
21
|
+
seqs=[]
|
|
22
|
+
|
|
23
|
+
f=File.open(seqs_file).each_line do |line|
|
|
24
|
+
seqs.push line.strip.chomp
|
|
25
|
+
end
|
|
26
|
+
puts seqs.join(';')
|
|
27
|
+
|
|
28
|
+
fqr=FastqFile.new(fasta)
|
|
29
|
+
|
|
30
|
+
output_fastq=FastqFile.new(output_name+'.fastq','w')
|
|
31
|
+
|
|
32
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
|
33
|
+
if seqs.index(seq_name)
|
|
34
|
+
output_fastq.write_seq(seq_name,seq_fasta,seq_qual)
|
|
35
|
+
seqs.delete(seq_name)
|
|
36
|
+
if seqs.empty?
|
|
37
|
+
break
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
output_fastq.close
|
|
43
|
+
fqr.close
|
|
44
|
+
|
|
45
|
+
end
|
data/bin/fasta2fastq.rb
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'scbi_fasta'
|
|
4
|
+
require 'scbi_fastq'
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
if ARGV.count < 3
|
|
8
|
+
puts "#{$0} FASTA QUAL OUTPUT_NAME"
|
|
9
|
+
exit
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
fasta = ARGV.shift
|
|
15
|
+
qual = ARGV.shift
|
|
16
|
+
output_name = ARGV.shift
|
|
17
|
+
default_qual = nil
|
|
18
|
+
|
|
19
|
+
if !File.exists?(qual)
|
|
20
|
+
fqr=FastaFile.new(fasta)
|
|
21
|
+
puts "Quality file doesn't exists. Using default qual value = 40"
|
|
22
|
+
default_qual = [40]
|
|
23
|
+
else
|
|
24
|
+
fqr=FastaQualFile.new(fasta,qual)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
output=FastqFile.new(output_name+'.fastq','w')
|
|
28
|
+
|
|
29
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
|
30
|
+
if default_qual
|
|
31
|
+
seq_qual = default_qual * seq_fasta.length
|
|
32
|
+
end
|
|
33
|
+
output.write_seq(seq_name,seq_fasta,seq_qual)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
output.close
|
|
37
|
+
fqr.close
|
|
38
|
+
|
data/bin/fastq2fasta.rb
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'scbi_fastq'
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
if ARGV.count < 2
|
|
7
|
+
puts "#{$0} FASTQ OUTPUT_NAME"
|
|
8
|
+
exit
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
fastq = ARGV.shift
|
|
14
|
+
output_name = ARGV.shift
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
fasta = File.open(output_name+'.fasta','w')
|
|
18
|
+
qual = File.open(output_name+'.fasta.qual','w')
|
|
19
|
+
|
|
20
|
+
fqr=FastqFile.new(fastq)
|
|
21
|
+
|
|
22
|
+
fqr.each do |seq_name,seq_fasta,seq_qual,comments|
|
|
23
|
+
|
|
24
|
+
fasta.puts ">#{seq_name} #{comments}"
|
|
25
|
+
fasta.puts seq_fasta
|
|
26
|
+
|
|
27
|
+
qual.puts ">#{seq_name} #{comments}"
|
|
28
|
+
qual.puts seq_qual.join(' ')
|
|
29
|
+
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
fasta.close
|
|
33
|
+
qual.close
|
|
34
|
+
fqr.close
|
|
35
|
+
|
data/bin/gen_qual.rb
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'scbi_fasta'
|
|
4
|
+
|
|
5
|
+
GOOD_QUAL=50
|
|
6
|
+
BAD_QUAL=10
|
|
7
|
+
DOWN_CASE=('a'..'z')
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Array
|
|
11
|
+
def count
|
|
12
|
+
self.length
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
if ARGV.count != 2
|
|
18
|
+
puts "Programa ENTRADA SALIDA"
|
|
19
|
+
exit
|
|
20
|
+
else
|
|
21
|
+
puts ARGV[0]
|
|
22
|
+
puts ARGV[1]
|
|
23
|
+
|
|
24
|
+
fqr=FastaQualFile.new(ARGV[0])
|
|
25
|
+
|
|
26
|
+
f = File.new(ARGV[1],'w+')
|
|
27
|
+
|
|
28
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
|
29
|
+
f.puts ">#{seq_name}"
|
|
30
|
+
res =[]
|
|
31
|
+
seq_fasta.each_char do |c|
|
|
32
|
+
if DOWN_CASE.include?(c)
|
|
33
|
+
res << BAD_QUAL
|
|
34
|
+
else
|
|
35
|
+
res << GOOD_QUAL
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
f.puts res.join(' ')
|
|
40
|
+
#f.puts "50 "*seq_fasta.length
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
f.close
|
|
44
|
+
fqr.close
|
|
45
|
+
|
|
46
|
+
end
|
data/bin/get_seq.rb
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'scbi_fasta'
|
|
4
|
+
|
|
5
|
+
GOOD_QUAL=50
|
|
6
|
+
BAD_QUAL=10
|
|
7
|
+
DOWN_CASE=('a'..'z')
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Array
|
|
11
|
+
def count
|
|
12
|
+
self.length
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
if ARGV.count < 3
|
|
18
|
+
puts "#{$0} FASTA QUAL SEQ_NAME [f|q|fq]"
|
|
19
|
+
exit
|
|
20
|
+
else
|
|
21
|
+
|
|
22
|
+
fqr=FastaQualFile.new(ARGV[0],ARGV[1])
|
|
23
|
+
get_type = 'fq'
|
|
24
|
+
if ARGV.count == 4
|
|
25
|
+
get_type=ARGV[3]
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
fqr.each do |seq_name,seq_fasta,seq_qual|
|
|
29
|
+
if seq_name == ARGV[2]
|
|
30
|
+
if get_type.index('f')
|
|
31
|
+
puts ">#{seq_name}"
|
|
32
|
+
puts seq_fasta
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
if get_type.index('q')
|
|
36
|
+
puts ">#{seq_name}"
|
|
37
|
+
puts seq_qual
|
|
38
|
+
end
|
|
39
|
+
break
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
fqr.close
|
|
45
|
+
|
|
46
|
+
end
|