seqtrimnext 2.0.29

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +114 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +159 -0
  5. data/Rakefile +38 -0
  6. data/bin/create_graphs.rb +46 -0
  7. data/bin/extract_seqs.rb +45 -0
  8. data/bin/extract_seqs_from_fasta.rb +56 -0
  9. data/bin/extract_seqs_from_fastq.rb +45 -0
  10. data/bin/fasta2fastq.rb +38 -0
  11. data/bin/fastq2fasta.rb +35 -0
  12. data/bin/gen_qual.rb +46 -0
  13. data/bin/get_seq.rb +46 -0
  14. data/bin/group_by_range.rb +17 -0
  15. data/bin/join_ilumina_paired.rb +130 -0
  16. data/bin/parse_amplicons.rb +95 -0
  17. data/bin/parse_json_results.rb +66 -0
  18. data/bin/parse_params.rb +82 -0
  19. data/bin/resume_clusters.rb +48 -0
  20. data/bin/resume_rejected.sh +9 -0
  21. data/bin/reverse_paired.rb +49 -0
  22. data/bin/seqtrimnext +368 -0
  23. data/bin/split_fastq.rb +42 -0
  24. data/bin/split_ilumina_paired.rb +65 -0
  25. data/bin/split_paired.rb +70 -0
  26. data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
  27. data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
  28. data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
  29. data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
  30. data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
  31. data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
  32. data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
  33. data/lib/seqtrimnext/actions/action_insert.rb +32 -0
  34. data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
  35. data/lib/seqtrimnext/actions/action_key.rb +30 -0
  36. data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
  37. data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
  38. data/lib/seqtrimnext/actions/action_linker.rb +30 -0
  39. data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
  40. data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
  41. data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
  42. data/lib/seqtrimnext/actions/action_mid.rb +30 -0
  43. data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
  44. data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
  45. data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
  46. data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
  47. data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
  48. data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
  49. data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
  50. data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
  51. data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
  52. data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
  53. data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
  54. data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
  55. data/lib/seqtrimnext/classes/action_manager.rb +47 -0
  56. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
  57. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
  58. data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
  59. data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
  60. data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
  61. data/lib/seqtrimnext/classes/install_database.rb +43 -0
  62. data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
  63. data/lib/seqtrimnext/classes/list_db.rb +49 -0
  64. data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
  65. data/lib/seqtrimnext/classes/one_blast.rb +41 -0
  66. data/lib/seqtrimnext/classes/params.rb +387 -0
  67. data/lib/seqtrimnext/classes/piro.rb +78 -0
  68. data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
  69. data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
  70. data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
  71. data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
  72. data/lib/seqtrimnext/classes/sequence.rb +55 -0
  73. data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
  74. data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
  75. data/lib/seqtrimnext/plugins/plugin.rb +267 -0
  76. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
  77. data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
  78. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
  79. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
  80. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
  81. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
  82. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
  83. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
  84. data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
  85. data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
  86. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
  87. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
  88. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
  89. data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
  90. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
  91. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
  92. data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
  93. data/lib/seqtrimnext/templates/amplicons.txt +16 -0
  94. data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
  95. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
  96. data/lib/seqtrimnext/templates/low_quality.txt +5 -0
  97. data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
  98. data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
  99. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
  100. data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
  101. data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
  102. data/lib/seqtrimnext/utils/global_match.rb +65 -0
  103. data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
  104. data/lib/seqtrimnext/utils/json_utils.rb +50 -0
  105. data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
  106. data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
  107. data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
  108. data/lib/seqtrimnext/utils/string_utils.rb +56 -0
  109. data/lib/seqtrimnext.rb +37 -0
  110. data/script/console +10 -0
  111. data/script/destroy +14 -0
  112. data/script/generate +14 -0
  113. data/test/test_helper.rb +3 -0
  114. data/test/test_seqtrimnext.rb +11 -0
  115. metadata +318 -0
data/History.txt ADDED
@@ -0,0 +1,3 @@
1
+ === 2.0.29 2011-06-06
2
+
3
+ * Initial public release on rubygems
data/Manifest.txt ADDED
@@ -0,0 +1,114 @@
1
+ bin/create_graphs.rb
2
+ bin/extract_seqs.rb
3
+ bin/extract_seqs_from_fasta.rb
4
+ bin/extract_seqs_from_fastq.rb
5
+ bin/fasta2fastq.rb
6
+ bin/fastq2fasta.rb
7
+ bin/gen_qual.rb
8
+ bin/get_seq.rb
9
+ bin/group_by_range.rb
10
+ bin/join_ilumina_paired.rb
11
+ bin/parse_amplicons.rb
12
+ bin/parse_json_results.rb
13
+ bin/parse_params.rb
14
+ bin/resume_clusters.rb
15
+ bin/resume_rejected.sh
16
+ bin/reverse_paired.rb
17
+ bin/seqtrimnext
18
+ bin/split_fastq.rb
19
+ bin/split_ilumina_paired.rb
20
+ bin/split_paired.rb
21
+ lib/seqtrimnext/actions/action_ab_adapter.rb
22
+ lib/seqtrimnext/actions/action_ab_far_adapter.rb
23
+ lib/seqtrimnext/actions/action_ab_left_adapter.rb
24
+ lib/seqtrimnext/actions/action_empty_insert.rb
25
+ lib/seqtrimnext/actions/action_ignore_repeated.rb
26
+ lib/seqtrimnext/actions/action_indetermination.rb
27
+ lib/seqtrimnext/actions/action_induced_low_complexity.rb
28
+ lib/seqtrimnext/actions/action_insert.rb
29
+ lib/seqtrimnext/actions/action_is_contaminated.rb
30
+ lib/seqtrimnext/actions/action_key.rb
31
+ lib/seqtrimnext/actions/action_left_adapter.rb
32
+ lib/seqtrimnext/actions/action_left_primer.rb
33
+ lib/seqtrimnext/actions/action_linker.rb
34
+ lib/seqtrimnext/actions/action_low_complexity.rb
35
+ lib/seqtrimnext/actions/action_low_high_size.rb
36
+ lib/seqtrimnext/actions/action_low_quality.rb
37
+ lib/seqtrimnext/actions/action_mid.rb
38
+ lib/seqtrimnext/actions/action_multiple_linker.rb
39
+ lib/seqtrimnext/actions/action_paired_reads.rb
40
+ lib/seqtrimnext/actions/action_poly_a.rb
41
+ lib/seqtrimnext/actions/action_poly_t.rb
42
+ lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
43
+ lib/seqtrimnext/actions/action_right_adapter.rb
44
+ lib/seqtrimnext/actions/action_right_primer.rb
45
+ lib/seqtrimnext/actions/action_short_insert.rb
46
+ lib/seqtrimnext/actions/action_unexpected_poly_t.rb
47
+ lib/seqtrimnext/actions/action_unexpected_vector.rb
48
+ lib/seqtrimnext/actions/action_vectors.rb
49
+ lib/seqtrimnext/actions/seqtrim_action.rb
50
+ lib/seqtrimnext/classes/action_manager.rb
51
+ lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb
52
+ lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb
53
+ lib/seqtrimnext/classes/extract_stats.rb
54
+ lib/seqtrimnext/classes/gnu_plot_graph.rb
55
+ lib/seqtrimnext/classes/graph_stats.rb
56
+ lib/seqtrimnext/classes/install_database.rb
57
+ lib/seqtrimnext/classes/install_requirements.rb
58
+ lib/seqtrimnext/classes/list_db.rb
59
+ lib/seqtrimnext/classes/make_blast_db.rb
60
+ lib/seqtrimnext/classes/one_blast.rb
61
+ lib/seqtrimnext/classes/params.rb
62
+ lib/seqtrimnext/classes/piro.rb
63
+ lib/seqtrimnext/classes/plugin_manager.rb
64
+ lib/seqtrimnext/classes/scan_for_restr_site.rb
65
+ lib/seqtrimnext/classes/scbi_stats.rb
66
+ lib/seqtrimnext/classes/seqtrim.rb
67
+ lib/seqtrimnext/classes/sequence.rb
68
+ lib/seqtrimnext/classes/sequence_group.rb
69
+ lib/seqtrimnext/classes/sequence_with_action.rb
70
+ lib/seqtrimnext/plugins/plugin.rb
71
+ lib/seqtrimnext/plugins/plugin_ab_adapters.rb
72
+ lib/seqtrimnext/plugins/plugin_adapters.rb
73
+ lib/seqtrimnext/plugins/plugin_amplicons.rb
74
+ lib/seqtrimnext/plugins/plugin_contaminants.rb
75
+ lib/seqtrimnext/plugins/plugin_extract_inserts.rb
76
+ lib/seqtrimnext/plugins/plugin_find_poly_at.rb
77
+ lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
78
+ lib/seqtrimnext/plugins/plugin_indeterminations.rb
79
+ lib/seqtrimnext/plugins/plugin_key.rb
80
+ lib/seqtrimnext/plugins/plugin_linker.rb
81
+ lib/seqtrimnext/plugins/plugin_low_complexity.rb
82
+ lib/seqtrimnext/plugins/plugin_low_high_size.rb
83
+ lib/seqtrimnext/plugins/plugin_low_quality.rb
84
+ lib/seqtrimnext/plugins/plugin_mids.rb
85
+ lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb
86
+ lib/seqtrimnext/plugins/plugin_short_insert.rb
87
+ lib/seqtrimnext/plugins/plugin_vectors.rb
88
+ lib/seqtrimnext/templates/amplicons.txt
89
+ lib/seqtrimnext/templates/genomics_454.txt
90
+ lib/seqtrimnext/templates/genomics_454_with_paired.txt
91
+ lib/seqtrimnext/templates/low_quality.txt
92
+ lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
93
+ lib/seqtrimnext/templates/transcriptomics_454.txt
94
+ lib/seqtrimnext/templates/transcriptomics_plants.txt
95
+ lib/seqtrimnext/utils/extract_samples.rb
96
+ lib/seqtrimnext/utils/fasta2xml.rb
97
+ lib/seqtrimnext/utils/global_match.rb
98
+ lib/seqtrimnext/utils/hash_stats.rb
99
+ lib/seqtrimnext/utils/json_utils.rb
100
+ lib/seqtrimnext/utils/load_fasta_names_in_hash.rb
101
+ lib/seqtrimnext/utils/load_qual_in_hash.rb
102
+ lib/seqtrimnext/utils/recover_mid.rb
103
+ lib/seqtrimnext/utils/string_utils.rb
104
+ lib/seqtrimnext.rb
105
+ History.txt
106
+ Manifest.txt
107
+ PostInstall.txt
108
+ Rakefile
109
+ README.rdoc
110
+ script/console
111
+ script/destroy
112
+ script/generate
113
+ test/test_helper.rb
114
+ test/test_seqtrimnext.rb
data/PostInstall.txt ADDED
@@ -0,0 +1,7 @@
1
+ For more information on seqtrimnext, see http://www.scbi.uma.es
2
+
3
+ Remember to install the following external prerequisites if you don't have them already installed.
4
+
5
+ If you need to install core databases, issue the following command:
6
+
7
+ seqtrimnext -i core
data/README.rdoc ADDED
@@ -0,0 +1,159 @@
1
+ = seqtrimnext
2
+
3
+ * http://www.scbi.uma.es/downloads
4
+
5
+ == DESCRIPTION:
6
+
7
+ SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data. It makes use of scbi_mapreduce gem to be able to run in parallel and distributed environments. It is specially suited for Roche 454 (normal and paired-end) & Ilumina datasets, although it could be easyly adapted to any other situation.
8
+
9
+ == FEATURES:
10
+
11
+ * SeqtrimNEXT is very flexible since it's architecture is based on plugins.
12
+ * You can add new plugins if needed.
13
+ * SeqtrimNEXT uses scbi_mapreduce and thus is able to exploit all the benefits of a cluster environment. It also works in multi-core machines big shared-memory servers.
14
+
15
+ == Default templates for genomics & transcriptomics are provided
16
+
17
+ <b>genomics_454.txt</b>:: cleans genomics data from Roche 454 sequencer.
18
+ <b>genomics_454_with_paired.txt</b>:: cleans genomic data from a paired-end experiment sequenced with a Roche 454 sequencer.
19
+ <b>low_quality.txt</b>:: trims low quality.
20
+ <b>low_quality_and_low_complexity.txt</b>:: trims low quality and low complexity.
21
+ <b>transcriptomics_454.txt</b>:: cleans transcriptomics data from a Roche 454 sequencer.
22
+ <b>transcriptomics_plants.txt</b>:: cleans transcriptomics data from a Roche 454 sequencer with extra databases for plants.
23
+ <b>amplicons.txt</b>:: filters amplicons.
24
+
25
+ == You can define your own templates using a combination of available plugins:
26
+
27
+ <b>PluginKey</b>:: to remove sequencing keys from 454 input sequences.
28
+ <b>PluginMids</b>:: to remove MIDS (barcodes) from 454 sequences.
29
+ <b>PluginLinker</b>:: splits sequences into two inserts when a valid linker is found (paired-end experiments only)
30
+ <b>PluginAbAdapters</b>:: removes AB adapters from sequences using a predefined DB or one provided by the user.
31
+ <b>PluginFindPolyAt</b>:: removes polyA and polyT from sequences.
32
+ <b>PluginLowComplexity</b>:: filters sequences with low complexity regions
33
+ <b>PluginAdapters</b>:: removes Adapters from sequences using a predefined DB or one provided by the user.
34
+ <b>PluginLowHighSize</b>:: removes sequences too small or too big.
35
+ <b>PluginVectors</b>:: remove vectors from sequences using a predefined database or one provided by the user.
36
+ <b>PluginAmplicons</b>:: filters amplicons using user predefined primers.
37
+ <b>PluginIndeterminations</b>:: removes indeterminations (N) from the sequence.
38
+ <b>PluginLowQuality</b>:: eliminate low quality regions from sequences.
39
+ <b>PluginContaminants</b>:: remove contaminants from sequences or rejects contaminated ones. It uses a core database, but it can be expanded with user provided ones.
40
+
41
+
42
+
43
+ == SYNOPSIS:
44
+
45
+ Once installed, SeqtrimNEXT is very easy to use:
46
+
47
+ To install core databases (it should be done at installation time):
48
+
49
+ $> seqtrimnext -i
50
+
51
+ To perform an analisys using a predefined template with a FASTQ file format using 4 cpus:
52
+
53
+ $> seqtrimnext -t genomics_454.txt -Q input_file_in_FASTQ -w 4
54
+
55
+ To perform an analisys using a predefined template with a FASTQ file format:
56
+
57
+ $> seqtrimnext -t genomics_454.txt -f input_file_in_FASTA -q input_file_in_QUAL
58
+
59
+
60
+ To get additional help and list available templates and databases:
61
+
62
+ $> seqtrimnext -h
63
+
64
+
65
+ == REQUIREMENTS:
66
+
67
+ * Ruby 1.9.2
68
+ * Blast plus 2.24 or greater (prior versions have bugs that produces bad results)
69
+ * [Optional] - GnuPlot version 4.4.2 or greater (prior versions may produce wrong graphs)
70
+ * [Optional] - pdflatex - Optional, to produce a detailed report with results
71
+
72
+ == INSTALL:
73
+
74
+ === Installing Blast
75
+
76
+ *Download the latest version of Blast+ from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/release/LATEST/
77
+ *You can also use a precompiled version if you like
78
+ *To install from source, decompress the downloaded file, cd to the decompressed folder, and issue the following commands:
79
+
80
+ ./configure
81
+ make
82
+ sudo make install
83
+
84
+
85
+ === Installing Ruby 1.9
86
+
87
+ *You can use RVM to install ruby:
88
+
89
+ Install RVM:
90
+
91
+ $ bash < <(curl -s https://rvm.beginrescueend.com/install/rvm)
92
+
93
+ Setup environment:
94
+
95
+ $ echo '[[ -s "$HOME/.rvm/scripts/rvm" ]] && . "$HOME/.rvm/scripts/rvm" # Load RVM function' >> ~/.bash_profile
96
+
97
+ Install ruby 1.9.2 (this can take a while):
98
+
99
+ $ rvm install 1.9.2
100
+
101
+ Set it as the default:
102
+
103
+ $ rvm use 1.9.2 --default
104
+
105
+ === Install SeqtrimNEXT
106
+
107
+ SeqtrimNEXT is very easy to install. It is distributed as a ruby gem:
108
+
109
+ gem install seqtrimnext
110
+
111
+ This will install seqtrimnext and all the required gems.
112
+
113
+ === Install and rebuild SeqtrimNext's core databases
114
+
115
+ SeqtrimNEXT needs some core databases to work. To install them:
116
+
117
+ seqtrimnext -i core
118
+
119
+ === Database modifications
120
+
121
+ Included databases will be usefull for a lot of people, but if you prefer, you can modify them, or add more elements to be search against your sequences.
122
+
123
+ You only need to drop new fasta files to each respective directory:
124
+
125
+ DB/vectors to add more vectors
126
+ DB/contaminants to add more contaminants
127
+ etc...
128
+
129
+ Once the databases has been modified, you will need to reformat them by issuing the following command:
130
+
131
+ seqtrimnext -c
132
+
133
+ Modified databases will be rebuilt.
134
+
135
+
136
+ == LICENSE:
137
+
138
+ (The MIT License)
139
+
140
+ Copyright (c) 2011 Almudena Bocinos & Dario Guerrero
141
+
142
+ Permission is hereby granted, free of charge, to any person obtaining
143
+ a copy of this software and associated documentation files (the
144
+ 'Software'), to deal in the Software without restriction, including
145
+ without limitation the rights to use, copy, modify, merge, publish,
146
+ distribute, sublicense, and/or sell copies of the Software, and to
147
+ permit persons to whom the Software is furnished to do so, subject to
148
+ the following conditions:
149
+
150
+ The above copyright notice and this permission notice shall be
151
+ included in all copies or substantial portions of the Software.
152
+
153
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
154
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
155
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
156
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
157
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
158
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
159
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,38 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/seqtrimnext'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'seqtrimnext' do
14
+ self.developer 'Dario Guerrero & Almudena Bocinos', 'dariogf@gmail.com & alkoke@gmail.com'
15
+ self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
+ self.rubyforge_name = self.name # TODO this is default value
17
+ # self.extra_deps = ['narray','gnuplot','term-ansicolor','xml-simple','scbi_blast','scbi_drb','scbi_fasta','scbi_fastq','scbi_plot','scbi_math']
18
+
19
+ self.extra_deps = []
20
+ self.extra_deps << ['narray','>=0']
21
+ self.extra_deps << ['gnuplot','>=0']
22
+ self.extra_deps << ['term-ansicolor','>=0']
23
+ self.extra_deps << ['xml-simple','>=0']
24
+ self.extra_deps << ['scbi_blast','>=0']
25
+ self.extra_deps << ['scbi_mapreduce','>=0']
26
+ self.extra_deps << ['scbi_fasta','>=0']
27
+ self.extra_deps << ['scbi_fastq','>=0']
28
+ self.extra_deps << ['scbi_plot','>=0']
29
+ self.extra_deps << ['scbi_math','>=0']
30
+
31
+ end
32
+
33
+ require 'newgem/tasks'
34
+ Dir['tasks/**/*.rake'].each { |t| load t }
35
+
36
+ # TODO - want other tests/tasks run by default? Add them to the list
37
+ # remove_task :default
38
+ # task :default => [:spec, :features, :redocs]
@@ -0,0 +1,46 @@
1
+ require 'stringio'
2
+ # require 'test/unit'
3
+ require 'json'
4
+ require 'gnuplot'
5
+
6
+ ROOT_PATH=File.dirname(File.dirname(__FILE__))
7
+
8
+ # $: << File.expand_path(File.join(ROOT_PATH,'test'))
9
+ $: << File.expand_path(File.join(ROOT_PATH,'classes'))
10
+ $: << File.expand_path(File.join(ROOT_PATH,'plugins'))
11
+ $: << File.expand_path(File.join(ROOT_PATH,'utils'))
12
+
13
+ if ARGV.empty?
14
+ puts "Usage: #{$0} stats.json initial_stats.json"
15
+ exit
16
+ end
17
+
18
+ d=Dir.glob(File.expand_path(File.join(ROOT_PATH,'plugins','*.rb')))
19
+
20
+ # puts d.entries
21
+ # puts "="*20
22
+
23
+ require 'plugin'
24
+
25
+ # require 'params'
26
+
27
+ d.entries.each do |plugin|
28
+ require plugin
29
+ # puts "Requiring #{plugin}"
30
+ end
31
+
32
+ require 'graph_stats'
33
+
34
+ #load stats
35
+
36
+ r=File.read(ARGV[0])
37
+ stats=JSON::parse(r)
38
+
39
+
40
+ r2=File.read(ARGV[1])
41
+ init_stats=JSON::parse(r2)
42
+
43
+ gs=GraphStats.new(stats,init_stats)
44
+
45
+ puts "Graphs generated"
46
+
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fastq'
4
+
5
+ class Array
6
+ def count
7
+ self.length
8
+ end
9
+
10
+ end
11
+
12
+ if ARGV.count < 3
13
+ puts "#{$0} FASTA OUTPUT_NAME SEQ_NAME_FILE [MORE_SEQ_NAMES]"
14
+ exit
15
+ else
16
+
17
+ fasta = ARGV.shift
18
+ qual = ARGV.shift
19
+ output_name = ARGV.shift
20
+ seqs=ARGV
21
+ puts seqs.join(';')
22
+
23
+ fqr=FastaQualFile.new(fasta,qual)
24
+
25
+ output_fasta=File.new(output_name+'.fasta','a')
26
+ output_qual=File.new(output_name+'.fasta.qual','a')
27
+
28
+ fqr.each do |seq_name,seq_fasta,seq_qual|
29
+ if seqs.index(seq_name)
30
+ output_fasta.puts ">#{seq_name}"
31
+ output_fasta.puts seq_fasta
32
+ output_qual.puts ">#{seq_name}"
33
+ output_qual.puts seq_qual
34
+ seqs.delete(seq_name)
35
+ if seqs.empty?
36
+ break
37
+ end
38
+ end
39
+ end
40
+
41
+ output_qual.close
42
+ output_fasta.close
43
+ fqr.close
44
+
45
+ end
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fasta'
4
+
5
+ # GOOD_QUAL=50
6
+ # BAD_QUAL=10
7
+ # DOWN_CASE=('a'..'z')
8
+
9
+
10
+ class Array
11
+ def count
12
+ self.length
13
+ end
14
+
15
+ end
16
+
17
+ if ARGV.count < 4
18
+ puts "#{$0} FASTA QUAL OUTPUT_NAME SEQ_NAMES_FILE"
19
+ exit
20
+ else
21
+
22
+ fasta = ARGV.shift
23
+ qual = ARGV.shift
24
+ output_name = ARGV.shift
25
+ seqs_file=ARGV.shift
26
+
27
+ seqs=[]
28
+
29
+ f=File.open(seqs_file).each_line do |line|
30
+ seqs.push line.strip.chomp
31
+ end
32
+ # puts seqs.join(';')
33
+
34
+ fqr=FastaQualFile.new(fasta,qual)
35
+
36
+ output_fasta=File.new(output_name+'.fasta','a')
37
+ output_qual=File.new(output_name+'.fasta.qual','a')
38
+
39
+ fqr.each do |seq_name,seq_fasta,seq_qual|
40
+ if seqs.index(seq_name)
41
+ output_fasta.puts ">#{seq_name}"
42
+ output_fasta.puts seq_fasta
43
+ output_qual.puts ">#{seq_name}"
44
+ output_qual.puts seq_qual
45
+ seqs.delete(seq_name)
46
+ if seqs.empty?
47
+ break
48
+ end
49
+ end
50
+ end
51
+
52
+ output_qual.close
53
+ output_fasta.close
54
+ fqr.close
55
+
56
+ end
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fastq'
4
+
5
+ class Array
6
+ def count
7
+ self.length
8
+ end
9
+
10
+ end
11
+
12
+ if ARGV.count != 3
13
+ puts "#{$0} FASTQ OUTPUT_NAME SEQ_NAMES_FILE"
14
+ exit
15
+ else
16
+
17
+ fasta = ARGV.shift
18
+ output_name = ARGV.shift
19
+ seqs_file=ARGV.shift
20
+
21
+ seqs=[]
22
+
23
+ f=File.open(seqs_file).each_line do |line|
24
+ seqs.push line.strip.chomp
25
+ end
26
+ puts seqs.join(';')
27
+
28
+ fqr=FastqFile.new(fasta)
29
+
30
+ output_fastq=FastqFile.new(output_name+'.fastq','w')
31
+
32
+ fqr.each do |seq_name,seq_fasta,seq_qual|
33
+ if seqs.index(seq_name)
34
+ output_fastq.write_seq(seq_name,seq_fasta,seq_qual)
35
+ seqs.delete(seq_name)
36
+ if seqs.empty?
37
+ break
38
+ end
39
+ end
40
+ end
41
+
42
+ output_fastq.close
43
+ fqr.close
44
+
45
+ end
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fasta'
4
+ require 'scbi_fastq'
5
+
6
+
7
+ if ARGV.count < 3
8
+ puts "#{$0} FASTA QUAL OUTPUT_NAME"
9
+ exit
10
+ end
11
+
12
+
13
+
14
+ fasta = ARGV.shift
15
+ qual = ARGV.shift
16
+ output_name = ARGV.shift
17
+ default_qual = nil
18
+
19
+ if !File.exists?(qual)
20
+ fqr=FastaFile.new(fasta)
21
+ puts "Quality file doesn't exists. Using default qual value = 40"
22
+ default_qual = [40]
23
+ else
24
+ fqr=FastaQualFile.new(fasta,qual)
25
+ end
26
+
27
+ output=FastqFile.new(output_name+'.fastq','w')
28
+
29
+ fqr.each do |seq_name,seq_fasta,seq_qual|
30
+ if default_qual
31
+ seq_qual = default_qual * seq_fasta.length
32
+ end
33
+ output.write_seq(seq_name,seq_fasta,seq_qual)
34
+ end
35
+
36
+ output.close
37
+ fqr.close
38
+
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fastq'
4
+
5
+
6
+ if ARGV.count < 2
7
+ puts "#{$0} FASTQ OUTPUT_NAME"
8
+ exit
9
+ end
10
+
11
+
12
+
13
+ fastq = ARGV.shift
14
+ output_name = ARGV.shift
15
+
16
+
17
+ fasta = File.open(output_name+'.fasta','w')
18
+ qual = File.open(output_name+'.fasta.qual','w')
19
+
20
+ fqr=FastqFile.new(fastq)
21
+
22
+ fqr.each do |seq_name,seq_fasta,seq_qual,comments|
23
+
24
+ fasta.puts ">#{seq_name} #{comments}"
25
+ fasta.puts seq_fasta
26
+
27
+ qual.puts ">#{seq_name} #{comments}"
28
+ qual.puts seq_qual.join(' ')
29
+
30
+ end
31
+
32
+ fasta.close
33
+ qual.close
34
+ fqr.close
35
+
data/bin/gen_qual.rb ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fasta'
4
+
5
+ GOOD_QUAL=50
6
+ BAD_QUAL=10
7
+ DOWN_CASE=('a'..'z')
8
+
9
+
10
+ class Array
11
+ def count
12
+ self.length
13
+ end
14
+
15
+ end
16
+
17
+ if ARGV.count != 2
18
+ puts "Programa ENTRADA SALIDA"
19
+ exit
20
+ else
21
+ puts ARGV[0]
22
+ puts ARGV[1]
23
+
24
+ fqr=FastaQualFile.new(ARGV[0])
25
+
26
+ f = File.new(ARGV[1],'w+')
27
+
28
+ fqr.each do |seq_name,seq_fasta,seq_qual|
29
+ f.puts ">#{seq_name}"
30
+ res =[]
31
+ seq_fasta.each_char do |c|
32
+ if DOWN_CASE.include?(c)
33
+ res << BAD_QUAL
34
+ else
35
+ res << GOOD_QUAL
36
+ end
37
+ end
38
+
39
+ f.puts res.join(' ')
40
+ #f.puts "50 "*seq_fasta.length
41
+ end
42
+
43
+ f.close
44
+ fqr.close
45
+
46
+ end
data/bin/get_seq.rb ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fasta'
4
+
5
+ GOOD_QUAL=50
6
+ BAD_QUAL=10
7
+ DOWN_CASE=('a'..'z')
8
+
9
+
10
+ class Array
11
+ def count
12
+ self.length
13
+ end
14
+
15
+ end
16
+
17
+ if ARGV.count < 3
18
+ puts "#{$0} FASTA QUAL SEQ_NAME [f|q|fq]"
19
+ exit
20
+ else
21
+
22
+ fqr=FastaQualFile.new(ARGV[0],ARGV[1])
23
+ get_type = 'fq'
24
+ if ARGV.count == 4
25
+ get_type=ARGV[3]
26
+ end
27
+
28
+ fqr.each do |seq_name,seq_fasta,seq_qual|
29
+ if seq_name == ARGV[2]
30
+ if get_type.index('f')
31
+ puts ">#{seq_name}"
32
+ puts seq_fasta
33
+ end
34
+
35
+ if get_type.index('q')
36
+ puts ">#{seq_name}"
37
+ puts seq_qual
38
+ end
39
+ break
40
+ end
41
+
42
+ end
43
+
44
+ fqr.close
45
+
46
+ end