seqtrimnext 2.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +114 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +159 -0
  5. data/Rakefile +38 -0
  6. data/bin/create_graphs.rb +46 -0
  7. data/bin/extract_seqs.rb +45 -0
  8. data/bin/extract_seqs_from_fasta.rb +56 -0
  9. data/bin/extract_seqs_from_fastq.rb +45 -0
  10. data/bin/fasta2fastq.rb +38 -0
  11. data/bin/fastq2fasta.rb +35 -0
  12. data/bin/gen_qual.rb +46 -0
  13. data/bin/get_seq.rb +46 -0
  14. data/bin/group_by_range.rb +17 -0
  15. data/bin/join_ilumina_paired.rb +130 -0
  16. data/bin/parse_amplicons.rb +95 -0
  17. data/bin/parse_json_results.rb +66 -0
  18. data/bin/parse_params.rb +82 -0
  19. data/bin/resume_clusters.rb +48 -0
  20. data/bin/resume_rejected.sh +9 -0
  21. data/bin/reverse_paired.rb +49 -0
  22. data/bin/seqtrimnext +368 -0
  23. data/bin/split_fastq.rb +42 -0
  24. data/bin/split_ilumina_paired.rb +65 -0
  25. data/bin/split_paired.rb +70 -0
  26. data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
  27. data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
  28. data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
  29. data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
  30. data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
  31. data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
  32. data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
  33. data/lib/seqtrimnext/actions/action_insert.rb +32 -0
  34. data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
  35. data/lib/seqtrimnext/actions/action_key.rb +30 -0
  36. data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
  37. data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
  38. data/lib/seqtrimnext/actions/action_linker.rb +30 -0
  39. data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
  40. data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
  41. data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
  42. data/lib/seqtrimnext/actions/action_mid.rb +30 -0
  43. data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
  44. data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
  45. data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
  46. data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
  47. data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
  48. data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
  49. data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
  50. data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
  51. data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
  52. data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
  53. data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
  54. data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
  55. data/lib/seqtrimnext/classes/action_manager.rb +47 -0
  56. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
  57. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
  58. data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
  59. data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
  60. data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
  61. data/lib/seqtrimnext/classes/install_database.rb +43 -0
  62. data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
  63. data/lib/seqtrimnext/classes/list_db.rb +49 -0
  64. data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
  65. data/lib/seqtrimnext/classes/one_blast.rb +41 -0
  66. data/lib/seqtrimnext/classes/params.rb +387 -0
  67. data/lib/seqtrimnext/classes/piro.rb +78 -0
  68. data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
  69. data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
  70. data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
  71. data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
  72. data/lib/seqtrimnext/classes/sequence.rb +55 -0
  73. data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
  74. data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
  75. data/lib/seqtrimnext/plugins/plugin.rb +267 -0
  76. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
  77. data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
  78. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
  79. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
  80. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
  81. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
  82. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
  83. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
  84. data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
  85. data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
  86. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
  87. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
  88. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
  89. data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
  90. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
  91. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
  92. data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
  93. data/lib/seqtrimnext/templates/amplicons.txt +16 -0
  94. data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
  95. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
  96. data/lib/seqtrimnext/templates/low_quality.txt +5 -0
  97. data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
  98. data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
  99. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
  100. data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
  101. data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
  102. data/lib/seqtrimnext/utils/global_match.rb +65 -0
  103. data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
  104. data/lib/seqtrimnext/utils/json_utils.rb +50 -0
  105. data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
  106. data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
  107. data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
  108. data/lib/seqtrimnext/utils/string_utils.rb +56 -0
  109. data/lib/seqtrimnext.rb +37 -0
  110. data/script/console +10 -0
  111. data/script/destroy +14 -0
  112. data/script/generate +14 -0
  113. data/test/test_helper.rb +3 -0
  114. data/test/test_seqtrimnext.rb +11 -0
  115. metadata +318 -0
data/History.txt ADDED
@@ -0,0 +1,3 @@
1
+ === 2.0.29 2011-06-06
2
+
3
+ * Initial public release on rubygems
data/Manifest.txt ADDED
@@ -0,0 +1,114 @@
1
+ bin/create_graphs.rb
2
+ bin/extract_seqs.rb
3
+ bin/extract_seqs_from_fasta.rb
4
+ bin/extract_seqs_from_fastq.rb
5
+ bin/fasta2fastq.rb
6
+ bin/fastq2fasta.rb
7
+ bin/gen_qual.rb
8
+ bin/get_seq.rb
9
+ bin/group_by_range.rb
10
+ bin/join_ilumina_paired.rb
11
+ bin/parse_amplicons.rb
12
+ bin/parse_json_results.rb
13
+ bin/parse_params.rb
14
+ bin/resume_clusters.rb
15
+ bin/resume_rejected.sh
16
+ bin/reverse_paired.rb
17
+ bin/seqtrimnext
18
+ bin/split_fastq.rb
19
+ bin/split_ilumina_paired.rb
20
+ bin/split_paired.rb
21
+ lib/seqtrimnext/actions/action_ab_adapter.rb
22
+ lib/seqtrimnext/actions/action_ab_far_adapter.rb
23
+ lib/seqtrimnext/actions/action_ab_left_adapter.rb
24
+ lib/seqtrimnext/actions/action_empty_insert.rb
25
+ lib/seqtrimnext/actions/action_ignore_repeated.rb
26
+ lib/seqtrimnext/actions/action_indetermination.rb
27
+ lib/seqtrimnext/actions/action_induced_low_complexity.rb
28
+ lib/seqtrimnext/actions/action_insert.rb
29
+ lib/seqtrimnext/actions/action_is_contaminated.rb
30
+ lib/seqtrimnext/actions/action_key.rb
31
+ lib/seqtrimnext/actions/action_left_adapter.rb
32
+ lib/seqtrimnext/actions/action_left_primer.rb
33
+ lib/seqtrimnext/actions/action_linker.rb
34
+ lib/seqtrimnext/actions/action_low_complexity.rb
35
+ lib/seqtrimnext/actions/action_low_high_size.rb
36
+ lib/seqtrimnext/actions/action_low_quality.rb
37
+ lib/seqtrimnext/actions/action_mid.rb
38
+ lib/seqtrimnext/actions/action_multiple_linker.rb
39
+ lib/seqtrimnext/actions/action_paired_reads.rb
40
+ lib/seqtrimnext/actions/action_poly_a.rb
41
+ lib/seqtrimnext/actions/action_poly_t.rb
42
+ lib/seqtrimnext/actions/action_rem_adit_artifacts.rb
43
+ lib/seqtrimnext/actions/action_right_adapter.rb
44
+ lib/seqtrimnext/actions/action_right_primer.rb
45
+ lib/seqtrimnext/actions/action_short_insert.rb
46
+ lib/seqtrimnext/actions/action_unexpected_poly_t.rb
47
+ lib/seqtrimnext/actions/action_unexpected_vector.rb
48
+ lib/seqtrimnext/actions/action_vectors.rb
49
+ lib/seqtrimnext/actions/seqtrim_action.rb
50
+ lib/seqtrimnext/classes/action_manager.rb
51
+ lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb
52
+ lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb
53
+ lib/seqtrimnext/classes/extract_stats.rb
54
+ lib/seqtrimnext/classes/gnu_plot_graph.rb
55
+ lib/seqtrimnext/classes/graph_stats.rb
56
+ lib/seqtrimnext/classes/install_database.rb
57
+ lib/seqtrimnext/classes/install_requirements.rb
58
+ lib/seqtrimnext/classes/list_db.rb
59
+ lib/seqtrimnext/classes/make_blast_db.rb
60
+ lib/seqtrimnext/classes/one_blast.rb
61
+ lib/seqtrimnext/classes/params.rb
62
+ lib/seqtrimnext/classes/piro.rb
63
+ lib/seqtrimnext/classes/plugin_manager.rb
64
+ lib/seqtrimnext/classes/scan_for_restr_site.rb
65
+ lib/seqtrimnext/classes/scbi_stats.rb
66
+ lib/seqtrimnext/classes/seqtrim.rb
67
+ lib/seqtrimnext/classes/sequence.rb
68
+ lib/seqtrimnext/classes/sequence_group.rb
69
+ lib/seqtrimnext/classes/sequence_with_action.rb
70
+ lib/seqtrimnext/plugins/plugin.rb
71
+ lib/seqtrimnext/plugins/plugin_ab_adapters.rb
72
+ lib/seqtrimnext/plugins/plugin_adapters.rb
73
+ lib/seqtrimnext/plugins/plugin_amplicons.rb
74
+ lib/seqtrimnext/plugins/plugin_contaminants.rb
75
+ lib/seqtrimnext/plugins/plugin_extract_inserts.rb
76
+ lib/seqtrimnext/plugins/plugin_find_poly_at.rb
77
+ lib/seqtrimnext/plugins/plugin_ignore_repeated.rb
78
+ lib/seqtrimnext/plugins/plugin_indeterminations.rb
79
+ lib/seqtrimnext/plugins/plugin_key.rb
80
+ lib/seqtrimnext/plugins/plugin_linker.rb
81
+ lib/seqtrimnext/plugins/plugin_low_complexity.rb
82
+ lib/seqtrimnext/plugins/plugin_low_high_size.rb
83
+ lib/seqtrimnext/plugins/plugin_low_quality.rb
84
+ lib/seqtrimnext/plugins/plugin_mids.rb
85
+ lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb
86
+ lib/seqtrimnext/plugins/plugin_short_insert.rb
87
+ lib/seqtrimnext/plugins/plugin_vectors.rb
88
+ lib/seqtrimnext/templates/amplicons.txt
89
+ lib/seqtrimnext/templates/genomics_454.txt
90
+ lib/seqtrimnext/templates/genomics_454_with_paired.txt
91
+ lib/seqtrimnext/templates/low_quality.txt
92
+ lib/seqtrimnext/templates/low_quality_and_low_complexity.txt
93
+ lib/seqtrimnext/templates/transcriptomics_454.txt
94
+ lib/seqtrimnext/templates/transcriptomics_plants.txt
95
+ lib/seqtrimnext/utils/extract_samples.rb
96
+ lib/seqtrimnext/utils/fasta2xml.rb
97
+ lib/seqtrimnext/utils/global_match.rb
98
+ lib/seqtrimnext/utils/hash_stats.rb
99
+ lib/seqtrimnext/utils/json_utils.rb
100
+ lib/seqtrimnext/utils/load_fasta_names_in_hash.rb
101
+ lib/seqtrimnext/utils/load_qual_in_hash.rb
102
+ lib/seqtrimnext/utils/recover_mid.rb
103
+ lib/seqtrimnext/utils/string_utils.rb
104
+ lib/seqtrimnext.rb
105
+ History.txt
106
+ Manifest.txt
107
+ PostInstall.txt
108
+ Rakefile
109
+ README.rdoc
110
+ script/console
111
+ script/destroy
112
+ script/generate
113
+ test/test_helper.rb
114
+ test/test_seqtrimnext.rb
data/PostInstall.txt ADDED
@@ -0,0 +1,7 @@
1
+ For more information on seqtrimnext, see http://www.scbi.uma.es
2
+
3
+ Remember to install the following external prerequisites if you don't have them already installed.
4
+
5
+ If you need to install core databases, issue the following command:
6
+
7
+ seqtrimnext -i core
data/README.rdoc ADDED
@@ -0,0 +1,159 @@
1
+ = seqtrimnext
2
+
3
+ * http://www.scbi.uma.es/downloads
4
+
5
+ == DESCRIPTION:
6
+
7
+ SeqtrimNEXT is a customizable and distributed pre-processing software for NGS (Next Generation Sequencing) biological data. It makes use of scbi_mapreduce gem to be able to run in parallel and distributed environments. It is specially suited for Roche 454 (normal and paired-end) & Ilumina datasets, although it could be easyly adapted to any other situation.
8
+
9
+ == FEATURES:
10
+
11
+ * SeqtrimNEXT is very flexible since it's architecture is based on plugins.
12
+ * You can add new plugins if needed.
13
+ * SeqtrimNEXT uses scbi_mapreduce and thus is able to exploit all the benefits of a cluster environment. It also works in multi-core machines big shared-memory servers.
14
+
15
+ == Default templates for genomics & transcriptomics are provided
16
+
17
+ <b>genomics_454.txt</b>:: cleans genomics data from Roche 454 sequencer.
18
+ <b>genomics_454_with_paired.txt</b>:: cleans genomic data from a paired-end experiment sequenced with a Roche 454 sequencer.
19
+ <b>low_quality.txt</b>:: trims low quality.
20
+ <b>low_quality_and_low_complexity.txt</b>:: trims low quality and low complexity.
21
+ <b>transcriptomics_454.txt</b>:: cleans transcriptomics data from a Roche 454 sequencer.
22
+ <b>transcriptomics_plants.txt</b>:: cleans transcriptomics data from a Roche 454 sequencer with extra databases for plants.
23
+ <b>amplicons.txt</b>:: filters amplicons.
24
+
25
+ == You can define your own templates using a combination of available plugins:
26
+
27
+ <b>PluginKey</b>:: to remove sequencing keys from 454 input sequences.
28
+ <b>PluginMids</b>:: to remove MIDS (barcodes) from 454 sequences.
29
+ <b>PluginLinker</b>:: splits sequences into two inserts when a valid linker is found (paired-end experiments only)
30
+ <b>PluginAbAdapters</b>:: removes AB adapters from sequences using a predefined DB or one provided by the user.
31
+ <b>PluginFindPolyAt</b>:: removes polyA and polyT from sequences.
32
+ <b>PluginLowComplexity</b>:: filters sequences with low complexity regions
33
+ <b>PluginAdapters</b>:: removes Adapters from sequences using a predefined DB or one provided by the user.
34
+ <b>PluginLowHighSize</b>:: removes sequences too small or too big.
35
+ <b>PluginVectors</b>:: remove vectors from sequences using a predefined database or one provided by the user.
36
+ <b>PluginAmplicons</b>:: filters amplicons using user predefined primers.
37
+ <b>PluginIndeterminations</b>:: removes indeterminations (N) from the sequence.
38
+ <b>PluginLowQuality</b>:: eliminate low quality regions from sequences.
39
+ <b>PluginContaminants</b>:: remove contaminants from sequences or rejects contaminated ones. It uses a core database, but it can be expanded with user provided ones.
40
+
41
+
42
+
43
+ == SYNOPSIS:
44
+
45
+ Once installed, SeqtrimNEXT is very easy to use:
46
+
47
+ To install core databases (it should be done at installation time):
48
+
49
+ $> seqtrimnext -i
50
+
51
+ To perform an analisys using a predefined template with a FASTQ file format using 4 cpus:
52
+
53
+ $> seqtrimnext -t genomics_454.txt -Q input_file_in_FASTQ -w 4
54
+
55
+ To perform an analisys using a predefined template with a FASTQ file format:
56
+
57
+ $> seqtrimnext -t genomics_454.txt -f input_file_in_FASTA -q input_file_in_QUAL
58
+
59
+
60
+ To get additional help and list available templates and databases:
61
+
62
+ $> seqtrimnext -h
63
+
64
+
65
+ == REQUIREMENTS:
66
+
67
+ * Ruby 1.9.2
68
+ * Blast plus 2.24 or greater (prior versions have bugs that produces bad results)
69
+ * [Optional] - GnuPlot version 4.4.2 or greater (prior versions may produce wrong graphs)
70
+ * [Optional] - pdflatex - Optional, to produce a detailed report with results
71
+
72
+ == INSTALL:
73
+
74
+ === Installing Blast
75
+
76
+ *Download the latest version of Blast+ from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/release/LATEST/
77
+ *You can also use a precompiled version if you like
78
+ *To install from source, decompress the downloaded file, cd to the decompressed folder, and issue the following commands:
79
+
80
+ ./configure
81
+ make
82
+ sudo make install
83
+
84
+
85
+ === Installing Ruby 1.9
86
+
87
+ *You can use RVM to install ruby:
88
+
89
+ Install RVM:
90
+
91
+ $ bash < <(curl -s https://rvm.beginrescueend.com/install/rvm)
92
+
93
+ Setup environment:
94
+
95
+ $ echo '[[ -s "$HOME/.rvm/scripts/rvm" ]] && . "$HOME/.rvm/scripts/rvm" # Load RVM function' >> ~/.bash_profile
96
+
97
+ Install ruby 1.9.2 (this can take a while):
98
+
99
+ $ rvm install 1.9.2
100
+
101
+ Set it as the default:
102
+
103
+ $ rvm use 1.9.2 --default
104
+
105
+ === Install SeqtrimNEXT
106
+
107
+ SeqtrimNEXT is very easy to install. It is distributed as a ruby gem:
108
+
109
+ gem install seqtrimnext
110
+
111
+ This will install seqtrimnext and all the required gems.
112
+
113
+ === Install and rebuild SeqtrimNext's core databases
114
+
115
+ SeqtrimNEXT needs some core databases to work. To install them:
116
+
117
+ seqtrimnext -i core
118
+
119
+ === Database modifications
120
+
121
+ Included databases will be usefull for a lot of people, but if you prefer, you can modify them, or add more elements to be search against your sequences.
122
+
123
+ You only need to drop new fasta files to each respective directory:
124
+
125
+ DB/vectors to add more vectors
126
+ DB/contaminants to add more contaminants
127
+ etc...
128
+
129
+ Once the databases has been modified, you will need to reformat them by issuing the following command:
130
+
131
+ seqtrimnext -c
132
+
133
+ Modified databases will be rebuilt.
134
+
135
+
136
+ == LICENSE:
137
+
138
+ (The MIT License)
139
+
140
+ Copyright (c) 2011 Almudena Bocinos & Dario Guerrero
141
+
142
+ Permission is hereby granted, free of charge, to any person obtaining
143
+ a copy of this software and associated documentation files (the
144
+ 'Software'), to deal in the Software without restriction, including
145
+ without limitation the rights to use, copy, modify, merge, publish,
146
+ distribute, sublicense, and/or sell copies of the Software, and to
147
+ permit persons to whom the Software is furnished to do so, subject to
148
+ the following conditions:
149
+
150
+ The above copyright notice and this permission notice shall be
151
+ included in all copies or substantial portions of the Software.
152
+
153
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
154
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
155
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
156
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
157
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
158
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
159
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,38 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/seqtrimnext'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'seqtrimnext' do
14
+ self.developer 'Dario Guerrero & Almudena Bocinos', 'dariogf@gmail.com & alkoke@gmail.com'
15
+ self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
+ self.rubyforge_name = self.name # TODO this is default value
17
+ # self.extra_deps = ['narray','gnuplot','term-ansicolor','xml-simple','scbi_blast','scbi_drb','scbi_fasta','scbi_fastq','scbi_plot','scbi_math']
18
+
19
+ self.extra_deps = []
20
+ self.extra_deps << ['narray','>=0']
21
+ self.extra_deps << ['gnuplot','>=0']
22
+ self.extra_deps << ['term-ansicolor','>=0']
23
+ self.extra_deps << ['xml-simple','>=0']
24
+ self.extra_deps << ['scbi_blast','>=0']
25
+ self.extra_deps << ['scbi_mapreduce','>=0']
26
+ self.extra_deps << ['scbi_fasta','>=0']
27
+ self.extra_deps << ['scbi_fastq','>=0']
28
+ self.extra_deps << ['scbi_plot','>=0']
29
+ self.extra_deps << ['scbi_math','>=0']
30
+
31
+ end
32
+
33
+ require 'newgem/tasks'
34
+ Dir['tasks/**/*.rake'].each { |t| load t }
35
+
36
+ # TODO - want other tests/tasks run by default? Add them to the list
37
+ # remove_task :default
38
+ # task :default => [:spec, :features, :redocs]
@@ -0,0 +1,46 @@
1
+ require 'stringio'
2
+ # require 'test/unit'
3
+ require 'json'
4
+ require 'gnuplot'
5
+
6
+ ROOT_PATH=File.dirname(File.dirname(__FILE__))
7
+
8
+ # $: << File.expand_path(File.join(ROOT_PATH,'test'))
9
+ $: << File.expand_path(File.join(ROOT_PATH,'classes'))
10
+ $: << File.expand_path(File.join(ROOT_PATH,'plugins'))
11
+ $: << File.expand_path(File.join(ROOT_PATH,'utils'))
12
+
13
+ if ARGV.empty?
14
+ puts "Usage: #{$0} stats.json initial_stats.json"
15
+ exit
16
+ end
17
+
18
+ d=Dir.glob(File.expand_path(File.join(ROOT_PATH,'plugins','*.rb')))
19
+
20
+ # puts d.entries
21
+ # puts "="*20
22
+
23
+ require 'plugin'
24
+
25
+ # require 'params'
26
+
27
+ d.entries.each do |plugin|
28
+ require plugin
29
+ # puts "Requiring #{plugin}"
30
+ end
31
+
32
+ require 'graph_stats'
33
+
34
+ #load stats
35
+
36
+ r=File.read(ARGV[0])
37
+ stats=JSON::parse(r)
38
+
39
+
40
+ r2=File.read(ARGV[1])
41
+ init_stats=JSON::parse(r2)
42
+
43
+ gs=GraphStats.new(stats,init_stats)
44
+
45
+ puts "Graphs generated"
46
+
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fastq'
4
+
5
+ class Array
6
+ def count
7
+ self.length
8
+ end
9
+
10
+ end
11
+
12
+ if ARGV.count < 3
13
+ puts "#{$0} FASTA OUTPUT_NAME SEQ_NAME_FILE [MORE_SEQ_NAMES]"
14
+ exit
15
+ else
16
+
17
+ fasta = ARGV.shift
18
+ qual = ARGV.shift
19
+ output_name = ARGV.shift
20
+ seqs=ARGV
21
+ puts seqs.join(';')
22
+
23
+ fqr=FastaQualFile.new(fasta,qual)
24
+
25
+ output_fasta=File.new(output_name+'.fasta','a')
26
+ output_qual=File.new(output_name+'.fasta.qual','a')
27
+
28
+ fqr.each do |seq_name,seq_fasta,seq_qual|
29
+ if seqs.index(seq_name)
30
+ output_fasta.puts ">#{seq_name}"
31
+ output_fasta.puts seq_fasta
32
+ output_qual.puts ">#{seq_name}"
33
+ output_qual.puts seq_qual
34
+ seqs.delete(seq_name)
35
+ if seqs.empty?
36
+ break
37
+ end
38
+ end
39
+ end
40
+
41
+ output_qual.close
42
+ output_fasta.close
43
+ fqr.close
44
+
45
+ end
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fasta'
4
+
5
+ # GOOD_QUAL=50
6
+ # BAD_QUAL=10
7
+ # DOWN_CASE=('a'..'z')
8
+
9
+
10
+ class Array
11
+ def count
12
+ self.length
13
+ end
14
+
15
+ end
16
+
17
+ if ARGV.count < 4
18
+ puts "#{$0} FASTA QUAL OUTPUT_NAME SEQ_NAMES_FILE"
19
+ exit
20
+ else
21
+
22
+ fasta = ARGV.shift
23
+ qual = ARGV.shift
24
+ output_name = ARGV.shift
25
+ seqs_file=ARGV.shift
26
+
27
+ seqs=[]
28
+
29
+ f=File.open(seqs_file).each_line do |line|
30
+ seqs.push line.strip.chomp
31
+ end
32
+ # puts seqs.join(';')
33
+
34
+ fqr=FastaQualFile.new(fasta,qual)
35
+
36
+ output_fasta=File.new(output_name+'.fasta','a')
37
+ output_qual=File.new(output_name+'.fasta.qual','a')
38
+
39
+ fqr.each do |seq_name,seq_fasta,seq_qual|
40
+ if seqs.index(seq_name)
41
+ output_fasta.puts ">#{seq_name}"
42
+ output_fasta.puts seq_fasta
43
+ output_qual.puts ">#{seq_name}"
44
+ output_qual.puts seq_qual
45
+ seqs.delete(seq_name)
46
+ if seqs.empty?
47
+ break
48
+ end
49
+ end
50
+ end
51
+
52
+ output_qual.close
53
+ output_fasta.close
54
+ fqr.close
55
+
56
+ end
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fastq'
4
+
5
+ class Array
6
+ def count
7
+ self.length
8
+ end
9
+
10
+ end
11
+
12
+ if ARGV.count != 3
13
+ puts "#{$0} FASTQ OUTPUT_NAME SEQ_NAMES_FILE"
14
+ exit
15
+ else
16
+
17
+ fasta = ARGV.shift
18
+ output_name = ARGV.shift
19
+ seqs_file=ARGV.shift
20
+
21
+ seqs=[]
22
+
23
+ f=File.open(seqs_file).each_line do |line|
24
+ seqs.push line.strip.chomp
25
+ end
26
+ puts seqs.join(';')
27
+
28
+ fqr=FastqFile.new(fasta)
29
+
30
+ output_fastq=FastqFile.new(output_name+'.fastq','w')
31
+
32
+ fqr.each do |seq_name,seq_fasta,seq_qual|
33
+ if seqs.index(seq_name)
34
+ output_fastq.write_seq(seq_name,seq_fasta,seq_qual)
35
+ seqs.delete(seq_name)
36
+ if seqs.empty?
37
+ break
38
+ end
39
+ end
40
+ end
41
+
42
+ output_fastq.close
43
+ fqr.close
44
+
45
+ end
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fasta'
4
+ require 'scbi_fastq'
5
+
6
+
7
+ if ARGV.count < 3
8
+ puts "#{$0} FASTA QUAL OUTPUT_NAME"
9
+ exit
10
+ end
11
+
12
+
13
+
14
+ fasta = ARGV.shift
15
+ qual = ARGV.shift
16
+ output_name = ARGV.shift
17
+ default_qual = nil
18
+
19
+ if !File.exists?(qual)
20
+ fqr=FastaFile.new(fasta)
21
+ puts "Quality file doesn't exists. Using default qual value = 40"
22
+ default_qual = [40]
23
+ else
24
+ fqr=FastaQualFile.new(fasta,qual)
25
+ end
26
+
27
+ output=FastqFile.new(output_name+'.fastq','w')
28
+
29
+ fqr.each do |seq_name,seq_fasta,seq_qual|
30
+ if default_qual
31
+ seq_qual = default_qual * seq_fasta.length
32
+ end
33
+ output.write_seq(seq_name,seq_fasta,seq_qual)
34
+ end
35
+
36
+ output.close
37
+ fqr.close
38
+
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fastq'
4
+
5
+
6
+ if ARGV.count < 2
7
+ puts "#{$0} FASTQ OUTPUT_NAME"
8
+ exit
9
+ end
10
+
11
+
12
+
13
+ fastq = ARGV.shift
14
+ output_name = ARGV.shift
15
+
16
+
17
+ fasta = File.open(output_name+'.fasta','w')
18
+ qual = File.open(output_name+'.fasta.qual','w')
19
+
20
+ fqr=FastqFile.new(fastq)
21
+
22
+ fqr.each do |seq_name,seq_fasta,seq_qual,comments|
23
+
24
+ fasta.puts ">#{seq_name} #{comments}"
25
+ fasta.puts seq_fasta
26
+
27
+ qual.puts ">#{seq_name} #{comments}"
28
+ qual.puts seq_qual.join(' ')
29
+
30
+ end
31
+
32
+ fasta.close
33
+ qual.close
34
+ fqr.close
35
+
data/bin/gen_qual.rb ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fasta'
4
+
5
+ GOOD_QUAL=50
6
+ BAD_QUAL=10
7
+ DOWN_CASE=('a'..'z')
8
+
9
+
10
+ class Array
11
+ def count
12
+ self.length
13
+ end
14
+
15
+ end
16
+
17
+ if ARGV.count != 2
18
+ puts "Programa ENTRADA SALIDA"
19
+ exit
20
+ else
21
+ puts ARGV[0]
22
+ puts ARGV[1]
23
+
24
+ fqr=FastaQualFile.new(ARGV[0])
25
+
26
+ f = File.new(ARGV[1],'w+')
27
+
28
+ fqr.each do |seq_name,seq_fasta,seq_qual|
29
+ f.puts ">#{seq_name}"
30
+ res =[]
31
+ seq_fasta.each_char do |c|
32
+ if DOWN_CASE.include?(c)
33
+ res << BAD_QUAL
34
+ else
35
+ res << GOOD_QUAL
36
+ end
37
+ end
38
+
39
+ f.puts res.join(' ')
40
+ #f.puts "50 "*seq_fasta.length
41
+ end
42
+
43
+ f.close
44
+ fqr.close
45
+
46
+ end
data/bin/get_seq.rb ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scbi_fasta'
4
+
5
+ GOOD_QUAL=50
6
+ BAD_QUAL=10
7
+ DOWN_CASE=('a'..'z')
8
+
9
+
10
+ class Array
11
+ def count
12
+ self.length
13
+ end
14
+
15
+ end
16
+
17
+ if ARGV.count < 3
18
+ puts "#{$0} FASTA QUAL SEQ_NAME [f|q|fq]"
19
+ exit
20
+ else
21
+
22
+ fqr=FastaQualFile.new(ARGV[0],ARGV[1])
23
+ get_type = 'fq'
24
+ if ARGV.count == 4
25
+ get_type=ARGV[3]
26
+ end
27
+
28
+ fqr.each do |seq_name,seq_fasta,seq_qual|
29
+ if seq_name == ARGV[2]
30
+ if get_type.index('f')
31
+ puts ">#{seq_name}"
32
+ puts seq_fasta
33
+ end
34
+
35
+ if get_type.index('q')
36
+ puts ">#{seq_name}"
37
+ puts seq_qual
38
+ end
39
+ break
40
+ end
41
+
42
+ end
43
+
44
+ fqr.close
45
+
46
+ end