BioDSL 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 62ee1a33fd4240d69a9947883c1a2616c080c55b
4
+ data.tar.gz: bfceb11bc1375355e5b61eaf3ae31dd3459f6572
5
+ SHA512:
6
+ metadata.gz: f9c8bd7cae663da3110438b209b55ac92301c0de4fe5800dbf2c0d322c6089a17ebe4018a27a2ffaa00f615cb9ac6b89e15c4eaeba73bffd659c423b28ba5813
7
+ data.tar.gz: 5d1ffa6e7e16bc836774a42e79ebd0fcf9091264349a781106501178d1572c9d0176a892f8c07e27d1885d69a12831152f307147b9b098e45ed80179e0316588
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ *.swp
2
+ *.gem
3
+ mothur.*.logfile
4
+ coverage/
5
+ doc/
6
+ pkg/
7
+ .yardoc/
8
+ .tags*
9
+ tags
10
+ 8mer
data/BioDSL.gemspec ADDED
@@ -0,0 +1,64 @@
1
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
3
+ # #
4
+ # This program is free software; you can redistribute it and/or #
5
+ # modify it under the terms of the GNU General Public License #
6
+ # as published by the Free Software Foundation; either version 2 #
7
+ # of the License, or (at your option) any later version. #
8
+ # #
9
+ # This program is distributed in the hope that it will be useful, #
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
12
+ # GNU General Public License for more details. #
13
+ # #
14
+ # You should have received a copy of the GNU General Public License #
15
+ # along with this program; if not, write to the Free Software #
16
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
17
+ # USA. #
18
+ # #
19
+ # http://www.gnu.org/copyleft/gpl.html #
20
+ # #
21
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
22
+ # #
23
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+
27
+ $LOAD_PATH.push File.expand_path('../lib', __FILE__)
28
+
29
+ require 'BioDSL/version'
30
+
31
+ Gem::Specification.new do |s|
32
+ s.name = 'BioDSL'
33
+ s.version = BioDSL::VERSION
34
+ s.platform = Gem::Platform::RUBY
35
+ s.date = Time.now.strftime('%F')
36
+ s.summary = 'BioDSL'
37
+ s.description = 'BioDSL is a Bioinformatics Domain Specific Language.'
38
+ s.authors = ['Martin A. Hansen']
39
+ s.email = 'mail@maasha.dk'
40
+ s.rubyforge_project = 'BioDSL'
41
+ s.homepage = 'http://www.github.com/maasha/BioDSL'
42
+ s.license = 'GPL2'
43
+ s.rubygems_version = '2.0.0'
44
+ s.files = `git ls-files`.split("\n")
45
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
46
+ s.executables = `git ls-files -- bin/*`.split("\n").
47
+ map { |f| File.basename(f) }
48
+ s.extra_rdoc_files = Dir['wiki/*.rdoc']
49
+ s.require_paths = ['lib']
50
+
51
+ s.add_dependency('haml', '>= 4.0.5')
52
+ s.add_dependency('RubyInline', '>= 3.12.2')
53
+ s.add_dependency('narray', '>= 0.6.0')
54
+ s.add_dependency('mail', '>= 2.5.4')
55
+ s.add_dependency('msgpack', '>= 0.5.8')
56
+ s.add_dependency('gnuplotter', '>= 1.0.2')
57
+ s.add_dependency('parallel', '>= 1.0.0')
58
+ s.add_dependency('pqueue', '>= 2.0.2')
59
+ s.add_dependency('terminal-table', '>= 1.4.5')
60
+ s.add_dependency('tilt', '>= 2.0.1')
61
+ s.add_development_dependency('bundler', '>= 1.7.4')
62
+ s.add_development_dependency('simplecov', '>= 0.9.2')
63
+ s.add_development_dependency('mocha', '>= 1.0.0')
64
+ end
data/LICENSE ADDED
@@ -0,0 +1,339 @@
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 2, June 1991
3
+
4
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
5
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6
+ Everyone is permitted to copy and distribute verbatim copies
7
+ of this license document, but changing it is not allowed.
8
+
9
+ Preamble
10
+
11
+ The licenses for most software are designed to take away your
12
+ freedom to share and change it. By contrast, the GNU General Public
13
+ License is intended to guarantee your freedom to share and change free
14
+ software--to make sure the software is free for all its users. This
15
+ General Public License applies to most of the Free Software
16
+ Foundation's software and to any other program whose authors commit to
17
+ using it. (Some other Free Software Foundation software is covered by
18
+ the GNU Lesser General Public License instead.) You can apply it to
19
+ your programs, too.
20
+
21
+ When we speak of free software, we are referring to freedom, not
22
+ price. Our General Public Licenses are designed to make sure that you
23
+ have the freedom to distribute copies of free software (and charge for
24
+ this service if you wish), that you receive source code or can get it
25
+ if you want it, that you can change the software or use pieces of it
26
+ in new free programs; and that you know you can do these things.
27
+
28
+ To protect your rights, we need to make restrictions that forbid
29
+ anyone to deny you these rights or to ask you to surrender the rights.
30
+ These restrictions translate to certain responsibilities for you if you
31
+ distribute copies of the software, or if you modify it.
32
+
33
+ For example, if you distribute copies of such a program, whether
34
+ gratis or for a fee, you must give the recipients all the rights that
35
+ you have. You must make sure that they, too, receive or can get the
36
+ source code. And you must show them these terms so they know their
37
+ rights.
38
+
39
+ We protect your rights with two steps: (1) copyright the software, and
40
+ (2) offer you this license which gives you legal permission to copy,
41
+ distribute and/or modify the software.
42
+
43
+ Also, for each author's protection and ours, we want to make certain
44
+ that everyone understands that there is no warranty for this free
45
+ software. If the software is modified by someone else and passed on, we
46
+ want its recipients to know that what they have is not the original, so
47
+ that any problems introduced by others will not reflect on the original
48
+ authors' reputations.
49
+
50
+ Finally, any free program is threatened constantly by software
51
+ patents. We wish to avoid the danger that redistributors of a free
52
+ program will individually obtain patent licenses, in effect making the
53
+ program proprietary. To prevent this, we have made it clear that any
54
+ patent must be licensed for everyone's free use or not licensed at all.
55
+
56
+ The precise terms and conditions for copying, distribution and
57
+ modification follow.
58
+
59
+ GNU GENERAL PUBLIC LICENSE
60
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61
+
62
+ 0. This License applies to any program or other work which contains
63
+ a notice placed by the copyright holder saying it may be distributed
64
+ under the terms of this General Public License. The "Program", below,
65
+ refers to any such program or work, and a "work based on the Program"
66
+ means either the Program or any derivative work under copyright law:
67
+ that is to say, a work containing the Program or a portion of it,
68
+ either verbatim or with modifications and/or translated into another
69
+ language. (Hereinafter, translation is included without limitation in
70
+ the term "modification".) Each licensee is addressed as "you".
71
+
72
+ Activities other than copying, distribution and modification are not
73
+ covered by this License; they are outside its scope. The act of
74
+ running the Program is not restricted, and the output from the Program
75
+ is covered only if its contents constitute a work based on the
76
+ Program (independent of having been made by running the Program).
77
+ Whether that is true depends on what the Program does.
78
+
79
+ 1. You may copy and distribute verbatim copies of the Program's
80
+ source code as you receive it, in any medium, provided that you
81
+ conspicuously and appropriately publish on each copy an appropriate
82
+ copyright notice and disclaimer of warranty; keep intact all the
83
+ notices that refer to this License and to the absence of any warranty;
84
+ and give any other recipients of the Program a copy of this License
85
+ along with the Program.
86
+
87
+ You may charge a fee for the physical act of transferring a copy, and
88
+ you may at your option offer warranty protection in exchange for a fee.
89
+
90
+ 2. You may modify your copy or copies of the Program or any portion
91
+ of it, thus forming a work based on the Program, and copy and
92
+ distribute such modifications or work under the terms of Section 1
93
+ above, provided that you also meet all of these conditions:
94
+
95
+ a) You must cause the modified files to carry prominent notices
96
+ stating that you changed the files and the date of any change.
97
+
98
+ b) You must cause any work that you distribute or publish, that in
99
+ whole or in part contains or is derived from the Program or any
100
+ part thereof, to be licensed as a whole at no charge to all third
101
+ parties under the terms of this License.
102
+
103
+ c) If the modified program normally reads commands interactively
104
+ when run, you must cause it, when started running for such
105
+ interactive use in the most ordinary way, to print or display an
106
+ announcement including an appropriate copyright notice and a
107
+ notice that there is no warranty (or else, saying that you provide
108
+ a warranty) and that users may redistribute the program under
109
+ these conditions, and telling the user how to view a copy of this
110
+ License. (Exception: if the Program itself is interactive but
111
+ does not normally print such an announcement, your work based on
112
+ the Program is not required to print an announcement.)
113
+
114
+ These requirements apply to the modified work as a whole. If
115
+ identifiable sections of that work are not derived from the Program,
116
+ and can be reasonably considered independent and separate works in
117
+ themselves, then this License, and its terms, do not apply to those
118
+ sections when you distribute them as separate works. But when you
119
+ distribute the same sections as part of a whole which is a work based
120
+ on the Program, the distribution of the whole must be on the terms of
121
+ this License, whose permissions for other licensees extend to the
122
+ entire whole, and thus to each and every part regardless of who wrote it.
123
+
124
+ Thus, it is not the intent of this section to claim rights or contest
125
+ your rights to work written entirely by you; rather, the intent is to
126
+ exercise the right to control the distribution of derivative or
127
+ collective works based on the Program.
128
+
129
+ In addition, mere aggregation of another work not based on the Program
130
+ with the Program (or with a work based on the Program) on a volume of
131
+ a storage or distribution medium does not bring the other work under
132
+ the scope of this License.
133
+
134
+ 3. You may copy and distribute the Program (or a work based on it,
135
+ under Section 2) in object code or executable form under the terms of
136
+ Sections 1 and 2 above provided that you also do one of the following:
137
+
138
+ a) Accompany it with the complete corresponding machine-readable
139
+ source code, which must be distributed under the terms of Sections
140
+ 1 and 2 above on a medium customarily used for software interchange; or,
141
+
142
+ b) Accompany it with a written offer, valid for at least three
143
+ years, to give any third party, for a charge no more than your
144
+ cost of physically performing source distribution, a complete
145
+ machine-readable copy of the corresponding source code, to be
146
+ distributed under the terms of Sections 1 and 2 above on a medium
147
+ customarily used for software interchange; or,
148
+
149
+ c) Accompany it with the information you received as to the offer
150
+ to distribute corresponding source code. (This alternative is
151
+ allowed only for noncommercial distribution and only if you
152
+ received the program in object code or executable form with such
153
+ an offer, in accord with Subsection b above.)
154
+
155
+ The source code for a work means the preferred form of the work for
156
+ making modifications to it. For an executable work, complete source
157
+ code means all the source code for all modules it contains, plus any
158
+ associated interface definition files, plus the scripts used to
159
+ control compilation and installation of the executable. However, as a
160
+ special exception, the source code distributed need not include
161
+ anything that is normally distributed (in either source or binary
162
+ form) with the major components (compiler, kernel, and so on) of the
163
+ operating system on which the executable runs, unless that component
164
+ itself accompanies the executable.
165
+
166
+ If distribution of executable or object code is made by offering
167
+ access to copy from a designated place, then offering equivalent
168
+ access to copy the source code from the same place counts as
169
+ distribution of the source code, even though third parties are not
170
+ compelled to copy the source along with the object code.
171
+
172
+ 4. You may not copy, modify, sublicense, or distribute the Program
173
+ except as expressly provided under this License. Any attempt
174
+ otherwise to copy, modify, sublicense or distribute the Program is
175
+ void, and will automatically terminate your rights under this License.
176
+ However, parties who have received copies, or rights, from you under
177
+ this License will not have their licenses terminated so long as such
178
+ parties remain in full compliance.
179
+
180
+ 5. You are not required to accept this License, since you have not
181
+ signed it. However, nothing else grants you permission to modify or
182
+ distribute the Program or its derivative works. These actions are
183
+ prohibited by law if you do not accept this License. Therefore, by
184
+ modifying or distributing the Program (or any work based on the
185
+ Program), you indicate your acceptance of this License to do so, and
186
+ all its terms and conditions for copying, distributing or modifying
187
+ the Program or works based on it.
188
+
189
+ 6. Each time you redistribute the Program (or any work based on the
190
+ Program), the recipient automatically receives a license from the
191
+ original licensor to copy, distribute or modify the Program subject to
192
+ these terms and conditions. You may not impose any further
193
+ restrictions on the recipients' exercise of the rights granted herein.
194
+ You are not responsible for enforcing compliance by third parties to
195
+ this License.
196
+
197
+ 7. If, as a consequence of a court judgment or allegation of patent
198
+ infringement or for any other reason (not limited to patent issues),
199
+ conditions are imposed on you (whether by court order, agreement or
200
+ otherwise) that contradict the conditions of this License, they do not
201
+ excuse you from the conditions of this License. If you cannot
202
+ distribute so as to satisfy simultaneously your obligations under this
203
+ License and any other pertinent obligations, then as a consequence you
204
+ may not distribute the Program at all. For example, if a patent
205
+ license would not permit royalty-free redistribution of the Program by
206
+ all those who receive copies directly or indirectly through you, then
207
+ the only way you could satisfy both it and this License would be to
208
+ refrain entirely from distribution of the Program.
209
+
210
+ If any portion of this section is held invalid or unenforceable under
211
+ any particular circumstance, the balance of the section is intended to
212
+ apply and the section as a whole is intended to apply in other
213
+ circumstances.
214
+
215
+ It is not the purpose of this section to induce you to infringe any
216
+ patents or other property right claims or to contest validity of any
217
+ such claims; this section has the sole purpose of protecting the
218
+ integrity of the free software distribution system, which is
219
+ implemented by public license practices. Many people have made
220
+ generous contributions to the wide range of software distributed
221
+ through that system in reliance on consistent application of that
222
+ system; it is up to the author/donor to decide if he or she is willing
223
+ to distribute software through any other system and a licensee cannot
224
+ impose that choice.
225
+
226
+ This section is intended to make thoroughly clear what is believed to
227
+ be a consequence of the rest of this License.
228
+
229
+ 8. If the distribution and/or use of the Program is restricted in
230
+ certain countries either by patents or by copyrighted interfaces, the
231
+ original copyright holder who places the Program under this License
232
+ may add an explicit geographical distribution limitation excluding
233
+ those countries, so that distribution is permitted only in or among
234
+ countries not thus excluded. In such case, this License incorporates
235
+ the limitation as if written in the body of this License.
236
+
237
+ 9. The Free Software Foundation may publish revised and/or new versions
238
+ of the General Public License from time to time. Such new versions will
239
+ be similar in spirit to the present version, but may differ in detail to
240
+ address new problems or concerns.
241
+
242
+ Each version is given a distinguishing version number. If the Program
243
+ specifies a version number of this License which applies to it and "any
244
+ later version", you have the option of following the terms and conditions
245
+ either of that version or of any later version published by the Free
246
+ Software Foundation. If the Program does not specify a version number of
247
+ this License, you may choose any version ever published by the Free Software
248
+ Foundation.
249
+
250
+ 10. If you wish to incorporate parts of the Program into other free
251
+ programs whose distribution conditions are different, write to the author
252
+ to ask for permission. For software which is copyrighted by the Free
253
+ Software Foundation, write to the Free Software Foundation; we sometimes
254
+ make exceptions for this. Our decision will be guided by the two goals
255
+ of preserving the free status of all derivatives of our free software and
256
+ of promoting the sharing and reuse of software generally.
257
+
258
+ NO WARRANTY
259
+
260
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261
+ FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263
+ PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264
+ OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266
+ TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267
+ PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268
+ REPAIR OR CORRECTION.
269
+
270
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272
+ REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273
+ INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274
+ OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275
+ TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276
+ YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277
+ PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278
+ POSSIBILITY OF SUCH DAMAGES.
279
+
280
+ END OF TERMS AND CONDITIONS
281
+
282
+ How to Apply These Terms to Your New Programs
283
+
284
+ If you develop a new program, and you want it to be of the greatest
285
+ possible use to the public, the best way to achieve this is to make it
286
+ free software which everyone can redistribute and change under these terms.
287
+
288
+ To do so, attach the following notices to the program. It is safest
289
+ to attach them to the start of each source file to most effectively
290
+ convey the exclusion of warranty; and each file should have at least
291
+ the "copyright" line and a pointer to where the full notice is found.
292
+
293
+ {description}
294
+ Copyright (C) {year} {fullname}
295
+
296
+ This program is free software; you can redistribute it and/or modify
297
+ it under the terms of the GNU General Public License as published by
298
+ the Free Software Foundation; either version 2 of the License, or
299
+ (at your option) any later version.
300
+
301
+ This program is distributed in the hope that it will be useful,
302
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
303
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304
+ GNU General Public License for more details.
305
+
306
+ You should have received a copy of the GNU General Public License along
307
+ with this program; if not, write to the Free Software Foundation, Inc.,
308
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309
+
310
+ Also add information on how to contact you by electronic and paper mail.
311
+
312
+ If the program is interactive, make it output a short notice like this
313
+ when it starts in an interactive mode:
314
+
315
+ Gnomovision version 69, Copyright (C) year name of author
316
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317
+ This is free software, and you are welcome to redistribute it
318
+ under certain conditions; type `show c' for details.
319
+
320
+ The hypothetical commands `show w' and `show c' should show the appropriate
321
+ parts of the General Public License. Of course, the commands you use may
322
+ be called something other than `show w' and `show c'; they could even be
323
+ mouse-clicks or menu items--whatever suits your program.
324
+
325
+ You should also get your employer (if you work as a programmer) or your
326
+ school, if any, to sign a "copyright disclaimer" for the program, if
327
+ necessary. Here is a sample; alter the names:
328
+
329
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
331
+
332
+ {signature of Ty Coon}, 1 April 1989
333
+ Ty Coon, President of Vice
334
+
335
+ This General Public License does not permit incorporating your program into
336
+ proprietary programs. If your program is a subroutine library, you may
337
+ consider it more useful to permit linking proprietary applications with the
338
+ library. If this is what you want to do, use the GNU Lesser General
339
+ Public License instead of this License.
data/README.md ADDED
@@ -0,0 +1,205 @@
1
+ BioDSL
2
+ =========
3
+
4
+ Installation
5
+ ------------
6
+
7
+ `gem install BioDSL`
8
+
9
+ Getting started
10
+ ---------------
11
+
12
+ A test script:
13
+
14
+ #!/usr/bin/env ruby
15
+
16
+ require 'BioDSL'
17
+
18
+ p = BP.new.
19
+ read_fasta(input: "input.fna").
20
+ grab(select: "ATC$", keys: :SEQ).
21
+ write_fasta(output: "output.fna").
22
+ run(progress: true)
23
+
24
+ Or using an interactive shell using the alias ibp which you can create by
25
+ adding the following to your `~/.bashrc` file:
26
+
27
+ alias ibp="irb -r BioDSL --noinspect"
28
+
29
+ And then start the interactive shell:
30
+
31
+ $ ibp
32
+ irb(main):001:0> p = BP.new
33
+ => BP.new
34
+ irb(main):002:0> p.read_fasta(input: "input.fna")
35
+ => BP.new.read_fasta(input: "input.fna")
36
+ irb(main):003:0> p.grab(select: "ATC$", keys: :SEQ)
37
+ => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ)
38
+ irb(main):004:0> p.write_fasta(output: "output.fna")
39
+ => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna")
40
+ irb(main):005:0> p.run(progress: true)
41
+ => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
42
+ irb(main):006:0>
43
+
44
+
45
+ Or chaining commands directly:
46
+
47
+ $ ibp
48
+ irb(main):001:0> BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
49
+ => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
50
+ irb(main):002:0>
51
+
52
+ Or run on the command line with the alias bp which you can create by adding the
53
+ following to your ~/.bashrc file:
54
+
55
+ alias bp="ruby -r BioDSL"
56
+
57
+ Then you can run the below from the command line:
58
+
59
+ $ bp -e 'BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)'
60
+
61
+ Available BioDSL
62
+ -------------------
63
+
64
+ * [add_key] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AddKey)
65
+ * [align_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AlignSeqMothur)
66
+ * [analyze_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AnalyzeResidueDistribution)
67
+ * [assemble_pairs] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssemblePairs)
68
+ * [assemble_seq_idba] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssembleSeqIdba)
69
+ * [assemble_seq_ray] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssembleSeqRay)
70
+ * [assemble_seq_spades] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssembleSeqSpades)
71
+ * [classify_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClassifySeq)
72
+ * [classify_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClassifySeqMothur)
73
+ * [clip_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClipPrimer)
74
+ * [cluster_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClusterOtus)
75
+ * [collapse_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/CollapseOtus)
76
+ * [collect_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/CollectOtus)
77
+ * [complement_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ComplementSeq)
78
+ * [count] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Count)
79
+ * [degap_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/DegapSeq)
80
+ * [dereplicate_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/DereplicateSeq)
81
+ * [dump] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Dump)
82
+ * [filter_rrna] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/FilterRrna)
83
+ * [genecall] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Genecall)
84
+ * [grab] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Grab)
85
+ * [index_taxonomy] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/IndexTaxonomy)
86
+ * [mean_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MeanScores)
87
+ * [merge_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MergePairSeq)
88
+ * [merge_table] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MergeTable)
89
+ * [merge_values] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MergeValues)
90
+ * [plot_heatmap] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotHeatmap)
91
+ * [plot_histogram] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotHistogram)
92
+ * [plot_matches] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotMatches)
93
+ * [plot_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotResidueDistribution)
94
+ * [plot_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotScores)
95
+ * [random] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Random)
96
+ * [read_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReadFasta)
97
+ * [read_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReadFastq)
98
+ * [read_table] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReadTable)
99
+ * [reverse_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReverseSeq)
100
+ * [slice_align] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SliceAlign)
101
+ * [slice_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SliceSeq)
102
+ * [sort] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Sort)
103
+ * [split_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SplitPairSeq)
104
+ * [split_values] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SplitValues)
105
+ * [trim_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/TrimPrimer)
106
+ * [trim_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/TrimSeq)
107
+ * [uchime_ref] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/UchimeRef)
108
+ * [unique_values] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/UniqueValues)
109
+ * [usearch_global] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/UsearchGlobal)
110
+ * [write_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteFasta)
111
+ * [write_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteFastq)
112
+ * [write_table] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteTable)
113
+ * [write_tree] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteTree)
114
+
115
+ Log and History
116
+ ---------------
117
+
118
+ All BioDSL events are logged to `~/.BioDSL_log`.
119
+
120
+ BioDSL history is saved to `~/.BioDSL_history`.
121
+
122
+
123
+ Features
124
+ --------
125
+
126
+ Progress:
127
+
128
+ Show nifty progress table with commands, records read and emittet and time.
129
+
130
+ `BP.new.read_fasta(input: "input.fna").dump.run(progress: true)`
131
+
132
+ Verbose:
133
+
134
+ Output verbose messages from commands and the run status.
135
+
136
+ `BP.new.read_fasta(input: "input.fna").dump.run(verbose: true)`
137
+
138
+ Debug:
139
+
140
+ Output debug messages from commands using these.
141
+
142
+ `BP.new.read_fasta(input: "input.fna").dump.run(debug: true)`
143
+
144
+ E-mail notification:
145
+
146
+ Send an email when run is complete.
147
+
148
+ `BP.new.read_fasta(input: "input.fna").dump.run(email: mail@maasha.dk, subject: "Script done!")`
149
+
150
+ Report:
151
+
152
+ Create an HTML report of the run stats:
153
+
154
+ `BP.new.read_fasta(input: "input.fna").dump.run(report: "status.html")`
155
+
156
+ Output dir:
157
+
158
+ All output files from commands are put in a specified dir:
159
+
160
+ `BP.new.read_fasta(input: "input.fna").dump.run(output_dir: "Results")`
161
+
162
+
163
+ Configuration File
164
+ ------------------
165
+
166
+ It is possible to pre-set options in a configuration file located in your $HOME
167
+ directory called `.BioDSLrc`. Thus if an option is not already set, its value
168
+ will fall back to the one set in the configuration file. The configuration file
169
+ contains three whitespace separated columns:
170
+
171
+ * Command name
172
+ * Option
173
+ * Option value
174
+
175
+ Lines starting with '#' are considered comments and are ignored.
176
+
177
+ An example:
178
+
179
+ maasha@mel:~$ cat ~/.BioDSLrc
180
+ uchime_ref database /home/maasha/Install/QIIME1.8/data/rdp_gold.fa
181
+ uchime_ref cpus 20
182
+
183
+ On compute clusters it is necessary to specify the max processor count, which
184
+ is otherwise determined as the number of cores on the current node. To override
185
+ this add the following line:
186
+
187
+ pipeline processor_count 1000
188
+
189
+ It is also possible to change the temporary directory from the systems default
190
+ by adding the following line:
191
+
192
+ pipeline tmp_dir /home/projects/ku_microbio/scratch/tmp
193
+
194
+ Contributing
195
+ ------------
196
+
197
+ Fork it
198
+
199
+ Create your feature branch (git checkout -b my-new-feature)
200
+
201
+ Commit your changes (git commit -am 'Add some feature')
202
+
203
+ Push to the branch (git push origin my-new-feature)
204
+
205
+ Create new Pull Request