genfrag 0.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/.bnsignore +16 -0
  2. data/History.txt +4 -0
  3. data/LICENSE.txt +58 -0
  4. data/README.rdoc +40 -0
  5. data/Rakefile +53 -0
  6. data/bin/genfrag +8 -0
  7. data/lib/genfrag.rb +129 -0
  8. data/lib/genfrag/app.rb +105 -0
  9. data/lib/genfrag/app/command.rb +145 -0
  10. data/lib/genfrag/app/index_command.rb +227 -0
  11. data/lib/genfrag/app/index_command/db.rb +105 -0
  12. data/lib/genfrag/app/search_command.rb +298 -0
  13. data/lib/genfrag/app/search_command/match.rb +165 -0
  14. data/lib/genfrag/app/search_command/process_file.rb +125 -0
  15. data/lib/genfrag/app/search_command/trim.rb +121 -0
  16. data/lib/genfrag/debug.rb +0 -0
  17. data/spec/data/index_command/in/a.fasta +109 -0
  18. data/spec/data/index_command/out/1-a_lookup.tdf +4 -0
  19. data/spec/data/index_command/out/2-a_lookup.db +0 -0
  20. data/spec/data/index_command/out/3-a_lookup.tdf +2 -0
  21. data/spec/data/index_command/out/4-a_lookup.db +0 -0
  22. data/spec/data/index_command/out/5-a_lookup.tdf +4 -0
  23. data/spec/data/index_command/out/6-a_lookup.db +0 -0
  24. data/spec/data/index_command/out/a.fasta.db +0 -0
  25. data/spec/data/index_command/out/a.fasta.tdf +6 -0
  26. data/spec/genfrag/app/command_spec.rb +55 -0
  27. data/spec/genfrag/app/index_command_spec.rb +258 -0
  28. data/spec/genfrag/app/search_command/match_spec.rb +77 -0
  29. data/spec/genfrag/app/search_command/process_file_spec.rb +185 -0
  30. data/spec/genfrag/app/search_command/trim_spec.rb +75 -0
  31. data/spec/genfrag/app/search_command_spec.rb +260 -0
  32. data/spec/genfrag/app_spec.rb +77 -0
  33. data/spec/genfrag_spec.rb +87 -0
  34. data/spec/spec_helper.rb +56 -0
  35. data/tasks/ann.rake +80 -0
  36. data/tasks/bones.rake +20 -0
  37. data/tasks/gem.rake +201 -0
  38. data/tasks/git.rake +40 -0
  39. data/tasks/notes.rake +27 -0
  40. data/tasks/post_load.rake +34 -0
  41. data/tasks/rdoc.rake +50 -0
  42. data/tasks/rubyforge.rake +55 -0
  43. data/tasks/setup.rb +300 -0
  44. data/tasks/spec.rake +54 -0
  45. data/tasks/svn.rake +47 -0
  46. data/tasks/test.rake +40 -0
  47. metadata +136 -0
@@ -0,0 +1,16 @@
1
+ # The list of files that should be ignored by Mr Bones.
2
+ # Lines that start with '#' are comments.
3
+ #
4
+ # A .gitignore file can be used instead by setting it as the ignore
5
+ # file in your Rakefile:
6
+ #
7
+ # PROJ.ignore_file = '.gitignore'
8
+ #
9
+ # For a project with a C extension, the following would be a good set of
10
+ # exclude patterns (uncomment them if you want to use them):
11
+ # *.[oa]
12
+ # *~
13
+ announcement.txt
14
+ coverage
15
+ doc
16
+ pkg
@@ -0,0 +1,4 @@
1
+ == 0.1.0 / 2009-02-04
2
+
3
+ * 1 major enhancement
4
+ - initialize
@@ -0,0 +1,58 @@
1
+ Ruby is copyrighted free software by Yukihiro Matsumoto <matz@netlab.co.jp>.
2
+ You can redistribute it and/or modify it under either the terms of the GPL
3
+ (see COPYING.txt file), or the conditions below:
4
+
5
+ 1. You may make and give away verbatim copies of the source form of the
6
+ software without restriction, provided that you duplicate all of the
7
+ original copyright notices and associated disclaimers.
8
+
9
+ 2. You may modify your copy of the software in any way, provided that
10
+ you do at least ONE of the following:
11
+
12
+ a) place your modifications in the Public Domain or otherwise
13
+ make them Freely Available, such as by posting said
14
+ modifications to Usenet or an equivalent medium, or by allowing
15
+ the author to include your modifications in the software.
16
+
17
+ b) use the modified software only within your corporation or
18
+ organization.
19
+
20
+ c) rename any non-standard executables so the names do not conflict
21
+ with standard executables, which must also be provided.
22
+
23
+ d) make other distribution arrangements with the author.
24
+
25
+ 3. You may distribute the software in object code or executable
26
+ form, provided that you do at least ONE of the following:
27
+
28
+ a) distribute the executables and library files of the software,
29
+ together with instructions (in the manual page or equivalent)
30
+ on where to get the original distribution.
31
+
32
+ b) accompany the distribution with the machine-readable source of
33
+ the software.
34
+
35
+ c) give non-standard executables non-standard names, with
36
+ instructions on where to get the original software distribution.
37
+
38
+ d) make other distribution arrangements with the author.
39
+
40
+ 4. You may modify and include the part of the software into any other
41
+ software (possibly commercial). But some files in the distribution
42
+ are not written by the author, so that they are not under this terms.
43
+
44
+ They are gc.c(partly), utils.c(partly), regex.[ch], st.[ch] and some
45
+ files under the ./missing directory. See each file for the copying
46
+ condition.
47
+
48
+ 5. The scripts and library files supplied as input to or produced as
49
+ output from the software do not automatically fall under the
50
+ copyright of the software, but belong to whomever generated them,
51
+ and may be sold commercially, and may be aggregated with this
52
+ software.
53
+
54
+ 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
55
+ IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
56
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
57
+ PURPOSE.
58
+
@@ -0,0 +1,40 @@
1
+ Genfrag version 0.0.0.1
2
+ by Pjotr Prins and Trevor Wennblom
3
+ http://genfrag.rubyforge.org
4
+ (the "Rough Draught" release)
5
+
6
+
7
+ == DESCRIPTION:
8
+
9
+ This is a development release. Few features are functional at this time.
10
+
11
+ Genfrag allows for rapid in-silico searching of fragments cut by
12
+ different restriction enzymes in large nucleotide acid databases,
13
+ followed by matching specificity adapters which allow a further data
14
+ reduction when looking for differential expression of genes and
15
+ markers.
16
+
17
+
18
+ == USAGE:
19
+
20
+ This works
21
+ genfrag index -f example.fasta --re5 BstYI --re3 MseI
22
+ genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5 tt
23
+
24
+
25
+ == REQUIREMENTS:
26
+
27
+ * bioruby
28
+ * sqlite3-ruby
29
+
30
+
31
+ == INSTALL:
32
+
33
+ * sudo gem install genfrag
34
+
35
+
36
+ == LICENSE:
37
+
38
+ Copyright (c) 2009 Pjotr Prins and Trevor Wennblom
39
+
40
+ Distributed under the same terms as the Ruby License - see LICENSE.txt
@@ -0,0 +1,53 @@
1
+ # Look in the tasks/setup.rb file for the various options that can be
2
+ # configured in this Rakefile. The .rake files in the tasks directory
3
+ # are where the options are used.
4
+
5
+ begin
6
+ require 'bones'
7
+ Bones.setup
8
+ rescue LoadError
9
+ begin
10
+ load 'tasks/setup.rb'
11
+ rescue LoadError
12
+ raise RuntimeError, '### please install the "bones" gem ###'
13
+ end
14
+ end
15
+
16
+ ensure_in_path 'lib'
17
+ require 'genfrag'
18
+
19
+ #task :default => 'spec:specdoc'
20
+ task :default => 'spec:run'
21
+
22
+ PROJ.name = 'genfrag'
23
+ PROJ.authors = 'Pjotr Prins and Trevor Wennblom'
24
+ PROJ.email = 'trevor@corevx.com'
25
+ PROJ.url = 'http://genfrag.rubyforge.org'
26
+ PROJ.version = Genfrag::VERSION
27
+ PROJ.release_name = 'Rough Draught'
28
+ PROJ.ruby_opts = %w[-W0]
29
+ PROJ.readme_file = 'README.rdoc'
30
+ PROJ.ignore_file = '.gitignore'
31
+ PROJ.exclude << 'genfrag.gemspec'
32
+ PROJ.exclude << '.git'
33
+
34
+ PROJ.rubyforge.name = 'genfrag'
35
+
36
+ PROJ.spec.opts << '--color'
37
+
38
+ PROJ.gem.extras[:post_install_message] = <<-MSG
39
+ --------------------------------------------
40
+ Genfrag installed
41
+ Type 'genfrag -h' for a list of commands
42
+ --------------------------------------------
43
+ MSG
44
+
45
+ task 'ann:prereqs' do
46
+ PROJ.name = 'Genfrag'
47
+ end
48
+
49
+ depend_on 'bioruby'
50
+ depend_on 'rake'
51
+
52
+
53
+ # EOF
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path(
4
+ File.join(File.dirname(__FILE__), %w[.. lib genfrag]))
5
+
6
+ Genfrag::App.cli_run ARGV
7
+
8
+ # EOF
@@ -0,0 +1,129 @@
1
+
2
+ module Genfrag
3
+
4
+ # :stopdoc:
5
+ VERSION = '0.0.0.1'
6
+ LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
7
+ PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
8
+ # :startdoc:
9
+
10
+ # Returns the version string for the library.
11
+ #
12
+ def self.version
13
+ VERSION
14
+ end
15
+
16
+ # Returns the library path for the module. If any arguments are given,
17
+ # they will be joined to the end of the libray path using
18
+ # <tt>File.join</tt>.
19
+ #
20
+ def self.libpath( *args )
21
+ args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
22
+ end
23
+
24
+ # Returns the lpath for the module. If any arguments are given,
25
+ # they will be joined to the end of the path using
26
+ # <tt>File.join</tt>.
27
+ #
28
+ def self.path( *args )
29
+ args.empty? ? PATH : ::File.join(PATH, args.flatten)
30
+ end
31
+
32
+ # Utility method used to require all files ending in .rb that lie in the
33
+ # directory below this file that has the same name as the filename passed
34
+ # in. Optionally, a specific _directory_ name can be passed in such that
35
+ # the _filename_ does not have to be equivalent to the directory.
36
+ #
37
+ def self.require_all_libs_relative_to( fname, dir = nil )
38
+ dir ||= ::File.basename(fname, '.*')
39
+ search_me = ::File.expand_path(
40
+ ::File.join(::File.dirname(fname), dir, '**', '*.rb'))
41
+
42
+ Dir.glob(search_me).sort.each {|rb| require rb}
43
+ end
44
+
45
+ def self.tracktime verbose=true, f=nil
46
+ # Info:: Pjotr's shared Ruby modules
47
+ # Author:: Pjotr Prins
48
+ # mail:: pjotr.public05@thebird.nl
49
+ # Copyright:: July 2007
50
+ # License:: Ruby License
51
+ begin
52
+ t1 = Time.now
53
+ yield
54
+
55
+ ensure
56
+ t2 = Time.now
57
+ dt = t2 - t1
58
+ if verbose
59
+ if f == nil
60
+ f = $stdout
61
+ end
62
+ f.print "\nElapsed time "
63
+ hours = dt.to_i/3600
64
+ dt -= hours*3600
65
+ mins = dt.to_i/60
66
+ dt -= mins*60
67
+ secs = dt
68
+ secs = secs.to_i if secs > 25
69
+ if hours > 0
70
+ f.print hours.to_i," hours "
71
+ end
72
+ if mins > 0
73
+ f.print mins.to_i," minutes "
74
+ end
75
+ f.print secs," seconds\n"
76
+ end
77
+ end
78
+ end
79
+
80
+ # Create a unique filename for the frequency file out of a combination of filenames
81
+ #
82
+ def self.name_freq_lookup(input_filenames=[],filefasta=nil,filelookup=nil,re5=nil,re3=nil)
83
+ input_filenames = [] if input_filenames.nil?
84
+ if filelookup
85
+ # FIXME used to be gsub! - make sure it still works in code
86
+ return filelookup.gsub(/\.(db|tdf)$/, '')
87
+ elsif !input_filenames.empty?
88
+ if re5 and re3
89
+ [input_filenames.sort,re5.downcase,re3.downcase,'index'].join('_').gsub(/\//,'x')
90
+ else
91
+ raise "re5 or re3 is undefined"
92
+ end
93
+ elsif filefasta
94
+ # construct default name
95
+ if re5 and re3
96
+ [name_normalized_fasta(nil,filefasta),re5.downcase,re3.downcase,'index'].join('_').gsub(/\//,'x')
97
+ else
98
+ raise "re5 or re3 is undefined"
99
+ end
100
+ else
101
+ raise "--lookup undefined and no default filenames passed"
102
+ end
103
+ end
104
+
105
+ # Create a unique filename out of a combination of filenames
106
+ #
107
+ def self.name_normalized_fasta(input_filenames=[],filefasta=nil)
108
+ if filefasta
109
+ # FIXME used to be gsub! - make sure it still works in code
110
+ return filefasta.gsub(/\.(db|tdf)$/, '')
111
+ elsif !input_filenames.empty?
112
+ return [input_filenames.sort, 'normalized'].join('_').gsub(/\//,'x')
113
+ else
114
+ raise "--fasta undefined and no default filenames passed"
115
+ end
116
+ end
117
+
118
+ # Return the name of the adapters file without its extension
119
+ #
120
+ def self.name_adapters(fileadapters=nil)
121
+ return nil if !fileadapters
122
+ return fileadapters.gsub(/\.(db|tdf)$/, '')
123
+ end
124
+
125
+ end # module Genfrag
126
+
127
+ Genfrag.require_all_libs_relative_to(__FILE__)
128
+
129
+ # EOF
@@ -0,0 +1,105 @@
1
+ if ENV['BIORUBY_HOME']
2
+ $: << File.join(ENV['BIORUBY_HOME'],'lib')
3
+ else
4
+ require 'rubygems'
5
+ end
6
+
7
+ begin
8
+ require 'sqlite3'
9
+ rescue LoadError
10
+ $stderr.print "Warning: no sqlite installed"
11
+ end
12
+
13
+ require 'fileutils'
14
+ require 'optparse'
15
+ require 'ostruct'
16
+ require 'bio'
17
+ require 'csv'
18
+
19
+
20
+ module Genfrag
21
+ class App
22
+
23
+
24
+ # Create a new instance of App, and run the +genfrag+ application given
25
+ # the command line _args_.
26
+ #
27
+ def self.cli_run( args )
28
+ self.new.cli_run args
29
+ end
30
+
31
+ # Create a new main instance using _io_ for standard output and _err_ for
32
+ # error messages.
33
+ #
34
+ def initialize( out = STDOUT, err = STDERR )
35
+ @out = out
36
+ @err = err
37
+ end
38
+
39
+ # Parse the desired user command and run that command object.
40
+ #
41
+ def cli_run( args )
42
+ cmd_str = args.shift
43
+ cmd = case cmd_str
44
+ when 'index'; IndexCommand.new(@out, @err)
45
+ when 'search'; SearchCommand.new(@out, @err)
46
+ when 'info'; InfoCommand.new(@out, @err)
47
+ when nil, '-h', '--help'
48
+ help
49
+ when '-V', '--version'
50
+ @out.puts "Genfrag #{::Genfrag::VERSION}"
51
+ nil
52
+ else
53
+ raise "Unknown command #{cmd_str.inspect}"
54
+ end
55
+
56
+ cmd.cli_run args if cmd
57
+
58
+ rescue StandardError => err
59
+ @err.puts "ERROR: While executing genfrag ... (#{err.class})"
60
+ @err.puts " #{err.to_s}"
61
+ @err.puts %Q( #{err.backtrace.join("\n\t")})
62
+ exit 1
63
+ end
64
+
65
+ # Show the toplevel help message.
66
+ #
67
+ def help
68
+ @out.puts <<-MSG
69
+
70
+ GenFrag allows for rapid in-silico searching of fragments cut by
71
+ different restriction enzymes in large nucleotide acid databases,
72
+ followed by matching specificity adapters which allow a further data
73
+ reduction when looking for differential expression of genes and
74
+ markers.
75
+
76
+ Usage:
77
+ genfrag -h/--help
78
+ genfrag -V/--version
79
+ genfrag command [options] [arguments]
80
+
81
+ Examples:
82
+ genfrag index -f example.fasta --RE5 BstYI --RE3 MseI
83
+ genfrag search -f example.fasta --RE5 BstYI --RE3 MseI --adapter5 ct
84
+
85
+ Commands:
86
+ genfrag index initialize the index
87
+ genfrag search search FIXME
88
+ genfrag info show information about FIXME
89
+
90
+ Further Help:
91
+ Each command has a '--help' option that will provide detailed
92
+ information for that command.
93
+
94
+ http://genfrag.rubyforge.org/
95
+
96
+ MSG
97
+ nil
98
+ end
99
+
100
+ end # class App
101
+ end # module Genfrag
102
+
103
+ Genfrag.require_all_libs_relative_to(__FILE__)
104
+
105
+ # EOF
@@ -0,0 +1,145 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class Command
6
+
7
+ attr_accessor :options # a Hash used by optparse
8
+ attr_accessor :ops # an OpenStruct of the options
9
+
10
+ def initialize( out = STDOUT, err = STDERR )
11
+ @out = out
12
+ @err = err
13
+ @options = {
14
+ # :skeleton_dir => File.join(mrbones_dir, 'data'),
15
+ # :with_tasks => false,
16
+ # :verbose => false,
17
+ # :name => nil,
18
+ # :output_dir => nil
19
+ }
20
+ @ops = OpenStruct.new
21
+ #@options[:skeleton_dir] = ::Bones.path('data') unless test(?d, skeleton_dir)
22
+ end
23
+
24
+ def cli_run( args )
25
+ raise NotImplementedError
26
+ end
27
+
28
+ def run( args )
29
+ raise NotImplementedError
30
+ end
31
+
32
+ # Print running output when used in command-line mode
33
+ #
34
+ def cli_p(cli=true, str='')
35
+ return false if !cli or @ops.quiet
36
+ @out.puts str
37
+ end
38
+
39
+ # Print an error from the command-line options
40
+ #
41
+ def clierr_p(str)
42
+ @out.puts
43
+ @err.puts "Error: #{str}"
44
+ @out.puts
45
+ @out.puts opt_parser
46
+ end
47
+
48
+ # Define the command-line option available
49
+ #
50
+ def standard_options
51
+ {
52
+ :verbose => ['-v', '--verbose', 'enable verbose output',
53
+ lambda {
54
+ options[:verbose] = true
55
+ }],
56
+ :tracktime => ['-m', '--tracktime', 'track execution time',
57
+ lambda {
58
+ options[:tracktime] = true
59
+ }],
60
+ :quiet => ['-q', '--quiet', 'silence output',
61
+ lambda {
62
+ options[:quiet] = true
63
+ }],
64
+ :indir => ['-i', '--in DIR', String, 'input directory', "(default #{Dir.pwd})",
65
+ lambda{ |value|
66
+ options[:indir] = value
67
+ }],
68
+ :outdir => ['-o', '--out DIR', String, 'output directory', "(default #{Dir.pwd})",
69
+ lambda{ |value|
70
+ options[:outdir] = value
71
+ }],
72
+ :re5 => ['-5', '--re5 ENZYME', String, "5' restriction enzyme",
73
+ lambda { |value|
74
+ options[:re5] = value
75
+ }],
76
+ :re3 => ['-3', '--re3 ENZYME', String, "3' restriction enzyme",
77
+ lambda { |value|
78
+ options[:re3] = value
79
+ }],
80
+ :sqlite => ['-t', '--sqlite', 'use sqlite', '(default is CSV)',
81
+ lambda {
82
+ options[:sqlite] = true
83
+ }],
84
+ :filelookup => ['-l', '--lookup FILE', String, "name of the frequency lookup file generated by 'index'",
85
+ lambda { |value|
86
+ options[:filelookup] = value
87
+ }],
88
+ :filefasta => ['-f', '--fasta FILE', String, 'name of the Fasta sequences file',
89
+ lambda { |value|
90
+ options[:filefasta] = value
91
+ }],
92
+ :size => ['-s', '--size SIZE', Array, '',
93
+ lambda { |value|
94
+ options[:size] = value
95
+ }],
96
+
97
+ :adapter5 => ['-y', '--adapter5 ADAPTER', String, '',
98
+ lambda { |value|
99
+ options[:adapter5] = value
100
+ }],
101
+ :adapter3 => ['-z', '--adapter3 ADAPTER', String, '',
102
+ lambda { |value|
103
+ options[:adapter3] = value
104
+ }],
105
+
106
+ :named_adapter5 => ['-b', '--named-adapter5 NAME', String, '',
107
+ lambda { |value|
108
+ options[:named_adapter5] = value
109
+ }],
110
+ :named_adapter3 => ['-c', '--named-adapter3 NAME', String, '',
111
+ lambda { |value|
112
+ options[:named_adapter3] = value
113
+ }],
114
+
115
+ :adapter5_size => ['-d', '--adapter5-size SIZE', Integer, '',
116
+ lambda { |value|
117
+ options[:adapter5_size] = value
118
+ }],
119
+ :adapter3_size => ['-e', '--adapter3-size SIZE', Integer, '',
120
+ lambda { |value|
121
+ options[:adapter3_size] = value
122
+ }],
123
+
124
+ :adapter5_sequence => ['-w', '--adapter5-sequence SEQUENCE', String, '',
125
+ lambda { |value|
126
+ options[:adapter5_sequence] = value
127
+ }],
128
+ :adapter3_sequence => ['-x', '--adapter3-sequence SEQUENCE', String, '',
129
+ lambda { |value|
130
+ options[:adapter3_sequence] = value
131
+ }],
132
+
133
+ :fileadapters => ['-a', '--adapters FILE', String, '',
134
+ lambda { |value|
135
+ options[:fileadapters] = value
136
+ }]
137
+
138
+ }
139
+ end
140
+
141
+ end # class Command
142
+ end # class App
143
+ end # module Genfrag
144
+
145
+ # EOF