genfrag 0.0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/.bnsignore +16 -0
  2. data/History.txt +4 -0
  3. data/LICENSE.txt +58 -0
  4. data/README.rdoc +40 -0
  5. data/Rakefile +53 -0
  6. data/bin/genfrag +8 -0
  7. data/lib/genfrag.rb +129 -0
  8. data/lib/genfrag/app.rb +105 -0
  9. data/lib/genfrag/app/command.rb +145 -0
  10. data/lib/genfrag/app/index_command.rb +227 -0
  11. data/lib/genfrag/app/index_command/db.rb +105 -0
  12. data/lib/genfrag/app/search_command.rb +298 -0
  13. data/lib/genfrag/app/search_command/match.rb +165 -0
  14. data/lib/genfrag/app/search_command/process_file.rb +125 -0
  15. data/lib/genfrag/app/search_command/trim.rb +121 -0
  16. data/lib/genfrag/debug.rb +0 -0
  17. data/spec/data/index_command/in/a.fasta +109 -0
  18. data/spec/data/index_command/out/1-a_lookup.tdf +4 -0
  19. data/spec/data/index_command/out/2-a_lookup.db +0 -0
  20. data/spec/data/index_command/out/3-a_lookup.tdf +2 -0
  21. data/spec/data/index_command/out/4-a_lookup.db +0 -0
  22. data/spec/data/index_command/out/5-a_lookup.tdf +4 -0
  23. data/spec/data/index_command/out/6-a_lookup.db +0 -0
  24. data/spec/data/index_command/out/a.fasta.db +0 -0
  25. data/spec/data/index_command/out/a.fasta.tdf +6 -0
  26. data/spec/genfrag/app/command_spec.rb +55 -0
  27. data/spec/genfrag/app/index_command_spec.rb +258 -0
  28. data/spec/genfrag/app/search_command/match_spec.rb +77 -0
  29. data/spec/genfrag/app/search_command/process_file_spec.rb +185 -0
  30. data/spec/genfrag/app/search_command/trim_spec.rb +75 -0
  31. data/spec/genfrag/app/search_command_spec.rb +260 -0
  32. data/spec/genfrag/app_spec.rb +77 -0
  33. data/spec/genfrag_spec.rb +87 -0
  34. data/spec/spec_helper.rb +56 -0
  35. data/tasks/ann.rake +80 -0
  36. data/tasks/bones.rake +20 -0
  37. data/tasks/gem.rake +201 -0
  38. data/tasks/git.rake +40 -0
  39. data/tasks/notes.rake +27 -0
  40. data/tasks/post_load.rake +34 -0
  41. data/tasks/rdoc.rake +50 -0
  42. data/tasks/rubyforge.rake +55 -0
  43. data/tasks/setup.rb +300 -0
  44. data/tasks/spec.rake +54 -0
  45. data/tasks/svn.rake +47 -0
  46. data/tasks/test.rake +40 -0
  47. metadata +136 -0
@@ -0,0 +1,16 @@
1
+ # The list of files that should be ignored by Mr Bones.
2
+ # Lines that start with '#' are comments.
3
+ #
4
+ # A .gitignore file can be used instead by setting it as the ignore
5
+ # file in your Rakefile:
6
+ #
7
+ # PROJ.ignore_file = '.gitignore'
8
+ #
9
+ # For a project with a C extension, the following would be a good set of
10
+ # exclude patterns (uncomment them if you want to use them):
11
+ # *.[oa]
12
+ # *~
13
+ announcement.txt
14
+ coverage
15
+ doc
16
+ pkg
@@ -0,0 +1,4 @@
1
+ == 0.1.0 / 2009-02-04
2
+
3
+ * 1 major enhancement
4
+ - initialize
@@ -0,0 +1,58 @@
1
+ Ruby is copyrighted free software by Yukihiro Matsumoto <matz@netlab.co.jp>.
2
+ You can redistribute it and/or modify it under either the terms of the GPL
3
+ (see COPYING.txt file), or the conditions below:
4
+
5
+ 1. You may make and give away verbatim copies of the source form of the
6
+ software without restriction, provided that you duplicate all of the
7
+ original copyright notices and associated disclaimers.
8
+
9
+ 2. You may modify your copy of the software in any way, provided that
10
+ you do at least ONE of the following:
11
+
12
+ a) place your modifications in the Public Domain or otherwise
13
+ make them Freely Available, such as by posting said
14
+ modifications to Usenet or an equivalent medium, or by allowing
15
+ the author to include your modifications in the software.
16
+
17
+ b) use the modified software only within your corporation or
18
+ organization.
19
+
20
+ c) rename any non-standard executables so the names do not conflict
21
+ with standard executables, which must also be provided.
22
+
23
+ d) make other distribution arrangements with the author.
24
+
25
+ 3. You may distribute the software in object code or executable
26
+ form, provided that you do at least ONE of the following:
27
+
28
+ a) distribute the executables and library files of the software,
29
+ together with instructions (in the manual page or equivalent)
30
+ on where to get the original distribution.
31
+
32
+ b) accompany the distribution with the machine-readable source of
33
+ the software.
34
+
35
+ c) give non-standard executables non-standard names, with
36
+ instructions on where to get the original software distribution.
37
+
38
+ d) make other distribution arrangements with the author.
39
+
40
+ 4. You may modify and include the part of the software into any other
41
+ software (possibly commercial). But some files in the distribution
42
+ are not written by the author, so that they are not under this terms.
43
+
44
+ They are gc.c(partly), utils.c(partly), regex.[ch], st.[ch] and some
45
+ files under the ./missing directory. See each file for the copying
46
+ condition.
47
+
48
+ 5. The scripts and library files supplied as input to or produced as
49
+ output from the software do not automatically fall under the
50
+ copyright of the software, but belong to whomever generated them,
51
+ and may be sold commercially, and may be aggregated with this
52
+ software.
53
+
54
+ 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
55
+ IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
56
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
57
+ PURPOSE.
58
+
@@ -0,0 +1,40 @@
1
+ Genfrag version 0.0.0.1
2
+ by Pjotr Prins and Trevor Wennblom
3
+ http://genfrag.rubyforge.org
4
+ (the "Rough Draught" release)
5
+
6
+
7
+ == DESCRIPTION:
8
+
9
+ This is a development release. Few features are functional at this time.
10
+
11
+ Genfrag allows for rapid in-silico searching of fragments cut by
12
+ different restriction enzymes in large nucleotide acid databases,
13
+ followed by matching specificity adapters which allow a further data
14
+ reduction when looking for differential expression of genes and
15
+ markers.
16
+
17
+
18
+ == USAGE:
19
+
20
+ This works
21
+ genfrag index -f example.fasta --re5 BstYI --re3 MseI
22
+ genfrag search -f example.fasta --re5 BstYI --re3 MseI --adapter5 tt
23
+
24
+
25
+ == REQUIREMENTS:
26
+
27
+ * bioruby
28
+ * sqlite3-ruby
29
+
30
+
31
+ == INSTALL:
32
+
33
+ * sudo gem install genfrag
34
+
35
+
36
+ == LICENSE:
37
+
38
+ Copyright (c) 2009 Pjotr Prins and Trevor Wennblom
39
+
40
+ Distributed under the same terms as the Ruby License - see LICENSE.txt
@@ -0,0 +1,53 @@
1
+ # Look in the tasks/setup.rb file for the various options that can be
2
+ # configured in this Rakefile. The .rake files in the tasks directory
3
+ # are where the options are used.
4
+
5
+ begin
6
+ require 'bones'
7
+ Bones.setup
8
+ rescue LoadError
9
+ begin
10
+ load 'tasks/setup.rb'
11
+ rescue LoadError
12
+ raise RuntimeError, '### please install the "bones" gem ###'
13
+ end
14
+ end
15
+
16
+ ensure_in_path 'lib'
17
+ require 'genfrag'
18
+
19
+ #task :default => 'spec:specdoc'
20
+ task :default => 'spec:run'
21
+
22
+ PROJ.name = 'genfrag'
23
+ PROJ.authors = 'Pjotr Prins and Trevor Wennblom'
24
+ PROJ.email = 'trevor@corevx.com'
25
+ PROJ.url = 'http://genfrag.rubyforge.org'
26
+ PROJ.version = Genfrag::VERSION
27
+ PROJ.release_name = 'Rough Draught'
28
+ PROJ.ruby_opts = %w[-W0]
29
+ PROJ.readme_file = 'README.rdoc'
30
+ PROJ.ignore_file = '.gitignore'
31
+ PROJ.exclude << 'genfrag.gemspec'
32
+ PROJ.exclude << '.git'
33
+
34
+ PROJ.rubyforge.name = 'genfrag'
35
+
36
+ PROJ.spec.opts << '--color'
37
+
38
+ PROJ.gem.extras[:post_install_message] = <<-MSG
39
+ --------------------------------------------
40
+ Genfrag installed
41
+ Type 'genfrag -h' for a list of commands
42
+ --------------------------------------------
43
+ MSG
44
+
45
+ task 'ann:prereqs' do
46
+ PROJ.name = 'Genfrag'
47
+ end
48
+
49
+ depend_on 'bioruby'
50
+ depend_on 'rake'
51
+
52
+
53
+ # EOF
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path(
4
+ File.join(File.dirname(__FILE__), %w[.. lib genfrag]))
5
+
6
+ Genfrag::App.cli_run ARGV
7
+
8
+ # EOF
@@ -0,0 +1,129 @@
1
+
2
+ module Genfrag
3
+
4
+ # :stopdoc:
5
+ VERSION = '0.0.0.1'
6
+ LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
7
+ PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
8
+ # :startdoc:
9
+
10
+ # Returns the version string for the library.
11
+ #
12
+ def self.version
13
+ VERSION
14
+ end
15
+
16
+ # Returns the library path for the module. If any arguments are given,
17
+ # they will be joined to the end of the libray path using
18
+ # <tt>File.join</tt>.
19
+ #
20
+ def self.libpath( *args )
21
+ args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
22
+ end
23
+
24
+ # Returns the lpath for the module. If any arguments are given,
25
+ # they will be joined to the end of the path using
26
+ # <tt>File.join</tt>.
27
+ #
28
+ def self.path( *args )
29
+ args.empty? ? PATH : ::File.join(PATH, args.flatten)
30
+ end
31
+
32
+ # Utility method used to require all files ending in .rb that lie in the
33
+ # directory below this file that has the same name as the filename passed
34
+ # in. Optionally, a specific _directory_ name can be passed in such that
35
+ # the _filename_ does not have to be equivalent to the directory.
36
+ #
37
+ def self.require_all_libs_relative_to( fname, dir = nil )
38
+ dir ||= ::File.basename(fname, '.*')
39
+ search_me = ::File.expand_path(
40
+ ::File.join(::File.dirname(fname), dir, '**', '*.rb'))
41
+
42
+ Dir.glob(search_me).sort.each {|rb| require rb}
43
+ end
44
+
45
+ def self.tracktime verbose=true, f=nil
46
+ # Info:: Pjotr's shared Ruby modules
47
+ # Author:: Pjotr Prins
48
+ # mail:: pjotr.public05@thebird.nl
49
+ # Copyright:: July 2007
50
+ # License:: Ruby License
51
+ begin
52
+ t1 = Time.now
53
+ yield
54
+
55
+ ensure
56
+ t2 = Time.now
57
+ dt = t2 - t1
58
+ if verbose
59
+ if f == nil
60
+ f = $stdout
61
+ end
62
+ f.print "\nElapsed time "
63
+ hours = dt.to_i/3600
64
+ dt -= hours*3600
65
+ mins = dt.to_i/60
66
+ dt -= mins*60
67
+ secs = dt
68
+ secs = secs.to_i if secs > 25
69
+ if hours > 0
70
+ f.print hours.to_i," hours "
71
+ end
72
+ if mins > 0
73
+ f.print mins.to_i," minutes "
74
+ end
75
+ f.print secs," seconds\n"
76
+ end
77
+ end
78
+ end
79
+
80
+ # Create a unique filename for the frequency file out of a combination of filenames
81
+ #
82
+ def self.name_freq_lookup(input_filenames=[],filefasta=nil,filelookup=nil,re5=nil,re3=nil)
83
+ input_filenames = [] if input_filenames.nil?
84
+ if filelookup
85
+ # FIXME used to be gsub! - make sure it still works in code
86
+ return filelookup.gsub(/\.(db|tdf)$/, '')
87
+ elsif !input_filenames.empty?
88
+ if re5 and re3
89
+ [input_filenames.sort,re5.downcase,re3.downcase,'index'].join('_').gsub(/\//,'x')
90
+ else
91
+ raise "re5 or re3 is undefined"
92
+ end
93
+ elsif filefasta
94
+ # construct default name
95
+ if re5 and re3
96
+ [name_normalized_fasta(nil,filefasta),re5.downcase,re3.downcase,'index'].join('_').gsub(/\//,'x')
97
+ else
98
+ raise "re5 or re3 is undefined"
99
+ end
100
+ else
101
+ raise "--lookup undefined and no default filenames passed"
102
+ end
103
+ end
104
+
105
+ # Create a unique filename out of a combination of filenames
106
+ #
107
+ def self.name_normalized_fasta(input_filenames=[],filefasta=nil)
108
+ if filefasta
109
+ # FIXME used to be gsub! - make sure it still works in code
110
+ return filefasta.gsub(/\.(db|tdf)$/, '')
111
+ elsif !input_filenames.empty?
112
+ return [input_filenames.sort, 'normalized'].join('_').gsub(/\//,'x')
113
+ else
114
+ raise "--fasta undefined and no default filenames passed"
115
+ end
116
+ end
117
+
118
+ # Return the name of the adapters file without its extension
119
+ #
120
+ def self.name_adapters(fileadapters=nil)
121
+ return nil if !fileadapters
122
+ return fileadapters.gsub(/\.(db|tdf)$/, '')
123
+ end
124
+
125
+ end # module Genfrag
126
+
127
+ Genfrag.require_all_libs_relative_to(__FILE__)
128
+
129
+ # EOF
@@ -0,0 +1,105 @@
1
+ if ENV['BIORUBY_HOME']
2
+ $: << File.join(ENV['BIORUBY_HOME'],'lib')
3
+ else
4
+ require 'rubygems'
5
+ end
6
+
7
+ begin
8
+ require 'sqlite3'
9
+ rescue LoadError
10
+ $stderr.print "Warning: no sqlite installed"
11
+ end
12
+
13
+ require 'fileutils'
14
+ require 'optparse'
15
+ require 'ostruct'
16
+ require 'bio'
17
+ require 'csv'
18
+
19
+
20
+ module Genfrag
21
+ class App
22
+
23
+
24
+ # Create a new instance of App, and run the +genfrag+ application given
25
+ # the command line _args_.
26
+ #
27
+ def self.cli_run( args )
28
+ self.new.cli_run args
29
+ end
30
+
31
+ # Create a new main instance using _io_ for standard output and _err_ for
32
+ # error messages.
33
+ #
34
+ def initialize( out = STDOUT, err = STDERR )
35
+ @out = out
36
+ @err = err
37
+ end
38
+
39
+ # Parse the desired user command and run that command object.
40
+ #
41
+ def cli_run( args )
42
+ cmd_str = args.shift
43
+ cmd = case cmd_str
44
+ when 'index'; IndexCommand.new(@out, @err)
45
+ when 'search'; SearchCommand.new(@out, @err)
46
+ when 'info'; InfoCommand.new(@out, @err)
47
+ when nil, '-h', '--help'
48
+ help
49
+ when '-V', '--version'
50
+ @out.puts "Genfrag #{::Genfrag::VERSION}"
51
+ nil
52
+ else
53
+ raise "Unknown command #{cmd_str.inspect}"
54
+ end
55
+
56
+ cmd.cli_run args if cmd
57
+
58
+ rescue StandardError => err
59
+ @err.puts "ERROR: While executing genfrag ... (#{err.class})"
60
+ @err.puts " #{err.to_s}"
61
+ @err.puts %Q( #{err.backtrace.join("\n\t")})
62
+ exit 1
63
+ end
64
+
65
+ # Show the toplevel help message.
66
+ #
67
+ def help
68
+ @out.puts <<-MSG
69
+
70
+ GenFrag allows for rapid in-silico searching of fragments cut by
71
+ different restriction enzymes in large nucleotide acid databases,
72
+ followed by matching specificity adapters which allow a further data
73
+ reduction when looking for differential expression of genes and
74
+ markers.
75
+
76
+ Usage:
77
+ genfrag -h/--help
78
+ genfrag -V/--version
79
+ genfrag command [options] [arguments]
80
+
81
+ Examples:
82
+ genfrag index -f example.fasta --RE5 BstYI --RE3 MseI
83
+ genfrag search -f example.fasta --RE5 BstYI --RE3 MseI --adapter5 ct
84
+
85
+ Commands:
86
+ genfrag index initialize the index
87
+ genfrag search search FIXME
88
+ genfrag info show information about FIXME
89
+
90
+ Further Help:
91
+ Each command has a '--help' option that will provide detailed
92
+ information for that command.
93
+
94
+ http://genfrag.rubyforge.org/
95
+
96
+ MSG
97
+ nil
98
+ end
99
+
100
+ end # class App
101
+ end # module Genfrag
102
+
103
+ Genfrag.require_all_libs_relative_to(__FILE__)
104
+
105
+ # EOF
@@ -0,0 +1,145 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class Command
6
+
7
+ attr_accessor :options # a Hash used by optparse
8
+ attr_accessor :ops # an OpenStruct of the options
9
+
10
+ def initialize( out = STDOUT, err = STDERR )
11
+ @out = out
12
+ @err = err
13
+ @options = {
14
+ # :skeleton_dir => File.join(mrbones_dir, 'data'),
15
+ # :with_tasks => false,
16
+ # :verbose => false,
17
+ # :name => nil,
18
+ # :output_dir => nil
19
+ }
20
+ @ops = OpenStruct.new
21
+ #@options[:skeleton_dir] = ::Bones.path('data') unless test(?d, skeleton_dir)
22
+ end
23
+
24
+ def cli_run( args )
25
+ raise NotImplementedError
26
+ end
27
+
28
+ def run( args )
29
+ raise NotImplementedError
30
+ end
31
+
32
+ # Print running output when used in command-line mode
33
+ #
34
+ def cli_p(cli=true, str='')
35
+ return false if !cli or @ops.quiet
36
+ @out.puts str
37
+ end
38
+
39
+ # Print an error from the command-line options
40
+ #
41
+ def clierr_p(str)
42
+ @out.puts
43
+ @err.puts "Error: #{str}"
44
+ @out.puts
45
+ @out.puts opt_parser
46
+ end
47
+
48
+ # Define the command-line option available
49
+ #
50
+ def standard_options
51
+ {
52
+ :verbose => ['-v', '--verbose', 'enable verbose output',
53
+ lambda {
54
+ options[:verbose] = true
55
+ }],
56
+ :tracktime => ['-m', '--tracktime', 'track execution time',
57
+ lambda {
58
+ options[:tracktime] = true
59
+ }],
60
+ :quiet => ['-q', '--quiet', 'silence output',
61
+ lambda {
62
+ options[:quiet] = true
63
+ }],
64
+ :indir => ['-i', '--in DIR', String, 'input directory', "(default #{Dir.pwd})",
65
+ lambda{ |value|
66
+ options[:indir] = value
67
+ }],
68
+ :outdir => ['-o', '--out DIR', String, 'output directory', "(default #{Dir.pwd})",
69
+ lambda{ |value|
70
+ options[:outdir] = value
71
+ }],
72
+ :re5 => ['-5', '--re5 ENZYME', String, "5' restriction enzyme",
73
+ lambda { |value|
74
+ options[:re5] = value
75
+ }],
76
+ :re3 => ['-3', '--re3 ENZYME', String, "3' restriction enzyme",
77
+ lambda { |value|
78
+ options[:re3] = value
79
+ }],
80
+ :sqlite => ['-t', '--sqlite', 'use sqlite', '(default is CSV)',
81
+ lambda {
82
+ options[:sqlite] = true
83
+ }],
84
+ :filelookup => ['-l', '--lookup FILE', String, "name of the frequency lookup file generated by 'index'",
85
+ lambda { |value|
86
+ options[:filelookup] = value
87
+ }],
88
+ :filefasta => ['-f', '--fasta FILE', String, 'name of the Fasta sequences file',
89
+ lambda { |value|
90
+ options[:filefasta] = value
91
+ }],
92
+ :size => ['-s', '--size SIZE', Array, '',
93
+ lambda { |value|
94
+ options[:size] = value
95
+ }],
96
+
97
+ :adapter5 => ['-y', '--adapter5 ADAPTER', String, '',
98
+ lambda { |value|
99
+ options[:adapter5] = value
100
+ }],
101
+ :adapter3 => ['-z', '--adapter3 ADAPTER', String, '',
102
+ lambda { |value|
103
+ options[:adapter3] = value
104
+ }],
105
+
106
+ :named_adapter5 => ['-b', '--named-adapter5 NAME', String, '',
107
+ lambda { |value|
108
+ options[:named_adapter5] = value
109
+ }],
110
+ :named_adapter3 => ['-c', '--named-adapter3 NAME', String, '',
111
+ lambda { |value|
112
+ options[:named_adapter3] = value
113
+ }],
114
+
115
+ :adapter5_size => ['-d', '--adapter5-size SIZE', Integer, '',
116
+ lambda { |value|
117
+ options[:adapter5_size] = value
118
+ }],
119
+ :adapter3_size => ['-e', '--adapter3-size SIZE', Integer, '',
120
+ lambda { |value|
121
+ options[:adapter3_size] = value
122
+ }],
123
+
124
+ :adapter5_sequence => ['-w', '--adapter5-sequence SEQUENCE', String, '',
125
+ lambda { |value|
126
+ options[:adapter5_sequence] = value
127
+ }],
128
+ :adapter3_sequence => ['-x', '--adapter3-sequence SEQUENCE', String, '',
129
+ lambda { |value|
130
+ options[:adapter3_sequence] = value
131
+ }],
132
+
133
+ :fileadapters => ['-a', '--adapters FILE', String, '',
134
+ lambda { |value|
135
+ options[:fileadapters] = value
136
+ }]
137
+
138
+ }
139
+ end
140
+
141
+ end # class Command
142
+ end # class App
143
+ end # module Genfrag
144
+
145
+ # EOF