forage 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/bin/forage +16 -0
  2. data/lib/forage.rb +47 -0
  3. metadata +48 -0
data/bin/forage ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'forage'))
4
+
5
+ if ARGV.length != 2
6
+ $stderr.puts "usage: forage filename.fasta query(regex)"
7
+ exit
8
+ end
9
+
10
+ forage = Forage.new File.open(ARGV[0]), ARGV[1]
11
+
12
+ forage.each do |matches|
13
+ matches[1].each_with_index do |match, i|
14
+ puts ">#{matches[0]}:#{i}\n#{match}"
15
+ end
16
+ end
data/lib/forage.rb ADDED
@@ -0,0 +1,47 @@
1
+ class Forage
2
+ attr_accessor :handle, :iupac, :regex
3
+
4
+ def initialize(handle, query)
5
+ @handle = handle
6
+
7
+ @iupac = Hash.new { |h, k| h[k] = k }.merge(
8
+ { 'R' => 'AG', 'Y' => 'CTUY', 'S' => 'GCS', 'W' => 'ATUW', 'K' =>
9
+ 'GTUK', 'M' => 'ACM', 'B' => 'CGTUB', 'D' => 'AGTUD', 'H' => 'ACTUH',
10
+ 'V' => 'ACGV', 'N' => '.', '-' => '.'}
11
+ )
12
+
13
+ @regex = self.convert query
14
+ end
15
+
16
+ # return [header, [matches]]
17
+ def each
18
+ self.fasta do |record|
19
+ next if record[0] == nil
20
+ header = record[0]
21
+ sequence = record[1]
22
+ matches = sequence.scan(@regex)
23
+ yield [header, matches] if matches != []
24
+ end
25
+ end
26
+
27
+ # convert string to regex
28
+ def convert(s)
29
+ @regex = s.each_char.collect { |n| "[#{@iupac[n]}]" }.join
30
+ Regexp.new @regex
31
+ end
32
+
33
+ # parse fasta-formatted data
34
+ def fasta
35
+ sequence, header = nil, nil
36
+ @handle.each do |line|
37
+ if line[0].chr == '>'
38
+ yield [ header, sequence ]
39
+ sequence = ''
40
+ header = line[1..-1].strip
41
+ else
42
+ sequence << line.strip.tr(' ','')
43
+ end
44
+ end
45
+ yield [ header, sequence ]
46
+ end
47
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: forage
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Austin G. Davis-Richardson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-07-06 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: Forage is grep for nucleotide data. With Forage, you can grep nucleotide
15
+ sequences in FASTA using the IUPAC ambiguous nucleotides code.
16
+ email: harekrishna@gmail.com
17
+ executables:
18
+ - forage
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - bin/forage
23
+ - lib/forage.rb
24
+ homepage: http://www.github.com/audy/forage
25
+ licenses: []
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ! '>='
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project: forage
44
+ rubygems_version: 1.8.5
45
+ signing_key:
46
+ specification_version: 3
47
+ summary: Grep for nucleotide data.
48
+ test_files: []