forage 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/bin/forage +16 -0
  2. data/lib/forage.rb +47 -0
  3. metadata +48 -0
data/bin/forage ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'forage'))
4
+
5
+ if ARGV.length != 2
6
+ $stderr.puts "usage: forage filename.fasta query(regex)"
7
+ exit
8
+ end
9
+
10
+ forage = Forage.new File.open(ARGV[0]), ARGV[1]
11
+
12
+ forage.each do |matches|
13
+ matches[1].each_with_index do |match, i|
14
+ puts ">#{matches[0]}:#{i}\n#{match}"
15
+ end
16
+ end
data/lib/forage.rb ADDED
@@ -0,0 +1,47 @@
1
+ class Forage
2
+ attr_accessor :handle, :iupac, :regex
3
+
4
+ def initialize(handle, query)
5
+ @handle = handle
6
+
7
+ @iupac = Hash.new { |h, k| h[k] = k }.merge(
8
+ { 'R' => 'AG', 'Y' => 'CTUY', 'S' => 'GCS', 'W' => 'ATUW', 'K' =>
9
+ 'GTUK', 'M' => 'ACM', 'B' => 'CGTUB', 'D' => 'AGTUD', 'H' => 'ACTUH',
10
+ 'V' => 'ACGV', 'N' => '.', '-' => '.'}
11
+ )
12
+
13
+ @regex = self.convert query
14
+ end
15
+
16
+ # return [header, [matches]]
17
+ def each
18
+ self.fasta do |record|
19
+ next if record[0] == nil
20
+ header = record[0]
21
+ sequence = record[1]
22
+ matches = sequence.scan(@regex)
23
+ yield [header, matches] if matches != []
24
+ end
25
+ end
26
+
27
+ # convert string to regex
28
+ def convert(s)
29
+ @regex = s.each_char.collect { |n| "[#{@iupac[n]}]" }.join
30
+ Regexp.new @regex
31
+ end
32
+
33
+ # parse fasta-formatted data
34
+ def fasta
35
+ sequence, header = nil, nil
36
+ @handle.each do |line|
37
+ if line[0].chr == '>'
38
+ yield [ header, sequence ]
39
+ sequence = ''
40
+ header = line[1..-1].strip
41
+ else
42
+ sequence << line.strip.tr(' ','')
43
+ end
44
+ end
45
+ yield [ header, sequence ]
46
+ end
47
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: forage
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Austin G. Davis-Richardson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-07-06 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: Forage is grep for nucleotide data. With Forage, you can grep nucleotide
15
+ sequences in FASTA using the IUPAC ambiguous nucleotides code.
16
+ email: harekrishna@gmail.com
17
+ executables:
18
+ - forage
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - bin/forage
23
+ - lib/forage.rb
24
+ homepage: http://www.github.com/audy/forage
25
+ licenses: []
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ! '>='
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project: forage
44
+ rubygems_version: 1.8.5
45
+ signing_key:
46
+ specification_version: 3
47
+ summary: Grep for nucleotide data.
48
+ test_files: []