bio-ipcress 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +2 -3
- data/README.md +4 -3
- data/VERSION +1 -1
- data/bin/pcr.rb +135 -0
- data/test/test-script.rb +21 -0
- metadata +62 -51
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 04f49808a34b7c50343255fc72f11eda8ca54de7
|
4
|
+
data.tar.gz: a9e7a50b28bba9ad139c065349291353d1ace24b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5d29e0fef85c45118303a32469d305911815615300c018d3dd12c48537301bb92afeb4a6bb685aad0c6ddf522f2bb18efbec0e144502f49be22b5b8cc7d3a0b8
|
7
|
+
data.tar.gz: bf90fc0954bd8ba9f187f65d0c7d31676be8fa6abbf5d792b029745a17030c8e0520d54b05229180c058932dfaa03669efa3c9e3b20c8425d5c7d70e08837017
|
data/Gemfile
CHANGED
@@ -9,8 +9,7 @@ gem 'bio', '>=1.4.2'
|
|
9
9
|
group :development do
|
10
10
|
gem "shoulda", ">= 0"
|
11
11
|
gem "rdoc", "~> 3.12"
|
12
|
-
gem "jeweler", "~>
|
12
|
+
gem "jeweler", "~> 2.3", ">=2.3.2"
|
13
13
|
gem "bundler", ">= 1.0.21"
|
14
|
-
gem
|
15
|
-
gem "rdoc", "~> 3.12"
|
14
|
+
gem 'test-unit'
|
16
15
|
end
|
data/README.md
CHANGED
@@ -17,7 +17,7 @@ results = Bio::Ipcress.run(
|
|
17
17
|
primer_set,
|
18
18
|
'Methanocella_conradii_16s.fa', #this file is in the test/data/Ipcress directory
|
19
19
|
{:min_distance => 2, :max_distance => 10000})
|
20
|
-
#=>
|
20
|
+
#=> An array of Bio::Ipcress::Result objects, parsed from
|
21
21
|
#
|
22
22
|
#Ipcress result
|
23
23
|
#--------------
|
@@ -59,8 +59,9 @@ res.forward_mismatches #=> 1
|
|
59
59
|
res.reverse_mismatches #=> 1
|
60
60
|
```
|
61
61
|
|
62
|
-
There appears to be a slight bug in iPCRess, in the way it handles primers with 'wobble' bases like
|
63
|
-
|
62
|
+
There appears to be a slight bug in iPCRess, in the way it handles primers with 'wobble' bases like the
|
63
|
+
last base of AAACTY,
|
64
|
+
which indicates that both AAACTC and AAACTT are added as primers.
|
64
65
|
IPCress always suggests that there is at least a single mismatch,
|
65
66
|
when this is not always the case. To workaround this, the
|
66
67
|
```Result#recalculate_mismatches_from_alignments``` method re-computes the
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/bin/pcr.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'bio-logger'
|
5
|
+
require 'bio-ipcress'
|
6
|
+
require 'bio'
|
7
|
+
|
8
|
+
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
|
9
|
+
|
10
|
+
# Parse command line options into the options hash
|
11
|
+
options = {
|
12
|
+
:logger => 'stderr',
|
13
|
+
:log_level => 'info',
|
14
|
+
:print_amplicon => false,
|
15
|
+
:num_mismatches => 3,
|
16
|
+
}
|
17
|
+
o = OptionParser.new do |opts|
|
18
|
+
opts.banner = "
|
19
|
+
Usage: #{SCRIPT_NAME} <arguments>
|
20
|
+
|
21
|
+
Take a set of primers and a fasta file of genome(s), and output each place that the primer hits \n\n"
|
22
|
+
|
23
|
+
opts.on("--primer1 PRIMER", "sequence of the forward primer [required]") do |arg|
|
24
|
+
options[:primer1] = arg
|
25
|
+
end
|
26
|
+
opts.on("--primer2 PRIMER", "sequence of the reverse primer [required]") do |arg|
|
27
|
+
options[:primer2] = arg
|
28
|
+
end
|
29
|
+
opts.on("--fasta FASTA_FILE[, FASTA_FILE2, ..]", Array, "sequence(s) being assayed [required]") do |arg|
|
30
|
+
options[:fasta] = arg
|
31
|
+
end
|
32
|
+
opts.on("--mismatches NUM", Integer, "max number of allowed mismatches. [default: #{options[:num_mismatches] }]") do |arg|
|
33
|
+
options[:num_mismatches] = arg
|
34
|
+
end
|
35
|
+
opts.on("--print-amplicon", "print out the sequence of the amplicon [default: #{options[:print_amplicon] }]") do
|
36
|
+
options[:print_amplicon] = true
|
37
|
+
end
|
38
|
+
|
39
|
+
# logger options
|
40
|
+
opts.separator "\nVerbosity:\n\n"
|
41
|
+
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
|
42
|
+
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
43
|
+
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
|
44
|
+
end; o.parse!
|
45
|
+
if ARGV.length != 0 or options[:primer1].nil? or options[:primer2].nil? or options[:fasta].nil?
|
46
|
+
$stderr.puts o
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
# Setup logging
|
50
|
+
Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
51
|
+
|
52
|
+
|
53
|
+
# make the primer set
|
54
|
+
primer_set = Bio::Ipcress::PrimerSet.new options[:primer1], options[:primer2]
|
55
|
+
|
56
|
+
to_gc_binary = lambda do |seq|
|
57
|
+
str = ''
|
58
|
+
seq.each_char do |char|
|
59
|
+
if %(G C).include?(char)
|
60
|
+
str="#{str}1"
|
61
|
+
else
|
62
|
+
str="#{str}0"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
str
|
66
|
+
end
|
67
|
+
to_gc_count = lambda do |seq|
|
68
|
+
count = 0
|
69
|
+
seq.each_char do |char|
|
70
|
+
count += 1 if %(G C).include?(char)
|
71
|
+
end
|
72
|
+
count
|
73
|
+
end
|
74
|
+
|
75
|
+
# output characters of each hit
|
76
|
+
headers = %w(
|
77
|
+
target
|
78
|
+
mismatches_fwd
|
79
|
+
mismatches_rev
|
80
|
+
length
|
81
|
+
gc_of_forward_matching
|
82
|
+
gc_of_reverse_matching
|
83
|
+
gc_positions_of_forward_matching
|
84
|
+
gc_positions_of_reverse_matching
|
85
|
+
)
|
86
|
+
headers += ['amplicon'] if options[:print_amplicon]
|
87
|
+
puts headers.join("\t")
|
88
|
+
|
89
|
+
options[:fasta].each do |fasta|
|
90
|
+
# run ipcress
|
91
|
+
mismatch_param = 3 #default to 3 so ipcress bug gets worked around, and filter later
|
92
|
+
mismatch_param = options[:num_mismatches] if options[:num_mismatches] > mismatch_param
|
93
|
+
|
94
|
+
results = Bio::Ipcress.run primer_set, fasta, :mismatches => mismatch_param
|
95
|
+
|
96
|
+
seqs = {}
|
97
|
+
if options[:print_amplicon]
|
98
|
+
Bio::FlatFile.foreach(fasta) do |e|
|
99
|
+
name = e.definition
|
100
|
+
seq = e.seq.seq
|
101
|
+
raise "Duplicate sequence name found: #{name}" if seqs.key?(name)
|
102
|
+
seqs[name] = seq
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
results.each do |res|
|
107
|
+
misses = res.recalculate_mismatches_from_alignments
|
108
|
+
next if misses.reduce(:+) > options[:num_mismatches]
|
109
|
+
|
110
|
+
to_print = [
|
111
|
+
res.target,
|
112
|
+
misses[0],
|
113
|
+
misses[1],
|
114
|
+
res.length,
|
115
|
+
to_gc_count.call(res.forward_matching_sequence),
|
116
|
+
to_gc_count.call(res.reverse_matching_sequence),
|
117
|
+
to_gc_binary.call(res.forward_matching_sequence),
|
118
|
+
to_gc_binary.call(res.reverse_matching_sequence),
|
119
|
+
]
|
120
|
+
if options[:print_amplicon]
|
121
|
+
name = res.target.gsub(':filter(unmasked)','') #H509DRAFT_scaffold00021.21:filter(unmasked)
|
122
|
+
raise "Unable to find sequence name #{name} in fasta file, possible programming error" if !seqs.key?(name)
|
123
|
+
seq = seqs[name]
|
124
|
+
amplicon = seq[(res.start)...(res.start+res.length)]
|
125
|
+
if res.result_type == 'forward'
|
126
|
+
to_print += [amplicon]
|
127
|
+
elsif res.result_type == 'revcomp'
|
128
|
+
to_print += [Bio::Sequence::NA.new(amplicon).reverse_complement.to_s.upcase]
|
129
|
+
else
|
130
|
+
raise "Unexpected ipcress result type: #{res.result_type}"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
puts to_print.join("\t")
|
134
|
+
end
|
135
|
+
end
|
data/test/test-script.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'bio-commandeer'
|
3
|
+
|
4
|
+
class TestScript < Test::Unit::TestCase
|
5
|
+
should "test 1 result" do
|
6
|
+
path_to_script = File.join(File.dirname(__FILE__),'..','bin','pcr.rb')
|
7
|
+
path_to_data = File.join(File.dirname(__FILE__),'data')
|
8
|
+
|
9
|
+
expected = "target mismatches_fwd mismatches_rev length gc_of_forward_matching gc_of_reverse_matching gc_positions_of_forward_matching gc_positions_of_reverse_matching
|
10
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 0 3 418 6 7 11010101100 00110011111
|
11
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 0 3 733 6 5 11010101100 00111100001
|
12
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 3 0 348 7 10 11010101101 1000010110001001100111
|
13
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 3 0 123 8 10 11011101101 1000010110001001100111
|
14
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 0 3 418 6 7 11010101100 00110011111
|
15
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 0 3 733 6 5 11010101100 00111100001
|
16
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 3 0 348 7 10 11010101101 1000010110001001100111
|
17
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 3 0 123 8 10 11011101101 1000010110001001100111
|
18
|
+
"
|
19
|
+
assert_equal(expected, Bio::Commandeer.run("#{path_to_script} --primer1 GGTCACTGCTA --primer2 GGCTACCTTGTTACGACTTAAC --fasta #{path_to_data}/Ipcress/Methanocella_conradii_16s_twice.fa"))
|
20
|
+
end
|
21
|
+
end
|
metadata
CHANGED
@@ -1,109 +1,123 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-ipcress
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Ben J Woodcroft
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2017-01-09 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bio
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 1.4.2
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.4.2
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: shoulda
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - ">="
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: rdoc
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- - ~>
|
45
|
+
- - "~>"
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: '3.12'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.12'
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
56
|
name: jeweler
|
49
|
-
requirement:
|
50
|
-
none: false
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
|
-
- -
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.3.2
|
62
|
+
- - "~>"
|
53
63
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
64
|
+
version: '2.3'
|
55
65
|
type: :development
|
56
66
|
prerelease: false
|
57
|
-
version_requirements:
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: 2.3.2
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '2.3'
|
58
75
|
- !ruby/object:Gem::Dependency
|
59
76
|
name: bundler
|
60
|
-
requirement:
|
61
|
-
none: false
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
62
78
|
requirements:
|
63
|
-
- -
|
79
|
+
- - ">="
|
64
80
|
- !ruby/object:Gem::Version
|
65
81
|
version: 1.0.21
|
66
82
|
type: :development
|
67
83
|
prerelease: false
|
68
|
-
version_requirements:
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: bio
|
71
|
-
requirement: &72405540 !ruby/object:Gem::Requirement
|
72
|
-
none: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
85
|
requirements:
|
74
|
-
- -
|
86
|
+
- - ">="
|
75
87
|
- !ruby/object:Gem::Version
|
76
|
-
version: 1.
|
77
|
-
type: :development
|
78
|
-
prerelease: false
|
79
|
-
version_requirements: *72405540
|
88
|
+
version: 1.0.21
|
80
89
|
- !ruby/object:Gem::Dependency
|
81
|
-
name:
|
82
|
-
requirement:
|
83
|
-
none: false
|
90
|
+
name: test-unit
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
84
92
|
requirements:
|
85
|
-
- -
|
93
|
+
- - ">="
|
86
94
|
- !ruby/object:Gem::Version
|
87
|
-
version: '
|
95
|
+
version: '0'
|
88
96
|
type: :development
|
89
97
|
prerelease: false
|
90
|
-
version_requirements:
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
91
103
|
description: a programmatic interface to the iPCRess in-silico PCR software. iPCRess
|
92
104
|
is part of the exonerate suite.
|
93
105
|
email: gmail.com after donttrustben
|
94
|
-
executables:
|
106
|
+
executables:
|
107
|
+
- pcr.rb
|
95
108
|
extensions: []
|
96
109
|
extra_rdoc_files:
|
97
110
|
- LICENSE.txt
|
98
111
|
- README.md
|
99
112
|
files:
|
100
|
-
- .document
|
101
|
-
- .travis.yml
|
113
|
+
- ".document"
|
114
|
+
- ".travis.yml"
|
102
115
|
- Gemfile
|
103
116
|
- LICENSE.txt
|
104
117
|
- README.md
|
105
118
|
- Rakefile
|
106
119
|
- VERSION
|
120
|
+
- bin/pcr.rb
|
107
121
|
- lib/bio-ipcress.rb
|
108
122
|
- lib/bio/appl/ipcress.rb
|
109
123
|
- test/data/Ipcress/Methanocella_conradii_16s.fa
|
@@ -113,33 +127,30 @@ files:
|
|
113
127
|
- test/data/Ipcress/input1_with_insert.txt
|
114
128
|
- test/data/Ipcress/input2.txt
|
115
129
|
- test/helper.rb
|
130
|
+
- test/test-script.rb
|
116
131
|
- test/test_bio-ipcress.rb
|
117
132
|
homepage: http://github.com/wwood/bioruby-ipcress
|
118
133
|
licenses:
|
119
134
|
- MIT
|
135
|
+
metadata: {}
|
120
136
|
post_install_message:
|
121
137
|
rdoc_options: []
|
122
138
|
require_paths:
|
123
139
|
- lib
|
124
140
|
required_ruby_version: !ruby/object:Gem::Requirement
|
125
|
-
none: false
|
126
141
|
requirements:
|
127
|
-
- -
|
142
|
+
- - ">="
|
128
143
|
- !ruby/object:Gem::Version
|
129
144
|
version: '0'
|
130
|
-
segments:
|
131
|
-
- 0
|
132
|
-
hash: 237777805
|
133
145
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
|
-
none: false
|
135
146
|
requirements:
|
136
|
-
- -
|
147
|
+
- - ">="
|
137
148
|
- !ruby/object:Gem::Version
|
138
149
|
version: '0'
|
139
150
|
requirements: []
|
140
151
|
rubyforge_project:
|
141
|
-
rubygems_version:
|
152
|
+
rubygems_version: 2.5.2
|
142
153
|
signing_key:
|
143
|
-
specification_version:
|
154
|
+
specification_version: 4
|
144
155
|
summary: a programmatic interface to the iPCRess in-silico PCR software
|
145
156
|
test_files: []
|