bio-ipcress 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +2 -3
- data/README.md +4 -3
- data/VERSION +1 -1
- data/bin/pcr.rb +135 -0
- data/test/test-script.rb +21 -0
- metadata +62 -51
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 04f49808a34b7c50343255fc72f11eda8ca54de7
|
4
|
+
data.tar.gz: a9e7a50b28bba9ad139c065349291353d1ace24b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5d29e0fef85c45118303a32469d305911815615300c018d3dd12c48537301bb92afeb4a6bb685aad0c6ddf522f2bb18efbec0e144502f49be22b5b8cc7d3a0b8
|
7
|
+
data.tar.gz: bf90fc0954bd8ba9f187f65d0c7d31676be8fa6abbf5d792b029745a17030c8e0520d54b05229180c058932dfaa03669efa3c9e3b20c8425d5c7d70e08837017
|
data/Gemfile
CHANGED
@@ -9,8 +9,7 @@ gem 'bio', '>=1.4.2'
|
|
9
9
|
group :development do
|
10
10
|
gem "shoulda", ">= 0"
|
11
11
|
gem "rdoc", "~> 3.12"
|
12
|
-
gem "jeweler", "~>
|
12
|
+
gem "jeweler", "~> 2.3", ">=2.3.2"
|
13
13
|
gem "bundler", ">= 1.0.21"
|
14
|
-
gem
|
15
|
-
gem "rdoc", "~> 3.12"
|
14
|
+
gem 'test-unit'
|
16
15
|
end
|
data/README.md
CHANGED
@@ -17,7 +17,7 @@ results = Bio::Ipcress.run(
|
|
17
17
|
primer_set,
|
18
18
|
'Methanocella_conradii_16s.fa', #this file is in the test/data/Ipcress directory
|
19
19
|
{:min_distance => 2, :max_distance => 10000})
|
20
|
-
#=>
|
20
|
+
#=> An array of Bio::Ipcress::Result objects, parsed from
|
21
21
|
#
|
22
22
|
#Ipcress result
|
23
23
|
#--------------
|
@@ -59,8 +59,9 @@ res.forward_mismatches #=> 1
|
|
59
59
|
res.reverse_mismatches #=> 1
|
60
60
|
```
|
61
61
|
|
62
|
-
There appears to be a slight bug in iPCRess, in the way it handles primers with 'wobble' bases like
|
63
|
-
|
62
|
+
There appears to be a slight bug in iPCRess, in the way it handles primers with 'wobble' bases like the
|
63
|
+
last base of AAACTY,
|
64
|
+
which indicates that both AAACTC and AAACTT are added as primers.
|
64
65
|
IPCress always suggests that there is at least a single mismatch,
|
65
66
|
when this is not always the case. To workaround this, the
|
66
67
|
```Result#recalculate_mismatches_from_alignments``` method re-computes the
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/bin/pcr.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'bio-logger'
|
5
|
+
require 'bio-ipcress'
|
6
|
+
require 'bio'
|
7
|
+
|
8
|
+
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
|
9
|
+
|
10
|
+
# Parse command line options into the options hash
|
11
|
+
options = {
|
12
|
+
:logger => 'stderr',
|
13
|
+
:log_level => 'info',
|
14
|
+
:print_amplicon => false,
|
15
|
+
:num_mismatches => 3,
|
16
|
+
}
|
17
|
+
o = OptionParser.new do |opts|
|
18
|
+
opts.banner = "
|
19
|
+
Usage: #{SCRIPT_NAME} <arguments>
|
20
|
+
|
21
|
+
Take a set of primers and a fasta file of genome(s), and output each place that the primer hits \n\n"
|
22
|
+
|
23
|
+
opts.on("--primer1 PRIMER", "sequence of the forward primer [required]") do |arg|
|
24
|
+
options[:primer1] = arg
|
25
|
+
end
|
26
|
+
opts.on("--primer2 PRIMER", "sequence of the reverse primer [required]") do |arg|
|
27
|
+
options[:primer2] = arg
|
28
|
+
end
|
29
|
+
opts.on("--fasta FASTA_FILE[, FASTA_FILE2, ..]", Array, "sequence(s) being assayed [required]") do |arg|
|
30
|
+
options[:fasta] = arg
|
31
|
+
end
|
32
|
+
opts.on("--mismatches NUM", Integer, "max number of allowed mismatches. [default: #{options[:num_mismatches] }]") do |arg|
|
33
|
+
options[:num_mismatches] = arg
|
34
|
+
end
|
35
|
+
opts.on("--print-amplicon", "print out the sequence of the amplicon [default: #{options[:print_amplicon] }]") do
|
36
|
+
options[:print_amplicon] = true
|
37
|
+
end
|
38
|
+
|
39
|
+
# logger options
|
40
|
+
opts.separator "\nVerbosity:\n\n"
|
41
|
+
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
|
42
|
+
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
43
|
+
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
|
44
|
+
end; o.parse!
|
45
|
+
if ARGV.length != 0 or options[:primer1].nil? or options[:primer2].nil? or options[:fasta].nil?
|
46
|
+
$stderr.puts o
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
# Setup logging
|
50
|
+
Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
51
|
+
|
52
|
+
|
53
|
+
# make the primer set
|
54
|
+
primer_set = Bio::Ipcress::PrimerSet.new options[:primer1], options[:primer2]
|
55
|
+
|
56
|
+
to_gc_binary = lambda do |seq|
|
57
|
+
str = ''
|
58
|
+
seq.each_char do |char|
|
59
|
+
if %(G C).include?(char)
|
60
|
+
str="#{str}1"
|
61
|
+
else
|
62
|
+
str="#{str}0"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
str
|
66
|
+
end
|
67
|
+
to_gc_count = lambda do |seq|
|
68
|
+
count = 0
|
69
|
+
seq.each_char do |char|
|
70
|
+
count += 1 if %(G C).include?(char)
|
71
|
+
end
|
72
|
+
count
|
73
|
+
end
|
74
|
+
|
75
|
+
# output characters of each hit
|
76
|
+
headers = %w(
|
77
|
+
target
|
78
|
+
mismatches_fwd
|
79
|
+
mismatches_rev
|
80
|
+
length
|
81
|
+
gc_of_forward_matching
|
82
|
+
gc_of_reverse_matching
|
83
|
+
gc_positions_of_forward_matching
|
84
|
+
gc_positions_of_reverse_matching
|
85
|
+
)
|
86
|
+
headers += ['amplicon'] if options[:print_amplicon]
|
87
|
+
puts headers.join("\t")
|
88
|
+
|
89
|
+
options[:fasta].each do |fasta|
|
90
|
+
# run ipcress
|
91
|
+
mismatch_param = 3 #default to 3 so ipcress bug gets worked around, and filter later
|
92
|
+
mismatch_param = options[:num_mismatches] if options[:num_mismatches] > mismatch_param
|
93
|
+
|
94
|
+
results = Bio::Ipcress.run primer_set, fasta, :mismatches => mismatch_param
|
95
|
+
|
96
|
+
seqs = {}
|
97
|
+
if options[:print_amplicon]
|
98
|
+
Bio::FlatFile.foreach(fasta) do |e|
|
99
|
+
name = e.definition
|
100
|
+
seq = e.seq.seq
|
101
|
+
raise "Duplicate sequence name found: #{name}" if seqs.key?(name)
|
102
|
+
seqs[name] = seq
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
results.each do |res|
|
107
|
+
misses = res.recalculate_mismatches_from_alignments
|
108
|
+
next if misses.reduce(:+) > options[:num_mismatches]
|
109
|
+
|
110
|
+
to_print = [
|
111
|
+
res.target,
|
112
|
+
misses[0],
|
113
|
+
misses[1],
|
114
|
+
res.length,
|
115
|
+
to_gc_count.call(res.forward_matching_sequence),
|
116
|
+
to_gc_count.call(res.reverse_matching_sequence),
|
117
|
+
to_gc_binary.call(res.forward_matching_sequence),
|
118
|
+
to_gc_binary.call(res.reverse_matching_sequence),
|
119
|
+
]
|
120
|
+
if options[:print_amplicon]
|
121
|
+
name = res.target.gsub(':filter(unmasked)','') #H509DRAFT_scaffold00021.21:filter(unmasked)
|
122
|
+
raise "Unable to find sequence name #{name} in fasta file, possible programming error" if !seqs.key?(name)
|
123
|
+
seq = seqs[name]
|
124
|
+
amplicon = seq[(res.start)...(res.start+res.length)]
|
125
|
+
if res.result_type == 'forward'
|
126
|
+
to_print += [amplicon]
|
127
|
+
elsif res.result_type == 'revcomp'
|
128
|
+
to_print += [Bio::Sequence::NA.new(amplicon).reverse_complement.to_s.upcase]
|
129
|
+
else
|
130
|
+
raise "Unexpected ipcress result type: #{res.result_type}"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
puts to_print.join("\t")
|
134
|
+
end
|
135
|
+
end
|
data/test/test-script.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'bio-commandeer'
|
3
|
+
|
4
|
+
class TestScript < Test::Unit::TestCase
|
5
|
+
should "test 1 result" do
|
6
|
+
path_to_script = File.join(File.dirname(__FILE__),'..','bin','pcr.rb')
|
7
|
+
path_to_data = File.join(File.dirname(__FILE__),'data')
|
8
|
+
|
9
|
+
expected = "target mismatches_fwd mismatches_rev length gc_of_forward_matching gc_of_reverse_matching gc_positions_of_forward_matching gc_positions_of_reverse_matching
|
10
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 0 3 418 6 7 11010101100 00110011111
|
11
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 0 3 733 6 5 11010101100 00111100001
|
12
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 3 0 348 7 10 11010101101 1000010110001001100111
|
13
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 3 0 123 8 10 11011101101 1000010110001001100111
|
14
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 0 3 418 6 7 11010101100 00110011111
|
15
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 0 3 733 6 5 11010101100 00111100001
|
16
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 3 0 348 7 10 11010101101 1000010110001001100111
|
17
|
+
gi|335929284|gb|JN048683.1|:filter(unmasked) Methanocella conradii HZ254 16S ribosomal RNA gene, partial sequence 3 0 123 8 10 11011101101 1000010110001001100111
|
18
|
+
"
|
19
|
+
assert_equal(expected, Bio::Commandeer.run("#{path_to_script} --primer1 GGTCACTGCTA --primer2 GGCTACCTTGTTACGACTTAAC --fasta #{path_to_data}/Ipcress/Methanocella_conradii_16s_twice.fa"))
|
20
|
+
end
|
21
|
+
end
|
metadata
CHANGED
@@ -1,109 +1,123 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-ipcress
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Ben J Woodcroft
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2017-01-09 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bio
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 1.4.2
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.4.2
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: shoulda
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - ">="
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: rdoc
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- - ~>
|
45
|
+
- - "~>"
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: '3.12'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.12'
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
56
|
name: jeweler
|
49
|
-
requirement:
|
50
|
-
none: false
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
|
-
- -
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.3.2
|
62
|
+
- - "~>"
|
53
63
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
64
|
+
version: '2.3'
|
55
65
|
type: :development
|
56
66
|
prerelease: false
|
57
|
-
version_requirements:
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: 2.3.2
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '2.3'
|
58
75
|
- !ruby/object:Gem::Dependency
|
59
76
|
name: bundler
|
60
|
-
requirement:
|
61
|
-
none: false
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
62
78
|
requirements:
|
63
|
-
- -
|
79
|
+
- - ">="
|
64
80
|
- !ruby/object:Gem::Version
|
65
81
|
version: 1.0.21
|
66
82
|
type: :development
|
67
83
|
prerelease: false
|
68
|
-
version_requirements:
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: bio
|
71
|
-
requirement: &72405540 !ruby/object:Gem::Requirement
|
72
|
-
none: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
85
|
requirements:
|
74
|
-
- -
|
86
|
+
- - ">="
|
75
87
|
- !ruby/object:Gem::Version
|
76
|
-
version: 1.
|
77
|
-
type: :development
|
78
|
-
prerelease: false
|
79
|
-
version_requirements: *72405540
|
88
|
+
version: 1.0.21
|
80
89
|
- !ruby/object:Gem::Dependency
|
81
|
-
name:
|
82
|
-
requirement:
|
83
|
-
none: false
|
90
|
+
name: test-unit
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
84
92
|
requirements:
|
85
|
-
- -
|
93
|
+
- - ">="
|
86
94
|
- !ruby/object:Gem::Version
|
87
|
-
version: '
|
95
|
+
version: '0'
|
88
96
|
type: :development
|
89
97
|
prerelease: false
|
90
|
-
version_requirements:
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
91
103
|
description: a programmatic interface to the iPCRess in-silico PCR software. iPCRess
|
92
104
|
is part of the exonerate suite.
|
93
105
|
email: gmail.com after donttrustben
|
94
|
-
executables:
|
106
|
+
executables:
|
107
|
+
- pcr.rb
|
95
108
|
extensions: []
|
96
109
|
extra_rdoc_files:
|
97
110
|
- LICENSE.txt
|
98
111
|
- README.md
|
99
112
|
files:
|
100
|
-
- .document
|
101
|
-
- .travis.yml
|
113
|
+
- ".document"
|
114
|
+
- ".travis.yml"
|
102
115
|
- Gemfile
|
103
116
|
- LICENSE.txt
|
104
117
|
- README.md
|
105
118
|
- Rakefile
|
106
119
|
- VERSION
|
120
|
+
- bin/pcr.rb
|
107
121
|
- lib/bio-ipcress.rb
|
108
122
|
- lib/bio/appl/ipcress.rb
|
109
123
|
- test/data/Ipcress/Methanocella_conradii_16s.fa
|
@@ -113,33 +127,30 @@ files:
|
|
113
127
|
- test/data/Ipcress/input1_with_insert.txt
|
114
128
|
- test/data/Ipcress/input2.txt
|
115
129
|
- test/helper.rb
|
130
|
+
- test/test-script.rb
|
116
131
|
- test/test_bio-ipcress.rb
|
117
132
|
homepage: http://github.com/wwood/bioruby-ipcress
|
118
133
|
licenses:
|
119
134
|
- MIT
|
135
|
+
metadata: {}
|
120
136
|
post_install_message:
|
121
137
|
rdoc_options: []
|
122
138
|
require_paths:
|
123
139
|
- lib
|
124
140
|
required_ruby_version: !ruby/object:Gem::Requirement
|
125
|
-
none: false
|
126
141
|
requirements:
|
127
|
-
- -
|
142
|
+
- - ">="
|
128
143
|
- !ruby/object:Gem::Version
|
129
144
|
version: '0'
|
130
|
-
segments:
|
131
|
-
- 0
|
132
|
-
hash: 237777805
|
133
145
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
|
-
none: false
|
135
146
|
requirements:
|
136
|
-
- -
|
147
|
+
- - ">="
|
137
148
|
- !ruby/object:Gem::Version
|
138
149
|
version: '0'
|
139
150
|
requirements: []
|
140
151
|
rubyforge_project:
|
141
|
-
rubygems_version:
|
152
|
+
rubygems_version: 2.5.2
|
142
153
|
signing_key:
|
143
|
-
specification_version:
|
154
|
+
specification_version: 4
|
144
155
|
summary: a programmatic interface to the iPCRess in-silico PCR software
|
145
156
|
test_files: []
|