biodiversity19 1.0.5 → 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/.rvmrc +1 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +49 -0
- data/VERSION +1 -1
- data/lib/biodiversity/parser.rb +25 -0
- data/spec/parser/scientific_name.spec.rb +19 -0
- metadata +54 -7
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm use ruby-1.9.2-p290@biodiversity--create
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
archive-tar-minitar (0.5.2)
|
5
|
+
columnize (0.3.4)
|
6
|
+
diff-lcs (1.1.3)
|
7
|
+
facter (1.6.3)
|
8
|
+
git (1.2.5)
|
9
|
+
jeweler (1.6.4)
|
10
|
+
bundler (~> 1.0)
|
11
|
+
git (>= 1.2.5)
|
12
|
+
rake
|
13
|
+
linecache19 (0.5.12)
|
14
|
+
ruby_core_source (>= 0.1.4)
|
15
|
+
parallel (0.5.9)
|
16
|
+
polyglot (0.3.3)
|
17
|
+
rake (0.9.2.2)
|
18
|
+
rspec (2.7.0)
|
19
|
+
rspec-core (~> 2.7.0)
|
20
|
+
rspec-expectations (~> 2.7.0)
|
21
|
+
rspec-mocks (~> 2.7.0)
|
22
|
+
rspec-core (2.7.1)
|
23
|
+
rspec-expectations (2.7.0)
|
24
|
+
diff-lcs (~> 1.1.2)
|
25
|
+
rspec-mocks (2.7.0)
|
26
|
+
ruby-debug-base19 (0.11.25)
|
27
|
+
columnize (>= 0.3.1)
|
28
|
+
linecache19 (>= 0.5.11)
|
29
|
+
ruby_core_source (>= 0.1.4)
|
30
|
+
ruby-debug19 (0.11.6)
|
31
|
+
columnize (>= 0.3.1)
|
32
|
+
linecache19 (>= 0.5.11)
|
33
|
+
ruby-debug-base19 (>= 0.11.19)
|
34
|
+
ruby_core_source (0.1.5)
|
35
|
+
archive-tar-minitar (>= 0.5.2)
|
36
|
+
treetop (1.4.10)
|
37
|
+
polyglot
|
38
|
+
polyglot (>= 0.3.1)
|
39
|
+
|
40
|
+
PLATFORMS
|
41
|
+
ruby
|
42
|
+
|
43
|
+
DEPENDENCIES
|
44
|
+
facter
|
45
|
+
jeweler
|
46
|
+
parallel
|
47
|
+
rspec
|
48
|
+
ruby-debug19
|
49
|
+
treetop
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.6
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -23,6 +23,31 @@ module PreProcessor
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
+
class ParallelParser
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
require 'facter'
|
30
|
+
require 'parallel'
|
31
|
+
cpu_num
|
32
|
+
@processes_num = cpu_num > 1 ? cpu_num - 1 : 1
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse(names_list)
|
36
|
+
parsed = Parallel.map(names_list.uniq, :in_processes => @processes_num) { |n| [n, parse_process(n)] }
|
37
|
+
parsed.inject({}) { |res, x| res[x[0]] = x[1]; res }
|
38
|
+
end
|
39
|
+
|
40
|
+
def cpu_num
|
41
|
+
@cpu_num ||= Facter.processorcount.to_i
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def parse_process(name)
|
46
|
+
p = ScientificNameParser.new
|
47
|
+
p.parse(name).to_json rescue {'scientificName' => {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}}.to_json
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
26
51
|
# we can use these expressions when we are ready to parse virus names
|
27
52
|
# class VirusParser
|
28
53
|
# def initialize
|
@@ -43,3 +43,22 @@ describe ScientificNameParser do
|
|
43
43
|
parse('Nile virus')[:scientificName][:parser_version].should_not be_nil
|
44
44
|
end
|
45
45
|
end
|
46
|
+
|
47
|
+
|
48
|
+
describe ParallelParser do
|
49
|
+
it "should find number of cpus" do
|
50
|
+
pparser = ParallelParser.new
|
51
|
+
pparser.cpu_num.should > 0
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should parse several names in parallel" do
|
55
|
+
names = []
|
56
|
+
read_test_file { |n| names << (n[:name]) if n[:name] }
|
57
|
+
names.uniq!
|
58
|
+
pparser = ParallelParser.new
|
59
|
+
res = pparser.parse(names)
|
60
|
+
names.size.should > 100
|
61
|
+
res.keys.size.should == names.size
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity19
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,55 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-11-17 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: jeweler
|
16
|
+
requirement: &70228377889480 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70228377889480
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: treetop
|
27
|
+
requirement: &70228377827560 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70228377827560
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: facter
|
38
|
+
requirement: &70228377827080 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70228377827080
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: parallel
|
49
|
+
requirement: &70228377826600 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :runtime
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *70228377826600
|
14
58
|
- !ruby/object:Gem::Dependency
|
15
59
|
name: treetop
|
16
|
-
requirement: &
|
60
|
+
requirement: &70228377826120 !ruby/object:Gem::Requirement
|
17
61
|
none: false
|
18
62
|
requirements:
|
19
63
|
- - ! '>='
|
@@ -21,10 +65,10 @@ dependencies:
|
|
21
65
|
version: '0'
|
22
66
|
type: :runtime
|
23
67
|
prerelease: false
|
24
|
-
version_requirements: *
|
68
|
+
version_requirements: *70228377826120
|
25
69
|
- !ruby/object:Gem::Dependency
|
26
70
|
name: rspec
|
27
|
-
requirement: &
|
71
|
+
requirement: &70228377825640 !ruby/object:Gem::Requirement
|
28
72
|
none: false
|
29
73
|
requirements:
|
30
74
|
- - ! '>='
|
@@ -32,7 +76,7 @@ dependencies:
|
|
32
76
|
version: '0'
|
33
77
|
type: :development
|
34
78
|
prerelease: false
|
35
|
-
version_requirements: *
|
79
|
+
version_requirements: *70228377825640
|
36
80
|
description: Tools for biodiversity informatics
|
37
81
|
email: dmozzherin@gmail.com
|
38
82
|
executables:
|
@@ -44,6 +88,9 @@ extra_rdoc_files:
|
|
44
88
|
- README.rdoc
|
45
89
|
files:
|
46
90
|
- .document
|
91
|
+
- .rvmrc
|
92
|
+
- Gemfile
|
93
|
+
- Gemfile.lock
|
47
94
|
- LICENSE
|
48
95
|
- README.rdoc
|
49
96
|
- Rakefile
|
@@ -89,7 +136,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
136
|
version: '0'
|
90
137
|
requirements: []
|
91
138
|
rubyforge_project:
|
92
|
-
rubygems_version: 1.8.
|
139
|
+
rubygems_version: 1.8.10
|
93
140
|
signing_key:
|
94
141
|
specification_version: 3
|
95
142
|
summary: Parser of scientific names
|