biodiversity19 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rvmrc +1 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +49 -0
- data/VERSION +1 -1
- data/lib/biodiversity/parser.rb +25 -0
- data/spec/parser/scientific_name.spec.rb +19 -0
- metadata +54 -7
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm use ruby-1.9.2-p290@biodiversity--create
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
archive-tar-minitar (0.5.2)
|
5
|
+
columnize (0.3.4)
|
6
|
+
diff-lcs (1.1.3)
|
7
|
+
facter (1.6.3)
|
8
|
+
git (1.2.5)
|
9
|
+
jeweler (1.6.4)
|
10
|
+
bundler (~> 1.0)
|
11
|
+
git (>= 1.2.5)
|
12
|
+
rake
|
13
|
+
linecache19 (0.5.12)
|
14
|
+
ruby_core_source (>= 0.1.4)
|
15
|
+
parallel (0.5.9)
|
16
|
+
polyglot (0.3.3)
|
17
|
+
rake (0.9.2.2)
|
18
|
+
rspec (2.7.0)
|
19
|
+
rspec-core (~> 2.7.0)
|
20
|
+
rspec-expectations (~> 2.7.0)
|
21
|
+
rspec-mocks (~> 2.7.0)
|
22
|
+
rspec-core (2.7.1)
|
23
|
+
rspec-expectations (2.7.0)
|
24
|
+
diff-lcs (~> 1.1.2)
|
25
|
+
rspec-mocks (2.7.0)
|
26
|
+
ruby-debug-base19 (0.11.25)
|
27
|
+
columnize (>= 0.3.1)
|
28
|
+
linecache19 (>= 0.5.11)
|
29
|
+
ruby_core_source (>= 0.1.4)
|
30
|
+
ruby-debug19 (0.11.6)
|
31
|
+
columnize (>= 0.3.1)
|
32
|
+
linecache19 (>= 0.5.11)
|
33
|
+
ruby-debug-base19 (>= 0.11.19)
|
34
|
+
ruby_core_source (0.1.5)
|
35
|
+
archive-tar-minitar (>= 0.5.2)
|
36
|
+
treetop (1.4.10)
|
37
|
+
polyglot
|
38
|
+
polyglot (>= 0.3.1)
|
39
|
+
|
40
|
+
PLATFORMS
|
41
|
+
ruby
|
42
|
+
|
43
|
+
DEPENDENCIES
|
44
|
+
facter
|
45
|
+
jeweler
|
46
|
+
parallel
|
47
|
+
rspec
|
48
|
+
ruby-debug19
|
49
|
+
treetop
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.6
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -23,6 +23,31 @@ module PreProcessor
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
+
class ParallelParser
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
require 'facter'
|
30
|
+
require 'parallel'
|
31
|
+
cpu_num
|
32
|
+
@processes_num = cpu_num > 1 ? cpu_num - 1 : 1
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse(names_list)
|
36
|
+
parsed = Parallel.map(names_list.uniq, :in_processes => @processes_num) { |n| [n, parse_process(n)] }
|
37
|
+
parsed.inject({}) { |res, x| res[x[0]] = x[1]; res }
|
38
|
+
end
|
39
|
+
|
40
|
+
def cpu_num
|
41
|
+
@cpu_num ||= Facter.processorcount.to_i
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def parse_process(name)
|
46
|
+
p = ScientificNameParser.new
|
47
|
+
p.parse(name).to_json rescue {'scientificName' => {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}}.to_json
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
26
51
|
# we can use these expressions when we are ready to parse virus names
|
27
52
|
# class VirusParser
|
28
53
|
# def initialize
|
@@ -43,3 +43,22 @@ describe ScientificNameParser do
|
|
43
43
|
parse('Nile virus')[:scientificName][:parser_version].should_not be_nil
|
44
44
|
end
|
45
45
|
end
|
46
|
+
|
47
|
+
|
48
|
+
describe ParallelParser do
|
49
|
+
it "should find number of cpus" do
|
50
|
+
pparser = ParallelParser.new
|
51
|
+
pparser.cpu_num.should > 0
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should parse several names in parallel" do
|
55
|
+
names = []
|
56
|
+
read_test_file { |n| names << (n[:name]) if n[:name] }
|
57
|
+
names.uniq!
|
58
|
+
pparser = ParallelParser.new
|
59
|
+
res = pparser.parse(names)
|
60
|
+
names.size.should > 100
|
61
|
+
res.keys.size.should == names.size
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity19
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,55 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-11-17 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: jeweler
|
16
|
+
requirement: &70228377889480 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70228377889480
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: treetop
|
27
|
+
requirement: &70228377827560 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70228377827560
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: facter
|
38
|
+
requirement: &70228377827080 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70228377827080
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: parallel
|
49
|
+
requirement: &70228377826600 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :runtime
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *70228377826600
|
14
58
|
- !ruby/object:Gem::Dependency
|
15
59
|
name: treetop
|
16
|
-
requirement: &
|
60
|
+
requirement: &70228377826120 !ruby/object:Gem::Requirement
|
17
61
|
none: false
|
18
62
|
requirements:
|
19
63
|
- - ! '>='
|
@@ -21,10 +65,10 @@ dependencies:
|
|
21
65
|
version: '0'
|
22
66
|
type: :runtime
|
23
67
|
prerelease: false
|
24
|
-
version_requirements: *
|
68
|
+
version_requirements: *70228377826120
|
25
69
|
- !ruby/object:Gem::Dependency
|
26
70
|
name: rspec
|
27
|
-
requirement: &
|
71
|
+
requirement: &70228377825640 !ruby/object:Gem::Requirement
|
28
72
|
none: false
|
29
73
|
requirements:
|
30
74
|
- - ! '>='
|
@@ -32,7 +76,7 @@ dependencies:
|
|
32
76
|
version: '0'
|
33
77
|
type: :development
|
34
78
|
prerelease: false
|
35
|
-
version_requirements: *
|
79
|
+
version_requirements: *70228377825640
|
36
80
|
description: Tools for biodiversity informatics
|
37
81
|
email: dmozzherin@gmail.com
|
38
82
|
executables:
|
@@ -44,6 +88,9 @@ extra_rdoc_files:
|
|
44
88
|
- README.rdoc
|
45
89
|
files:
|
46
90
|
- .document
|
91
|
+
- .rvmrc
|
92
|
+
- Gemfile
|
93
|
+
- Gemfile.lock
|
47
94
|
- LICENSE
|
48
95
|
- README.rdoc
|
49
96
|
- Rakefile
|
@@ -89,7 +136,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
136
|
version: '0'
|
90
137
|
requirements: []
|
91
138
|
rubyforge_project:
|
92
|
-
rubygems_version: 1.8.
|
139
|
+
rubygems_version: 1.8.10
|
93
140
|
signing_key:
|
94
141
|
specification_version: 3
|
95
142
|
summary: Parser of scientific names
|