dimus-biodiversity 0.5.11 → 0.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +3 -3
- data/VERSION +1 -1
- data/bin/nnparse +18 -4
- data/biodiversity.gemspec +2 -2
- data/lib/biodiversity/parser.rb +4 -0
- metadata +2 -2
data/README.rdoc
CHANGED
|
@@ -31,13 +31,13 @@ You can use it as a library
|
|
|
31
31
|
parser.all_json
|
|
32
32
|
|
|
33
33
|
# to clean name up
|
|
34
|
-
parser.parse(" Plantago major ")[:normalized]
|
|
34
|
+
parser.parse(" Plantago major ")[:scientificName][:normalized]
|
|
35
35
|
|
|
36
36
|
# to get only cleaned up latin part of the name
|
|
37
|
-
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:canonical]
|
|
37
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:canonical]
|
|
38
38
|
|
|
39
39
|
# to get detailed information about elements of the name
|
|
40
|
-
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:details]
|
|
40
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
|
|
41
41
|
|
|
42
42
|
# to resolve lsid and get back RDF file
|
|
43
43
|
LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.5.
|
|
1
|
+
0.5.13
|
data/bin/nnparse
CHANGED
|
@@ -6,6 +6,7 @@ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
|
|
6
6
|
require 'biodiversity'
|
|
7
7
|
require 'json'
|
|
8
8
|
|
|
9
|
+
|
|
9
10
|
if ARGV.empty?
|
|
10
11
|
puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n"
|
|
11
12
|
exit
|
|
@@ -14,16 +15,29 @@ end
|
|
|
14
15
|
input = ARGV[0]
|
|
15
16
|
output = ARGV[1] || 'parsed.json'
|
|
16
17
|
|
|
18
|
+
ruby_min_version = RUBY_VERSION.split(".")[0..1].join('').to_i
|
|
19
|
+
|
|
17
20
|
p = ScientificNameParser.new
|
|
18
21
|
o = open(output, 'w')
|
|
19
|
-
|
|
20
22
|
count = 0
|
|
23
|
+
puts 'Parsing...'
|
|
21
24
|
IO.foreach(input) do |line|
|
|
22
25
|
count += 1
|
|
23
26
|
puts("%s lines parsed" % count) if count % 10000 == 0
|
|
24
|
-
name = line.gsub(/^[\d]*\s*/, '').strip
|
|
25
|
-
|
|
26
|
-
|
|
27
|
+
name = line.gsub(/^[\d]*\s*/, '').strip
|
|
28
|
+
begin
|
|
29
|
+
if ruby_min_version < 19
|
|
30
|
+
old_kcode = $KCODE
|
|
31
|
+
$KCODE = 'NONE'
|
|
32
|
+
end
|
|
33
|
+
p.parse(name)
|
|
34
|
+
parsed_data = p.parsed.all_json rescue {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
|
|
35
|
+
if ruby_min_version < 19
|
|
36
|
+
$KCODE = old_kcode
|
|
37
|
+
end
|
|
38
|
+
rescue
|
|
39
|
+
parsed_data = {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
|
|
40
|
+
end
|
|
27
41
|
o.write parsed_data + "\n"
|
|
28
42
|
end
|
|
29
43
|
|
data/biodiversity.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{biodiversity}
|
|
8
|
-
s.version = "0.5.
|
|
8
|
+
s.version = "0.5.13"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Dmitry Mozzherin"]
|
|
12
|
-
s.date = %q{2009-08-
|
|
12
|
+
s.date = %q{2009-08-17}
|
|
13
13
|
s.default_executable = %q{nnparse}
|
|
14
14
|
s.description = %q{Tools for biodiversity informatics}
|
|
15
15
|
s.email = %q{dmozzherin@gmail.com}
|
data/lib/biodiversity/parser.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dimus-biodiversity
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.13
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dmitry Mozzherin
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date: 2009-08-
|
|
12
|
+
date: 2009-08-17 00:00:00 -07:00
|
|
13
13
|
default_executable: nnparse
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|