dimus-biodiversity 0.0.3 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +2 -2
- data/bin/nnparse +29 -9
- metadata +3 -3
data/README.rdoc
CHANGED
@@ -11,11 +11,11 @@ To install gem you need RubyGems >= 1.2.0
|
|
11
11
|
|
12
12
|
== Example usage
|
13
13
|
|
14
|
-
|
14
|
+
You can parse file with species names from command line. File should contain one scientific name per line
|
15
15
|
|
16
16
|
nnparser file_with_names
|
17
17
|
|
18
|
-
|
18
|
+
You can use it as a library
|
19
19
|
|
20
20
|
require 'biodiversity'
|
21
21
|
|
data/bin/nnparse
CHANGED
@@ -4,25 +4,45 @@ gem 'dimus-biodiversity'
|
|
4
4
|
|
5
5
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
6
6
|
require 'biodiversity'
|
7
|
+
require 'yaml'
|
7
8
|
|
8
9
|
if ARGV.empty?
|
9
|
-
puts "Usage:\n\nnnparse file_with_scientific_names\n\n"
|
10
|
+
puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.yml\n\n"
|
10
11
|
exit
|
11
12
|
end
|
12
13
|
|
13
14
|
|
14
15
|
parser = ScientificNameParser.new
|
15
|
-
|
16
|
+
|
17
|
+
output = ARGV[1] || 'parsed.yml'
|
18
|
+
o = File.open(output,'w')
|
19
|
+
|
16
20
|
# parse a file with names
|
17
21
|
count = count2 = 0
|
18
|
-
|
19
|
-
|
22
|
+
names = []
|
23
|
+
IO.foreach(ARGV[0]) do |n|
|
24
|
+
name_dict = {}
|
25
|
+
puts 'Parsing names' if count2 == 0
|
20
26
|
count2 += 1
|
21
|
-
|
27
|
+
p count2 if count2 % 5000 == 0
|
22
28
|
n.strip!
|
23
|
-
|
24
|
-
|
29
|
+
name_dict = {:input => n}
|
30
|
+
parsed = parser.parse n
|
31
|
+
unless parsed
|
32
|
+
name_dict[:details] = {:parsed => false}
|
33
|
+
names << name_dict
|
25
34
|
count += 1
|
26
|
-
|
35
|
+
else
|
36
|
+
name_dict[:output] = parsed.value
|
37
|
+
name_dict[:caononical] = parsed.canonical
|
38
|
+
name_dict[:details] = parsed.details
|
39
|
+
name_dict[:parsed => true]
|
40
|
+
names << name_dict
|
41
|
+
end
|
27
42
|
end
|
28
|
-
|
43
|
+
$KCODE = 'UTF8'
|
44
|
+
puts "Converting results to YAML"
|
45
|
+
results = YAML.dump(names)
|
46
|
+
puts "Writing restuls to #{output} file"
|
47
|
+
o.write(results)
|
48
|
+
puts "Found #{count2} records, #{count} of them could not be parsed."
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dimus-biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
@@ -23,8 +23,8 @@ dependencies:
|
|
23
23
|
version:
|
24
24
|
description: Biodiversity library provides a parser tool for scientific species names
|
25
25
|
email: dmozzherin {et} eol {dt} org
|
26
|
-
executables:
|
27
|
-
|
26
|
+
executables:
|
27
|
+
- - nnparse
|
28
28
|
extensions: []
|
29
29
|
|
30
30
|
extra_rdoc_files: []
|