dimus-biodiversity 0.0.3 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/README.rdoc +2 -2
  2. data/bin/nnparse +29 -9
  3. metadata +3 -3
data/README.rdoc CHANGED
@@ -11,11 +11,11 @@ To install gem you need RubyGems >= 1.2.0
11
11
 
12
12
  == Example usage
13
13
 
14
- * You can parse file with species names from command line. File should contain one scientific name per line
14
+ You can parse file with species names from command line. File should contain one scientific name per line
15
15
 
16
16
  nnparser file_with_names
17
17
 
18
- * You can use it as a library
18
+ You can use it as a library
19
19
 
20
20
  require 'biodiversity'
21
21
 
data/bin/nnparse CHANGED
@@ -4,25 +4,45 @@ gem 'dimus-biodiversity'
4
4
 
5
5
  $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
6
6
  require 'biodiversity'
7
+ require 'yaml'
7
8
 
8
9
  if ARGV.empty?
9
- puts "Usage:\n\nnnparse file_with_scientific_names\n\n"
10
+ puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.yml\n\n"
10
11
  exit
11
12
  end
12
13
 
13
14
 
14
15
  parser = ScientificNameParser.new
15
- debug = true
16
+
17
+ output = ARGV[1] || 'parsed.yml'
18
+ o = File.open(output,'w')
19
+
16
20
  # parse a file with names
17
21
  count = count2 = 0
18
- IO.foreach(ARGV[-1]) do |n|
19
- puts 'started' if count2 == 0 && debug
22
+ names = []
23
+ IO.foreach(ARGV[0]) do |n|
24
+ name_dict = {}
25
+ puts 'Parsing names' if count2 == 0
20
26
  count2 += 1
21
- puts count2.to_s + "/" + count.to_s if count2 % 10000 == 0 && debug
27
+ p count2 if count2 % 5000 == 0
22
28
  n.strip!
23
- unless parser.parse n
24
- puts n
29
+ name_dict = {:input => n}
30
+ parsed = parser.parse n
31
+ unless parsed
32
+ name_dict[:details] = {:parsed => false}
33
+ names << name_dict
25
34
  count += 1
26
- end
35
+ else
36
+ name_dict[:output] = parsed.value
37
+ name_dict[:caononical] = parsed.canonical
38
+ name_dict[:details] = parsed.details
39
+ name_dict[:parsed => true]
40
+ names << name_dict
41
+ end
27
42
  end
28
- puts count.to_s
43
+ $KCODE = 'UTF8'
44
+ puts "Converting results to YAML"
45
+ results = YAML.dump(names)
46
+ puts "Writing restuls to #{output} file"
47
+ o.write(results)
48
+ puts "Found #{count2} records, #{count} of them could not be parsed."
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dimus-biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
@@ -23,8 +23,8 @@ dependencies:
23
23
  version:
24
24
  description: Biodiversity library provides a parser tool for scientific species names
25
25
  email: dmozzherin {et} eol {dt} org
26
- executables: []
27
-
26
+ executables:
27
+ - - nnparse
28
28
  extensions: []
29
29
 
30
30
  extra_rdoc_files: []