biodiversity 5.0.1 → 5.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 74596acd42fb5eda1472dabf4ec8976da1e9f90bfd23c07035d4a16608f50ef6
4
- data.tar.gz: 8ce1c6d68c47fddb41b27aa807a54a851ea2204a9b0cfdb7cb05f9be19019907
3
+ metadata.gz: 21aca2b78bc54f22390a57dbdef3550beeb2eb65e3407e8d4a518fedd62d23bf
4
+ data.tar.gz: 8a08979ecb6f77b99928966ed1aaaec34a12048e22effd3c5924a2b6b1820317
5
5
  SHA512:
6
- metadata.gz: af55ab0704fa84e02c490620dd55ae024f130136e096ab0333cd1d77506bef5e9ae2a3039d3be34c9a6ac739f03dee58510efc2f8440759663caab1924b67318
7
- data.tar.gz: 04e28487d5fe1031a34c4065a3e24677d2568bf60a3f46e177c2367e497caf1912688452223895d2a535a56362e7290cac0242ca3e2c102256decd1e3cf3d7ea
6
+ metadata.gz: a308d0af1aadb91ca19402bd61c4933da3555f774f78f4c6c1f9313ea150cf6f62f06c9245f01abaa832a8db5d102961a78e67b2e4e53f44c3110de21ff3c124
7
+ data.tar.gz: 122f7b88eae3c8fe9f4943f49b6bbca6ee46bad15687d65725a88d8e01823b56d8ec7792973d19236bfb67d3f37756a5b88a565a26c863949c845ac6a5aba782
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.0.0
1
+ 3.0.1
data/CHANGELOG CHANGED
@@ -1,3 +1,9 @@
1
+ 5.1.2 -- upgrade to gnparser v1.0.2
2
+
3
+ 5.1.1 -- simplified signature of methods
4
+
5
+ 5.1.0 -- changed sinature of methods
6
+
1
7
  5.0.0 -- changed GNparser to v 1.0.0
2
8
 
3
9
  4.0.3 -- fix memory leak in parse_ary method
data/README.md CHANGED
@@ -1,5 +1,4 @@
1
- Biodiversity
2
- ============
1
+ # Biodiversity
3
2
 
4
3
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3569596.svg)](https://doi.org/10.5281/zenodo.3569596)
5
4
  [![Gem Version][gem_svg]][gem_link]
@@ -8,12 +7,12 @@ Biodiversity
8
7
  Parses taxonomic scientific name and breaks it into semantic elements.
9
8
 
10
9
  **Important**: Biodiversity parser >= 4.0.0 uses binding to
11
- https://gitlab.com/gogna/gnparser and
10
+ `https://github.com/gnames/gnparser` and
12
11
  is not backward compatible with older versions. However it is much much faster
13
12
  and better than previous versions.
14
13
 
15
14
  This gem does not have a remote server or a command line executable anymore.
16
- For such features use https://gitlab.com/gogna/gnparser.
15
+ For such features use `https://github.com/gnames/gnparser`.
17
16
 
18
17
  - [Biodiversity](#biodiversity)
19
18
  - [Installation](#installation)
@@ -24,7 +23,9 @@ For such features use https://gitlab.com/gogna/gnparser.
24
23
 
25
24
  ## Installation
26
25
 
27
- sudo gem install biodiversity
26
+ ```bash
27
+ sudo gem install biodiversity
28
+ ```
28
29
 
29
30
  The gem should work on Linux, Mac and Windows (64bit) machines
30
31
 
@@ -64,7 +65,6 @@ million names on a 4CPU hyperthreaded laptop:
64
65
 
65
66
  You can use it as a library in Ruby:
66
67
 
67
-
68
68
  ```ruby
69
69
  require 'biodiversity'
70
70
 
@@ -92,7 +92,6 @@ Biodiversity::Parser.parse("Plantago").to_json
92
92
  # to clean name up
93
93
  Biodiversity::Parser.parse(" Plantago major ")[:normalized]
94
94
 
95
-
96
95
  # to get canonical form with or without infraspecies ranks, as well as
97
96
  # stemmed version.
98
97
  parsed = Biodiversity::Parser.parse("Seddera latifolia H. & S. var. latifolia")
@@ -103,6 +102,9 @@ parsed[:canonicalName][:stem]
103
102
  # to get detailed information about elements of the name
104
103
  Biodiversity::Parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. \
105
104
  Braun & Crous 2003")[:details]
105
+
106
+ # to parse a botanical cultivar
107
+ Biodiversity::Parser.parse("Sarracenia flava 'Maxima'", with_cultivars: true)
106
108
  ```
107
109
 
108
110
  'Surrogate' is a broad group which includes 'Barcode of Life' names, and various
@@ -111,6 +113,7 @@ undetermined names with cf. sp. spp. nr. in them:
111
113
  ```ruby
112
114
  parser.parse("Coleoptera BOLD:1234567")[:surrogate]
113
115
  ```
116
+
114
117
  ### What is "nameStringID" in the parsed results?
115
118
 
116
119
  ID field contains UUID v5 hexadecimal string. ID is generated out of bytes
@@ -118,18 +121,16 @@ from the name string itself, and identical id can be generated using [any
118
121
  popular programming language][uuid_examples]. You can read more about UUID
119
122
  version 5 in a [blog post][uuid_blog]
120
123
 
121
- For example "Homo sapiens" should generate "16f235a0-e4a3-529c-9b83-bd15fe722110" UUID
124
+ For example "Homo sapiens" should generate
125
+ "16f235a0-e4a3-529c-9b83-bd15fe722110" UUID
122
126
 
123
- Copyright
124
- ---------
127
+ ## Copyright
125
128
 
126
129
  Authors: [Dmitry Mozzherin][dimus]
127
130
 
128
131
  Contributors: [Patrick Leary][pleary], [Hernán Lucas Pereira][hernan]
129
132
 
130
-
131
-
132
- Copyright (c) 2008-2020 Dmitry Mozzherin. See [LICENSE][license]
133
+ Copyright (c) 2008-2021 Dmitry Mozzherin. See [LICENSE][license]
133
134
  for further details.
134
135
 
135
136
  [gem_svg]: https://badge.fury.io/rb/biodiversity.svg
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
19
19
  /* Start of preamble from import "C" comments. */
20
20
 
21
21
 
22
- #line 3 "main.go"
22
+ #line 5 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
25
 
@@ -80,7 +80,7 @@ extern "C" {
80
80
  // 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
81
81
  // parsed details are ommited, if it is 1 -- they are included.
82
82
  // true.
83
- extern char* ParseToString(char* name, char* f, int details);
83
+ extern char* ParseToString(char* name, char* f, int details, int cultivars);
84
84
 
85
85
  // FreeMemory takes a string pointer and frees its memory.
86
86
  extern void FreeMemory(char* p);
@@ -90,7 +90,7 @@ extern void FreeMemory(char* p);
90
90
  // either CSV or JSON format. Format argument can take values of 'csv',
91
91
  // 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
92
92
  // true.
93
- extern char* ParseAryToString(char** in, int length, char* f, int details);
93
+ extern char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
94
94
 
95
95
  #ifdef __cplusplus
96
96
  }
Binary file
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
19
19
  /* Start of preamble from import "C" comments. */
20
20
 
21
21
 
22
- #line 3 "main.go"
22
+ #line 5 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
25
 
@@ -80,20 +80,17 @@ extern "C" {
80
80
  // 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
81
81
  // parsed details are ommited, if it is 1 -- they are included.
82
82
  // true.
83
-
84
- extern char* ParseToString(char* p0, char* p1, int p2);
83
+ extern char* ParseToString(char* name, char* f, int details, int cultivars);
85
84
 
86
85
  // FreeMemory takes a string pointer and frees its memory.
87
-
88
- extern void FreeMemory(char* p0);
86
+ extern void FreeMemory(char* p);
89
87
 
90
88
  // ParseAryToString function takes an array of names, parsing format, and a
91
89
  // withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
92
90
  // either CSV or JSON format. Format argument can take values of 'csv',
93
91
  // 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
94
92
  // true.
95
-
96
- extern char* ParseAryToString(char** p0, int p1, char* p2, int p3);
93
+ extern char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
97
94
 
98
95
  #ifdef __cplusplus
99
96
  }
Binary file
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
19
19
  /* Start of preamble from import "C" comments. */
20
20
 
21
21
 
22
- #line 3 "main.go"
22
+ #line 5 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
25
 
@@ -80,17 +80,17 @@ extern "C" {
80
80
  // 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
81
81
  // parsed details are ommited, if it is 1 -- they are included.
82
82
  // true.
83
- extern char* ParseToString(char* name, char* f, int details);
83
+ extern __declspec(dllexport) char* ParseToString(char* name, char* f, int details, int cultivars);
84
84
 
85
85
  // FreeMemory takes a string pointer and frees its memory.
86
- extern void FreeMemory(char* p);
86
+ extern __declspec(dllexport) void FreeMemory(char* p);
87
87
 
88
88
  // ParseAryToString function takes an array of names, parsing format, and a
89
89
  // withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
90
90
  // either CSV or JSON format. Format argument can take values of 'csv',
91
91
  // 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
92
92
  // true.
93
- extern char* ParseAryToString(char** in, int length, char* f, int details);
93
+ extern __declspec(dllexport) char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
94
94
 
95
95
  #ifdef __cplusplus
96
96
  }
Binary file
@@ -28,28 +28,33 @@ module Biodiversity
28
28
  callback(:parser_callback, %i[string], :void)
29
29
 
30
30
  attach_function(:parse_go, :ParseToString,
31
- %i[string string int], :strptr)
31
+ %i[string string int int], :strptr)
32
32
  attach_function(:parse_ary_go, :ParseAryToString,
33
- %i[pointer int string int], :strptr)
33
+ %i[pointer int string int int], :strptr)
34
34
  attach_function(:free_mem, :FreeMemory, %i[pointer], :void)
35
35
 
36
- def self.parse(name, simple: false)
36
+ def self.parse(name, simple: false, with_cultivars: false)
37
37
  format = simple ? 'csv' : 'compact'
38
+ with_details = simple ? 0 : 1
39
+ with_cultivars = with_cultivars ? 1 : 0
38
40
 
39
- parsed, ptr = parse_go(name, format, 0)
41
+ parsed, ptr = parse_go(name, format, with_details, with_cultivars)
40
42
  free_mem(ptr)
41
43
  output(parsed, simple)
42
44
  end
43
45
 
44
- def self.parse_ary(ary, simple: false)
46
+ def self.parse_ary(ary, simple: false, with_cultivars: false)
45
47
  format = simple ? 'csv' : 'compact'
46
- in_ptr = FFI::MemoryPointer.new(:pointer, ary.length)
48
+ with_details = simple ? 0 : 1
49
+ with_cultivars = with_cultivars ? 1 : 0
47
50
 
51
+ in_ptr = FFI::MemoryPointer.new(:pointer, ary.length)
48
52
  in_ptr.write_array_of_pointer(
49
53
  ary.map { |s| FFI::MemoryPointer.from_string(s) }
50
54
  )
51
55
 
52
- parsed, ptr = parse_ary_go(in_ptr, ary.length, format, 0)
56
+ parsed, ptr = parse_ary_go(in_ptr, ary.length, format,
57
+ with_details, with_cultivars)
53
58
  free_mem(ptr)
54
59
  if simple
55
60
  CSV.new(parsed).map do |row|
@@ -62,11 +67,14 @@ module Biodiversity
62
67
 
63
68
  def self.output(parsed, simple)
64
69
  if simple
70
+ parsed = parsed.force_encoding('UTF-8')
65
71
  csv = CSV.new(parsed)
66
72
  row = csv.readlines[0]
67
73
  csv_row(row)
68
74
  else
69
- JSON.parse(parsed, symbolize_names: true)
75
+ parsed = JSON.parse(parsed, symbolize_names: true)
76
+ parsed[:parserVersion] = Biodiversity.gnparser_version
77
+ parsed
70
78
  end
71
79
  end
72
80
 
@@ -74,7 +82,7 @@ module Biodiversity
74
82
  {
75
83
  id: row[0],
76
84
  verbatim: row[1],
77
- cardinality: row[2],
85
+ cardinality: row[2].to_i,
78
86
  canonical: {
79
87
  stem: row[3],
80
88
  simple: row[4],
@@ -82,7 +90,7 @@ module Biodiversity
82
90
  },
83
91
  authorship: row[6],
84
92
  year: row[7],
85
- quality: row[8]
93
+ quality: row[8].to_i
86
94
  }
87
95
  end
88
96
  end
@@ -2,9 +2,14 @@
2
2
 
3
3
  # Biodiversity module provides a namespace for scientific name parser.
4
4
  module Biodiversity
5
- VERSION = '5.0.1'
5
+ VERSION = '5.3.0'
6
+ GNPARSER_VERSION = 'GNparser 1.3.0+'
6
7
 
7
8
  def self.version
8
9
  VERSION
9
10
  end
11
+
12
+ def self.gnparser_version
13
+ GNPARSER_VERSION
14
+ end
10
15
  end
@@ -4,25 +4,46 @@
4
4
 
5
5
  describe Biodiversity::Parser do
6
6
  describe('parse') do
7
- it 'parses name in simple format' do
7
+ it 'parses name in simple form' do
8
8
  parsed = subject.parse('Homo sapiens Linn.', simple: true)
9
9
  expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
10
10
  expect(parsed[:normalized]).to be_nil
11
11
  end
12
12
 
13
- it 'parsed name in full format' do
14
- parsed = subject.parse('Homo sapiens Linn.')
13
+ it 'parsed name in full form' do
14
+ parsed = subject.parse('Homo sapiens Linn. 1758')
15
15
  expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
16
- expect(parsed[:normalized]).to eq 'Homo sapiens Linn.'
16
+ expect(parsed[:normalized]).to eq 'Homo sapiens Linn. 1758'
17
+ expect(parsed[:authorship][:year]).to eq '1758'
18
+ expect(parsed[:words].size).to eq 4
17
19
  end
18
20
 
19
21
  it 'gets quality and year correctly in simple form' do
20
- parsed = subject.parse('Homo sapiens Linn. 1758', simple: true)
22
+ parsed = subject.parse('Homo sapiens Foo & Bar. 1758', simple: true)
21
23
  expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
22
24
  expect(parsed[:year]).to eq '1758'
23
- expect(parsed[:quality]).to eq '1'
25
+ expect(parsed[:authorship]).to eq 'Foo & Bar. 1758'
26
+ expect(parsed[:quality]).to eq 1
24
27
  expect(parsed[:normalized]).to be_nil
25
28
  end
29
+
30
+ it 'parses botanical cultivars in full form' do
31
+ parsed = subject.parse('Aus bus "White Russian"',
32
+ simple: false, with_cultivars: true)
33
+ expect(parsed[:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
34
+ expect(parsed[:quality]).to eq 1
35
+ end
36
+
37
+ it 'parses botanical cultivars in simple form' do
38
+ parsed = subject.parse('Aus bus "White Russian"',
39
+ simple: true, with_cultivars: true)
40
+ expect(parsed[:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
41
+ expect(parsed[:quality]).to eq 1
42
+ parsed = subject.parse('Aus bus "White Russian"',
43
+ simple: true, with_cultivars: false)
44
+ expect(parsed[:canonical][:simple]).to eq 'Aus bus'
45
+ expect(parsed[:quality]).to eq 2
46
+ end
26
47
  end
27
48
 
28
49
  describe('parse_ary') do
@@ -32,8 +53,9 @@ describe Biodiversity::Parser do
32
53
  simple: true
33
54
  )
34
55
  expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
35
- expect(parsed[1][:canonical][:simple]).to eq 'Pardosa moesta'
36
56
  expect(parsed[0][:normalized]).to be_nil
57
+
58
+ expect(parsed[1][:canonical][:simple]).to eq 'Pardosa moesta'
37
59
  end
38
60
 
39
61
  it 'parsed name in full format' do
@@ -42,8 +64,10 @@ describe Biodiversity::Parser do
42
64
  )
43
65
  expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
44
66
  expect(parsed[0][:normalized]).to eq 'Homo sapiens Linn.'
67
+ expect(parsed[0][:words].size).to eq 3
45
68
  expect(parsed[1][:parsed]).to be false
46
69
  expect(parsed[1][:virus]).to be true
70
+ expect(parsed[1][:words]).to be_nil
47
71
  end
48
72
  end
49
73
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.1
4
+ version: 5.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-19 00:00:00.000000000 Z
11
+ date: 2021-07-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -142,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
142
  - !ruby/object:Gem::Version
143
143
  version: '0'
144
144
  requirements: []
145
- rubygems_version: 3.2.3
145
+ rubygems_version: 3.2.15
146
146
  signing_key:
147
147
  specification_version: 4
148
148
  summary: Parser of scientific names