biodiversity 5.0.1 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/CHANGELOG +6 -0
- data/README.md +14 -13
- data/clib/linux/libgnparser.h +3 -3
- data/clib/linux/libgnparser.so +0 -0
- data/clib/mac/libgnparser.h +4 -7
- data/clib/mac/libgnparser.so +0 -0
- data/clib/win/libgnparser.h +4 -4
- data/clib/win/libgnparser.so +0 -0
- data/lib/biodiversity/parser.rb +18 -10
- data/lib/biodiversity/version.rb +6 -1
- data/spec/lib/parser_spec.rb +31 -7
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21aca2b78bc54f22390a57dbdef3550beeb2eb65e3407e8d4a518fedd62d23bf
|
4
|
+
data.tar.gz: 8a08979ecb6f77b99928966ed1aaaec34a12048e22effd3c5924a2b6b1820317
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a308d0af1aadb91ca19402bd61c4933da3555f774f78f4c6c1f9313ea150cf6f62f06c9245f01abaa832a8db5d102961a78e67b2e4e53f44c3110de21ff3c124
|
7
|
+
data.tar.gz: 122f7b88eae3c8fe9f4943f49b6bbca6ee46bad15687d65725a88d8e01823b56d8ec7792973d19236bfb67d3f37756a5b88a565a26c863949c845ac6a5aba782
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.0.
|
1
|
+
3.0.1
|
data/CHANGELOG
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
Biodiversity
|
2
|
-
============
|
1
|
+
# Biodiversity
|
3
2
|
|
4
3
|
[](https://doi.org/10.5281/zenodo.3569596)
|
5
4
|
[![Gem Version][gem_svg]][gem_link]
|
@@ -8,12 +7,12 @@ Biodiversity
|
|
8
7
|
Parses taxonomic scientific name and breaks it into semantic elements.
|
9
8
|
|
10
9
|
**Important**: Biodiversity parser >= 4.0.0 uses binding to
|
11
|
-
https://
|
10
|
+
`https://github.com/gnames/gnparser` and
|
12
11
|
is not backward compatible with older versions. However it is much much faster
|
13
12
|
and better than previous versions.
|
14
13
|
|
15
14
|
This gem does not have a remote server or a command line executable anymore.
|
16
|
-
For such features use https://
|
15
|
+
For such features use `https://github.com/gnames/gnparser`.
|
17
16
|
|
18
17
|
- [Biodiversity](#biodiversity)
|
19
18
|
- [Installation](#installation)
|
@@ -24,7 +23,9 @@ For such features use https://gitlab.com/gogna/gnparser.
|
|
24
23
|
|
25
24
|
## Installation
|
26
25
|
|
27
|
-
|
26
|
+
```bash
|
27
|
+
sudo gem install biodiversity
|
28
|
+
```
|
28
29
|
|
29
30
|
The gem should work on Linux, Mac and Windows (64bit) machines
|
30
31
|
|
@@ -64,7 +65,6 @@ million names on a 4CPU hyperthreaded laptop:
|
|
64
65
|
|
65
66
|
You can use it as a library in Ruby:
|
66
67
|
|
67
|
-
|
68
68
|
```ruby
|
69
69
|
require 'biodiversity'
|
70
70
|
|
@@ -92,7 +92,6 @@ Biodiversity::Parser.parse("Plantago").to_json
|
|
92
92
|
# to clean name up
|
93
93
|
Biodiversity::Parser.parse(" Plantago major ")[:normalized]
|
94
94
|
|
95
|
-
|
96
95
|
# to get canonical form with or without infraspecies ranks, as well as
|
97
96
|
# stemmed version.
|
98
97
|
parsed = Biodiversity::Parser.parse("Seddera latifolia H. & S. var. latifolia")
|
@@ -103,6 +102,9 @@ parsed[:canonicalName][:stem]
|
|
103
102
|
# to get detailed information about elements of the name
|
104
103
|
Biodiversity::Parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. \
|
105
104
|
Braun & Crous 2003")[:details]
|
105
|
+
|
106
|
+
# to parse a botanical cultivar
|
107
|
+
Biodiversity::Parser.parse("Sarracenia flava 'Maxima'", with_cultivars: true)
|
106
108
|
```
|
107
109
|
|
108
110
|
'Surrogate' is a broad group which includes 'Barcode of Life' names, and various
|
@@ -111,6 +113,7 @@ undetermined names with cf. sp. spp. nr. in them:
|
|
111
113
|
```ruby
|
112
114
|
parser.parse("Coleoptera BOLD:1234567")[:surrogate]
|
113
115
|
```
|
116
|
+
|
114
117
|
### What is "nameStringID" in the parsed results?
|
115
118
|
|
116
119
|
ID field contains UUID v5 hexadecimal string. ID is generated out of bytes
|
@@ -118,18 +121,16 @@ from the name string itself, and identical id can be generated using [any
|
|
118
121
|
popular programming language][uuid_examples]. You can read more about UUID
|
119
122
|
version 5 in a [blog post][uuid_blog]
|
120
123
|
|
121
|
-
For example "Homo sapiens" should generate
|
124
|
+
For example "Homo sapiens" should generate
|
125
|
+
"16f235a0-e4a3-529c-9b83-bd15fe722110" UUID
|
122
126
|
|
123
|
-
Copyright
|
124
|
-
---------
|
127
|
+
## Copyright
|
125
128
|
|
126
129
|
Authors: [Dmitry Mozzherin][dimus]
|
127
130
|
|
128
131
|
Contributors: [Patrick Leary][pleary], [Hernán Lucas Pereira][hernan]
|
129
132
|
|
130
|
-
|
131
|
-
|
132
|
-
Copyright (c) 2008-2020 Dmitry Mozzherin. See [LICENSE][license]
|
133
|
+
Copyright (c) 2008-2021 Dmitry Mozzherin. See [LICENSE][license]
|
133
134
|
for further details.
|
134
135
|
|
135
136
|
[gem_svg]: https://badge.fury.io/rb/biodiversity.svg
|
data/clib/linux/libgnparser.h
CHANGED
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
19
19
|
/* Start of preamble from import "C" comments. */
|
20
20
|
|
21
21
|
|
22
|
-
#line
|
22
|
+
#line 5 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
25
|
|
@@ -80,7 +80,7 @@ extern "C" {
|
|
80
80
|
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
81
|
// parsed details are ommited, if it is 1 -- they are included.
|
82
82
|
// true.
|
83
|
-
extern char* ParseToString(char* name, char* f, int details);
|
83
|
+
extern char* ParseToString(char* name, char* f, int details, int cultivars);
|
84
84
|
|
85
85
|
// FreeMemory takes a string pointer and frees its memory.
|
86
86
|
extern void FreeMemory(char* p);
|
@@ -90,7 +90,7 @@ extern void FreeMemory(char* p);
|
|
90
90
|
// either CSV or JSON format. Format argument can take values of 'csv',
|
91
91
|
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
92
92
|
// true.
|
93
|
-
extern char* ParseAryToString(char** in, int length, char* f, int details);
|
93
|
+
extern char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
|
94
94
|
|
95
95
|
#ifdef __cplusplus
|
96
96
|
}
|
data/clib/linux/libgnparser.so
CHANGED
Binary file
|
data/clib/mac/libgnparser.h
CHANGED
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
19
19
|
/* Start of preamble from import "C" comments. */
|
20
20
|
|
21
21
|
|
22
|
-
#line
|
22
|
+
#line 5 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
25
|
|
@@ -80,20 +80,17 @@ extern "C" {
|
|
80
80
|
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
81
|
// parsed details are ommited, if it is 1 -- they are included.
|
82
82
|
// true.
|
83
|
-
|
84
|
-
extern char* ParseToString(char* p0, char* p1, int p2);
|
83
|
+
extern char* ParseToString(char* name, char* f, int details, int cultivars);
|
85
84
|
|
86
85
|
// FreeMemory takes a string pointer and frees its memory.
|
87
|
-
|
88
|
-
extern void FreeMemory(char* p0);
|
86
|
+
extern void FreeMemory(char* p);
|
89
87
|
|
90
88
|
// ParseAryToString function takes an array of names, parsing format, and a
|
91
89
|
// withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
|
92
90
|
// either CSV or JSON format. Format argument can take values of 'csv',
|
93
91
|
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
94
92
|
// true.
|
95
|
-
|
96
|
-
extern char* ParseAryToString(char** p0, int p1, char* p2, int p3);
|
93
|
+
extern char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
|
97
94
|
|
98
95
|
#ifdef __cplusplus
|
99
96
|
}
|
data/clib/mac/libgnparser.so
CHANGED
Binary file
|
data/clib/win/libgnparser.h
CHANGED
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
19
19
|
/* Start of preamble from import "C" comments. */
|
20
20
|
|
21
21
|
|
22
|
-
#line
|
22
|
+
#line 5 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
25
|
|
@@ -80,17 +80,17 @@ extern "C" {
|
|
80
80
|
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
81
|
// parsed details are ommited, if it is 1 -- they are included.
|
82
82
|
// true.
|
83
|
-
extern char* ParseToString(char* name, char* f, int details);
|
83
|
+
extern __declspec(dllexport) char* ParseToString(char* name, char* f, int details, int cultivars);
|
84
84
|
|
85
85
|
// FreeMemory takes a string pointer and frees its memory.
|
86
|
-
extern void FreeMemory(char* p);
|
86
|
+
extern __declspec(dllexport) void FreeMemory(char* p);
|
87
87
|
|
88
88
|
// ParseAryToString function takes an array of names, parsing format, and a
|
89
89
|
// withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
|
90
90
|
// either CSV or JSON format. Format argument can take values of 'csv',
|
91
91
|
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
92
92
|
// true.
|
93
|
-
extern char* ParseAryToString(char** in, int length, char* f, int details);
|
93
|
+
extern __declspec(dllexport) char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
|
94
94
|
|
95
95
|
#ifdef __cplusplus
|
96
96
|
}
|
data/clib/win/libgnparser.so
CHANGED
Binary file
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -28,28 +28,33 @@ module Biodiversity
|
|
28
28
|
callback(:parser_callback, %i[string], :void)
|
29
29
|
|
30
30
|
attach_function(:parse_go, :ParseToString,
|
31
|
-
%i[string string int], :strptr)
|
31
|
+
%i[string string int int], :strptr)
|
32
32
|
attach_function(:parse_ary_go, :ParseAryToString,
|
33
|
-
%i[pointer int string int], :strptr)
|
33
|
+
%i[pointer int string int int], :strptr)
|
34
34
|
attach_function(:free_mem, :FreeMemory, %i[pointer], :void)
|
35
35
|
|
36
|
-
def self.parse(name, simple: false)
|
36
|
+
def self.parse(name, simple: false, with_cultivars: false)
|
37
37
|
format = simple ? 'csv' : 'compact'
|
38
|
+
with_details = simple ? 0 : 1
|
39
|
+
with_cultivars = with_cultivars ? 1 : 0
|
38
40
|
|
39
|
-
parsed, ptr = parse_go(name, format,
|
41
|
+
parsed, ptr = parse_go(name, format, with_details, with_cultivars)
|
40
42
|
free_mem(ptr)
|
41
43
|
output(parsed, simple)
|
42
44
|
end
|
43
45
|
|
44
|
-
def self.parse_ary(ary, simple: false)
|
46
|
+
def self.parse_ary(ary, simple: false, with_cultivars: false)
|
45
47
|
format = simple ? 'csv' : 'compact'
|
46
|
-
|
48
|
+
with_details = simple ? 0 : 1
|
49
|
+
with_cultivars = with_cultivars ? 1 : 0
|
47
50
|
|
51
|
+
in_ptr = FFI::MemoryPointer.new(:pointer, ary.length)
|
48
52
|
in_ptr.write_array_of_pointer(
|
49
53
|
ary.map { |s| FFI::MemoryPointer.from_string(s) }
|
50
54
|
)
|
51
55
|
|
52
|
-
parsed, ptr = parse_ary_go(in_ptr, ary.length, format,
|
56
|
+
parsed, ptr = parse_ary_go(in_ptr, ary.length, format,
|
57
|
+
with_details, with_cultivars)
|
53
58
|
free_mem(ptr)
|
54
59
|
if simple
|
55
60
|
CSV.new(parsed).map do |row|
|
@@ -62,11 +67,14 @@ module Biodiversity
|
|
62
67
|
|
63
68
|
def self.output(parsed, simple)
|
64
69
|
if simple
|
70
|
+
parsed = parsed.force_encoding('UTF-8')
|
65
71
|
csv = CSV.new(parsed)
|
66
72
|
row = csv.readlines[0]
|
67
73
|
csv_row(row)
|
68
74
|
else
|
69
|
-
JSON.parse(parsed, symbolize_names: true)
|
75
|
+
parsed = JSON.parse(parsed, symbolize_names: true)
|
76
|
+
parsed[:parserVersion] = Biodiversity.gnparser_version
|
77
|
+
parsed
|
70
78
|
end
|
71
79
|
end
|
72
80
|
|
@@ -74,7 +82,7 @@ module Biodiversity
|
|
74
82
|
{
|
75
83
|
id: row[0],
|
76
84
|
verbatim: row[1],
|
77
|
-
cardinality: row[2],
|
85
|
+
cardinality: row[2].to_i,
|
78
86
|
canonical: {
|
79
87
|
stem: row[3],
|
80
88
|
simple: row[4],
|
@@ -82,7 +90,7 @@ module Biodiversity
|
|
82
90
|
},
|
83
91
|
authorship: row[6],
|
84
92
|
year: row[7],
|
85
|
-
quality: row[8]
|
93
|
+
quality: row[8].to_i
|
86
94
|
}
|
87
95
|
end
|
88
96
|
end
|
data/lib/biodiversity/version.rb
CHANGED
@@ -2,9 +2,14 @@
|
|
2
2
|
|
3
3
|
# Biodiversity module provides a namespace for scientific name parser.
|
4
4
|
module Biodiversity
|
5
|
-
VERSION = '5.0
|
5
|
+
VERSION = '5.3.0'
|
6
|
+
GNPARSER_VERSION = 'GNparser 1.3.0+'
|
6
7
|
|
7
8
|
def self.version
|
8
9
|
VERSION
|
9
10
|
end
|
11
|
+
|
12
|
+
def self.gnparser_version
|
13
|
+
GNPARSER_VERSION
|
14
|
+
end
|
10
15
|
end
|
data/spec/lib/parser_spec.rb
CHANGED
@@ -4,25 +4,46 @@
|
|
4
4
|
|
5
5
|
describe Biodiversity::Parser do
|
6
6
|
describe('parse') do
|
7
|
-
it 'parses name in simple
|
7
|
+
it 'parses name in simple form' do
|
8
8
|
parsed = subject.parse('Homo sapiens Linn.', simple: true)
|
9
9
|
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
10
10
|
expect(parsed[:normalized]).to be_nil
|
11
11
|
end
|
12
12
|
|
13
|
-
it 'parsed name in full
|
14
|
-
parsed = subject.parse('Homo sapiens Linn.')
|
13
|
+
it 'parsed name in full form' do
|
14
|
+
parsed = subject.parse('Homo sapiens Linn. 1758')
|
15
15
|
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
16
|
-
expect(parsed[:normalized]).to eq 'Homo sapiens Linn.'
|
16
|
+
expect(parsed[:normalized]).to eq 'Homo sapiens Linn. 1758'
|
17
|
+
expect(parsed[:authorship][:year]).to eq '1758'
|
18
|
+
expect(parsed[:words].size).to eq 4
|
17
19
|
end
|
18
20
|
|
19
21
|
it 'gets quality and year correctly in simple form' do
|
20
|
-
parsed = subject.parse('Homo sapiens
|
22
|
+
parsed = subject.parse('Homo sapiens Foo & Bar. 1758', simple: true)
|
21
23
|
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
22
24
|
expect(parsed[:year]).to eq '1758'
|
23
|
-
expect(parsed[:
|
25
|
+
expect(parsed[:authorship]).to eq 'Foo & Bar. 1758'
|
26
|
+
expect(parsed[:quality]).to eq 1
|
24
27
|
expect(parsed[:normalized]).to be_nil
|
25
28
|
end
|
29
|
+
|
30
|
+
it 'parses botanical cultivars in full form' do
|
31
|
+
parsed = subject.parse('Aus bus "White Russian"',
|
32
|
+
simple: false, with_cultivars: true)
|
33
|
+
expect(parsed[:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
|
34
|
+
expect(parsed[:quality]).to eq 1
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'parses botanical cultivars in simple form' do
|
38
|
+
parsed = subject.parse('Aus bus "White Russian"',
|
39
|
+
simple: true, with_cultivars: true)
|
40
|
+
expect(parsed[:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
|
41
|
+
expect(parsed[:quality]).to eq 1
|
42
|
+
parsed = subject.parse('Aus bus "White Russian"',
|
43
|
+
simple: true, with_cultivars: false)
|
44
|
+
expect(parsed[:canonical][:simple]).to eq 'Aus bus'
|
45
|
+
expect(parsed[:quality]).to eq 2
|
46
|
+
end
|
26
47
|
end
|
27
48
|
|
28
49
|
describe('parse_ary') do
|
@@ -32,8 +53,9 @@ describe Biodiversity::Parser do
|
|
32
53
|
simple: true
|
33
54
|
)
|
34
55
|
expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
|
35
|
-
expect(parsed[1][:canonical][:simple]).to eq 'Pardosa moesta'
|
36
56
|
expect(parsed[0][:normalized]).to be_nil
|
57
|
+
|
58
|
+
expect(parsed[1][:canonical][:simple]).to eq 'Pardosa moesta'
|
37
59
|
end
|
38
60
|
|
39
61
|
it 'parsed name in full format' do
|
@@ -42,8 +64,10 @@ describe Biodiversity::Parser do
|
|
42
64
|
)
|
43
65
|
expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
|
44
66
|
expect(parsed[0][:normalized]).to eq 'Homo sapiens Linn.'
|
67
|
+
expect(parsed[0][:words].size).to eq 3
|
45
68
|
expect(parsed[1][:parsed]).to be false
|
46
69
|
expect(parsed[1][:virus]).to be true
|
70
|
+
expect(parsed[1][:words]).to be_nil
|
47
71
|
end
|
48
72
|
end
|
49
73
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.0
|
4
|
+
version: 5.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-07-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -142,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
142
|
- !ruby/object:Gem::Version
|
143
143
|
version: '0'
|
144
144
|
requirements: []
|
145
|
-
rubygems_version: 3.2.
|
145
|
+
rubygems_version: 3.2.15
|
146
146
|
signing_key:
|
147
147
|
specification_version: 4
|
148
148
|
summary: Parser of scientific names
|