biodiversity 5.1.0 → 5.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -0
- data/.ruby-version +1 -1
- data/CHANGELOG +6 -0
- data/README.md +14 -13
- data/clib/linux/libgnparser.h +3 -3
- data/clib/linux/libgnparser.so +0 -0
- data/clib/mac/libgnparser.h +4 -7
- data/clib/mac/libgnparser.so +0 -0
- data/clib/win/libgnparser.h +4 -4
- data/clib/win/libgnparser.so +0 -0
- data/lib/biodiversity/parser.rb +22 -13
- data/lib/biodiversity/version.rb +6 -1
- data/spec/lib/parser_spec.rb +44 -10
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b281d6c55701570148a6e42577c6e981c23e2bdf6558dafc27aaa9c89dfdcc0d
|
4
|
+
data.tar.gz: 0e54a072a72fe7f2e5b76917766d044d945a44859b61cdc51247c3ac95f386b8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ce44a6dcefcaf1ab262c3d2fd8593d8fabadeab5a5f4c167e6b921f4b66ebf96212bacab809c1765b4eba87a4adc58539449a1aad95cf5e381c87f3edb0326b
|
7
|
+
data.tar.gz: b8c0077f1eaa45fb89055431ab3807869acb7a87c8d4f6955bdbdd9364647cb13dea0bb36f4949f8a571e31d89dd1ce2215cec1c053265376995e229e962e5a7
|
data/.rubocop.yml
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.0.
|
1
|
+
3.0.1
|
data/CHANGELOG
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
Biodiversity
|
2
|
-
============
|
1
|
+
# Biodiversity
|
3
2
|
|
4
3
|
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3569596.svg)](https://doi.org/10.5281/zenodo.3569596)
|
5
4
|
[![Gem Version][gem_svg]][gem_link]
|
@@ -8,12 +7,12 @@ Biodiversity
|
|
8
7
|
Parses taxonomic scientific name and breaks it into semantic elements.
|
9
8
|
|
10
9
|
**Important**: Biodiversity parser >= 4.0.0 uses binding to
|
11
|
-
https://
|
10
|
+
`https://github.com/gnames/gnparser` and
|
12
11
|
is not backward compatible with older versions. However it is much much faster
|
13
12
|
and better than previous versions.
|
14
13
|
|
15
14
|
This gem does not have a remote server or a command line executable anymore.
|
16
|
-
For such features use https://
|
15
|
+
For such features use `https://github.com/gnames/gnparser`.
|
17
16
|
|
18
17
|
- [Biodiversity](#biodiversity)
|
19
18
|
- [Installation](#installation)
|
@@ -24,7 +23,9 @@ For such features use https://gitlab.com/gogna/gnparser.
|
|
24
23
|
|
25
24
|
## Installation
|
26
25
|
|
27
|
-
|
26
|
+
```bash
|
27
|
+
sudo gem install biodiversity
|
28
|
+
```
|
28
29
|
|
29
30
|
The gem should work on Linux, Mac and Windows (64bit) machines
|
30
31
|
|
@@ -64,7 +65,6 @@ million names on a 4CPU hyperthreaded laptop:
|
|
64
65
|
|
65
66
|
You can use it as a library in Ruby:
|
66
67
|
|
67
|
-
|
68
68
|
```ruby
|
69
69
|
require 'biodiversity'
|
70
70
|
|
@@ -92,7 +92,6 @@ Biodiversity::Parser.parse("Plantago").to_json
|
|
92
92
|
# to clean name up
|
93
93
|
Biodiversity::Parser.parse(" Plantago major ")[:normalized]
|
94
94
|
|
95
|
-
|
96
95
|
# to get canonical form with or without infraspecies ranks, as well as
|
97
96
|
# stemmed version.
|
98
97
|
parsed = Biodiversity::Parser.parse("Seddera latifolia H. & S. var. latifolia")
|
@@ -103,6 +102,9 @@ parsed[:canonicalName][:stem]
|
|
103
102
|
# to get detailed information about elements of the name
|
104
103
|
Biodiversity::Parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. \
|
105
104
|
Braun & Crous 2003")[:details]
|
105
|
+
|
106
|
+
# to parse a botanical cultivar
|
107
|
+
Biodiversity::Parser.parse("Sarracenia flava 'Maxima'", with_cultivars: true)
|
106
108
|
```
|
107
109
|
|
108
110
|
'Surrogate' is a broad group which includes 'Barcode of Life' names, and various
|
@@ -111,6 +113,7 @@ undetermined names with cf. sp. spp. nr. in them:
|
|
111
113
|
```ruby
|
112
114
|
parser.parse("Coleoptera BOLD:1234567")[:surrogate]
|
113
115
|
```
|
116
|
+
|
114
117
|
### What is "nameStringID" in the parsed results?
|
115
118
|
|
116
119
|
ID field contains UUID v5 hexadecimal string. ID is generated out of bytes
|
@@ -118,18 +121,16 @@ from the name string itself, and identical id can be generated using [any
|
|
118
121
|
popular programming language][uuid_examples]. You can read more about UUID
|
119
122
|
version 5 in a [blog post][uuid_blog]
|
120
123
|
|
121
|
-
For example "Homo sapiens" should generate
|
124
|
+
For example "Homo sapiens" should generate
|
125
|
+
"16f235a0-e4a3-529c-9b83-bd15fe722110" UUID
|
122
126
|
|
123
|
-
Copyright
|
124
|
-
---------
|
127
|
+
## Copyright
|
125
128
|
|
126
129
|
Authors: [Dmitry Mozzherin][dimus]
|
127
130
|
|
128
131
|
Contributors: [Patrick Leary][pleary], [Hernán Lucas Pereira][hernan]
|
129
132
|
|
130
|
-
|
131
|
-
|
132
|
-
Copyright (c) 2008-2020 Dmitry Mozzherin. See [LICENSE][license]
|
133
|
+
Copyright (c) 2008-2021 Dmitry Mozzherin. See [LICENSE][license]
|
133
134
|
for further details.
|
134
135
|
|
135
136
|
[gem_svg]: https://badge.fury.io/rb/biodiversity.svg
|
data/clib/linux/libgnparser.h
CHANGED
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
19
19
|
/* Start of preamble from import "C" comments. */
|
20
20
|
|
21
21
|
|
22
|
-
#line
|
22
|
+
#line 5 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
25
|
|
@@ -80,7 +80,7 @@ extern "C" {
|
|
80
80
|
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
81
|
// parsed details are ommited, if it is 1 -- they are included.
|
82
82
|
// true.
|
83
|
-
extern char* ParseToString(char* name, char* f, int details);
|
83
|
+
extern char* ParseToString(char* name, char* f, int details, int cultivars);
|
84
84
|
|
85
85
|
// FreeMemory takes a string pointer and frees its memory.
|
86
86
|
extern void FreeMemory(char* p);
|
@@ -90,7 +90,7 @@ extern void FreeMemory(char* p);
|
|
90
90
|
// either CSV or JSON format. Format argument can take values of 'csv',
|
91
91
|
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
92
92
|
// true.
|
93
|
-
extern char* ParseAryToString(char** in, int length, char* f, int details);
|
93
|
+
extern char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
|
94
94
|
|
95
95
|
#ifdef __cplusplus
|
96
96
|
}
|
data/clib/linux/libgnparser.so
CHANGED
Binary file
|
data/clib/mac/libgnparser.h
CHANGED
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
19
19
|
/* Start of preamble from import "C" comments. */
|
20
20
|
|
21
21
|
|
22
|
-
#line
|
22
|
+
#line 5 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
25
|
|
@@ -80,20 +80,17 @@ extern "C" {
|
|
80
80
|
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
81
|
// parsed details are ommited, if it is 1 -- they are included.
|
82
82
|
// true.
|
83
|
-
|
84
|
-
extern char* ParseToString(char* p0, char* p1, int p2);
|
83
|
+
extern char* ParseToString(char* name, char* f, int details, int cultivars);
|
85
84
|
|
86
85
|
// FreeMemory takes a string pointer and frees its memory.
|
87
|
-
|
88
|
-
extern void FreeMemory(char* p0);
|
86
|
+
extern void FreeMemory(char* p);
|
89
87
|
|
90
88
|
// ParseAryToString function takes an array of names, parsing format, and a
|
91
89
|
// withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
|
92
90
|
// either CSV or JSON format. Format argument can take values of 'csv',
|
93
91
|
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
94
92
|
// true.
|
95
|
-
|
96
|
-
extern char* ParseAryToString(char** p0, int p1, char* p2, int p3);
|
93
|
+
extern char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
|
97
94
|
|
98
95
|
#ifdef __cplusplus
|
99
96
|
}
|
data/clib/mac/libgnparser.so
CHANGED
Binary file
|
data/clib/win/libgnparser.h
CHANGED
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
19
19
|
/* Start of preamble from import "C" comments. */
|
20
20
|
|
21
21
|
|
22
|
-
#line
|
22
|
+
#line 5 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
25
|
|
@@ -80,17 +80,17 @@ extern "C" {
|
|
80
80
|
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
81
|
// parsed details are ommited, if it is 1 -- they are included.
|
82
82
|
// true.
|
83
|
-
extern char* ParseToString(char* name, char* f, int details);
|
83
|
+
extern __declspec(dllexport) char* ParseToString(char* name, char* f, int details, int cultivars);
|
84
84
|
|
85
85
|
// FreeMemory takes a string pointer and frees its memory.
|
86
|
-
extern void FreeMemory(char* p);
|
86
|
+
extern __declspec(dllexport) void FreeMemory(char* p);
|
87
87
|
|
88
88
|
// ParseAryToString function takes an array of names, parsing format, and a
|
89
89
|
// withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
|
90
90
|
// either CSV or JSON format. Format argument can take values of 'csv',
|
91
91
|
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
92
92
|
// true.
|
93
|
-
extern char* ParseAryToString(char** in, int length, char* f, int details);
|
93
|
+
extern __declspec(dllexport) char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
|
94
94
|
|
95
95
|
#ifdef __cplusplus
|
96
96
|
}
|
data/clib/win/libgnparser.so
CHANGED
Binary file
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -28,47 +28,56 @@ module Biodiversity
|
|
28
28
|
callback(:parser_callback, %i[string], :void)
|
29
29
|
|
30
30
|
attach_function(:parse_go, :ParseToString,
|
31
|
-
%i[string string int], :strptr)
|
31
|
+
%i[string string int int], :strptr)
|
32
32
|
attach_function(:parse_ary_go, :ParseAryToString,
|
33
|
-
%i[pointer int string int], :strptr)
|
33
|
+
%i[pointer int string int int], :strptr)
|
34
34
|
attach_function(:free_mem, :FreeMemory, %i[pointer], :void)
|
35
35
|
|
36
|
-
def self.parse(name, simple: false,
|
36
|
+
def self.parse(name, simple: false, with_cultivars: false)
|
37
37
|
format = simple ? 'csv' : 'compact'
|
38
|
-
with_details =
|
38
|
+
with_details = simple ? 0 : 1
|
39
|
+
with_cultivars = with_cultivars ? 1 : 0
|
39
40
|
|
40
|
-
parsed, ptr = parse_go(name, format, with_details)
|
41
|
+
parsed, ptr = parse_go(name, format, with_details, with_cultivars)
|
41
42
|
free_mem(ptr)
|
42
43
|
output(parsed, simple)
|
43
44
|
end
|
44
45
|
|
45
|
-
def self.parse_ary(ary, simple: false,
|
46
|
+
def self.parse_ary(ary, simple: false, with_cultivars: false)
|
46
47
|
format = simple ? 'csv' : 'compact'
|
47
|
-
with_details =
|
48
|
+
with_details = simple ? 0 : 1
|
49
|
+
with_cultivars = with_cultivars ? 1 : 0
|
48
50
|
|
49
51
|
in_ptr = FFI::MemoryPointer.new(:pointer, ary.length)
|
50
52
|
in_ptr.write_array_of_pointer(
|
51
53
|
ary.map { |s| FFI::MemoryPointer.from_string(s) }
|
52
54
|
)
|
53
55
|
|
54
|
-
parsed, ptr = parse_ary_go(in_ptr, ary.length, format,
|
56
|
+
parsed, ptr = parse_ary_go(in_ptr, ary.length, format,
|
57
|
+
with_details, with_cultivars)
|
55
58
|
free_mem(ptr)
|
56
59
|
if simple
|
57
|
-
CSV.new(parsed).map do |row|
|
60
|
+
CSV.new(parsed.force_encoding('UTF-8')).map do |row|
|
58
61
|
csv_row(row)
|
59
62
|
end
|
60
63
|
else
|
61
|
-
JSON.parse(parsed, symbolize_names: true)
|
64
|
+
JSON.parse(parsed, symbolize_names: true).map do |item|
|
65
|
+
item[:parserVersion] = Biodiversity.gnparser_version
|
66
|
+
item
|
67
|
+
end
|
62
68
|
end
|
63
69
|
end
|
64
70
|
|
65
71
|
def self.output(parsed, simple)
|
66
72
|
if simple
|
73
|
+
parsed = parsed.force_encoding('UTF-8')
|
67
74
|
csv = CSV.new(parsed)
|
68
75
|
row = csv.readlines[0]
|
69
76
|
csv_row(row)
|
70
77
|
else
|
71
|
-
JSON.parse(parsed, symbolize_names: true)
|
78
|
+
parsed = JSON.parse(parsed, symbolize_names: true)
|
79
|
+
parsed[:parserVersion] = Biodiversity.gnparser_version
|
80
|
+
parsed
|
72
81
|
end
|
73
82
|
end
|
74
83
|
|
@@ -76,7 +85,7 @@ module Biodiversity
|
|
76
85
|
{
|
77
86
|
id: row[0],
|
78
87
|
verbatim: row[1],
|
79
|
-
cardinality: row[2],
|
88
|
+
cardinality: row[2].to_i,
|
80
89
|
canonical: {
|
81
90
|
stem: row[3],
|
82
91
|
simple: row[4],
|
@@ -84,7 +93,7 @@ module Biodiversity
|
|
84
93
|
},
|
85
94
|
authorship: row[6],
|
86
95
|
year: row[7],
|
87
|
-
quality: row[8]
|
96
|
+
quality: row[8].to_i
|
88
97
|
}
|
89
98
|
end
|
90
99
|
end
|
data/lib/biodiversity/version.rb
CHANGED
@@ -2,9 +2,14 @@
|
|
2
2
|
|
3
3
|
# Biodiversity module provides a namespace for scientific name parser.
|
4
4
|
module Biodiversity
|
5
|
-
VERSION = '5.1
|
5
|
+
VERSION = '5.3.1'
|
6
|
+
GNPARSER_VERSION = 'GNparser 1.3.0+'
|
6
7
|
|
7
8
|
def self.version
|
8
9
|
VERSION
|
9
10
|
end
|
11
|
+
|
12
|
+
def self.gnparser_version
|
13
|
+
GNPARSER_VERSION
|
14
|
+
end
|
10
15
|
end
|
data/spec/lib/parser_spec.rb
CHANGED
@@ -4,46 +4,80 @@
|
|
4
4
|
|
5
5
|
describe Biodiversity::Parser do
|
6
6
|
describe('parse') do
|
7
|
-
it 'parses name in simple
|
7
|
+
it 'parses name in simple form' do
|
8
8
|
parsed = subject.parse('Homo sapiens Linn.', simple: true)
|
9
9
|
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
10
10
|
expect(parsed[:normalized]).to be_nil
|
11
11
|
end
|
12
12
|
|
13
|
-
it 'parsed name in full
|
14
|
-
parsed = subject.parse('Homo sapiens Linn.')
|
13
|
+
it 'parsed name in full form' do
|
14
|
+
parsed = subject.parse('Homo sapiens Linn. 1758')
|
15
15
|
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
16
|
-
expect(parsed[:normalized]).to eq 'Homo sapiens Linn.'
|
16
|
+
expect(parsed[:normalized]).to eq 'Homo sapiens Linn. 1758'
|
17
|
+
expect(parsed[:authorship][:year]).to eq '1758'
|
18
|
+
expect(parsed[:words].size).to eq 4
|
17
19
|
end
|
18
20
|
|
19
21
|
it 'gets quality and year correctly in simple form' do
|
20
|
-
parsed = subject.parse('Homo sapiens
|
22
|
+
parsed = subject.parse('Homo sapiens Foo & Bar. 1758', simple: true)
|
21
23
|
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
22
24
|
expect(parsed[:year]).to eq '1758'
|
23
|
-
expect(parsed[:
|
25
|
+
expect(parsed[:authorship]).to eq 'Foo & Bar. 1758'
|
26
|
+
expect(parsed[:quality]).to eq 1
|
24
27
|
expect(parsed[:normalized]).to be_nil
|
25
28
|
end
|
29
|
+
|
30
|
+
it 'parses botanical cultivars in full form' do
|
31
|
+
parsed = subject.parse('Aus bus "White Russian"',
|
32
|
+
simple: false, with_cultivars: true)
|
33
|
+
expect(parsed[:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
|
34
|
+
expect(parsed[:quality]).to eq 1
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'parses botanical cultivars in simple form' do
|
38
|
+
parsed = subject.parse('Aus bus "White Russian"',
|
39
|
+
simple: true, with_cultivars: true)
|
40
|
+
expect(parsed[:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
|
41
|
+
expect(parsed[:quality]).to eq 1
|
42
|
+
parsed = subject.parse('Aus bus "White Russian"',
|
43
|
+
simple: true, with_cultivars: false)
|
44
|
+
expect(parsed[:canonical][:simple]).to eq 'Aus bus'
|
45
|
+
expect(parsed[:quality]).to eq 2
|
46
|
+
end
|
26
47
|
end
|
27
48
|
|
28
49
|
describe('parse_ary') do
|
29
50
|
it 'parses names in simple format' do
|
30
51
|
parsed = subject.parse_ary(
|
31
|
-
['Homo sapiens Linn.', 'Pardosa moesta'],
|
32
|
-
simple: true
|
52
|
+
['Homo sapiens Linn.', 'Pardosa moesta', 'Aus bus "White Russian"'],
|
53
|
+
simple: true, with_cultivars: true
|
33
54
|
)
|
34
55
|
expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
|
35
|
-
expect(parsed[1][:canonical][:simple]).to eq 'Pardosa moesta'
|
36
56
|
expect(parsed[0][:normalized]).to be_nil
|
57
|
+
|
58
|
+
expect(parsed[1][:canonical][:simple]).to eq 'Pardosa moesta'
|
59
|
+
expect(parsed[2][:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
|
60
|
+
expect(parsed[2][:quality]).to eq 1
|
37
61
|
end
|
38
62
|
|
39
63
|
it 'parsed name in full format' do
|
40
64
|
parsed = subject.parse_ary(
|
41
|
-
[
|
65
|
+
[
|
66
|
+
'Homo sapiens Linn.',
|
67
|
+
'Tobacco Mosaic Virus',
|
68
|
+
"Aus bus 'White Russian'"
|
69
|
+
],
|
70
|
+
with_cultivars: true
|
42
71
|
)
|
43
72
|
expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
|
44
73
|
expect(parsed[0][:normalized]).to eq 'Homo sapiens Linn.'
|
74
|
+
expect(parsed[0][:words].size).to eq 3
|
45
75
|
expect(parsed[1][:parsed]).to be false
|
46
76
|
expect(parsed[1][:virus]).to be true
|
77
|
+
expect(parsed[1][:words]).to be_nil
|
78
|
+
expect(parsed[2][:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
|
79
|
+
expect(parsed[2][:quality]).to eq 1
|
80
|
+
expect(parsed[2][:parserVersion]).to match(/GNparser/)
|
47
81
|
end
|
48
82
|
end
|
49
83
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.1
|
4
|
+
version: 5.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-07-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -142,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
142
|
- !ruby/object:Gem::Version
|
143
143
|
version: '0'
|
144
144
|
requirements: []
|
145
|
-
rubygems_version: 3.2.
|
145
|
+
rubygems_version: 3.2.15
|
146
146
|
signing_key:
|
147
147
|
specification_version: 4
|
148
148
|
summary: Parser of scientific names
|