biodiversity 4.0.1 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +10 -1
- data/.ruby-version +1 -1
- data/.travis.yml +2 -1
- data/CHANGELOG +8 -0
- data/LICENSE +1 -1
- data/README.md +41 -2
- data/biodiversity.gemspec +6 -5
- data/clib/linux/libgnparser.h +16 -12
- data/clib/linux/libgnparser.so +0 -0
- data/clib/mac/libgnparser.h +18 -11
- data/clib/mac/libgnparser.so +0 -0
- data/clib/win/libgnparser.h +97 -0
- data/clib/win/libgnparser.so +0 -0
- data/lib/biodiversity.rb +1 -0
- data/lib/biodiversity/parser.rb +43 -33
- data/lib/biodiversity/version.rb +1 -1
- data/spec/lib/parser_spec.rb +12 -9
- metadata +19 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74596acd42fb5eda1472dabf4ec8976da1e9f90bfd23c07035d4a16608f50ef6
|
4
|
+
data.tar.gz: 8ce1c6d68c47fddb41b27aa807a54a851ea2204a9b0cfdb7cb05f9be19019907
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af55ab0704fa84e02c490620dd55ae024f130136e096ab0333cd1d77506bef5e9ae2a3039d3be34c9a6ac739f03dee58510efc2f8440759663caab1924b67318
|
7
|
+
data.tar.gz: 04e28487d5fe1031a34c4065a3e24677d2568bf60a3f46e177c2367e497caf1912688452223895d2a535a56362e7290cac0242ca3e2c102256decd1e3cf3d7ea
|
data/.rubocop.yml
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
AllCops:
|
2
|
+
TargetRubyVersion: 2.6
|
3
|
+
NewCops: disable
|
4
|
+
SuggestExtensions: false
|
2
5
|
Exclude:
|
3
6
|
- .bundle/**/*
|
4
7
|
- bundle_bin/**/*
|
@@ -10,4 +13,10 @@ Metrics/MethodLength:
|
|
10
13
|
- lib/**/*
|
11
14
|
Metrics/BlockLength:
|
12
15
|
Exclude:
|
13
|
-
- spec/**/*
|
16
|
+
- spec/**/*
|
17
|
+
Style/HashEachMethods:
|
18
|
+
Enabled: true
|
19
|
+
Style/HashTransformKeys:
|
20
|
+
Enabled: true
|
21
|
+
Style/HashTransformValues:
|
22
|
+
Enabled: true
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.0
|
data/.travis.yml
CHANGED
data/CHANGELOG
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
5.0.0 -- changed GNparser to v 1.0.0
|
2
|
+
|
3
|
+
4.0.3 -- fix memory leak in parse_ary method
|
4
|
+
|
5
|
+
4.0.2 -- add MS Windows libraries
|
6
|
+
|
7
|
+
4.0.1 -- fix for simple output
|
8
|
+
|
1
9
|
4.0.0 -- migrate code to gnparser C-shared library. This change breaks
|
2
10
|
backward compatibility, and makes parser dramatically faster.
|
3
11
|
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Biodiversity
|
2
2
|
============
|
3
3
|
|
4
|
-
[](https://doi.org/10.5281/zenodo.3569596)
|
5
5
|
[![Gem Version][gem_svg]][gem_link]
|
6
6
|
[![Continuous Integration Status][ci_svg]][ci_link]
|
7
7
|
|
@@ -17,6 +17,7 @@ For such features use https://gitlab.com/gogna/gnparser.
|
|
17
17
|
|
18
18
|
- [Biodiversity](#biodiversity)
|
19
19
|
- [Installation](#installation)
|
20
|
+
- [Benchmarks](#benchmarks)
|
20
21
|
- [Example usage](#example-usage)
|
21
22
|
- [What is "nameStringID" in the parsed results?](#what-is-%22namestringid%22-in-the-parsed-results)
|
22
23
|
- [Copyright](#copyright)
|
@@ -27,6 +28,38 @@ For such features use https://gitlab.com/gogna/gnparser.
|
|
27
28
|
|
28
29
|
The gem should work on Linux, Mac and Windows (64bit) machines
|
29
30
|
|
31
|
+
## Benchmarks
|
32
|
+
|
33
|
+
The fastest way to go through a massive amount of names is to use
|
34
|
+
`Biodiversity::Parser.parse_ary([big array], simple = true)` function.
|
35
|
+
|
36
|
+
For example parsing a large file with one name per line:
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
#!/usr/bin/env ruby
|
40
|
+
|
41
|
+
require 'biodiversity'
|
42
|
+
|
43
|
+
P = Biodiversity::Parser
|
44
|
+
count = 0
|
45
|
+
File.open('all_names.txt').each_slice(50_000) do |sl|
|
46
|
+
count += 1
|
47
|
+
res = P.parse_ary(sl, true)
|
48
|
+
puts count * 50_000
|
49
|
+
puts res[0]
|
50
|
+
end
|
51
|
+
```
|
52
|
+
|
53
|
+
Here are comparative results of running parsers against a file with 24
|
54
|
+
million names on a 4CPU hyperthreaded laptop:
|
55
|
+
|
56
|
+
| Program | Version | Full/Simple | Names/min |
|
57
|
+
| ------------ | ------- | ----------- | --------: |
|
58
|
+
| gnparser | 0.12.0 | Simple | 3,000,000 |
|
59
|
+
| biodiversity | 4.0.1 | Simple | 2,000,000 |
|
60
|
+
| biodiversity | 4.0.1 | Full JSON | 800,000 |
|
61
|
+
| biodiversity | 3.5.1 | n/a | 40,000 |
|
62
|
+
|
30
63
|
## Example usage
|
31
64
|
|
32
65
|
You can use it as a library in Ruby:
|
@@ -92,7 +125,11 @@ Copyright
|
|
92
125
|
|
93
126
|
Authors: [Dmitry Mozzherin][dimus]
|
94
127
|
|
95
|
-
|
128
|
+
Contributors: [Patrick Leary][pleary], [Hernán Lucas Pereira][hernan]
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
Copyright (c) 2008-2020 Dmitry Mozzherin. See [LICENSE][license]
|
96
133
|
for further details.
|
97
134
|
|
98
135
|
[gem_svg]: https://badge.fury.io/rb/biodiversity.svg
|
@@ -100,6 +137,8 @@ for further details.
|
|
100
137
|
[ci_svg]: https://secure.travis-ci.org/GlobalNamesArchitecture/biodiversity.svg
|
101
138
|
[ci_link]: http://travis-ci.org/GlobalNamesArchitecture/biodiversity
|
102
139
|
[dimus]: https://github.com/dimus
|
140
|
+
[pleary]: https://github.com/pleary
|
141
|
+
[hernan]: https://github.com/LocoDelAssembly
|
103
142
|
[license]: https://github.com/GlobalNamesArchitecture/biodiversity/blob/master/LICENSE
|
104
143
|
[uuid_examples]: https://github.com/GlobalNamesArchitecture/gn_uuid_examples
|
105
144
|
[uuid_blog]: http://globalnamesarchitecture.github.io/gna/uuid/2015/05/31/gn-uuid-0-5-0.html
|
data/biodiversity.gemspec
CHANGED
@@ -5,6 +5,7 @@ $LOAD_PATH.push File.expand_path('lib', __dir__)
|
|
5
5
|
require 'biodiversity/version'
|
6
6
|
|
7
7
|
Gem::Specification.new do |gem|
|
8
|
+
gem.required_ruby_version = '>= 2.6'
|
8
9
|
gem.name = 'biodiversity'
|
9
10
|
gem.version = Biodiversity::VERSION
|
10
11
|
gem.homepage = 'https://github.com/GlobalNamesArchitecture/biodiversity'
|
@@ -17,11 +18,11 @@ Gem::Specification.new do |gem|
|
|
17
18
|
gem.files = `git ls-files`.split("\n")
|
18
19
|
gem.require_paths = ['lib']
|
19
20
|
|
20
|
-
gem.add_runtime_dependency 'ffi', '~> 1.
|
21
|
+
gem.add_runtime_dependency 'ffi', '~> 1.14'
|
21
22
|
|
22
|
-
gem.add_development_dependency 'bundler', '~> 2.
|
23
|
-
gem.add_development_dependency 'byebug', '~> 11.
|
23
|
+
gem.add_development_dependency 'bundler', '~> 2.2'
|
24
|
+
gem.add_development_dependency 'byebug', '~> 11.1'
|
24
25
|
gem.add_development_dependency 'rake', '~> 13.0'
|
25
|
-
gem.add_development_dependency 'rspec', '~> 3.
|
26
|
-
gem.add_development_dependency 'rubocop', '~>
|
26
|
+
gem.add_development_dependency 'rspec', '~> 3.10'
|
27
|
+
gem.add_development_dependency 'rubocop', '~> 1.8'
|
27
28
|
end
|
data/clib/linux/libgnparser.h
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
/* Code generated by cmd/cgo; DO NOT EDIT. */
|
2
2
|
|
3
|
-
/* package
|
3
|
+
/* package github.com/gnames/gnparser/binding */
|
4
4
|
|
5
5
|
|
6
6
|
#line 1 "cgo-builtin-export-prolog"
|
@@ -74,19 +74,23 @@ extern "C" {
|
|
74
74
|
#endif
|
75
75
|
|
76
76
|
|
77
|
-
// ParseToString function takes a name-string, desired format,
|
78
|
-
// the name-string to either JSON, or
|
79
|
-
// the desired format. Format can take values of
|
77
|
+
// ParseToString function takes a name-string, desired format, a withDetails
|
78
|
+
// flag as 0|1 integer. It parses the name-string to either JSON, or a CSV
|
79
|
+
// string, depending on the desired format. Format argument can take values of
|
80
|
+
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
|
+
// parsed details are ommited, if it is 1 -- they are included.
|
82
|
+
// true.
|
83
|
+
extern char* ParseToString(char* name, char* f, int details);
|
80
84
|
|
81
|
-
|
85
|
+
// FreeMemory takes a string pointer and frees its memory.
|
86
|
+
extern void FreeMemory(char* p);
|
82
87
|
|
83
|
-
//
|
84
|
-
//
|
85
|
-
//
|
86
|
-
//
|
87
|
-
//
|
88
|
-
|
89
|
-
extern void ParseAryToStrings(char** p0, int p1, char* p2, char*** p3);
|
88
|
+
// ParseAryToString function takes an array of names, parsing format, and a
|
89
|
+
// withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
|
90
|
+
// either CSV or JSON format. Format argument can take values of 'csv',
|
91
|
+
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
92
|
+
// true.
|
93
|
+
extern char* ParseAryToString(char** in, int length, char* f, int details);
|
90
94
|
|
91
95
|
#ifdef __cplusplus
|
92
96
|
}
|
data/clib/linux/libgnparser.so
CHANGED
Binary file
|
data/clib/mac/libgnparser.h
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
/* Code generated by cmd/cgo; DO NOT EDIT. */
|
2
2
|
|
3
|
-
/* package
|
3
|
+
/* package github.com/gnames/gnparser/binding */
|
4
4
|
|
5
5
|
|
6
6
|
#line 1 "cgo-builtin-export-prolog"
|
@@ -74,19 +74,26 @@ extern "C" {
|
|
74
74
|
#endif
|
75
75
|
|
76
76
|
|
77
|
-
// ParseToString function takes a name-string, desired format,
|
78
|
-
// the name-string to either JSON, or
|
79
|
-
// the desired format. Format can take values of
|
77
|
+
// ParseToString function takes a name-string, desired format, a withDetails
|
78
|
+
// flag as 0|1 integer. It parses the name-string to either JSON, or a CSV
|
79
|
+
// string, depending on the desired format. Format argument can take values of
|
80
|
+
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
|
+
// parsed details are ommited, if it is 1 -- they are included.
|
82
|
+
// true.
|
80
83
|
|
81
|
-
extern char* ParseToString(char* p0, char* p1);
|
84
|
+
extern char* ParseToString(char* p0, char* p1, int p2);
|
82
85
|
|
83
|
-
//
|
84
|
-
// reference to an output: an empty array of strings to return the the data
|
85
|
-
// back. It populates the output array with raw strings of either JSON or
|
86
|
-
// pipe-separated parsed values (depending on a given format). Format can take
|
87
|
-
// values of 'simple', 'compact', or 'pretty'.
|
86
|
+
// FreeMemory takes a string pointer and frees its memory.
|
88
87
|
|
89
|
-
extern void
|
88
|
+
extern void FreeMemory(char* p0);
|
89
|
+
|
90
|
+
// ParseAryToString function takes an array of names, parsing format, and a
|
91
|
+
// withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
|
92
|
+
// either CSV or JSON format. Format argument can take values of 'csv',
|
93
|
+
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
94
|
+
// true.
|
95
|
+
|
96
|
+
extern char* ParseAryToString(char** p0, int p1, char* p2, int p3);
|
90
97
|
|
91
98
|
#ifdef __cplusplus
|
92
99
|
}
|
data/clib/mac/libgnparser.so
CHANGED
Binary file
|
@@ -0,0 +1,97 @@
|
|
1
|
+
/* Code generated by cmd/cgo; DO NOT EDIT. */
|
2
|
+
|
3
|
+
/* package github.com/gnames/gnparser/binding */
|
4
|
+
|
5
|
+
|
6
|
+
#line 1 "cgo-builtin-export-prolog"
|
7
|
+
|
8
|
+
#include <stddef.h> /* for ptrdiff_t below */
|
9
|
+
|
10
|
+
#ifndef GO_CGO_EXPORT_PROLOGUE_H
|
11
|
+
#define GO_CGO_EXPORT_PROLOGUE_H
|
12
|
+
|
13
|
+
#ifndef GO_CGO_GOSTRING_TYPEDEF
|
14
|
+
typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
15
|
+
#endif
|
16
|
+
|
17
|
+
#endif
|
18
|
+
|
19
|
+
/* Start of preamble from import "C" comments. */
|
20
|
+
|
21
|
+
|
22
|
+
#line 3 "main.go"
|
23
|
+
|
24
|
+
#include "stdlib.h"
|
25
|
+
|
26
|
+
#line 1 "cgo-generated-wrapper"
|
27
|
+
|
28
|
+
|
29
|
+
/* End of preamble from import "C" comments. */
|
30
|
+
|
31
|
+
|
32
|
+
/* Start of boilerplate cgo prologue. */
|
33
|
+
#line 1 "cgo-gcc-export-header-prolog"
|
34
|
+
|
35
|
+
#ifndef GO_CGO_PROLOGUE_H
|
36
|
+
#define GO_CGO_PROLOGUE_H
|
37
|
+
|
38
|
+
typedef signed char GoInt8;
|
39
|
+
typedef unsigned char GoUint8;
|
40
|
+
typedef short GoInt16;
|
41
|
+
typedef unsigned short GoUint16;
|
42
|
+
typedef int GoInt32;
|
43
|
+
typedef unsigned int GoUint32;
|
44
|
+
typedef long long GoInt64;
|
45
|
+
typedef unsigned long long GoUint64;
|
46
|
+
typedef GoInt64 GoInt;
|
47
|
+
typedef GoUint64 GoUint;
|
48
|
+
typedef __SIZE_TYPE__ GoUintptr;
|
49
|
+
typedef float GoFloat32;
|
50
|
+
typedef double GoFloat64;
|
51
|
+
typedef float _Complex GoComplex64;
|
52
|
+
typedef double _Complex GoComplex128;
|
53
|
+
|
54
|
+
/*
|
55
|
+
static assertion to make sure the file is being used on architecture
|
56
|
+
at least with matching size of GoInt.
|
57
|
+
*/
|
58
|
+
typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1];
|
59
|
+
|
60
|
+
#ifndef GO_CGO_GOSTRING_TYPEDEF
|
61
|
+
typedef _GoString_ GoString;
|
62
|
+
#endif
|
63
|
+
typedef void *GoMap;
|
64
|
+
typedef void *GoChan;
|
65
|
+
typedef struct { void *t; void *v; } GoInterface;
|
66
|
+
typedef struct { void *data; GoInt len; GoInt cap; } GoSlice;
|
67
|
+
|
68
|
+
#endif
|
69
|
+
|
70
|
+
/* End of boilerplate cgo prologue. */
|
71
|
+
|
72
|
+
#ifdef __cplusplus
|
73
|
+
extern "C" {
|
74
|
+
#endif
|
75
|
+
|
76
|
+
|
77
|
+
// ParseToString function takes a name-string, desired format, a withDetails
|
78
|
+
// flag as 0|1 integer. It parses the name-string to either JSON, or a CSV
|
79
|
+
// string, depending on the desired format. Format argument can take values of
|
80
|
+
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
|
+
// parsed details are ommited, if it is 1 -- they are included.
|
82
|
+
// true.
|
83
|
+
extern char* ParseToString(char* name, char* f, int details);
|
84
|
+
|
85
|
+
// FreeMemory takes a string pointer and frees its memory.
|
86
|
+
extern void FreeMemory(char* p);
|
87
|
+
|
88
|
+
// ParseAryToString function takes an array of names, parsing format, and a
|
89
|
+
// withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
|
90
|
+
// either CSV or JSON format. Format argument can take values of 'csv',
|
91
|
+
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
92
|
+
// true.
|
93
|
+
extern char* ParseAryToString(char** in, int length, char* f, int details);
|
94
|
+
|
95
|
+
#ifdef __cplusplus
|
96
|
+
}
|
97
|
+
#endif
|
Binary file
|
data/lib/biodiversity.rb
CHANGED
data/lib/biodiversity/parser.rb
CHANGED
@@ -17,7 +17,7 @@ module Biodiversity
|
|
17
17
|
'linux'
|
18
18
|
when 'darwin'
|
19
19
|
'mac'
|
20
|
-
when '
|
20
|
+
when 'mingw32'
|
21
21
|
'win'
|
22
22
|
else
|
23
23
|
raise "Unsupported platform: #{Gem.platforms[1].os}"
|
@@ -25,55 +25,65 @@ module Biodiversity
|
|
25
25
|
ffi_lib File.join(__dir__, '..', '..', 'clib', platform, 'libgnparser.so')
|
26
26
|
POINTER_SIZE = FFI.type_size(:pointer)
|
27
27
|
|
28
|
-
|
29
|
-
attach_function(:parse_ary_go, :ParseAryToStrings,
|
30
|
-
%i[pointer int string pointer], :void)
|
28
|
+
callback(:parser_callback, %i[string], :void)
|
31
29
|
|
32
|
-
|
33
|
-
|
34
|
-
|
30
|
+
attach_function(:parse_go, :ParseToString,
|
31
|
+
%i[string string int], :strptr)
|
32
|
+
attach_function(:parse_ary_go, :ParseAryToString,
|
33
|
+
%i[pointer int string int], :strptr)
|
34
|
+
attach_function(:free_mem, :FreeMemory, %i[pointer], :void)
|
35
|
+
|
36
|
+
def self.parse(name, simple: false)
|
37
|
+
format = simple ? 'csv' : 'compact'
|
38
|
+
|
39
|
+
parsed, ptr = parse_go(name, format, 0)
|
40
|
+
free_mem(ptr)
|
35
41
|
output(parsed, simple)
|
36
42
|
end
|
37
43
|
|
38
|
-
def self.parse_ary(ary, simple
|
39
|
-
format = simple ? '
|
44
|
+
def self.parse_ary(ary, simple: false)
|
45
|
+
format = simple ? 'csv' : 'compact'
|
40
46
|
in_ptr = FFI::MemoryPointer.new(:pointer, ary.length)
|
47
|
+
|
41
48
|
in_ptr.write_array_of_pointer(
|
42
49
|
ary.map { |s| FFI::MemoryPointer.from_string(s) }
|
43
50
|
)
|
44
|
-
out_var = FFI::MemoryPointer.new(:pointer)
|
45
|
-
parse_ary_go(in_ptr, ary.length, format, out_var)
|
46
51
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
52
|
+
parsed, ptr = parse_ary_go(in_ptr, ary.length, format, 0)
|
53
|
+
free_mem(ptr)
|
54
|
+
if simple
|
55
|
+
CSV.new(parsed).map do |row|
|
56
|
+
csv_row(row)
|
57
|
+
end
|
58
|
+
else
|
59
|
+
JSON.parse(parsed, symbolize_names: true)
|
55
60
|
end
|
56
|
-
CLib.free(out_var.read_pointer)
|
57
61
|
end
|
58
62
|
|
59
63
|
def self.output(parsed, simple)
|
60
64
|
if simple
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
verbatim: parsed[1],
|
65
|
-
canonicalName: {
|
66
|
-
full: parsed[2],
|
67
|
-
simple: parsed[3],
|
68
|
-
stem: parsed[4]
|
69
|
-
},
|
70
|
-
authorship: parsed[5],
|
71
|
-
year: parsed[6],
|
72
|
-
quality: parsed[7]
|
73
|
-
}
|
65
|
+
csv = CSV.new(parsed)
|
66
|
+
row = csv.readlines[0]
|
67
|
+
csv_row(row)
|
74
68
|
else
|
75
69
|
JSON.parse(parsed, symbolize_names: true)
|
76
70
|
end
|
77
71
|
end
|
72
|
+
|
73
|
+
def self.csv_row(row)
|
74
|
+
{
|
75
|
+
id: row[0],
|
76
|
+
verbatim: row[1],
|
77
|
+
cardinality: row[2],
|
78
|
+
canonical: {
|
79
|
+
stem: row[3],
|
80
|
+
simple: row[4],
|
81
|
+
full: row[5]
|
82
|
+
},
|
83
|
+
authorship: row[6],
|
84
|
+
year: row[7],
|
85
|
+
quality: row[8]
|
86
|
+
}
|
87
|
+
end
|
78
88
|
end
|
79
89
|
end
|
data/lib/biodiversity/version.rb
CHANGED
data/spec/lib/parser_spec.rb
CHANGED
@@ -5,20 +5,20 @@
|
|
5
5
|
describe Biodiversity::Parser do
|
6
6
|
describe('parse') do
|
7
7
|
it 'parses name in simple format' do
|
8
|
-
parsed = subject.parse('Homo sapiens Linn.', true)
|
9
|
-
expect(parsed[:
|
8
|
+
parsed = subject.parse('Homo sapiens Linn.', simple: true)
|
9
|
+
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
10
10
|
expect(parsed[:normalized]).to be_nil
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'parsed name in full format' do
|
14
14
|
parsed = subject.parse('Homo sapiens Linn.')
|
15
|
-
expect(parsed[:
|
15
|
+
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
16
16
|
expect(parsed[:normalized]).to eq 'Homo sapiens Linn.'
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'gets quality and year correctly in simple form' do
|
20
|
-
parsed = subject.parse('Homo sapiens Linn. 1758', true)
|
21
|
-
expect(parsed[:
|
20
|
+
parsed = subject.parse('Homo sapiens Linn. 1758', simple: true)
|
21
|
+
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
22
22
|
expect(parsed[:year]).to eq '1758'
|
23
23
|
expect(parsed[:quality]).to eq '1'
|
24
24
|
expect(parsed[:normalized]).to be_nil
|
@@ -27,9 +27,12 @@ describe Biodiversity::Parser do
|
|
27
27
|
|
28
28
|
describe('parse_ary') do
|
29
29
|
it 'parses names in simple format' do
|
30
|
-
parsed = subject.parse_ary(
|
31
|
-
|
32
|
-
|
30
|
+
parsed = subject.parse_ary(
|
31
|
+
['Homo sapiens Linn.', 'Pardosa moesta'],
|
32
|
+
simple: true
|
33
|
+
)
|
34
|
+
expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
|
35
|
+
expect(parsed[1][:canonical][:simple]).to eq 'Pardosa moesta'
|
33
36
|
expect(parsed[0][:normalized]).to be_nil
|
34
37
|
end
|
35
38
|
|
@@ -37,7 +40,7 @@ describe Biodiversity::Parser do
|
|
37
40
|
parsed = subject.parse_ary(
|
38
41
|
['Homo sapiens Linn.', 'Tobacco Mosaic Virus']
|
39
42
|
)
|
40
|
-
expect(parsed[0][:
|
43
|
+
expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
|
41
44
|
expect(parsed[0][:normalized]).to eq 'Homo sapiens Linn.'
|
42
45
|
expect(parsed[1][:parsed]).to be false
|
43
46
|
expect(parsed[1][:virus]).to be true
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 5.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -16,42 +16,42 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.14'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.14'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '2.
|
33
|
+
version: '2.2'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '2.
|
40
|
+
version: '2.2'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: byebug
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '11.
|
47
|
+
version: '11.1'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '11.
|
54
|
+
version: '11.1'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,28 +72,28 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '3.
|
75
|
+
version: '3.10'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '3.
|
82
|
+
version: '3.10'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rubocop
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
89
|
+
version: '1.8'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
96
|
+
version: '1.8'
|
97
97
|
description: Parsing tool for biodiversity informatics
|
98
98
|
email: dmozzherin@gmail.com
|
99
99
|
executables: []
|
@@ -115,6 +115,8 @@ files:
|
|
115
115
|
- clib/linux/libgnparser.so
|
116
116
|
- clib/mac/libgnparser.h
|
117
117
|
- clib/mac/libgnparser.so
|
118
|
+
- clib/win/libgnparser.h
|
119
|
+
- clib/win/libgnparser.so
|
118
120
|
- lib/biodiversity.rb
|
119
121
|
- lib/biodiversity/parser.rb
|
120
122
|
- lib/biodiversity/version.rb
|
@@ -125,7 +127,7 @@ homepage: https://github.com/GlobalNamesArchitecture/biodiversity
|
|
125
127
|
licenses:
|
126
128
|
- MIT
|
127
129
|
metadata: {}
|
128
|
-
post_install_message:
|
130
|
+
post_install_message:
|
129
131
|
rdoc_options: []
|
130
132
|
require_paths:
|
131
133
|
- lib
|
@@ -133,16 +135,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
133
135
|
requirements:
|
134
136
|
- - ">="
|
135
137
|
- !ruby/object:Gem::Version
|
136
|
-
version: '
|
138
|
+
version: '2.6'
|
137
139
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
140
|
requirements:
|
139
141
|
- - ">="
|
140
142
|
- !ruby/object:Gem::Version
|
141
143
|
version: '0'
|
142
144
|
requirements: []
|
143
|
-
|
144
|
-
|
145
|
-
signing_key:
|
145
|
+
rubygems_version: 3.2.3
|
146
|
+
signing_key:
|
146
147
|
specification_version: 4
|
147
148
|
summary: Parser of scientific names
|
148
149
|
test_files: []
|