biodiversity 4.0.0 → 5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +10 -1
- data/.ruby-version +1 -1
- data/.travis.yml +3 -1
- data/CHANGELOG +8 -0
- data/LICENSE +1 -1
- data/README.md +48 -4
- data/biodiversity.gemspec +6 -5
- data/clib/linux/libgnparser.h +16 -12
- data/clib/linux/libgnparser.so +0 -0
- data/clib/mac/libgnparser.h +18 -11
- data/clib/mac/libgnparser.so +0 -0
- data/clib/win/libgnparser.h +97 -0
- data/clib/win/libgnparser.so +0 -0
- data/lib/biodiversity.rb +1 -0
- data/lib/biodiversity/parser.rb +43 -32
- data/lib/biodiversity/version.rb +1 -1
- data/spec/lib/parser_spec.rb +18 -7
- metadata +19 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46c26c9c1ada65f95edf5e00365203f4e68d596e0b14f44b6995b46e127afab9
|
4
|
+
data.tar.gz: ce2cced66dbebc464ef47eee7f401a8a6b946d69d6664b7fcf1c8d5c413f7251
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea6b814f8fe7bf451a360c87674c53d63bf49a2351e23aa724b7974cf269986ab7206fd1987dc945224fe9bc9cf36605994f34d6d6fd61ad72c03783cb41629d
|
7
|
+
data.tar.gz: b456ef00e072b37d12f0312b347af29aea126a40f4f86ab0b735b42b185977e692ef6fc94c18072b42e75ee6b69d254ba5bb90fe0a4d7a7f0b057366af403a8e
|
data/.rubocop.yml
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
AllCops:
|
2
|
+
TargetRubyVersion: 2.6
|
3
|
+
NewCops: disable
|
4
|
+
SuggestExtensions: false
|
2
5
|
Exclude:
|
3
6
|
- .bundle/**/*
|
4
7
|
- bundle_bin/**/*
|
@@ -10,4 +13,10 @@ Metrics/MethodLength:
|
|
10
13
|
- lib/**/*
|
11
14
|
Metrics/BlockLength:
|
12
15
|
Exclude:
|
13
|
-
- spec/**/*
|
16
|
+
- spec/**/*
|
17
|
+
Style/HashEachMethods:
|
18
|
+
Enabled: true
|
19
|
+
Style/HashTransformKeys:
|
20
|
+
Enabled: true
|
21
|
+
Style/HashTransformValues:
|
22
|
+
Enabled: true
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.0
|
data/.travis.yml
CHANGED
data/CHANGELOG
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
5.0.0 -- changed GNparser to v 1.0.0
|
2
|
+
|
3
|
+
4.0.3 -- fix memory leak in parse_ary method
|
4
|
+
|
5
|
+
4.0.2 -- add MS Windows libraries
|
6
|
+
|
7
|
+
4.0.1 -- fix for simple output
|
8
|
+
|
1
9
|
4.0.0 -- migrate code to gnparser C-shared library. This change breaks
|
2
10
|
backward compatibility, and makes parser dramatically faster.
|
3
11
|
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Biodiversity
|
2
2
|
============
|
3
3
|
|
4
|
-
[![DOI](https://zenodo.org/badge/
|
4
|
+
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3569596.svg)](https://doi.org/10.5281/zenodo.3569596)
|
5
5
|
[![Gem Version][gem_svg]][gem_link]
|
6
6
|
[![Continuous Integration Status][ci_svg]][ci_link]
|
7
7
|
|
@@ -15,6 +15,12 @@ and better than previous versions.
|
|
15
15
|
This gem does not have a remote server or a command line executable anymore.
|
16
16
|
For such features use https://gitlab.com/gogna/gnparser.
|
17
17
|
|
18
|
+
- [Biodiversity](#biodiversity)
|
19
|
+
- [Installation](#installation)
|
20
|
+
- [Benchmarks](#benchmarks)
|
21
|
+
- [Example usage](#example-usage)
|
22
|
+
- [What is "nameStringID" in the parsed results?](#what-is-%22namestringid%22-in-the-parsed-results)
|
23
|
+
- [Copyright](#copyright)
|
18
24
|
|
19
25
|
## Installation
|
20
26
|
|
@@ -22,6 +28,38 @@ For such features use https://gitlab.com/gogna/gnparser.
|
|
22
28
|
|
23
29
|
The gem should work on Linux, Mac and Windows (64bit) machines
|
24
30
|
|
31
|
+
## Benchmarks
|
32
|
+
|
33
|
+
The fastest way to go through a massive amount of names is to use
|
34
|
+
`Biodiversity::Parser.parse_ary([big array], simple = true)` function.
|
35
|
+
|
36
|
+
For example parsing a large file with one name per line:
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
#!/usr/bin/env ruby
|
40
|
+
|
41
|
+
require 'biodiversity'
|
42
|
+
|
43
|
+
P = Biodiversity::Parser
|
44
|
+
count = 0
|
45
|
+
File.open('all_names.txt').each_slice(50_000) do |sl|
|
46
|
+
count += 1
|
47
|
+
res = P.parse_ary(sl, true)
|
48
|
+
puts count * 50_000
|
49
|
+
puts res[0]
|
50
|
+
end
|
51
|
+
```
|
52
|
+
|
53
|
+
Here are comparative results of running parsers against a file with 24
|
54
|
+
million names on a 4CPU hyperthreaded laptop:
|
55
|
+
|
56
|
+
| Program | Version | Full/Simple | Names/min |
|
57
|
+
| ------------ | ------- | ----------- | --------: |
|
58
|
+
| gnparser | 0.12.0 | Simple | 3,000,000 |
|
59
|
+
| biodiversity | 4.0.1 | Simple | 2,000,000 |
|
60
|
+
| biodiversity | 4.0.1 | Full JSON | 800,000 |
|
61
|
+
| biodiversity | 3.5.1 | n/a | 40,000 |
|
62
|
+
|
25
63
|
## Example usage
|
26
64
|
|
27
65
|
You can use it as a library in Ruby:
|
@@ -40,13 +78,13 @@ Biodiversity.version
|
|
40
78
|
Biodiversity::Parser.parse("Plantago major", simple = true)
|
41
79
|
|
42
80
|
# to parse many scientific names using all computer CPUs
|
43
|
-
Biodiversity::Parser.
|
81
|
+
Biodiversity::Parser.parse_ary(["Plantago major", ... ], simple = true)
|
44
82
|
|
45
83
|
# to parse a scientific name into a very detailed Ruby hash
|
46
84
|
Biodiversity::Parser.parse("Plantago major")
|
47
85
|
|
48
86
|
# to parse many scientific names with all details using all computer CPUs
|
49
|
-
Biodiversity::Parser.
|
87
|
+
Biodiversity::Parser.parse_ary(["Plantago major", ... ])
|
50
88
|
|
51
89
|
#to get json representation
|
52
90
|
Biodiversity::Parser.parse("Plantago").to_json
|
@@ -87,7 +125,11 @@ Copyright
|
|
87
125
|
|
88
126
|
Authors: [Dmitry Mozzherin][dimus]
|
89
127
|
|
90
|
-
|
128
|
+
Contributors: [Patrick Leary][pleary], [Hernán Lucas Pereira][hernan]
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
Copyright (c) 2008-2020 Dmitry Mozzherin. See [LICENSE][license]
|
91
133
|
for further details.
|
92
134
|
|
93
135
|
[gem_svg]: https://badge.fury.io/rb/biodiversity.svg
|
@@ -95,6 +137,8 @@ for further details.
|
|
95
137
|
[ci_svg]: https://secure.travis-ci.org/GlobalNamesArchitecture/biodiversity.svg
|
96
138
|
[ci_link]: http://travis-ci.org/GlobalNamesArchitecture/biodiversity
|
97
139
|
[dimus]: https://github.com/dimus
|
140
|
+
[pleary]: https://github.com/pleary
|
141
|
+
[hernan]: https://github.com/LocoDelAssembly
|
98
142
|
[license]: https://github.com/GlobalNamesArchitecture/biodiversity/blob/master/LICENSE
|
99
143
|
[uuid_examples]: https://github.com/GlobalNamesArchitecture/gn_uuid_examples
|
100
144
|
[uuid_blog]: http://globalnamesarchitecture.github.io/gna/uuid/2015/05/31/gn-uuid-0-5-0.html
|
data/biodiversity.gemspec
CHANGED
@@ -5,6 +5,7 @@ $LOAD_PATH.push File.expand_path('lib', __dir__)
|
|
5
5
|
require 'biodiversity/version'
|
6
6
|
|
7
7
|
Gem::Specification.new do |gem|
|
8
|
+
gem.required_ruby_version = '>= 2.6'
|
8
9
|
gem.name = 'biodiversity'
|
9
10
|
gem.version = Biodiversity::VERSION
|
10
11
|
gem.homepage = 'https://github.com/GlobalNamesArchitecture/biodiversity'
|
@@ -17,11 +18,11 @@ Gem::Specification.new do |gem|
|
|
17
18
|
gem.files = `git ls-files`.split("\n")
|
18
19
|
gem.require_paths = ['lib']
|
19
20
|
|
20
|
-
gem.add_runtime_dependency 'ffi', '~> 1.
|
21
|
+
gem.add_runtime_dependency 'ffi', '~> 1.14'
|
21
22
|
|
22
|
-
gem.add_development_dependency 'bundler', '~> 2.
|
23
|
-
gem.add_development_dependency 'byebug', '~> 11.
|
23
|
+
gem.add_development_dependency 'bundler', '~> 2.2'
|
24
|
+
gem.add_development_dependency 'byebug', '~> 11.1'
|
24
25
|
gem.add_development_dependency 'rake', '~> 13.0'
|
25
|
-
gem.add_development_dependency 'rspec', '~> 3.
|
26
|
-
gem.add_development_dependency 'rubocop', '~>
|
26
|
+
gem.add_development_dependency 'rspec', '~> 3.10'
|
27
|
+
gem.add_development_dependency 'rubocop', '~> 1.8'
|
27
28
|
end
|
data/clib/linux/libgnparser.h
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
/* Code generated by cmd/cgo; DO NOT EDIT. */
|
2
2
|
|
3
|
-
/* package
|
3
|
+
/* package github.com/gnames/gnparser/binding */
|
4
4
|
|
5
5
|
|
6
6
|
#line 1 "cgo-builtin-export-prolog"
|
@@ -74,19 +74,23 @@ extern "C" {
|
|
74
74
|
#endif
|
75
75
|
|
76
76
|
|
77
|
-
// ParseToString function takes a name-string, desired format,
|
78
|
-
// the name-string to either JSON, or
|
79
|
-
// the desired format. Format can take values of
|
77
|
+
// ParseToString function takes a name-string, desired format, a withDetails
|
78
|
+
// flag as 0|1 integer. It parses the name-string to either JSON, or a CSV
|
79
|
+
// string, depending on the desired format. Format argument can take values of
|
80
|
+
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
|
+
// parsed details are ommited, if it is 1 -- they are included.
|
82
|
+
// true.
|
83
|
+
extern char* ParseToString(char* name, char* f, int details);
|
80
84
|
|
81
|
-
|
85
|
+
// FreeMemory takes a string pointer and frees its memory.
|
86
|
+
extern void FreeMemory(char* p);
|
82
87
|
|
83
|
-
//
|
84
|
-
//
|
85
|
-
//
|
86
|
-
//
|
87
|
-
//
|
88
|
-
|
89
|
-
extern void ParseAryToStrings(char** p0, int p1, char* p2, char*** p3);
|
88
|
+
// ParseAryToString function takes an array of names, parsing format, and a
|
89
|
+
// withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
|
90
|
+
// either CSV or JSON format. Format argument can take values of 'csv',
|
91
|
+
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
92
|
+
// true.
|
93
|
+
extern char* ParseAryToString(char** in, int length, char* f, int details);
|
90
94
|
|
91
95
|
#ifdef __cplusplus
|
92
96
|
}
|
data/clib/linux/libgnparser.so
CHANGED
Binary file
|
data/clib/mac/libgnparser.h
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
/* Code generated by cmd/cgo; DO NOT EDIT. */
|
2
2
|
|
3
|
-
/* package
|
3
|
+
/* package github.com/gnames/gnparser/binding */
|
4
4
|
|
5
5
|
|
6
6
|
#line 1 "cgo-builtin-export-prolog"
|
@@ -74,19 +74,26 @@ extern "C" {
|
|
74
74
|
#endif
|
75
75
|
|
76
76
|
|
77
|
-
// ParseToString function takes a name-string, desired format,
|
78
|
-
// the name-string to either JSON, or
|
79
|
-
// the desired format. Format can take values of
|
77
|
+
// ParseToString function takes a name-string, desired format, a withDetails
|
78
|
+
// flag as 0|1 integer. It parses the name-string to either JSON, or a CSV
|
79
|
+
// string, depending on the desired format. Format argument can take values of
|
80
|
+
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
|
+
// parsed details are ommited, if it is 1 -- they are included.
|
82
|
+
// true.
|
80
83
|
|
81
|
-
extern char* ParseToString(char* p0, char* p1);
|
84
|
+
extern char* ParseToString(char* p0, char* p1, int p2);
|
82
85
|
|
83
|
-
//
|
84
|
-
// reference to an output: an empty array of strings to return the the data
|
85
|
-
// back. It populates the output array with raw strings of either JSON or
|
86
|
-
// pipe-separated parsed values (depending on a given format). Format can take
|
87
|
-
// values of 'simple', 'compact', or 'pretty'.
|
86
|
+
// FreeMemory takes a string pointer and frees its memory.
|
88
87
|
|
89
|
-
extern void
|
88
|
+
extern void FreeMemory(char* p0);
|
89
|
+
|
90
|
+
// ParseAryToString function takes an array of names, parsing format, and a
|
91
|
+
// withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
|
92
|
+
// either CSV or JSON format. Format argument can take values of 'csv',
|
93
|
+
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
94
|
+
// true.
|
95
|
+
|
96
|
+
extern char* ParseAryToString(char** p0, int p1, char* p2, int p3);
|
90
97
|
|
91
98
|
#ifdef __cplusplus
|
92
99
|
}
|
data/clib/mac/libgnparser.so
CHANGED
Binary file
|
@@ -0,0 +1,97 @@
|
|
1
|
+
/* Code generated by cmd/cgo; DO NOT EDIT. */
|
2
|
+
|
3
|
+
/* package github.com/gnames/gnparser/binding */
|
4
|
+
|
5
|
+
|
6
|
+
#line 1 "cgo-builtin-export-prolog"
|
7
|
+
|
8
|
+
#include <stddef.h> /* for ptrdiff_t below */
|
9
|
+
|
10
|
+
#ifndef GO_CGO_EXPORT_PROLOGUE_H
|
11
|
+
#define GO_CGO_EXPORT_PROLOGUE_H
|
12
|
+
|
13
|
+
#ifndef GO_CGO_GOSTRING_TYPEDEF
|
14
|
+
typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
15
|
+
#endif
|
16
|
+
|
17
|
+
#endif
|
18
|
+
|
19
|
+
/* Start of preamble from import "C" comments. */
|
20
|
+
|
21
|
+
|
22
|
+
#line 3 "main.go"
|
23
|
+
|
24
|
+
#include "stdlib.h"
|
25
|
+
|
26
|
+
#line 1 "cgo-generated-wrapper"
|
27
|
+
|
28
|
+
|
29
|
+
/* End of preamble from import "C" comments. */
|
30
|
+
|
31
|
+
|
32
|
+
/* Start of boilerplate cgo prologue. */
|
33
|
+
#line 1 "cgo-gcc-export-header-prolog"
|
34
|
+
|
35
|
+
#ifndef GO_CGO_PROLOGUE_H
|
36
|
+
#define GO_CGO_PROLOGUE_H
|
37
|
+
|
38
|
+
typedef signed char GoInt8;
|
39
|
+
typedef unsigned char GoUint8;
|
40
|
+
typedef short GoInt16;
|
41
|
+
typedef unsigned short GoUint16;
|
42
|
+
typedef int GoInt32;
|
43
|
+
typedef unsigned int GoUint32;
|
44
|
+
typedef long long GoInt64;
|
45
|
+
typedef unsigned long long GoUint64;
|
46
|
+
typedef GoInt64 GoInt;
|
47
|
+
typedef GoUint64 GoUint;
|
48
|
+
typedef __SIZE_TYPE__ GoUintptr;
|
49
|
+
typedef float GoFloat32;
|
50
|
+
typedef double GoFloat64;
|
51
|
+
typedef float _Complex GoComplex64;
|
52
|
+
typedef double _Complex GoComplex128;
|
53
|
+
|
54
|
+
/*
|
55
|
+
static assertion to make sure the file is being used on architecture
|
56
|
+
at least with matching size of GoInt.
|
57
|
+
*/
|
58
|
+
typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1];
|
59
|
+
|
60
|
+
#ifndef GO_CGO_GOSTRING_TYPEDEF
|
61
|
+
typedef _GoString_ GoString;
|
62
|
+
#endif
|
63
|
+
typedef void *GoMap;
|
64
|
+
typedef void *GoChan;
|
65
|
+
typedef struct { void *t; void *v; } GoInterface;
|
66
|
+
typedef struct { void *data; GoInt len; GoInt cap; } GoSlice;
|
67
|
+
|
68
|
+
#endif
|
69
|
+
|
70
|
+
/* End of boilerplate cgo prologue. */
|
71
|
+
|
72
|
+
#ifdef __cplusplus
|
73
|
+
extern "C" {
|
74
|
+
#endif
|
75
|
+
|
76
|
+
|
77
|
+
// ParseToString function takes a name-string, desired format, a withDetails
|
78
|
+
// flag as 0|1 integer. It parses the name-string to either JSON, or a CSV
|
79
|
+
// string, depending on the desired format. Format argument can take values of
|
80
|
+
// 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
|
81
|
+
// parsed details are ommited, if it is 1 -- they are included.
|
82
|
+
// true.
|
83
|
+
extern char* ParseToString(char* name, char* f, int details);
|
84
|
+
|
85
|
+
// FreeMemory takes a string pointer and frees its memory.
|
86
|
+
extern void FreeMemory(char* p);
|
87
|
+
|
88
|
+
// ParseAryToString function takes an array of names, parsing format, and a
|
89
|
+
// withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
|
90
|
+
// either CSV or JSON format. Format argument can take values of 'csv',
|
91
|
+
// 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
|
92
|
+
// true.
|
93
|
+
extern char* ParseAryToString(char** in, int length, char* f, int details);
|
94
|
+
|
95
|
+
#ifdef __cplusplus
|
96
|
+
}
|
97
|
+
#endif
|
Binary file
|
data/lib/biodiversity.rb
CHANGED
data/lib/biodiversity/parser.rb
CHANGED
@@ -17,7 +17,7 @@ module Biodiversity
|
|
17
17
|
'linux'
|
18
18
|
when 'darwin'
|
19
19
|
'mac'
|
20
|
-
when '
|
20
|
+
when 'mingw32'
|
21
21
|
'win'
|
22
22
|
else
|
23
23
|
raise "Unsupported platform: #{Gem.platforms[1].os}"
|
@@ -25,54 +25,65 @@ module Biodiversity
|
|
25
25
|
ffi_lib File.join(__dir__, '..', '..', 'clib', platform, 'libgnparser.so')
|
26
26
|
POINTER_SIZE = FFI.type_size(:pointer)
|
27
27
|
|
28
|
-
|
29
|
-
attach_function(:parse_ary_go, :ParseAryToStrings,
|
30
|
-
%i[pointer int string pointer], :void)
|
28
|
+
callback(:parser_callback, %i[string], :void)
|
31
29
|
|
32
|
-
|
33
|
-
|
34
|
-
|
30
|
+
attach_function(:parse_go, :ParseToString,
|
31
|
+
%i[string string int], :strptr)
|
32
|
+
attach_function(:parse_ary_go, :ParseAryToString,
|
33
|
+
%i[pointer int string int], :strptr)
|
34
|
+
attach_function(:free_mem, :FreeMemory, %i[pointer], :void)
|
35
|
+
|
36
|
+
def self.parse(name, simple: false)
|
37
|
+
format = simple ? 'csv' : 'compact'
|
38
|
+
|
39
|
+
parsed, ptr = parse_go(name, format, 0)
|
40
|
+
free_mem(ptr)
|
35
41
|
output(parsed, simple)
|
36
42
|
end
|
37
43
|
|
38
|
-
def self.parse_ary(ary, simple
|
39
|
-
format = simple ? '
|
44
|
+
def self.parse_ary(ary, simple: false)
|
45
|
+
format = simple ? 'csv' : 'compact'
|
40
46
|
in_ptr = FFI::MemoryPointer.new(:pointer, ary.length)
|
47
|
+
|
41
48
|
in_ptr.write_array_of_pointer(
|
42
49
|
ary.map { |s| FFI::MemoryPointer.from_string(s) }
|
43
50
|
)
|
44
|
-
out_var = FFI::MemoryPointer.new(:pointer)
|
45
|
-
parse_ary_go(in_ptr, ary.length, format, out_var)
|
46
51
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
52
|
+
parsed, ptr = parse_ary_go(in_ptr, ary.length, format, 0)
|
53
|
+
free_mem(ptr)
|
54
|
+
if simple
|
55
|
+
CSV.new(parsed).map do |row|
|
56
|
+
csv_row(row)
|
57
|
+
end
|
58
|
+
else
|
59
|
+
JSON.parse(parsed, symbolize_names: true)
|
55
60
|
end
|
56
|
-
CLib.free(out_var.read_pointer)
|
57
61
|
end
|
58
62
|
|
59
63
|
def self.output(parsed, simple)
|
60
64
|
if simple
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
verbatim: parsed[1],
|
65
|
-
canonicalName: {
|
66
|
-
full: parsed[2],
|
67
|
-
simple: parsed[3],
|
68
|
-
stem: parsed[4]
|
69
|
-
},
|
70
|
-
authorship: parsed[5],
|
71
|
-
quality: parsed[6]
|
72
|
-
}
|
65
|
+
csv = CSV.new(parsed)
|
66
|
+
row = csv.readlines[0]
|
67
|
+
csv_row(row)
|
73
68
|
else
|
74
69
|
JSON.parse(parsed, symbolize_names: true)
|
75
70
|
end
|
76
71
|
end
|
72
|
+
|
73
|
+
def self.csv_row(row)
|
74
|
+
{
|
75
|
+
id: row[0],
|
76
|
+
verbatim: row[1],
|
77
|
+
cardinality: row[2],
|
78
|
+
canonical: {
|
79
|
+
stem: row[3],
|
80
|
+
simple: row[4],
|
81
|
+
full: row[5]
|
82
|
+
},
|
83
|
+
authorship: row[6],
|
84
|
+
year: row[7],
|
85
|
+
quality: row[8]
|
86
|
+
}
|
87
|
+
end
|
77
88
|
end
|
78
89
|
end
|
data/lib/biodiversity/version.rb
CHANGED
data/spec/lib/parser_spec.rb
CHANGED
@@ -5,23 +5,34 @@
|
|
5
5
|
describe Biodiversity::Parser do
|
6
6
|
describe('parse') do
|
7
7
|
it 'parses name in simple format' do
|
8
|
-
parsed = subject.parse('Homo sapiens Linn.', true)
|
9
|
-
expect(parsed[:
|
8
|
+
parsed = subject.parse('Homo sapiens Linn.', simple: true)
|
9
|
+
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
10
10
|
expect(parsed[:normalized]).to be_nil
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'parsed name in full format' do
|
14
14
|
parsed = subject.parse('Homo sapiens Linn.')
|
15
|
-
expect(parsed[:
|
15
|
+
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
16
16
|
expect(parsed[:normalized]).to eq 'Homo sapiens Linn.'
|
17
17
|
end
|
18
|
+
|
19
|
+
it 'gets quality and year correctly in simple form' do
|
20
|
+
parsed = subject.parse('Homo sapiens Linn. 1758', simple: true)
|
21
|
+
expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
|
22
|
+
expect(parsed[:year]).to eq '1758'
|
23
|
+
expect(parsed[:quality]).to eq '1'
|
24
|
+
expect(parsed[:normalized]).to be_nil
|
25
|
+
end
|
18
26
|
end
|
19
27
|
|
20
28
|
describe('parse_ary') do
|
21
29
|
it 'parses names in simple format' do
|
22
|
-
parsed = subject.parse_ary(
|
23
|
-
|
24
|
-
|
30
|
+
parsed = subject.parse_ary(
|
31
|
+
['Homo sapiens Linn.', 'Pardosa moesta'],
|
32
|
+
simple: true
|
33
|
+
)
|
34
|
+
expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
|
35
|
+
expect(parsed[1][:canonical][:simple]).to eq 'Pardosa moesta'
|
25
36
|
expect(parsed[0][:normalized]).to be_nil
|
26
37
|
end
|
27
38
|
|
@@ -29,7 +40,7 @@ describe Biodiversity::Parser do
|
|
29
40
|
parsed = subject.parse_ary(
|
30
41
|
['Homo sapiens Linn.', 'Tobacco Mosaic Virus']
|
31
42
|
)
|
32
|
-
expect(parsed[0][:
|
43
|
+
expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
|
33
44
|
expect(parsed[0][:normalized]).to eq 'Homo sapiens Linn.'
|
34
45
|
expect(parsed[1][:parsed]).to be false
|
35
46
|
expect(parsed[1][:virus]).to be true
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 5.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -16,42 +16,42 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.14'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.14'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '2.
|
33
|
+
version: '2.2'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '2.
|
40
|
+
version: '2.2'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: byebug
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '11.
|
47
|
+
version: '11.1'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '11.
|
54
|
+
version: '11.1'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,28 +72,28 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '3.
|
75
|
+
version: '3.10'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '3.
|
82
|
+
version: '3.10'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rubocop
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
89
|
+
version: '1.8'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
96
|
+
version: '1.8'
|
97
97
|
description: Parsing tool for biodiversity informatics
|
98
98
|
email: dmozzherin@gmail.com
|
99
99
|
executables: []
|
@@ -115,6 +115,8 @@ files:
|
|
115
115
|
- clib/linux/libgnparser.so
|
116
116
|
- clib/mac/libgnparser.h
|
117
117
|
- clib/mac/libgnparser.so
|
118
|
+
- clib/win/libgnparser.h
|
119
|
+
- clib/win/libgnparser.so
|
118
120
|
- lib/biodiversity.rb
|
119
121
|
- lib/biodiversity/parser.rb
|
120
122
|
- lib/biodiversity/version.rb
|
@@ -125,7 +127,7 @@ homepage: https://github.com/GlobalNamesArchitecture/biodiversity
|
|
125
127
|
licenses:
|
126
128
|
- MIT
|
127
129
|
metadata: {}
|
128
|
-
post_install_message:
|
130
|
+
post_install_message:
|
129
131
|
rdoc_options: []
|
130
132
|
require_paths:
|
131
133
|
- lib
|
@@ -133,16 +135,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
133
135
|
requirements:
|
134
136
|
- - ">="
|
135
137
|
- !ruby/object:Gem::Version
|
136
|
-
version: '
|
138
|
+
version: '2.6'
|
137
139
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
140
|
requirements:
|
139
141
|
- - ">="
|
140
142
|
- !ruby/object:Gem::Version
|
141
143
|
version: '0'
|
142
144
|
requirements: []
|
143
|
-
|
144
|
-
|
145
|
-
signing_key:
|
145
|
+
rubygems_version: 3.2.3
|
146
|
+
signing_key:
|
146
147
|
specification_version: 4
|
147
148
|
summary: Parser of scientific names
|
148
149
|
test_files: []
|