biodiversity 4.0.2 → 4.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -1
- data/.ruby-version +1 -1
- data/CHANGELOG +2 -0
- data/README.md +40 -1
- data/clib/linux/callback_bridge.h +3 -0
- data/clib/linux/libgnparser.h +4 -2
- data/clib/linux/libgnparser.so +0 -0
- data/clib/mac/callback_bridge.h +3 -0
- data/clib/mac/libgnparser.h +4 -2
- data/clib/mac/libgnparser.so +0 -0
- data/clib/win/callback_bridge.h +3 -0
- data/clib/win/libgnparser.h +4 -2
- data/clib/win/libgnparser.so +0 -0
- data/lib/biodiversity.rb +1 -0
- data/lib/biodiversity/parser.rb +17 -15
- data/lib/biodiversity/version.rb +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8d1bb6dd3170d4a3d036cf5068a6d663606ec85f29003b24408e252e782ff4fd
|
4
|
+
data.tar.gz: 2ee9a9e168b107ecd851302c32c5b06cbafe25df83bae385fe00d620fbfd8cde
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2643c3916407220055a52581d357e09519a94be555e4703a9ecf4a8709f99f35a8be9232b3612913baa5a30e42812fcbf66d223f7824def39eee0bdef7e7da8
|
7
|
+
data.tar.gz: faaebcafb79ccc3273a28ace4c00f642e43b7813dd7477c4ec19c1ddf4252c8c871f7185746e40a9c94e37b04fc422cfd9c53aea53f97b532c74324b2fc66387
|
data/.rubocop.yml
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.5
|
1
|
+
2.6.5
|
data/CHANGELOG
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Biodiversity
|
2
2
|
============
|
3
3
|
|
4
|
-
[](https://doi.org/10.5281/zenodo.3569596)
|
5
5
|
[![Gem Version][gem_svg]][gem_link]
|
6
6
|
[![Continuous Integration Status][ci_svg]][ci_link]
|
7
7
|
|
@@ -17,6 +17,7 @@ For such features use https://gitlab.com/gogna/gnparser.
|
|
17
17
|
|
18
18
|
- [Biodiversity](#biodiversity)
|
19
19
|
- [Installation](#installation)
|
20
|
+
- [Benchmarks](#benchmarks)
|
20
21
|
- [Example usage](#example-usage)
|
21
22
|
- [What is "nameStringID" in the parsed results?](#what-is-%22namestringid%22-in-the-parsed-results)
|
22
23
|
- [Copyright](#copyright)
|
@@ -27,6 +28,38 @@ For such features use https://gitlab.com/gogna/gnparser.
|
|
27
28
|
|
28
29
|
The gem should work on Linux, Mac and Windows (64bit) machines
|
29
30
|
|
31
|
+
## Benchmarks
|
32
|
+
|
33
|
+
The fastest way to go through a massive amount of names is to use
|
34
|
+
`Biodiversity::Parser.parse_ary([big array], simple = true)` function.
|
35
|
+
|
36
|
+
For example parsing a large file with one name per line:
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
#!/usr/bin/env ruby
|
40
|
+
|
41
|
+
require 'biodiversity'
|
42
|
+
|
43
|
+
P = Biodiversity::Parser
|
44
|
+
count = 0
|
45
|
+
File.open('all_names.txt').each_slice(50_000) do |sl|
|
46
|
+
count += 1
|
47
|
+
res = P.parse_ary(sl, true)
|
48
|
+
puts count * 50_000
|
49
|
+
puts res[0]
|
50
|
+
end
|
51
|
+
```
|
52
|
+
|
53
|
+
Here are comparative results of running parsers against a file with 24
|
54
|
+
million names on a 4CPU hyperthreaded laptop:
|
55
|
+
|
56
|
+
| Program | Version | Full/Simple | Names/min |
|
57
|
+
| ------------ | ------- | ----------- | --------: |
|
58
|
+
| gnparser | 0.12.0 | Simple | 3,000,000 |
|
59
|
+
| biodiversity | 4.0.1 | Simple | 2,000,000 |
|
60
|
+
| biodiversity | 4.0.1 | Full JSON | 800,000 |
|
61
|
+
| biodiversity | 3.5.1 | n/a | 40,000 |
|
62
|
+
|
30
63
|
## Example usage
|
31
64
|
|
32
65
|
You can use it as a library in Ruby:
|
@@ -92,6 +125,10 @@ Copyright
|
|
92
125
|
|
93
126
|
Authors: [Dmitry Mozzherin][dimus]
|
94
127
|
|
128
|
+
Contributors: [Patrick Leary][pleary], [Hernán Lucas Pereira][hernan]
|
129
|
+
|
130
|
+
|
131
|
+
|
95
132
|
Copyright (c) 2008-2019 Dmitry Mozzherin. See [LICENSE][license]
|
96
133
|
for further details.
|
97
134
|
|
@@ -100,6 +137,8 @@ for further details.
|
|
100
137
|
[ci_svg]: https://secure.travis-ci.org/GlobalNamesArchitecture/biodiversity.svg
|
101
138
|
[ci_link]: http://travis-ci.org/GlobalNamesArchitecture/biodiversity
|
102
139
|
[dimus]: https://github.com/dimus
|
140
|
+
[pleary]: https://github.com/pleary
|
141
|
+
[hernan]: https://github.com/LocoDelAssembly
|
103
142
|
[license]: https://github.com/GlobalNamesArchitecture/biodiversity/blob/master/LICENSE
|
104
143
|
[uuid_examples]: https://github.com/GlobalNamesArchitecture/gn_uuid_examples
|
105
144
|
[uuid_blog]: http://globalnamesarchitecture.github.io/gna/uuid/2015/05/31/gn-uuid-0-5-0.html
|
data/clib/linux/libgnparser.h
CHANGED
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
22
22
|
#line 3 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
|
+
#include "callback_bridge.h"
|
25
26
|
|
26
27
|
#line 1 "cgo-generated-wrapper"
|
27
28
|
|
@@ -77,8 +78,9 @@ extern "C" {
|
|
77
78
|
// ParseToString function takes a name-string, desired format, and parses
|
78
79
|
// the name-string to either JSON, or pipe-separated values, depending on
|
79
80
|
// the desired format. Format can take values of 'simple', 'compact', 'pretty'.
|
81
|
+
// NOTE: Read callback type as "void (*callback)(char *parsed)"
|
80
82
|
|
81
|
-
extern
|
83
|
+
extern void ParseToString(char* p0, char* p1, void* p2);
|
82
84
|
|
83
85
|
// ParseAryToStrings function takes an array of names, parsing format and a
|
84
86
|
// reference to an output: an empty array of strings to return the the data
|
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
|
|
86
88
|
// pipe-separated parsed values (depending on a given format). Format can take
|
87
89
|
// values of 'simple', 'compact', or 'pretty'.
|
88
90
|
|
89
|
-
extern void ParseAryToStrings(char** p0, int p1, char* p2,
|
91
|
+
extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
|
90
92
|
|
91
93
|
#ifdef __cplusplus
|
92
94
|
}
|
data/clib/linux/libgnparser.so
CHANGED
Binary file
|
data/clib/mac/libgnparser.h
CHANGED
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
22
22
|
#line 3 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
|
+
#include "callback_bridge.h"
|
25
26
|
|
26
27
|
#line 1 "cgo-generated-wrapper"
|
27
28
|
|
@@ -77,8 +78,9 @@ extern "C" {
|
|
77
78
|
// ParseToString function takes a name-string, desired format, and parses
|
78
79
|
// the name-string to either JSON, or pipe-separated values, depending on
|
79
80
|
// the desired format. Format can take values of 'simple', 'compact', 'pretty'.
|
81
|
+
// NOTE: Read callback type as "void (*callback)(char *parsed)"
|
80
82
|
|
81
|
-
extern
|
83
|
+
extern void ParseToString(char* p0, char* p1, void* p2);
|
82
84
|
|
83
85
|
// ParseAryToStrings function takes an array of names, parsing format and a
|
84
86
|
// reference to an output: an empty array of strings to return the the data
|
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
|
|
86
88
|
// pipe-separated parsed values (depending on a given format). Format can take
|
87
89
|
// values of 'simple', 'compact', or 'pretty'.
|
88
90
|
|
89
|
-
extern void ParseAryToStrings(char** p0, int p1, char* p2,
|
91
|
+
extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
|
90
92
|
|
91
93
|
#ifdef __cplusplus
|
92
94
|
}
|
data/clib/mac/libgnparser.so
CHANGED
Binary file
|
data/clib/win/libgnparser.h
CHANGED
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
22
22
|
#line 3 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
|
+
#include "callback_bridge.h"
|
25
26
|
|
26
27
|
#line 1 "cgo-generated-wrapper"
|
27
28
|
|
@@ -77,8 +78,9 @@ extern "C" {
|
|
77
78
|
// ParseToString function takes a name-string, desired format, and parses
|
78
79
|
// the name-string to either JSON, or pipe-separated values, depending on
|
79
80
|
// the desired format. Format can take values of 'simple', 'compact', 'pretty'.
|
81
|
+
// NOTE: Read callback type as "void (*callback)(char *parsed)"
|
80
82
|
|
81
|
-
extern
|
83
|
+
extern void ParseToString(char* p0, char* p1, void* p2);
|
82
84
|
|
83
85
|
// ParseAryToStrings function takes an array of names, parsing format and a
|
84
86
|
// reference to an output: an empty array of strings to return the the data
|
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
|
|
86
88
|
// pipe-separated parsed values (depending on a given format). Format can take
|
87
89
|
// values of 'simple', 'compact', or 'pretty'.
|
88
90
|
|
89
|
-
extern void ParseAryToStrings(char** p0, int p1, char* p2,
|
91
|
+
extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
|
90
92
|
|
91
93
|
#ifdef __cplusplus
|
92
94
|
}
|
data/clib/win/libgnparser.so
CHANGED
Binary file
|
data/lib/biodiversity.rb
CHANGED
data/lib/biodiversity/parser.rb
CHANGED
@@ -25,40 +25,42 @@ module Biodiversity
|
|
25
25
|
ffi_lib File.join(__dir__, '..', '..', 'clib', platform, 'libgnparser.so')
|
26
26
|
POINTER_SIZE = FFI.type_size(:pointer)
|
27
27
|
|
28
|
-
|
28
|
+
callback(:parser_callback, %i[string], :void)
|
29
|
+
|
30
|
+
attach_function(:parse_go, :ParseToString,
|
31
|
+
%i[string string parser_callback], :void)
|
29
32
|
attach_function(:parse_ary_go, :ParseAryToStrings,
|
30
|
-
%i[pointer int string
|
33
|
+
%i[pointer int string parser_callback], :void)
|
31
34
|
|
32
35
|
def self.parse(name, simple = false)
|
33
36
|
format = simple ? 'simple' : 'compact'
|
34
|
-
|
37
|
+
|
38
|
+
parsed = nil
|
39
|
+
callback = FFI::Function.new(:void, [:string]) { |str| parsed = str }
|
40
|
+
parse_go(name, format, callback)
|
35
41
|
output(parsed, simple)
|
36
42
|
end
|
37
43
|
|
38
44
|
def self.parse_ary(ary, simple = false)
|
39
45
|
format = simple ? 'simple' : 'compact'
|
40
46
|
in_ptr = FFI::MemoryPointer.new(:pointer, ary.length)
|
47
|
+
|
41
48
|
in_ptr.write_array_of_pointer(
|
42
49
|
ary.map { |s| FFI::MemoryPointer.from_string(s) }
|
43
50
|
)
|
44
|
-
out_var = FFI::MemoryPointer.new(:pointer)
|
45
|
-
parse_ary_go(in_ptr, ary.length, format, out_var)
|
46
51
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
a << output(prsd, simple)
|
51
|
-
end
|
52
|
-
ensure
|
53
|
-
out_var.read_pointer.get_array_of_pointer(0, ary.length).each do |p|
|
54
|
-
CLib.free(p)
|
52
|
+
out_ary = []
|
53
|
+
callback = FFI::Function.new(:void, [:string]) do |str|
|
54
|
+
out_ary << output(str, simple)
|
55
55
|
end
|
56
|
-
|
56
|
+
parse_ary_go(in_ptr, ary.length, format, callback)
|
57
|
+
out_ary
|
57
58
|
end
|
58
59
|
|
59
60
|
def self.output(parsed, simple)
|
60
61
|
if simple
|
61
|
-
|
62
|
+
csv = CSV.new(parsed)
|
63
|
+
parsed = csv.read[0]
|
62
64
|
{
|
63
65
|
id: parsed[0],
|
64
66
|
verbatim: parsed[1],
|
data/lib/biodiversity/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0.
|
4
|
+
version: 4.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -111,10 +111,13 @@ files:
|
|
111
111
|
- README.md
|
112
112
|
- Rakefile
|
113
113
|
- biodiversity.gemspec
|
114
|
+
- clib/linux/callback_bridge.h
|
114
115
|
- clib/linux/libgnparser.h
|
115
116
|
- clib/linux/libgnparser.so
|
117
|
+
- clib/mac/callback_bridge.h
|
116
118
|
- clib/mac/libgnparser.h
|
117
119
|
- clib/mac/libgnparser.so
|
120
|
+
- clib/win/callback_bridge.h
|
118
121
|
- clib/win/libgnparser.h
|
119
122
|
- clib/win/libgnparser.so
|
120
123
|
- lib/biodiversity.rb
|
@@ -142,8 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
145
|
- !ruby/object:Gem::Version
|
143
146
|
version: '0'
|
144
147
|
requirements: []
|
145
|
-
|
146
|
-
rubygems_version: 2.7.6.2
|
148
|
+
rubygems_version: 3.0.3
|
147
149
|
signing_key:
|
148
150
|
specification_version: 4
|
149
151
|
summary: Parser of scientific names
|