biodiversity 4.0.2 → 4.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -1
- data/.ruby-version +1 -1
- data/CHANGELOG +2 -0
- data/README.md +40 -1
- data/clib/linux/callback_bridge.h +3 -0
- data/clib/linux/libgnparser.h +4 -2
- data/clib/linux/libgnparser.so +0 -0
- data/clib/mac/callback_bridge.h +3 -0
- data/clib/mac/libgnparser.h +4 -2
- data/clib/mac/libgnparser.so +0 -0
- data/clib/win/callback_bridge.h +3 -0
- data/clib/win/libgnparser.h +4 -2
- data/clib/win/libgnparser.so +0 -0
- data/lib/biodiversity.rb +1 -0
- data/lib/biodiversity/parser.rb +17 -15
- data/lib/biodiversity/version.rb +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8d1bb6dd3170d4a3d036cf5068a6d663606ec85f29003b24408e252e782ff4fd
|
4
|
+
data.tar.gz: 2ee9a9e168b107ecd851302c32c5b06cbafe25df83bae385fe00d620fbfd8cde
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2643c3916407220055a52581d357e09519a94be555e4703a9ecf4a8709f99f35a8be9232b3612913baa5a30e42812fcbf66d223f7824def39eee0bdef7e7da8
|
7
|
+
data.tar.gz: faaebcafb79ccc3273a28ace4c00f642e43b7813dd7477c4ec19c1ddf4252c8c871f7185746e40a9c94e37b04fc422cfd9c53aea53f97b532c74324b2fc66387
|
data/.rubocop.yml
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.5
|
1
|
+
2.6.5
|
data/CHANGELOG
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Biodiversity
|
2
2
|
============
|
3
3
|
|
4
|
-
[![DOI](https://zenodo.org/badge/
|
4
|
+
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3569596.svg)](https://doi.org/10.5281/zenodo.3569596)
|
5
5
|
[![Gem Version][gem_svg]][gem_link]
|
6
6
|
[![Continuous Integration Status][ci_svg]][ci_link]
|
7
7
|
|
@@ -17,6 +17,7 @@ For such features use https://gitlab.com/gogna/gnparser.
|
|
17
17
|
|
18
18
|
- [Biodiversity](#biodiversity)
|
19
19
|
- [Installation](#installation)
|
20
|
+
- [Benchmarks](#benchmarks)
|
20
21
|
- [Example usage](#example-usage)
|
21
22
|
- [What is "nameStringID" in the parsed results?](#what-is-%22namestringid%22-in-the-parsed-results)
|
22
23
|
- [Copyright](#copyright)
|
@@ -27,6 +28,38 @@ For such features use https://gitlab.com/gogna/gnparser.
|
|
27
28
|
|
28
29
|
The gem should work on Linux, Mac and Windows (64bit) machines
|
29
30
|
|
31
|
+
## Benchmarks
|
32
|
+
|
33
|
+
The fastest way to go through a massive amount of names is to use
|
34
|
+
`Biodiversity::Parser.parse_ary([big array], simple = true)` function.
|
35
|
+
|
36
|
+
For example parsing a large file with one name per line:
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
#!/usr/bin/env ruby
|
40
|
+
|
41
|
+
require 'biodiversity'
|
42
|
+
|
43
|
+
P = Biodiversity::Parser
|
44
|
+
count = 0
|
45
|
+
File.open('all_names.txt').each_slice(50_000) do |sl|
|
46
|
+
count += 1
|
47
|
+
res = P.parse_ary(sl, true)
|
48
|
+
puts count * 50_000
|
49
|
+
puts res[0]
|
50
|
+
end
|
51
|
+
```
|
52
|
+
|
53
|
+
Here are comparative results of running parsers against a file with 24
|
54
|
+
million names on a 4CPU hyperthreaded laptop:
|
55
|
+
|
56
|
+
| Program | Version | Full/Simple | Names/min |
|
57
|
+
| ------------ | ------- | ----------- | --------: |
|
58
|
+
| gnparser | 0.12.0 | Simple | 3,000,000 |
|
59
|
+
| biodiversity | 4.0.1 | Simple | 2,000,000 |
|
60
|
+
| biodiversity | 4.0.1 | Full JSON | 800,000 |
|
61
|
+
| biodiversity | 3.5.1 | n/a | 40,000 |
|
62
|
+
|
30
63
|
## Example usage
|
31
64
|
|
32
65
|
You can use it as a library in Ruby:
|
@@ -92,6 +125,10 @@ Copyright
|
|
92
125
|
|
93
126
|
Authors: [Dmitry Mozzherin][dimus]
|
94
127
|
|
128
|
+
Contributors: [Patrick Leary][pleary], [Hernán Lucas Pereira][hernan]
|
129
|
+
|
130
|
+
|
131
|
+
|
95
132
|
Copyright (c) 2008-2019 Dmitry Mozzherin. See [LICENSE][license]
|
96
133
|
for further details.
|
97
134
|
|
@@ -100,6 +137,8 @@ for further details.
|
|
100
137
|
[ci_svg]: https://secure.travis-ci.org/GlobalNamesArchitecture/biodiversity.svg
|
101
138
|
[ci_link]: http://travis-ci.org/GlobalNamesArchitecture/biodiversity
|
102
139
|
[dimus]: https://github.com/dimus
|
140
|
+
[pleary]: https://github.com/pleary
|
141
|
+
[hernan]: https://github.com/LocoDelAssembly
|
103
142
|
[license]: https://github.com/GlobalNamesArchitecture/biodiversity/blob/master/LICENSE
|
104
143
|
[uuid_examples]: https://github.com/GlobalNamesArchitecture/gn_uuid_examples
|
105
144
|
[uuid_blog]: http://globalnamesarchitecture.github.io/gna/uuid/2015/05/31/gn-uuid-0-5-0.html
|
data/clib/linux/libgnparser.h
CHANGED
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
22
22
|
#line 3 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
|
+
#include "callback_bridge.h"
|
25
26
|
|
26
27
|
#line 1 "cgo-generated-wrapper"
|
27
28
|
|
@@ -77,8 +78,9 @@ extern "C" {
|
|
77
78
|
// ParseToString function takes a name-string, desired format, and parses
|
78
79
|
// the name-string to either JSON, or pipe-separated values, depending on
|
79
80
|
// the desired format. Format can take values of 'simple', 'compact', 'pretty'.
|
81
|
+
// NOTE: Read callback type as "void (*callback)(char *parsed)"
|
80
82
|
|
81
|
-
extern
|
83
|
+
extern void ParseToString(char* p0, char* p1, void* p2);
|
82
84
|
|
83
85
|
// ParseAryToStrings function takes an array of names, parsing format and a
|
84
86
|
// reference to an output: an empty array of strings to return the the data
|
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
|
|
86
88
|
// pipe-separated parsed values (depending on a given format). Format can take
|
87
89
|
// values of 'simple', 'compact', or 'pretty'.
|
88
90
|
|
89
|
-
extern void ParseAryToStrings(char** p0, int p1, char* p2,
|
91
|
+
extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
|
90
92
|
|
91
93
|
#ifdef __cplusplus
|
92
94
|
}
|
data/clib/linux/libgnparser.so
CHANGED
Binary file
|
data/clib/mac/libgnparser.h
CHANGED
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
22
22
|
#line 3 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
|
+
#include "callback_bridge.h"
|
25
26
|
|
26
27
|
#line 1 "cgo-generated-wrapper"
|
27
28
|
|
@@ -77,8 +78,9 @@ extern "C" {
|
|
77
78
|
// ParseToString function takes a name-string, desired format, and parses
|
78
79
|
// the name-string to either JSON, or pipe-separated values, depending on
|
79
80
|
// the desired format. Format can take values of 'simple', 'compact', 'pretty'.
|
81
|
+
// NOTE: Read callback type as "void (*callback)(char *parsed)"
|
80
82
|
|
81
|
-
extern
|
83
|
+
extern void ParseToString(char* p0, char* p1, void* p2);
|
82
84
|
|
83
85
|
// ParseAryToStrings function takes an array of names, parsing format and a
|
84
86
|
// reference to an output: an empty array of strings to return the the data
|
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
|
|
86
88
|
// pipe-separated parsed values (depending on a given format). Format can take
|
87
89
|
// values of 'simple', 'compact', or 'pretty'.
|
88
90
|
|
89
|
-
extern void ParseAryToStrings(char** p0, int p1, char* p2,
|
91
|
+
extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
|
90
92
|
|
91
93
|
#ifdef __cplusplus
|
92
94
|
}
|
data/clib/mac/libgnparser.so
CHANGED
Binary file
|
data/clib/win/libgnparser.h
CHANGED
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
|
22
22
|
#line 3 "main.go"
|
23
23
|
|
24
24
|
#include "stdlib.h"
|
25
|
+
#include "callback_bridge.h"
|
25
26
|
|
26
27
|
#line 1 "cgo-generated-wrapper"
|
27
28
|
|
@@ -77,8 +78,9 @@ extern "C" {
|
|
77
78
|
// ParseToString function takes a name-string, desired format, and parses
|
78
79
|
// the name-string to either JSON, or pipe-separated values, depending on
|
79
80
|
// the desired format. Format can take values of 'simple', 'compact', 'pretty'.
|
81
|
+
// NOTE: Read callback type as "void (*callback)(char *parsed)"
|
80
82
|
|
81
|
-
extern
|
83
|
+
extern void ParseToString(char* p0, char* p1, void* p2);
|
82
84
|
|
83
85
|
// ParseAryToStrings function takes an array of names, parsing format and a
|
84
86
|
// reference to an output: an empty array of strings to return the the data
|
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
|
|
86
88
|
// pipe-separated parsed values (depending on a given format). Format can take
|
87
89
|
// values of 'simple', 'compact', or 'pretty'.
|
88
90
|
|
89
|
-
extern void ParseAryToStrings(char** p0, int p1, char* p2,
|
91
|
+
extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
|
90
92
|
|
91
93
|
#ifdef __cplusplus
|
92
94
|
}
|
data/clib/win/libgnparser.so
CHANGED
Binary file
|
data/lib/biodiversity.rb
CHANGED
data/lib/biodiversity/parser.rb
CHANGED
@@ -25,40 +25,42 @@ module Biodiversity
|
|
25
25
|
ffi_lib File.join(__dir__, '..', '..', 'clib', platform, 'libgnparser.so')
|
26
26
|
POINTER_SIZE = FFI.type_size(:pointer)
|
27
27
|
|
28
|
-
|
28
|
+
callback(:parser_callback, %i[string], :void)
|
29
|
+
|
30
|
+
attach_function(:parse_go, :ParseToString,
|
31
|
+
%i[string string parser_callback], :void)
|
29
32
|
attach_function(:parse_ary_go, :ParseAryToStrings,
|
30
|
-
%i[pointer int string
|
33
|
+
%i[pointer int string parser_callback], :void)
|
31
34
|
|
32
35
|
def self.parse(name, simple = false)
|
33
36
|
format = simple ? 'simple' : 'compact'
|
34
|
-
|
37
|
+
|
38
|
+
parsed = nil
|
39
|
+
callback = FFI::Function.new(:void, [:string]) { |str| parsed = str }
|
40
|
+
parse_go(name, format, callback)
|
35
41
|
output(parsed, simple)
|
36
42
|
end
|
37
43
|
|
38
44
|
def self.parse_ary(ary, simple = false)
|
39
45
|
format = simple ? 'simple' : 'compact'
|
40
46
|
in_ptr = FFI::MemoryPointer.new(:pointer, ary.length)
|
47
|
+
|
41
48
|
in_ptr.write_array_of_pointer(
|
42
49
|
ary.map { |s| FFI::MemoryPointer.from_string(s) }
|
43
50
|
)
|
44
|
-
out_var = FFI::MemoryPointer.new(:pointer)
|
45
|
-
parse_ary_go(in_ptr, ary.length, format, out_var)
|
46
51
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
a << output(prsd, simple)
|
51
|
-
end
|
52
|
-
ensure
|
53
|
-
out_var.read_pointer.get_array_of_pointer(0, ary.length).each do |p|
|
54
|
-
CLib.free(p)
|
52
|
+
out_ary = []
|
53
|
+
callback = FFI::Function.new(:void, [:string]) do |str|
|
54
|
+
out_ary << output(str, simple)
|
55
55
|
end
|
56
|
-
|
56
|
+
parse_ary_go(in_ptr, ary.length, format, callback)
|
57
|
+
out_ary
|
57
58
|
end
|
58
59
|
|
59
60
|
def self.output(parsed, simple)
|
60
61
|
if simple
|
61
|
-
|
62
|
+
csv = CSV.new(parsed)
|
63
|
+
parsed = csv.read[0]
|
62
64
|
{
|
63
65
|
id: parsed[0],
|
64
66
|
verbatim: parsed[1],
|
data/lib/biodiversity/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0.
|
4
|
+
version: 4.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -111,10 +111,13 @@ files:
|
|
111
111
|
- README.md
|
112
112
|
- Rakefile
|
113
113
|
- biodiversity.gemspec
|
114
|
+
- clib/linux/callback_bridge.h
|
114
115
|
- clib/linux/libgnparser.h
|
115
116
|
- clib/linux/libgnparser.so
|
117
|
+
- clib/mac/callback_bridge.h
|
116
118
|
- clib/mac/libgnparser.h
|
117
119
|
- clib/mac/libgnparser.so
|
120
|
+
- clib/win/callback_bridge.h
|
118
121
|
- clib/win/libgnparser.h
|
119
122
|
- clib/win/libgnparser.so
|
120
123
|
- lib/biodiversity.rb
|
@@ -142,8 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
145
|
- !ruby/object:Gem::Version
|
143
146
|
version: '0'
|
144
147
|
requirements: []
|
145
|
-
|
146
|
-
rubygems_version: 2.7.6.2
|
148
|
+
rubygems_version: 3.0.3
|
147
149
|
signing_key:
|
148
150
|
specification_version: 4
|
149
151
|
summary: Parser of scientific names
|