biodiversity 4.0.2 → 4.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e1a5a67bcea7c0e234726d7702c4224b109a7852899a2d3b84d25b38ee9ad84
4
- data.tar.gz: 69e7781d05858faa63ed40b4d964499e0718bec3b6fecc9e5620879bd72acf34
3
+ metadata.gz: 8d1bb6dd3170d4a3d036cf5068a6d663606ec85f29003b24408e252e782ff4fd
4
+ data.tar.gz: 2ee9a9e168b107ecd851302c32c5b06cbafe25df83bae385fe00d620fbfd8cde
5
5
  SHA512:
6
- metadata.gz: be191bc9156447da5883c7a910b83012422869bbe7fdb6950833e2090b1ad33fb521dd5196376fba821d2991857a5c0adcc06b80f5752b7434c428a449d662b1
7
- data.tar.gz: 4bc7244b2d4c24146aa19f191c78e9645779298cfb67a18904aaddae3c0baa866035ecfed82827bc2e78cde54e8ee4fd9dccdedd76f8a43d222f06212a07ce35
6
+ metadata.gz: f2643c3916407220055a52581d357e09519a94be555e4703a9ecf4a8709f99f35a8be9232b3612913baa5a30e42812fcbf66d223f7824def39eee0bdef7e7da8
7
+ data.tar.gz: faaebcafb79ccc3273a28ace4c00f642e43b7813dd7477c4ec19c1ddf4252c8c871f7185746e40a9c94e37b04fc422cfd9c53aea53f97b532c74324b2fc66387
@@ -10,4 +10,10 @@ Metrics/MethodLength:
10
10
  - lib/**/*
11
11
  Metrics/BlockLength:
12
12
  Exclude:
13
- - spec/**/*
13
+ - spec/**/*
14
+ Style/HashEachMethods:
15
+ Enabled: true
16
+ Style/HashTransformKeys:
17
+ Enabled: true
18
+ Style/HashTransformValues:
19
+ Enabled: true
@@ -1 +1 @@
1
- 2.5.7
1
+ 2.6.5
data/CHANGELOG CHANGED
@@ -1,3 +1,5 @@
1
+ 4.0.3 -- fix memory leak in parse_ary method
2
+
1
3
  4.0.2 -- add MS Windows libraries
2
4
 
3
5
  4.0.1 -- fix for simple output
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  Biodiversity
2
2
  ============
3
3
 
4
- [![DOI](https://zenodo.org/badge/19435/GlobalNamesArchitecture/biodiversity.svg)](https://zenodo.org/badge/latestdoi/19435/GlobalNamesArchitecture/biodiversity)
4
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3569596.svg)](https://doi.org/10.5281/zenodo.3569596)
5
5
  [![Gem Version][gem_svg]][gem_link]
6
6
  [![Continuous Integration Status][ci_svg]][ci_link]
7
7
 
@@ -17,6 +17,7 @@ For such features use https://gitlab.com/gogna/gnparser.
17
17
 
18
18
  - [Biodiversity](#biodiversity)
19
19
  - [Installation](#installation)
20
+ - [Benchmarks](#benchmarks)
20
21
  - [Example usage](#example-usage)
21
22
  - [What is "nameStringID" in the parsed results?](#what-is-%22namestringid%22-in-the-parsed-results)
22
23
  - [Copyright](#copyright)
@@ -27,6 +28,38 @@ For such features use https://gitlab.com/gogna/gnparser.
27
28
 
28
29
  The gem should work on Linux, Mac and Windows (64bit) machines
29
30
 
31
+ ## Benchmarks
32
+
33
+ The fastest way to go through a massive amount of names is to use
34
+ `Biodiversity::Parser.parse_ary([big array], simple = true)` function.
35
+
36
+ For example parsing a large file with one name per line:
37
+
38
+ ```ruby
39
+ #!/usr/bin/env ruby
40
+
41
+ require 'biodiversity'
42
+
43
+ P = Biodiversity::Parser
44
+ count = 0
45
+ File.open('all_names.txt').each_slice(50_000) do |sl|
46
+ count += 1
47
+ res = P.parse_ary(sl, true)
48
+ puts count * 50_000
49
+ puts res[0]
50
+ end
51
+ ```
52
+
53
+ Here are comparative results of running parsers against a file with 24
54
+ million names on a 4CPU hyperthreaded laptop:
55
+
56
+ | Program | Version | Full/Simple | Names/min |
57
+ | ------------ | ------- | ----------- | --------: |
58
+ | gnparser | 0.12.0 | Simple | 3,000,000 |
59
+ | biodiversity | 4.0.1 | Simple | 2,000,000 |
60
+ | biodiversity | 4.0.1 | Full JSON | 800,000 |
61
+ | biodiversity | 3.5.1 | n/a | 40,000 |
62
+
30
63
  ## Example usage
31
64
 
32
65
  You can use it as a library in Ruby:
@@ -92,6 +125,10 @@ Copyright
92
125
 
93
126
  Authors: [Dmitry Mozzherin][dimus]
94
127
 
128
+ Contributors: [Patrick Leary][pleary], [Hernán Lucas Pereira][hernan]
129
+
130
+
131
+
95
132
  Copyright (c) 2008-2019 Dmitry Mozzherin. See [LICENSE][license]
96
133
  for further details.
97
134
 
@@ -100,6 +137,8 @@ for further details.
100
137
  [ci_svg]: https://secure.travis-ci.org/GlobalNamesArchitecture/biodiversity.svg
101
138
  [ci_link]: http://travis-ci.org/GlobalNamesArchitecture/biodiversity
102
139
  [dimus]: https://github.com/dimus
140
+ [pleary]: https://github.com/pleary
141
+ [hernan]: https://github.com/LocoDelAssembly
103
142
  [license]: https://github.com/GlobalNamesArchitecture/biodiversity/blob/master/LICENSE
104
143
  [uuid_examples]: https://github.com/GlobalNamesArchitecture/gn_uuid_examples
105
144
  [uuid_blog]: http://globalnamesarchitecture.github.io/gna/uuid/2015/05/31/gn-uuid-0-5-0.html
@@ -0,0 +1,3 @@
1
+ typedef void Callback(char *parsed);
2
+
3
+ void callback_bridge(void *callback, char *parsed);
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
22
22
  #line 3 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
+ #include "callback_bridge.h"
25
26
 
26
27
  #line 1 "cgo-generated-wrapper"
27
28
 
@@ -77,8 +78,9 @@ extern "C" {
77
78
  // ParseToString function takes a name-string, desired format, and parses
78
79
  // the name-string to either JSON, or pipe-separated values, depending on
79
80
  // the desired format. Format can take values of 'simple', 'compact', 'pretty'.
81
+ // NOTE: Read callback type as "void (*callback)(char *parsed)"
80
82
 
81
- extern char* ParseToString(char* p0, char* p1);
83
+ extern void ParseToString(char* p0, char* p1, void* p2);
82
84
 
83
85
  // ParseAryToStrings function takes an array of names, parsing format and a
84
86
  // reference to an output: an empty array of strings to return the the data
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
86
88
  // pipe-separated parsed values (depending on a given format). Format can take
87
89
  // values of 'simple', 'compact', or 'pretty'.
88
90
 
89
- extern void ParseAryToStrings(char** p0, int p1, char* p2, char*** p3);
91
+ extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
90
92
 
91
93
  #ifdef __cplusplus
92
94
  }
Binary file
@@ -0,0 +1,3 @@
1
+ typedef void Callback(char *parsed);
2
+
3
+ void callback_bridge(void *callback, char *parsed);
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
22
22
  #line 3 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
+ #include "callback_bridge.h"
25
26
 
26
27
  #line 1 "cgo-generated-wrapper"
27
28
 
@@ -77,8 +78,9 @@ extern "C" {
77
78
  // ParseToString function takes a name-string, desired format, and parses
78
79
  // the name-string to either JSON, or pipe-separated values, depending on
79
80
  // the desired format. Format can take values of 'simple', 'compact', 'pretty'.
81
+ // NOTE: Read callback type as "void (*callback)(char *parsed)"
80
82
 
81
- extern char* ParseToString(char* p0, char* p1);
83
+ extern void ParseToString(char* p0, char* p1, void* p2);
82
84
 
83
85
  // ParseAryToStrings function takes an array of names, parsing format and a
84
86
  // reference to an output: an empty array of strings to return the the data
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
86
88
  // pipe-separated parsed values (depending on a given format). Format can take
87
89
  // values of 'simple', 'compact', or 'pretty'.
88
90
 
89
- extern void ParseAryToStrings(char** p0, int p1, char* p2, char*** p3);
91
+ extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
90
92
 
91
93
  #ifdef __cplusplus
92
94
  }
Binary file
@@ -0,0 +1,3 @@
1
+ typedef void Callback(char *parsed);
2
+
3
+ void callback_bridge(void *callback, char *parsed);
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
22
22
  #line 3 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
+ #include "callback_bridge.h"
25
26
 
26
27
  #line 1 "cgo-generated-wrapper"
27
28
 
@@ -77,8 +78,9 @@ extern "C" {
77
78
  // ParseToString function takes a name-string, desired format, and parses
78
79
  // the name-string to either JSON, or pipe-separated values, depending on
79
80
  // the desired format. Format can take values of 'simple', 'compact', 'pretty'.
81
+ // NOTE: Read callback type as "void (*callback)(char *parsed)"
80
82
 
81
- extern char* ParseToString(char* p0, char* p1);
83
+ extern void ParseToString(char* p0, char* p1, void* p2);
82
84
 
83
85
  // ParseAryToStrings function takes an array of names, parsing format and a
84
86
  // reference to an output: an empty array of strings to return the the data
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
86
88
  // pipe-separated parsed values (depending on a given format). Format can take
87
89
  // values of 'simple', 'compact', or 'pretty'.
88
90
 
89
- extern void ParseAryToStrings(char** p0, int p1, char* p2, char*** p3);
91
+ extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
90
92
 
91
93
  #ifdef __cplusplus
92
94
  }
Binary file
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'ffi'
4
4
  require 'json'
5
+ require 'csv'
5
6
  require_relative 'biodiversity/version'
6
7
  require_relative 'biodiversity/parser'
7
8
 
@@ -25,40 +25,42 @@ module Biodiversity
25
25
  ffi_lib File.join(__dir__, '..', '..', 'clib', platform, 'libgnparser.so')
26
26
  POINTER_SIZE = FFI.type_size(:pointer)
27
27
 
28
- attach_function(:parse_go, :ParseToString, %i[string string], :string)
28
+ callback(:parser_callback, %i[string], :void)
29
+
30
+ attach_function(:parse_go, :ParseToString,
31
+ %i[string string parser_callback], :void)
29
32
  attach_function(:parse_ary_go, :ParseAryToStrings,
30
- %i[pointer int string pointer], :void)
33
+ %i[pointer int string parser_callback], :void)
31
34
 
32
35
  def self.parse(name, simple = false)
33
36
  format = simple ? 'simple' : 'compact'
34
- parsed = parse_go(name, format)
37
+
38
+ parsed = nil
39
+ callback = FFI::Function.new(:void, [:string]) { |str| parsed = str }
40
+ parse_go(name, format, callback)
35
41
  output(parsed, simple)
36
42
  end
37
43
 
38
44
  def self.parse_ary(ary, simple = false)
39
45
  format = simple ? 'simple' : 'compact'
40
46
  in_ptr = FFI::MemoryPointer.new(:pointer, ary.length)
47
+
41
48
  in_ptr.write_array_of_pointer(
42
49
  ary.map { |s| FFI::MemoryPointer.from_string(s) }
43
50
  )
44
- out_var = FFI::MemoryPointer.new(:pointer)
45
- parse_ary_go(in_ptr, ary.length, format, out_var)
46
51
 
47
- out_var.read_pointer
48
- .get_array_of_string(0, ary.length)
49
- .each_with_object([]) do |prsd, a|
50
- a << output(prsd, simple)
51
- end
52
- ensure
53
- out_var.read_pointer.get_array_of_pointer(0, ary.length).each do |p|
54
- CLib.free(p)
52
+ out_ary = []
53
+ callback = FFI::Function.new(:void, [:string]) do |str|
54
+ out_ary << output(str, simple)
55
55
  end
56
- CLib.free(out_var.read_pointer)
56
+ parse_ary_go(in_ptr, ary.length, format, callback)
57
+ out_ary
57
58
  end
58
59
 
59
60
  def self.output(parsed, simple)
60
61
  if simple
61
- parsed = parsed.split('|')
62
+ csv = CSV.new(parsed)
63
+ parsed = csv.read[0]
62
64
  {
63
65
  id: parsed[0],
64
66
  verbatim: parsed[1],
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Biodiversity module provides a namespace for scientific name parser.
4
4
  module Biodiversity
5
- VERSION = '4.0.2'
5
+ VERSION = '4.0.3'
6
6
 
7
7
  def self.version
8
8
  VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.2
4
+ version: 4.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-18 00:00:00.000000000 Z
11
+ date: 2020-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -111,10 +111,13 @@ files:
111
111
  - README.md
112
112
  - Rakefile
113
113
  - biodiversity.gemspec
114
+ - clib/linux/callback_bridge.h
114
115
  - clib/linux/libgnparser.h
115
116
  - clib/linux/libgnparser.so
117
+ - clib/mac/callback_bridge.h
116
118
  - clib/mac/libgnparser.h
117
119
  - clib/mac/libgnparser.so
120
+ - clib/win/callback_bridge.h
118
121
  - clib/win/libgnparser.h
119
122
  - clib/win/libgnparser.so
120
123
  - lib/biodiversity.rb
@@ -142,8 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
145
  - !ruby/object:Gem::Version
143
146
  version: '0'
144
147
  requirements: []
145
- rubyforge_project:
146
- rubygems_version: 2.7.6.2
148
+ rubygems_version: 3.0.3
147
149
  signing_key:
148
150
  specification_version: 4
149
151
  summary: Parser of scientific names