biodiversity 4.0.2 → 4.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e1a5a67bcea7c0e234726d7702c4224b109a7852899a2d3b84d25b38ee9ad84
4
- data.tar.gz: 69e7781d05858faa63ed40b4d964499e0718bec3b6fecc9e5620879bd72acf34
3
+ metadata.gz: 8d1bb6dd3170d4a3d036cf5068a6d663606ec85f29003b24408e252e782ff4fd
4
+ data.tar.gz: 2ee9a9e168b107ecd851302c32c5b06cbafe25df83bae385fe00d620fbfd8cde
5
5
  SHA512:
6
- metadata.gz: be191bc9156447da5883c7a910b83012422869bbe7fdb6950833e2090b1ad33fb521dd5196376fba821d2991857a5c0adcc06b80f5752b7434c428a449d662b1
7
- data.tar.gz: 4bc7244b2d4c24146aa19f191c78e9645779298cfb67a18904aaddae3c0baa866035ecfed82827bc2e78cde54e8ee4fd9dccdedd76f8a43d222f06212a07ce35
6
+ metadata.gz: f2643c3916407220055a52581d357e09519a94be555e4703a9ecf4a8709f99f35a8be9232b3612913baa5a30e42812fcbf66d223f7824def39eee0bdef7e7da8
7
+ data.tar.gz: faaebcafb79ccc3273a28ace4c00f642e43b7813dd7477c4ec19c1ddf4252c8c871f7185746e40a9c94e37b04fc422cfd9c53aea53f97b532c74324b2fc66387
@@ -10,4 +10,10 @@ Metrics/MethodLength:
10
10
  - lib/**/*
11
11
  Metrics/BlockLength:
12
12
  Exclude:
13
- - spec/**/*
13
+ - spec/**/*
14
+ Style/HashEachMethods:
15
+ Enabled: true
16
+ Style/HashTransformKeys:
17
+ Enabled: true
18
+ Style/HashTransformValues:
19
+ Enabled: true
@@ -1 +1 @@
1
- 2.5.7
1
+ 2.6.5
data/CHANGELOG CHANGED
@@ -1,3 +1,5 @@
1
+ 4.0.3 -- fix memory leak in parse_ary method
2
+
1
3
  4.0.2 -- add MS Windows libraries
2
4
 
3
5
  4.0.1 -- fix for simple output
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  Biodiversity
2
2
  ============
3
3
 
4
- [![DOI](https://zenodo.org/badge/19435/GlobalNamesArchitecture/biodiversity.svg)](https://zenodo.org/badge/latestdoi/19435/GlobalNamesArchitecture/biodiversity)
4
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3569596.svg)](https://doi.org/10.5281/zenodo.3569596)
5
5
  [![Gem Version][gem_svg]][gem_link]
6
6
  [![Continuous Integration Status][ci_svg]][ci_link]
7
7
 
@@ -17,6 +17,7 @@ For such features use https://gitlab.com/gogna/gnparser.
17
17
 
18
18
  - [Biodiversity](#biodiversity)
19
19
  - [Installation](#installation)
20
+ - [Benchmarks](#benchmarks)
20
21
  - [Example usage](#example-usage)
21
22
  - [What is "nameStringID" in the parsed results?](#what-is-%22namestringid%22-in-the-parsed-results)
22
23
  - [Copyright](#copyright)
@@ -27,6 +28,38 @@ For such features use https://gitlab.com/gogna/gnparser.
27
28
 
28
29
  The gem should work on Linux, Mac and Windows (64bit) machines
29
30
 
31
+ ## Benchmarks
32
+
33
+ The fastest way to go through a massive amount of names is to use
34
+ `Biodiversity::Parser.parse_ary([big array], simple = true)` function.
35
+
36
+ For example parsing a large file with one name per line:
37
+
38
+ ```ruby
39
+ #!/usr/bin/env ruby
40
+
41
+ require 'biodiversity'
42
+
43
+ P = Biodiversity::Parser
44
+ count = 0
45
+ File.open('all_names.txt').each_slice(50_000) do |sl|
46
+ count += 1
47
+ res = P.parse_ary(sl, true)
48
+ puts count * 50_000
49
+ puts res[0]
50
+ end
51
+ ```
52
+
53
+ Here are comparative results of running parsers against a file with 24
54
+ million names on a 4CPU hyperthreaded laptop:
55
+
56
+ | Program | Version | Full/Simple | Names/min |
57
+ | ------------ | ------- | ----------- | --------: |
58
+ | gnparser | 0.12.0 | Simple | 3,000,000 |
59
+ | biodiversity | 4.0.1 | Simple | 2,000,000 |
60
+ | biodiversity | 4.0.1 | Full JSON | 800,000 |
61
+ | biodiversity | 3.5.1 | n/a | 40,000 |
62
+
30
63
  ## Example usage
31
64
 
32
65
  You can use it as a library in Ruby:
@@ -92,6 +125,10 @@ Copyright
92
125
 
93
126
  Authors: [Dmitry Mozzherin][dimus]
94
127
 
128
+ Contributors: [Patrick Leary][pleary], [Hernán Lucas Pereira][hernan]
129
+
130
+
131
+
95
132
  Copyright (c) 2008-2019 Dmitry Mozzherin. See [LICENSE][license]
96
133
  for further details.
97
134
 
@@ -100,6 +137,8 @@ for further details.
100
137
  [ci_svg]: https://secure.travis-ci.org/GlobalNamesArchitecture/biodiversity.svg
101
138
  [ci_link]: http://travis-ci.org/GlobalNamesArchitecture/biodiversity
102
139
  [dimus]: https://github.com/dimus
140
+ [pleary]: https://github.com/pleary
141
+ [hernan]: https://github.com/LocoDelAssembly
103
142
  [license]: https://github.com/GlobalNamesArchitecture/biodiversity/blob/master/LICENSE
104
143
  [uuid_examples]: https://github.com/GlobalNamesArchitecture/gn_uuid_examples
105
144
  [uuid_blog]: http://globalnamesarchitecture.github.io/gna/uuid/2015/05/31/gn-uuid-0-5-0.html
@@ -0,0 +1,3 @@
1
+ typedef void Callback(char *parsed);
2
+
3
+ void callback_bridge(void *callback, char *parsed);
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
22
22
  #line 3 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
+ #include "callback_bridge.h"
25
26
 
26
27
  #line 1 "cgo-generated-wrapper"
27
28
 
@@ -77,8 +78,9 @@ extern "C" {
77
78
  // ParseToString function takes a name-string, desired format, and parses
78
79
  // the name-string to either JSON, or pipe-separated values, depending on
79
80
  // the desired format. Format can take values of 'simple', 'compact', 'pretty'.
81
+ // NOTE: Read callback type as "void (*callback)(char *parsed)"
80
82
 
81
- extern char* ParseToString(char* p0, char* p1);
83
+ extern void ParseToString(char* p0, char* p1, void* p2);
82
84
 
83
85
  // ParseAryToStrings function takes an array of names, parsing format and a
84
86
  // reference to an output: an empty array of strings to return the the data
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
86
88
  // pipe-separated parsed values (depending on a given format). Format can take
87
89
  // values of 'simple', 'compact', or 'pretty'.
88
90
 
89
- extern void ParseAryToStrings(char** p0, int p1, char* p2, char*** p3);
91
+ extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
90
92
 
91
93
  #ifdef __cplusplus
92
94
  }
Binary file
@@ -0,0 +1,3 @@
1
+ typedef void Callback(char *parsed);
2
+
3
+ void callback_bridge(void *callback, char *parsed);
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
22
22
  #line 3 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
+ #include "callback_bridge.h"
25
26
 
26
27
  #line 1 "cgo-generated-wrapper"
27
28
 
@@ -77,8 +78,9 @@ extern "C" {
77
78
  // ParseToString function takes a name-string, desired format, and parses
78
79
  // the name-string to either JSON, or pipe-separated values, depending on
79
80
  // the desired format. Format can take values of 'simple', 'compact', 'pretty'.
81
+ // NOTE: Read callback type as "void (*callback)(char *parsed)"
80
82
 
81
- extern char* ParseToString(char* p0, char* p1);
83
+ extern void ParseToString(char* p0, char* p1, void* p2);
82
84
 
83
85
  // ParseAryToStrings function takes an array of names, parsing format and a
84
86
  // reference to an output: an empty array of strings to return the the data
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
86
88
  // pipe-separated parsed values (depending on a given format). Format can take
87
89
  // values of 'simple', 'compact', or 'pretty'.
88
90
 
89
- extern void ParseAryToStrings(char** p0, int p1, char* p2, char*** p3);
91
+ extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
90
92
 
91
93
  #ifdef __cplusplus
92
94
  }
Binary file
@@ -0,0 +1,3 @@
1
+ typedef void Callback(char *parsed);
2
+
3
+ void callback_bridge(void *callback, char *parsed);
@@ -22,6 +22,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
22
22
  #line 3 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
+ #include "callback_bridge.h"
25
26
 
26
27
  #line 1 "cgo-generated-wrapper"
27
28
 
@@ -77,8 +78,9 @@ extern "C" {
77
78
  // ParseToString function takes a name-string, desired format, and parses
78
79
  // the name-string to either JSON, or pipe-separated values, depending on
79
80
  // the desired format. Format can take values of 'simple', 'compact', 'pretty'.
81
+ // NOTE: Read callback type as "void (*callback)(char *parsed)"
80
82
 
81
- extern char* ParseToString(char* p0, char* p1);
83
+ extern void ParseToString(char* p0, char* p1, void* p2);
82
84
 
83
85
  // ParseAryToStrings function takes an array of names, parsing format and a
84
86
  // reference to an output: an empty array of strings to return the the data
@@ -86,7 +88,7 @@ extern char* ParseToString(char* p0, char* p1);
86
88
  // pipe-separated parsed values (depending on a given format). Format can take
87
89
  // values of 'simple', 'compact', or 'pretty'.
88
90
 
89
- extern void ParseAryToStrings(char** p0, int p1, char* p2, char*** p3);
91
+ extern void ParseAryToStrings(char** p0, int p1, char* p2, void* p3);
90
92
 
91
93
  #ifdef __cplusplus
92
94
  }
Binary file
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'ffi'
4
4
  require 'json'
5
+ require 'csv'
5
6
  require_relative 'biodiversity/version'
6
7
  require_relative 'biodiversity/parser'
7
8
 
@@ -25,40 +25,42 @@ module Biodiversity
25
25
  ffi_lib File.join(__dir__, '..', '..', 'clib', platform, 'libgnparser.so')
26
26
  POINTER_SIZE = FFI.type_size(:pointer)
27
27
 
28
- attach_function(:parse_go, :ParseToString, %i[string string], :string)
28
+ callback(:parser_callback, %i[string], :void)
29
+
30
+ attach_function(:parse_go, :ParseToString,
31
+ %i[string string parser_callback], :void)
29
32
  attach_function(:parse_ary_go, :ParseAryToStrings,
30
- %i[pointer int string pointer], :void)
33
+ %i[pointer int string parser_callback], :void)
31
34
 
32
35
  def self.parse(name, simple = false)
33
36
  format = simple ? 'simple' : 'compact'
34
- parsed = parse_go(name, format)
37
+
38
+ parsed = nil
39
+ callback = FFI::Function.new(:void, [:string]) { |str| parsed = str }
40
+ parse_go(name, format, callback)
35
41
  output(parsed, simple)
36
42
  end
37
43
 
38
44
  def self.parse_ary(ary, simple = false)
39
45
  format = simple ? 'simple' : 'compact'
40
46
  in_ptr = FFI::MemoryPointer.new(:pointer, ary.length)
47
+
41
48
  in_ptr.write_array_of_pointer(
42
49
  ary.map { |s| FFI::MemoryPointer.from_string(s) }
43
50
  )
44
- out_var = FFI::MemoryPointer.new(:pointer)
45
- parse_ary_go(in_ptr, ary.length, format, out_var)
46
51
 
47
- out_var.read_pointer
48
- .get_array_of_string(0, ary.length)
49
- .each_with_object([]) do |prsd, a|
50
- a << output(prsd, simple)
51
- end
52
- ensure
53
- out_var.read_pointer.get_array_of_pointer(0, ary.length).each do |p|
54
- CLib.free(p)
52
+ out_ary = []
53
+ callback = FFI::Function.new(:void, [:string]) do |str|
54
+ out_ary << output(str, simple)
55
55
  end
56
- CLib.free(out_var.read_pointer)
56
+ parse_ary_go(in_ptr, ary.length, format, callback)
57
+ out_ary
57
58
  end
58
59
 
59
60
  def self.output(parsed, simple)
60
61
  if simple
61
- parsed = parsed.split('|')
62
+ csv = CSV.new(parsed)
63
+ parsed = csv.read[0]
62
64
  {
63
65
  id: parsed[0],
64
66
  verbatim: parsed[1],
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Biodiversity module provides a namespace for scientific name parser.
4
4
  module Biodiversity
5
- VERSION = '4.0.2'
5
+ VERSION = '4.0.3'
6
6
 
7
7
  def self.version
8
8
  VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.2
4
+ version: 4.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-18 00:00:00.000000000 Z
11
+ date: 2020-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -111,10 +111,13 @@ files:
111
111
  - README.md
112
112
  - Rakefile
113
113
  - biodiversity.gemspec
114
+ - clib/linux/callback_bridge.h
114
115
  - clib/linux/libgnparser.h
115
116
  - clib/linux/libgnparser.so
117
+ - clib/mac/callback_bridge.h
116
118
  - clib/mac/libgnparser.h
117
119
  - clib/mac/libgnparser.so
120
+ - clib/win/callback_bridge.h
118
121
  - clib/win/libgnparser.h
119
122
  - clib/win/libgnparser.so
120
123
  - lib/biodiversity.rb
@@ -142,8 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
145
  - !ruby/object:Gem::Version
143
146
  version: '0'
144
147
  requirements: []
145
- rubyforge_project:
146
- rubygems_version: 2.7.6.2
148
+ rubygems_version: 3.0.3
147
149
  signing_key:
148
150
  specification_version: 4
149
151
  summary: Parser of scientific names