biodiversity 5.1.0 → 5.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b9df90a14b8fcc999def9b89b58ea277e37749eca303f858ebaee03159bb60d8
4
- data.tar.gz: b6f8ed59091c85d12fb660a608444f94c2974c4f7720e7b085b5c02265cad296
3
+ metadata.gz: b281d6c55701570148a6e42577c6e981c23e2bdf6558dafc27aaa9c89dfdcc0d
4
+ data.tar.gz: 0e54a072a72fe7f2e5b76917766d044d945a44859b61cdc51247c3ac95f386b8
5
5
  SHA512:
6
- metadata.gz: b2c749612aed3f60f3fc3041a4685a7f45651ba438d7058687433a8b039443487b3e2e9ff90cf9a976cab5b183bac513ae611d238ceed2353e3ad491f963e81c
7
- data.tar.gz: 2765731a3d3a9dfd5bb346246ce8afe741bf9a0b74e9c50bced34d3f82da9ce5e1a56c6d671373b6c0973e4afc63f913bc7badf2f7073f059a2c38aeebfc817c
6
+ metadata.gz: 5ce44a6dcefcaf1ab262c3d2fd8593d8fabadeab5a5f4c167e6b921f4b66ebf96212bacab809c1765b4eba87a4adc58539449a1aad95cf5e381c87f3edb0326b
7
+ data.tar.gz: b8c0077f1eaa45fb89055431ab3807869acb7a87c8d4f6955bdbdd9364647cb13dea0bb36f4949f8a571e31d89dd1ce2215cec1c053265376995e229e962e5a7
data/.rubocop.yml CHANGED
@@ -14,6 +14,10 @@ Metrics/MethodLength:
14
14
  Metrics/BlockLength:
15
15
  Exclude:
16
16
  - spec/**/*
17
+ Metrics/CyclomaticComplexity:
18
+ Enabled: false
19
+ Metrics/PerceivedComplexity:
20
+ Enabled: false
17
21
  Style/HashEachMethods:
18
22
  Enabled: true
19
23
  Style/HashTransformKeys:
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.0.0
1
+ 3.0.1
data/CHANGELOG CHANGED
@@ -1,3 +1,9 @@
1
+ 5.1.2 -- upgrade to gnparser v1.0.2
2
+
3
+ 5.1.1 -- simplified signature of methods
4
+
5
+ 5.1.0 -- changed sinature of methods
6
+
1
7
  5.0.0 -- changed GNparser to v 1.0.0
2
8
 
3
9
  4.0.3 -- fix memory leak in parse_ary method
data/README.md CHANGED
@@ -1,5 +1,4 @@
1
- Biodiversity
2
- ============
1
+ # Biodiversity
3
2
 
4
3
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3569596.svg)](https://doi.org/10.5281/zenodo.3569596)
5
4
  [![Gem Version][gem_svg]][gem_link]
@@ -8,12 +7,12 @@ Biodiversity
8
7
  Parses taxonomic scientific name and breaks it into semantic elements.
9
8
 
10
9
  **Important**: Biodiversity parser >= 4.0.0 uses binding to
11
- https://gitlab.com/gogna/gnparser and
10
+ `https://github.com/gnames/gnparser` and
12
11
  is not backward compatible with older versions. However it is much much faster
13
12
  and better than previous versions.
14
13
 
15
14
  This gem does not have a remote server or a command line executable anymore.
16
- For such features use https://gitlab.com/gogna/gnparser.
15
+ For such features use `https://github.com/gnames/gnparser`.
17
16
 
18
17
  - [Biodiversity](#biodiversity)
19
18
  - [Installation](#installation)
@@ -24,7 +23,9 @@ For such features use https://gitlab.com/gogna/gnparser.
24
23
 
25
24
  ## Installation
26
25
 
27
- sudo gem install biodiversity
26
+ ```bash
27
+ sudo gem install biodiversity
28
+ ```
28
29
 
29
30
  The gem should work on Linux, Mac and Windows (64bit) machines
30
31
 
@@ -64,7 +65,6 @@ million names on a 4CPU hyperthreaded laptop:
64
65
 
65
66
  You can use it as a library in Ruby:
66
67
 
67
-
68
68
  ```ruby
69
69
  require 'biodiversity'
70
70
 
@@ -92,7 +92,6 @@ Biodiversity::Parser.parse("Plantago").to_json
92
92
  # to clean name up
93
93
  Biodiversity::Parser.parse(" Plantago major ")[:normalized]
94
94
 
95
-
96
95
  # to get canonical form with or without infraspecies ranks, as well as
97
96
  # stemmed version.
98
97
  parsed = Biodiversity::Parser.parse("Seddera latifolia H. & S. var. latifolia")
@@ -103,6 +102,9 @@ parsed[:canonicalName][:stem]
103
102
  # to get detailed information about elements of the name
104
103
  Biodiversity::Parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. \
105
104
  Braun & Crous 2003")[:details]
105
+
106
+ # to parse a botanical cultivar
107
+ Biodiversity::Parser.parse("Sarracenia flava 'Maxima'", with_cultivars: true)
106
108
  ```
107
109
 
108
110
  'Surrogate' is a broad group which includes 'Barcode of Life' names, and various
@@ -111,6 +113,7 @@ undetermined names with cf. sp. spp. nr. in them:
111
113
  ```ruby
112
114
  parser.parse("Coleoptera BOLD:1234567")[:surrogate]
113
115
  ```
116
+
114
117
  ### What is "nameStringID" in the parsed results?
115
118
 
116
119
  ID field contains UUID v5 hexadecimal string. ID is generated out of bytes
@@ -118,18 +121,16 @@ from the name string itself, and identical id can be generated using [any
118
121
  popular programming language][uuid_examples]. You can read more about UUID
119
122
  version 5 in a [blog post][uuid_blog]
120
123
 
121
- For example "Homo sapiens" should generate "16f235a0-e4a3-529c-9b83-bd15fe722110" UUID
124
+ For example "Homo sapiens" should generate
125
+ "16f235a0-e4a3-529c-9b83-bd15fe722110" UUID
122
126
 
123
- Copyright
124
- ---------
127
+ ## Copyright
125
128
 
126
129
  Authors: [Dmitry Mozzherin][dimus]
127
130
 
128
131
  Contributors: [Patrick Leary][pleary], [Hernán Lucas Pereira][hernan]
129
132
 
130
-
131
-
132
- Copyright (c) 2008-2020 Dmitry Mozzherin. See [LICENSE][license]
133
+ Copyright (c) 2008-2021 Dmitry Mozzherin. See [LICENSE][license]
133
134
  for further details.
134
135
 
135
136
  [gem_svg]: https://badge.fury.io/rb/biodiversity.svg
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
19
19
  /* Start of preamble from import "C" comments. */
20
20
 
21
21
 
22
- #line 3 "main.go"
22
+ #line 5 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
25
 
@@ -80,7 +80,7 @@ extern "C" {
80
80
  // 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
81
81
  // parsed details are ommited, if it is 1 -- they are included.
82
82
  // true.
83
- extern char* ParseToString(char* name, char* f, int details);
83
+ extern char* ParseToString(char* name, char* f, int details, int cultivars);
84
84
 
85
85
  // FreeMemory takes a string pointer and frees its memory.
86
86
  extern void FreeMemory(char* p);
@@ -90,7 +90,7 @@ extern void FreeMemory(char* p);
90
90
  // either CSV or JSON format. Format argument can take values of 'csv',
91
91
  // 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
92
92
  // true.
93
- extern char* ParseAryToString(char** in, int length, char* f, int details);
93
+ extern char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
94
94
 
95
95
  #ifdef __cplusplus
96
96
  }
Binary file
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
19
19
  /* Start of preamble from import "C" comments. */
20
20
 
21
21
 
22
- #line 3 "main.go"
22
+ #line 5 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
25
 
@@ -80,20 +80,17 @@ extern "C" {
80
80
  // 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
81
81
  // parsed details are ommited, if it is 1 -- they are included.
82
82
  // true.
83
-
84
- extern char* ParseToString(char* p0, char* p1, int p2);
83
+ extern char* ParseToString(char* name, char* f, int details, int cultivars);
85
84
 
86
85
  // FreeMemory takes a string pointer and frees its memory.
87
-
88
- extern void FreeMemory(char* p0);
86
+ extern void FreeMemory(char* p);
89
87
 
90
88
  // ParseAryToString function takes an array of names, parsing format, and a
91
89
  // withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
92
90
  // either CSV or JSON format. Format argument can take values of 'csv',
93
91
  // 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
94
92
  // true.
95
-
96
- extern char* ParseAryToString(char** p0, int p1, char* p2, int p3);
93
+ extern char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
97
94
 
98
95
  #ifdef __cplusplus
99
96
  }
Binary file
@@ -19,7 +19,7 @@ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
19
19
  /* Start of preamble from import "C" comments. */
20
20
 
21
21
 
22
- #line 3 "main.go"
22
+ #line 5 "main.go"
23
23
 
24
24
  #include "stdlib.h"
25
25
 
@@ -80,17 +80,17 @@ extern "C" {
80
80
  // 'csv', 'compact', 'pretty'. If withDetails argument is 0, additional
81
81
  // parsed details are ommited, if it is 1 -- they are included.
82
82
  // true.
83
- extern char* ParseToString(char* name, char* f, int details);
83
+ extern __declspec(dllexport) char* ParseToString(char* name, char* f, int details, int cultivars);
84
84
 
85
85
  // FreeMemory takes a string pointer and frees its memory.
86
- extern void FreeMemory(char* p);
86
+ extern __declspec(dllexport) void FreeMemory(char* p);
87
87
 
88
88
  // ParseAryToString function takes an array of names, parsing format, and a
89
89
  // withDetails flag as 0|1 integer. Parsed outputs are sent as a string in
90
90
  // either CSV or JSON format. Format argument can take values of 'csv',
91
91
  // 'compact', or 'pretty'. For withDetails argument 0 means false, 1 means
92
92
  // true.
93
- extern char* ParseAryToString(char** in, int length, char* f, int details);
93
+ extern __declspec(dllexport) char* ParseAryToString(char** in, int length, char* f, int details, int cultivars);
94
94
 
95
95
  #ifdef __cplusplus
96
96
  }
Binary file
@@ -28,47 +28,56 @@ module Biodiversity
28
28
  callback(:parser_callback, %i[string], :void)
29
29
 
30
30
  attach_function(:parse_go, :ParseToString,
31
- %i[string string int], :strptr)
31
+ %i[string string int int], :strptr)
32
32
  attach_function(:parse_ary_go, :ParseAryToString,
33
- %i[pointer int string int], :strptr)
33
+ %i[pointer int string int int], :strptr)
34
34
  attach_function(:free_mem, :FreeMemory, %i[pointer], :void)
35
35
 
36
- def self.parse(name, simple: false, details: false)
36
+ def self.parse(name, simple: false, with_cultivars: false)
37
37
  format = simple ? 'csv' : 'compact'
38
- with_details = details ? 1 : 0
38
+ with_details = simple ? 0 : 1
39
+ with_cultivars = with_cultivars ? 1 : 0
39
40
 
40
- parsed, ptr = parse_go(name, format, with_details)
41
+ parsed, ptr = parse_go(name, format, with_details, with_cultivars)
41
42
  free_mem(ptr)
42
43
  output(parsed, simple)
43
44
  end
44
45
 
45
- def self.parse_ary(ary, simple: false, details: false)
46
+ def self.parse_ary(ary, simple: false, with_cultivars: false)
46
47
  format = simple ? 'csv' : 'compact'
47
- with_details = details ? 1 : 0
48
+ with_details = simple ? 0 : 1
49
+ with_cultivars = with_cultivars ? 1 : 0
48
50
 
49
51
  in_ptr = FFI::MemoryPointer.new(:pointer, ary.length)
50
52
  in_ptr.write_array_of_pointer(
51
53
  ary.map { |s| FFI::MemoryPointer.from_string(s) }
52
54
  )
53
55
 
54
- parsed, ptr = parse_ary_go(in_ptr, ary.length, format, with_details)
56
+ parsed, ptr = parse_ary_go(in_ptr, ary.length, format,
57
+ with_details, with_cultivars)
55
58
  free_mem(ptr)
56
59
  if simple
57
- CSV.new(parsed).map do |row|
60
+ CSV.new(parsed.force_encoding('UTF-8')).map do |row|
58
61
  csv_row(row)
59
62
  end
60
63
  else
61
- JSON.parse(parsed, symbolize_names: true)
64
+ JSON.parse(parsed, symbolize_names: true).map do |item|
65
+ item[:parserVersion] = Biodiversity.gnparser_version
66
+ item
67
+ end
62
68
  end
63
69
  end
64
70
 
65
71
  def self.output(parsed, simple)
66
72
  if simple
73
+ parsed = parsed.force_encoding('UTF-8')
67
74
  csv = CSV.new(parsed)
68
75
  row = csv.readlines[0]
69
76
  csv_row(row)
70
77
  else
71
- JSON.parse(parsed, symbolize_names: true)
78
+ parsed = JSON.parse(parsed, symbolize_names: true)
79
+ parsed[:parserVersion] = Biodiversity.gnparser_version
80
+ parsed
72
81
  end
73
82
  end
74
83
 
@@ -76,7 +85,7 @@ module Biodiversity
76
85
  {
77
86
  id: row[0],
78
87
  verbatim: row[1],
79
- cardinality: row[2],
88
+ cardinality: row[2].to_i,
80
89
  canonical: {
81
90
  stem: row[3],
82
91
  simple: row[4],
@@ -84,7 +93,7 @@ module Biodiversity
84
93
  },
85
94
  authorship: row[6],
86
95
  year: row[7],
87
- quality: row[8]
96
+ quality: row[8].to_i
88
97
  }
89
98
  end
90
99
  end
@@ -2,9 +2,14 @@
2
2
 
3
3
  # Biodiversity module provides a namespace for scientific name parser.
4
4
  module Biodiversity
5
- VERSION = '5.1.0'
5
+ VERSION = '5.3.1'
6
+ GNPARSER_VERSION = 'GNparser 1.3.0+'
6
7
 
7
8
  def self.version
8
9
  VERSION
9
10
  end
11
+
12
+ def self.gnparser_version
13
+ GNPARSER_VERSION
14
+ end
10
15
  end
@@ -4,46 +4,80 @@
4
4
 
5
5
  describe Biodiversity::Parser do
6
6
  describe('parse') do
7
- it 'parses name in simple format' do
7
+ it 'parses name in simple form' do
8
8
  parsed = subject.parse('Homo sapiens Linn.', simple: true)
9
9
  expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
10
10
  expect(parsed[:normalized]).to be_nil
11
11
  end
12
12
 
13
- it 'parsed name in full format' do
14
- parsed = subject.parse('Homo sapiens Linn.')
13
+ it 'parsed name in full form' do
14
+ parsed = subject.parse('Homo sapiens Linn. 1758')
15
15
  expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
16
- expect(parsed[:normalized]).to eq 'Homo sapiens Linn.'
16
+ expect(parsed[:normalized]).to eq 'Homo sapiens Linn. 1758'
17
+ expect(parsed[:authorship][:year]).to eq '1758'
18
+ expect(parsed[:words].size).to eq 4
17
19
  end
18
20
 
19
21
  it 'gets quality and year correctly in simple form' do
20
- parsed = subject.parse('Homo sapiens Linn. 1758', simple: true)
22
+ parsed = subject.parse('Homo sapiens Foo & Bar. 1758', simple: true)
21
23
  expect(parsed[:canonical][:simple]).to eq 'Homo sapiens'
22
24
  expect(parsed[:year]).to eq '1758'
23
- expect(parsed[:quality]).to eq '1'
25
+ expect(parsed[:authorship]).to eq 'Foo & Bar. 1758'
26
+ expect(parsed[:quality]).to eq 1
24
27
  expect(parsed[:normalized]).to be_nil
25
28
  end
29
+
30
+ it 'parses botanical cultivars in full form' do
31
+ parsed = subject.parse('Aus bus "White Russian"',
32
+ simple: false, with_cultivars: true)
33
+ expect(parsed[:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
34
+ expect(parsed[:quality]).to eq 1
35
+ end
36
+
37
+ it 'parses botanical cultivars in simple form' do
38
+ parsed = subject.parse('Aus bus "White Russian"',
39
+ simple: true, with_cultivars: true)
40
+ expect(parsed[:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
41
+ expect(parsed[:quality]).to eq 1
42
+ parsed = subject.parse('Aus bus "White Russian"',
43
+ simple: true, with_cultivars: false)
44
+ expect(parsed[:canonical][:simple]).to eq 'Aus bus'
45
+ expect(parsed[:quality]).to eq 2
46
+ end
26
47
  end
27
48
 
28
49
  describe('parse_ary') do
29
50
  it 'parses names in simple format' do
30
51
  parsed = subject.parse_ary(
31
- ['Homo sapiens Linn.', 'Pardosa moesta'],
32
- simple: true
52
+ ['Homo sapiens Linn.', 'Pardosa moesta', 'Aus bus "White Russian"'],
53
+ simple: true, with_cultivars: true
33
54
  )
34
55
  expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
35
- expect(parsed[1][:canonical][:simple]).to eq 'Pardosa moesta'
36
56
  expect(parsed[0][:normalized]).to be_nil
57
+
58
+ expect(parsed[1][:canonical][:simple]).to eq 'Pardosa moesta'
59
+ expect(parsed[2][:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
60
+ expect(parsed[2][:quality]).to eq 1
37
61
  end
38
62
 
39
63
  it 'parsed name in full format' do
40
64
  parsed = subject.parse_ary(
41
- ['Homo sapiens Linn.', 'Tobacco Mosaic Virus']
65
+ [
66
+ 'Homo sapiens Linn.',
67
+ 'Tobacco Mosaic Virus',
68
+ "Aus bus 'White Russian'"
69
+ ],
70
+ with_cultivars: true
42
71
  )
43
72
  expect(parsed[0][:canonical][:simple]).to eq 'Homo sapiens'
44
73
  expect(parsed[0][:normalized]).to eq 'Homo sapiens Linn.'
74
+ expect(parsed[0][:words].size).to eq 3
45
75
  expect(parsed[1][:parsed]).to be false
46
76
  expect(parsed[1][:virus]).to be true
77
+ expect(parsed[1][:words]).to be_nil
78
+ expect(parsed[2][:canonical][:simple]).to eq 'Aus bus ‘White Russian’'
79
+ expect(parsed[2][:quality]).to eq 1
80
+ expect(parsed[2][:parserVersion]).to match(/GNparser/)
47
81
  end
48
82
  end
49
83
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.1.0
4
+ version: 5.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-19 00:00:00.000000000 Z
11
+ date: 2021-07-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -142,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
142
  - !ruby/object:Gem::Version
143
143
  version: '0'
144
144
  requirements: []
145
- rubygems_version: 3.2.3
145
+ rubygems_version: 3.2.15
146
146
  signing_key:
147
147
  specification_version: 4
148
148
  summary: Parser of scientific names