dwc_agent 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c955cb02feb025b039612097594c1bbb2dc94647349dcf3dd566ed9111314d88
4
- data.tar.gz: 37749c3f89b1d11334362e8be70ce5a996d66285eb637ae0a4a6bd9866674f02
3
+ metadata.gz: 5175936a78d6dc64f17a2827dccfaa80c66f07242b17639b3668e6403e9a19b7
4
+ data.tar.gz: a13ead08f2756db93e5abaff55d2cd9316b7ee0739926571045d52abfc4243c8
5
5
  SHA512:
6
- metadata.gz: 956ab7dedbba9b87fd62294290ae3b14b8f35c19b7768d6318237cb61545e27dbe8cdd9d64000c72b3931b738446f03ef070ee6526876ae6e46cddf8b2a9f0bf
7
- data.tar.gz: 15f9ae4819ea5180c3c046a4d763005d69999c20eab8e362051a4c5406bf0c1dc54389ede6f61462d6898911fc10ed2f5fc66942d30df0a716de10651ab33f9b
6
+ metadata.gz: 368ea5fd8755f19dafd4b651fc68ef89a0e34246deedd6261b4cd97ec3dbf6b3f4eb13bcc3e7552ce46d77f25b721e9269e4082b861681486dfb12a797e8c254
7
+ data.tar.gz: f61d5cc267f5b210a327431eec44da687d03d991455e6c7797aa6d1aac1fca47c2664e8579143e46951c1413cafb068acc5d8fb13ad9c0c76442ccc578581833
@@ -7,7 +7,7 @@ require 'json'
7
7
  names = []
8
8
  DwcAgent.parse(ARGV[0]).each do |r|
9
9
  name = DwcAgent.clean(r)
10
- if !name[:family].nil? && name[:family].length >= 3
10
+ if !name[:family].nil? && name[:family].length >= 2
11
11
  names << name
12
12
  end
13
13
  end
@@ -7,6 +7,9 @@ module DwcAgent
7
7
  end
8
8
  end
9
9
 
10
+ def initialize
11
+ end
12
+
10
13
  # Cleans the passed-in namae object from the parse method and
11
14
  # re-organizes it to better match expected Darwin Core output.
12
15
  #
@@ -45,7 +48,7 @@ module DwcAgent
45
48
  (parsed_namae.given == parsed_namae.given.upcase ||
46
49
  parsed_namae.given == parsed_namae.given.downcase) &&
47
50
  !parsed_namae.given.include?(".") &&
48
- parsed_namae.given.gsub(".","").length >= 4
51
+ parsed_namae.given.tr(".","").length >= 4
49
52
  parsed_namae.given = NameCase(parsed_namae.given)
50
53
  end
51
54
 
@@ -50,7 +50,7 @@ module DwcAgent
50
50
  (?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
51
51
  \b\s*(?i:maybe)\s*\b|
52
52
  \b\s*(?i:prob)\.\s*\b|
53
- \(?(?i:collector|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
53
+ \(?[,;]?\s*?(?i:(local)?\s?collector|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
54
54
  (?i:fide)\:?\s*\b|
55
55
  (?i:game\s+dept)\.?\s*\b|
56
56
  (?i:see\s+notes?\s*(inside)?)|
@@ -7,6 +7,15 @@ module DwcAgent
7
7
  end
8
8
  end
9
9
 
10
+ def initialize
11
+ options = {
12
+ prefer_comma_as_separator: true,
13
+ separator: SPLIT_BY,
14
+ title: TITLE
15
+ }
16
+ @namae = Namae::Parser.new(options)
17
+ end
18
+
10
19
  # Parses the passed-in string and returns a list of names.
11
20
  #
12
21
  # @param names [String] the name or names to be parsed
@@ -14,20 +23,15 @@ module DwcAgent
14
23
  def parse(name)
15
24
  return [] if name.nil? || name == ""
16
25
  residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
17
- cleaned = name.gsub(STRIP_OUT, ' ')
18
- .gsub(/[#{CHAR_SUBS.keys.join('\\')}]/, CHAR_SUBS)
19
- .gsub(/(#{PHRASE_SUBS.keys.join('|')})/, PHRASE_SUBS)
20
- .gsub(/([A-Z]{1}\.)([[:alpha:]]{2,})/, '\1 \2')
21
- .gsub(COMPLEX_SEPARATORS, '\1 | \2')
22
- .gsub(residual_terminators_regex, '')
23
- .squeeze(' ').strip
24
- options = {
25
- prefer_comma_as_separator: true,
26
- separator: SPLIT_BY,
27
- title: TITLE
28
- }
29
- namae = Namae::Parser.new(options)
30
- namae.parse(cleaned)
26
+ name.gsub!(STRIP_OUT, ' ')
27
+ name.gsub!(/[#{CHAR_SUBS.keys.join('\\')}]/, CHAR_SUBS)
28
+ name.gsub!(/(#{PHRASE_SUBS.keys.join('|')})/, PHRASE_SUBS)
29
+ name.gsub!(/([A-Z]{1}\.)([[:alpha:]]{2,})/, '\1 \2')
30
+ name.gsub!(COMPLEX_SEPARATORS, '\1 | \2')
31
+ name.gsub!(residual_terminators_regex, '')
32
+ name.squeeze!(' ')
33
+ name.strip!
34
+ @namae.parse(name)
31
35
  end
32
36
 
33
37
  end
@@ -7,6 +7,9 @@ module DwcAgent
7
7
  end
8
8
  end
9
9
 
10
+ def initialize
11
+ end
12
+
10
13
  # Produces a similarity score of two given names
11
14
  # Logic inspired by R.D.M. Page, https://orcid.org/0000-0002-7101-9767
12
15
  # At https://linen-baseball.glitch.me/
@@ -15,10 +18,12 @@ module DwcAgent
15
18
  # @param given2 [String] a second given name
16
19
  # @return [Float] the similarity score
17
20
  def similarity_score(given1, given2)
18
- given1_parts = given1.gsub(/\.\s+/,".").split(/[\.\s]/)
19
- given2_parts = given2.gsub(/\.\s+/,".").split(/[\.\s]/)
20
- largest = [given1_parts,given2_parts].max
21
- smallest = [given1_parts,given2_parts].min
21
+ given1.gsub!(/\.\s+/,".")
22
+ g1_arr = given1.split(/[\.\s]/)
23
+ given2.gsub!(/\.\s+/,".")
24
+ g2_arr = given2.split(/[\.\s]/)
25
+ largest = [g1_arr,g2_arr].max
26
+ smallest = [g1_arr,g2_arr].min
22
27
 
23
28
  score = 0
24
29
  largest.each_with_index do |val,index|
@@ -3,7 +3,7 @@ module DwcAgent
3
3
 
4
4
  MAJOR = 0
5
5
  MINOR = 2
6
- PATCH = 1
6
+ PATCH = 2
7
7
  BUILD = nil
8
8
 
9
9
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-06-16 00:00:00.000000000 Z
11
+ date: 2019-07-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae