dwc_agent 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c955cb02feb025b039612097594c1bbb2dc94647349dcf3dd566ed9111314d88
4
- data.tar.gz: 37749c3f89b1d11334362e8be70ce5a996d66285eb637ae0a4a6bd9866674f02
3
+ metadata.gz: 5175936a78d6dc64f17a2827dccfaa80c66f07242b17639b3668e6403e9a19b7
4
+ data.tar.gz: a13ead08f2756db93e5abaff55d2cd9316b7ee0739926571045d52abfc4243c8
5
5
  SHA512:
6
- metadata.gz: 956ab7dedbba9b87fd62294290ae3b14b8f35c19b7768d6318237cb61545e27dbe8cdd9d64000c72b3931b738446f03ef070ee6526876ae6e46cddf8b2a9f0bf
7
- data.tar.gz: 15f9ae4819ea5180c3c046a4d763005d69999c20eab8e362051a4c5406bf0c1dc54389ede6f61462d6898911fc10ed2f5fc66942d30df0a716de10651ab33f9b
6
+ metadata.gz: 368ea5fd8755f19dafd4b651fc68ef89a0e34246deedd6261b4cd97ec3dbf6b3f4eb13bcc3e7552ce46d77f25b721e9269e4082b861681486dfb12a797e8c254
7
+ data.tar.gz: f61d5cc267f5b210a327431eec44da687d03d991455e6c7797aa6d1aac1fca47c2664e8579143e46951c1413cafb068acc5d8fb13ad9c0c76442ccc578581833
@@ -7,7 +7,7 @@ require 'json'
7
7
  names = []
8
8
  DwcAgent.parse(ARGV[0]).each do |r|
9
9
  name = DwcAgent.clean(r)
10
- if !name[:family].nil? && name[:family].length >= 3
10
+ if !name[:family].nil? && name[:family].length >= 2
11
11
  names << name
12
12
  end
13
13
  end
@@ -7,6 +7,9 @@ module DwcAgent
7
7
  end
8
8
  end
9
9
 
10
+ def initialize
11
+ end
12
+
10
13
  # Cleans the passed-in namae object from the parse method and
11
14
  # re-organizes it to better match expected Darwin Core output.
12
15
  #
@@ -45,7 +48,7 @@ module DwcAgent
45
48
  (parsed_namae.given == parsed_namae.given.upcase ||
46
49
  parsed_namae.given == parsed_namae.given.downcase) &&
47
50
  !parsed_namae.given.include?(".") &&
48
- parsed_namae.given.gsub(".","").length >= 4
51
+ parsed_namae.given.tr(".","").length >= 4
49
52
  parsed_namae.given = NameCase(parsed_namae.given)
50
53
  end
51
54
 
@@ -50,7 +50,7 @@ module DwcAgent
50
50
  (?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
51
51
  \b\s*(?i:maybe)\s*\b|
52
52
  \b\s*(?i:prob)\.\s*\b|
53
- \(?(?i:collector|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
53
+ \(?[,;]?\s*?(?i:(local)?\s?collector|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
54
54
  (?i:fide)\:?\s*\b|
55
55
  (?i:game\s+dept)\.?\s*\b|
56
56
  (?i:see\s+notes?\s*(inside)?)|
@@ -7,6 +7,15 @@ module DwcAgent
7
7
  end
8
8
  end
9
9
 
10
+ def initialize
11
+ options = {
12
+ prefer_comma_as_separator: true,
13
+ separator: SPLIT_BY,
14
+ title: TITLE
15
+ }
16
+ @namae = Namae::Parser.new(options)
17
+ end
18
+
10
19
  # Parses the passed-in string and returns a list of names.
11
20
  #
12
21
  # @param names [String] the name or names to be parsed
@@ -14,20 +23,15 @@ module DwcAgent
14
23
  def parse(name)
15
24
  return [] if name.nil? || name == ""
16
25
  residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
17
- cleaned = name.gsub(STRIP_OUT, ' ')
18
- .gsub(/[#{CHAR_SUBS.keys.join('\\')}]/, CHAR_SUBS)
19
- .gsub(/(#{PHRASE_SUBS.keys.join('|')})/, PHRASE_SUBS)
20
- .gsub(/([A-Z]{1}\.)([[:alpha:]]{2,})/, '\1 \2')
21
- .gsub(COMPLEX_SEPARATORS, '\1 | \2')
22
- .gsub(residual_terminators_regex, '')
23
- .squeeze(' ').strip
24
- options = {
25
- prefer_comma_as_separator: true,
26
- separator: SPLIT_BY,
27
- title: TITLE
28
- }
29
- namae = Namae::Parser.new(options)
30
- namae.parse(cleaned)
26
+ name.gsub!(STRIP_OUT, ' ')
27
+ name.gsub!(/[#{CHAR_SUBS.keys.join('\\')}]/, CHAR_SUBS)
28
+ name.gsub!(/(#{PHRASE_SUBS.keys.join('|')})/, PHRASE_SUBS)
29
+ name.gsub!(/([A-Z]{1}\.)([[:alpha:]]{2,})/, '\1 \2')
30
+ name.gsub!(COMPLEX_SEPARATORS, '\1 | \2')
31
+ name.gsub!(residual_terminators_regex, '')
32
+ name.squeeze!(' ')
33
+ name.strip!
34
+ @namae.parse(name)
31
35
  end
32
36
 
33
37
  end
@@ -7,6 +7,9 @@ module DwcAgent
7
7
  end
8
8
  end
9
9
 
10
+ def initialize
11
+ end
12
+
10
13
  # Produces a similarity score of two given names
11
14
  # Logic inspired by R.D.M. Page, https://orcid.org/0000-0002-7101-9767
12
15
  # At https://linen-baseball.glitch.me/
@@ -15,10 +18,12 @@ module DwcAgent
15
18
  # @param given2 [String] a second given name
16
19
  # @return [Float] the similarity score
17
20
  def similarity_score(given1, given2)
18
- given1_parts = given1.gsub(/\.\s+/,".").split(/[\.\s]/)
19
- given2_parts = given2.gsub(/\.\s+/,".").split(/[\.\s]/)
20
- largest = [given1_parts,given2_parts].max
21
- smallest = [given1_parts,given2_parts].min
21
+ given1.gsub!(/\.\s+/,".")
22
+ g1_arr = given1.split(/[\.\s]/)
23
+ given2.gsub!(/\.\s+/,".")
24
+ g2_arr = given2.split(/[\.\s]/)
25
+ largest = [g1_arr,g2_arr].max
26
+ smallest = [g1_arr,g2_arr].min
22
27
 
23
28
  score = 0
24
29
  largest.each_with_index do |val,index|
@@ -3,7 +3,7 @@ module DwcAgent
3
3
 
4
4
  MAJOR = 0
5
5
  MINOR = 2
6
- PATCH = 1
6
+ PATCH = 2
7
7
  BUILD = nil
8
8
 
9
9
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-06-16 00:00:00.000000000 Z
11
+ date: 2019-07-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae