dwc_agent 0.4.3 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c4ea46a2cca2719aebba4a99251aaf02f6d2fb36f21f3e6ea28b76584fc7345a
4
- data.tar.gz: 72cde7bbdf5c8f93923710f887f299a4618e32c5d129e8cac0bbcc1a285492fd
3
+ metadata.gz: 89ee1173b349dee8f3ef6fad429449acf1fb3816ae49972b0fb35e7eb9a37720
4
+ data.tar.gz: c4dc07d7b1e113835ca065f17cb86e394a313b5b1fc335c2bdbca7ec8bafbdb3
5
5
  SHA512:
6
- metadata.gz: 77c1027c302f5b853641266a833d197c1d81045ffad0ca0a2b3f4161d1fc4443fff944e6493d9c1089e710ac5aaff33bd0075698c7b70ff0ddf90d79602c8442
7
- data.tar.gz: 9b92955bf9421e4b5f7a0c5cc5fa04265a393f04de6d7f23217cea739732b81bd8f9fb9042c02db20fe13118f2c9f3b75e1a374787b83d8e0ee1350e3e2a6c7b
6
+ metadata.gz: b5c450fcb300b84bf942643ed527658840c8c2981cc4d3766a0ff9c3d4f4f5e27c940342f7a344ca502029f0b51e61b1580a08b8c6a68a19db55279cfe52fba6
7
+ data.tar.gz: 02a17d7b41313802dc6e14efbb0f54604768a53c7d5637114204d058112748189809fc20b557b772cf1650045c4856ec6ba49bac162a28d3768e729cf872092c
data/bin/dwcagent CHANGED
@@ -5,7 +5,7 @@ require 'dwc_agent'
5
5
  require 'json'
6
6
 
7
7
  names = []
8
- DwcAgent.parse(ARGV[0]).each do |r|
8
+ DwcAgent.parse(ARGV[0].dup).each do |r|
9
9
  name = DwcAgent.clean(r)
10
10
  if !name[:family].nil? && name[:family].length >= 2
11
11
  names << name
@@ -3,4 +3,4 @@
3
3
 
4
4
  require 'dwc_agent'
5
5
 
6
- puts DwcAgent.similarity_score(ARGV[0],ARGV[1])
6
+ puts DwcAgent.similarity_score(ARGV[0].dup,ARGV[1].dup)
@@ -16,16 +16,12 @@ module DwcAgent
16
16
  # @param parsed_namae [Object] the namae object
17
17
  # @return [Hash] the given, family hash
18
18
  def clean(parsed_namae)
19
- blank_name = { given: nil, family: nil }
19
+ blank_name = { given: nil, family: nil, particle: nil }
20
20
 
21
21
  if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
22
22
  return blank_name
23
23
  end
24
24
 
25
- if parsed_namae.family && parsed_namae.family.length < 2 && parsed_namae.family.count('.') == 0
26
- return blank_name
27
- end
28
-
29
25
  if parsed_namae.family && parsed_namae.family.length == 3 && parsed_namae.family.count('.') == 1
30
26
  return blank_name
31
27
  end
@@ -99,6 +95,10 @@ module DwcAgent
99
95
  particle = nil
100
96
  end
101
97
 
98
+ if !particle.nil? && particle.include?(".")
99
+ particle = nil
100
+ end
101
+
102
102
  if !family.nil? && (family == family.upcase || family == family.downcase)
103
103
  family = NameCase(family)
104
104
  end
@@ -115,7 +115,7 @@ module DwcAgent
115
115
  return blank_name
116
116
  end
117
117
 
118
- { given: given, family: family }
118
+ { given: given, family: family, particle: particle }
119
119
  end
120
120
 
121
121
  end
@@ -81,6 +81,7 @@ module DwcAgent
81
81
  AFSC\/POLISH\s+SORTING\s+CTR\.?|
82
82
  (?i:university|museum|exhibits?)|
83
83
  (?i:uqam)|
84
+ (?i:sem\s+(colec?tor|data))|
84
85
  \b[,;]\s+\d+\z|
85
86
  ["!@?]|
86
87
  [,]?\d+|
@@ -158,7 +159,7 @@ module DwcAgent
158
159
  }
159
160
 
160
161
  COMPLEX_SEPARATORS = %r{
161
- ^([A-Za-z]{4,},\s+(?:[A-Z]\.\s*){1,})\s+([A-Za-z]{4,},\s+(?:[A-Z]\.\s*){1,})$
162
+ ^(\S{4,},\s+(?:\S\.\s*){1,})\s+(\S{4,},\s+(?:\S\.\s*){1,})$
162
163
  }x
163
164
 
164
165
  BLACKLIST = %r{
@@ -227,7 +228,9 @@ module DwcAgent
227
228
  "von",
228
229
  "the",
229
230
  "of",
230
- "curators"
231
+ "curators",
232
+ "nomenclatural",
233
+ "adjustment"
231
234
  ]
232
235
 
233
236
  TITLE = /\s*\b(sir|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
@@ -20,7 +20,7 @@ module DwcAgent
20
20
  @char_subs_regex = Regexp.new [CHAR_SUBS.keys.join('\\')].to_s
21
21
  @phrase_subs_regex = Regexp.new (PHRASE_SUBS.keys.join('|')).to_s
22
22
  @complex_separators_regex = Regexp.new COMPLEX_SEPARATORS.to_s
23
- @add_separators_regex = Regexp.new %r{([A-Z]{1}\.)([[:alpha:]]{2,})}.to_s
23
+ @add_separators_regex = Regexp.new %r{(\S{1}\.)([[:alpha:]]{2,})}.to_s
24
24
  end
25
25
 
26
26
  # Parses the passed-in string and returns a list of names.
@@ -1,9 +1,9 @@
1
1
  module DwcAgent
2
2
  class Version
3
3
 
4
- MAJOR = 0
5
- MINOR = 4
6
- PATCH = 3
4
+ MAJOR = 1
5
+ MINOR = 3
6
+ PATCH = 1
7
7
  BUILD = nil
8
8
 
9
9
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-27 00:00:00.000000000 Z
11
+ date: 2019-11-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae