dwc_agent 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +7 -1
- data/lib/dwc_agent/parser.rb +7 -1
- data/lib/dwc_agent/similarity.rb +7 -1
- data/lib/dwc_agent/utility.rb +4 -4
- data/lib/dwc_agent/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70f9fec903afada2550d3f18bfc6580cdf25f33d
|
4
|
+
data.tar.gz: 49603abaf0ce0329271b1d851287325a2b19c85d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 879bd9099f120ea3dccacd9341f522e5b0952385c800673ae25a886c511c7de09813b2fb6e982acc835d2ca024116b022b30cc27c55301913a8f58c855a21cd5
|
7
|
+
data.tar.gz: b1d76e637c3e129798780b117cc38984e25b5f1f86a388b84e3a0b51e61dad67db936592859aa17945e2f9a6abc994c7b5658f47d6950deb7c1d5abc27fdb234
|
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -1,12 +1,18 @@
|
|
1
1
|
module DwcAgent
|
2
2
|
class Cleaner
|
3
3
|
|
4
|
+
class << self
|
5
|
+
def instance
|
6
|
+
Thread.current[:dwc_agent_cleaner] ||= new
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
4
10
|
# Cleans the passed-in namae object from the parse method and
|
5
11
|
# re-organizes it to better match expected Darwin Core output.
|
6
12
|
#
|
7
13
|
# @param parsed_namae [Object] the namae object
|
8
14
|
# @return [Hash] the given, family hash
|
9
|
-
def
|
15
|
+
def clean(parsed_namae)
|
10
16
|
blank_name = { given: nil, family: nil }
|
11
17
|
|
12
18
|
if parsed_namae.family && parsed_namae.family.length < 3
|
data/lib/dwc_agent/parser.rb
CHANGED
@@ -1,11 +1,17 @@
|
|
1
1
|
module DwcAgent
|
2
2
|
class Parser
|
3
3
|
|
4
|
+
class << self
|
5
|
+
def instance
|
6
|
+
Thread.current[:dwc_agent_parser] ||= new
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
4
10
|
# Parses the passed-in string and returns a list of names.
|
5
11
|
#
|
6
12
|
# @param names [String] the name or names to be parsed
|
7
13
|
# @return [Array] the list of parsed names
|
8
|
-
def
|
14
|
+
def parse(name)
|
9
15
|
return [] if name.nil? || name == ""
|
10
16
|
cleaned = name.gsub(STRIP_OUT, ' ')
|
11
17
|
.gsub(/[#{CHAR_SUBS.keys.join('\\')}]/, CHAR_SUBS)
|
data/lib/dwc_agent/similarity.rb
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
module DwcAgent
|
2
2
|
class Similarity
|
3
3
|
|
4
|
+
class << self
|
5
|
+
def instance
|
6
|
+
Thread.current[:dwc_agent_similarity] ||= new
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
4
10
|
# Produces a similarity score of two given names
|
5
11
|
# Logic inspired by R.D.M. Page, https://orcid.org/0000-0002-7101-9767
|
6
12
|
# At https://linen-baseball.glitch.me/
|
@@ -8,7 +14,7 @@ module DwcAgent
|
|
8
14
|
# @param given1 [String] one given name
|
9
15
|
# @param given2 [String] a second given name
|
10
16
|
# @return [Float] the similarity score
|
11
|
-
def
|
17
|
+
def similarity_score(given1, given2)
|
12
18
|
given1_parts = given1.gsub(/\.\s+/,".").split(/[\.\s]/)
|
13
19
|
given2_parts = given2.gsub(/\.\s+/,".").split(/[\.\s]/)
|
14
20
|
largest = [given1_parts,given2_parts].max
|
data/lib/dwc_agent/utility.rb
CHANGED
@@ -3,15 +3,15 @@ module DwcAgent
|
|
3
3
|
module_function
|
4
4
|
|
5
5
|
def parse(names)
|
6
|
-
Parser.parse(names)
|
6
|
+
Parser.instance.parse(names)
|
7
7
|
end
|
8
8
|
|
9
|
-
def clean(
|
10
|
-
Cleaner.clean(
|
9
|
+
def clean(parsed_name)
|
10
|
+
Cleaner.instance.clean(parsed_name)
|
11
11
|
end
|
12
12
|
|
13
13
|
def similarity_score(given1, given2)
|
14
|
-
Similarity.similarity_score(given1, given2)
|
14
|
+
Similarity.instance.similarity_score(given1, given2)
|
15
15
|
end
|
16
16
|
|
17
17
|
end
|
data/lib/dwc_agent/version.rb
CHANGED