matlock 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/matlock.rb +1 -1
- data/lib/matlock/data.rb +7 -1
- data/lib/matlock/version.rb +1 -1
- data/test/data_test.rb +1 -0
- data/test/matlock_test.rb +7 -1
- metadata +3 -3
data/lib/matlock.rb
CHANGED
@@ -47,7 +47,7 @@ class Matlock
|
|
47
47
|
names = []
|
48
48
|
|
49
49
|
# Split content into words.
|
50
|
-
words = content.split(
|
50
|
+
words = content.split(/[^-a-z0-9]+/i).select {|v| v.index(/^[-a-z]+$/i)}
|
51
51
|
|
52
52
|
# Loop over each bigram and check if the words are title cased and if at
|
53
53
|
# least one of the words is a first or last name.
|
data/lib/matlock/data.rb
CHANGED
@@ -76,7 +76,13 @@ class Matlock
|
|
76
76
|
#
|
77
77
|
# @retruns [Boolean] true if the string is a surname, otherwise false.
|
78
78
|
def self.surname?(str)
|
79
|
-
|
79
|
+
if !str.nil?
|
80
|
+
str.upcase.split(/\-+/).each do |name|
|
81
|
+
return true unless surnames[name].nil?
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
return false
|
80
86
|
end
|
81
87
|
|
82
88
|
# Determines if a string is a first name.
|
data/lib/matlock/version.rb
CHANGED
data/test/data_test.rb
CHANGED
data/test/matlock_test.rb
CHANGED
@@ -9,7 +9,7 @@ class TestMatlock < MiniTest::Unit::TestCase
|
|
9
9
|
# Extract Names
|
10
10
|
######################################
|
11
11
|
|
12
|
-
def
|
12
|
+
def test_simple_name_extraction
|
13
13
|
html = <<-BLOCK
|
14
14
|
<html>
|
15
15
|
<body>
|
@@ -21,4 +21,10 @@ class TestMatlock < MiniTest::Unit::TestCase
|
|
21
21
|
names = @matlock.extract_names(html)
|
22
22
|
assert_equal ["Jeremy Wilson", "Jenny Smith"], names
|
23
23
|
end
|
24
|
+
|
25
|
+
def test_name_extraction_with_hyphenated_last_name
|
26
|
+
html = "My friend, Delloreen Ennis-London, is an awesome person."
|
27
|
+
names = @matlock.extract_names(html)
|
28
|
+
assert_equal ["Delloreen Ennis-London"], names
|
29
|
+
end
|
24
30
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: matlock
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
@@ -170,7 +170,7 @@ rubyforge_project:
|
|
170
170
|
rubygems_version: 1.8.24
|
171
171
|
signing_key:
|
172
172
|
specification_version: 3
|
173
|
-
summary:
|
173
|
+
summary: Simple name extraction utility.
|
174
174
|
test_files:
|
175
175
|
- test/data_test.rb
|
176
176
|
- test/matlock_test.rb
|