matlock 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/matlock.rb +1 -1
- data/lib/matlock/data.rb +7 -1
- data/lib/matlock/version.rb +1 -1
- data/test/data_test.rb +1 -0
- data/test/matlock_test.rb +7 -1
- metadata +3 -3
data/lib/matlock.rb
CHANGED
@@ -47,7 +47,7 @@ class Matlock
|
|
47
47
|
names = []
|
48
48
|
|
49
49
|
# Split content into words.
|
50
|
-
words = content.split(
|
50
|
+
words = content.split(/[^-a-z0-9]+/i).select {|v| v.index(/^[-a-z]+$/i)}
|
51
51
|
|
52
52
|
# Loop over each bigram and check if the words are title cased and if at
|
53
53
|
# least one of the words is a first or last name.
|
data/lib/matlock/data.rb
CHANGED
@@ -76,7 +76,13 @@ class Matlock
|
|
76
76
|
#
|
77
77
|
# @retruns [Boolean] true if the string is a surname, otherwise false.
|
78
78
|
def self.surname?(str)
|
79
|
-
|
79
|
+
if !str.nil?
|
80
|
+
str.upcase.split(/\-+/).each do |name|
|
81
|
+
return true unless surnames[name].nil?
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
return false
|
80
86
|
end
|
81
87
|
|
82
88
|
# Determines if a string is a first name.
|
data/lib/matlock/version.rb
CHANGED
data/test/data_test.rb
CHANGED
data/test/matlock_test.rb
CHANGED
@@ -9,7 +9,7 @@ class TestMatlock < MiniTest::Unit::TestCase
|
|
9
9
|
# Extract Names
|
10
10
|
######################################
|
11
11
|
|
12
|
-
def
|
12
|
+
def test_simple_name_extraction
|
13
13
|
html = <<-BLOCK
|
14
14
|
<html>
|
15
15
|
<body>
|
@@ -21,4 +21,10 @@ class TestMatlock < MiniTest::Unit::TestCase
|
|
21
21
|
names = @matlock.extract_names(html)
|
22
22
|
assert_equal ["Jeremy Wilson", "Jenny Smith"], names
|
23
23
|
end
|
24
|
+
|
25
|
+
def test_name_extraction_with_hyphenated_last_name
|
26
|
+
html = "My friend, Delloreen Ennis-London, is an awesome person."
|
27
|
+
names = @matlock.extract_names(html)
|
28
|
+
assert_equal ["Delloreen Ennis-London"], names
|
29
|
+
end
|
24
30
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: matlock
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
@@ -170,7 +170,7 @@ rubyforge_project:
|
|
170
170
|
rubygems_version: 1.8.24
|
171
171
|
signing_key:
|
172
172
|
specification_version: 3
|
173
|
-
summary:
|
173
|
+
summary: Simple name extraction utility.
|
174
174
|
test_files:
|
175
175
|
- test/data_test.rb
|
176
176
|
- test/matlock_test.rb
|