matlock 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -47,7 +47,7 @@ class Matlock
47
47
  names = []
48
48
 
49
49
  # Split content into words.
50
- words = content.split(/\b+/).select {|v| v.index(/^[a-z]+$/i)}
50
+ words = content.split(/[^-a-z0-9]+/i).select {|v| v.index(/^[-a-z]+$/i)}
51
51
 
52
52
  # Loop over each bigram and check if the words are title cased and if at
53
53
  # least one of the words is a first or last name.
@@ -76,7 +76,13 @@ class Matlock
76
76
  #
77
77
  # @retruns [Boolean] true if the string is a surname, otherwise false.
78
78
  def self.surname?(str)
79
- return !str.nil? && !surnames[str.upcase].nil?
79
+ if !str.nil?
80
+ str.upcase.split(/\-+/).each do |name|
81
+ return true unless surnames[name].nil?
82
+ end
83
+ end
84
+
85
+ return false
80
86
  end
81
87
 
82
88
  # Determines if a string is a first name.
@@ -1,3 +1,3 @@
1
1
  class Matlock
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -12,6 +12,7 @@ class TestData < MiniTest::Unit::TestCase
12
12
  def test_is_surname
13
13
  assert Matlock::Data.surname?("Johnson")
14
14
  assert Matlock::Data.surname?("Smith")
15
+ assert Matlock::Data.surname?("Ennis-London")
15
16
  assert !Matlock::Data.surname?("Crabapple")
16
17
  end
17
18
 
@@ -9,7 +9,7 @@ class TestMatlock < MiniTest::Unit::TestCase
9
9
  # Extract Names
10
10
  ######################################
11
11
 
12
- def test_extract_names
12
+ def test_simple_name_extraction
13
13
  html = <<-BLOCK
14
14
  <html>
15
15
  <body>
@@ -21,4 +21,10 @@ class TestMatlock < MiniTest::Unit::TestCase
21
21
  names = @matlock.extract_names(html)
22
22
  assert_equal ["Jeremy Wilson", "Jenny Smith"], names
23
23
  end
24
+
25
+ def test_name_extraction_with_hyphenated_last_name
26
+ html = "My friend, Delloreen Ennis-London, is an awesome person."
27
+ names = @matlock.extract_names(html)
28
+ assert_equal ["Delloreen Ennis-London"], names
29
+ end
24
30
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: matlock
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-07 00:00:00.000000000 Z
12
+ date: 2013-01-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
@@ -170,7 +170,7 @@ rubyforge_project:
170
170
  rubygems_version: 1.8.24
171
171
  signing_key:
172
172
  specification_version: 3
173
- summary: An intelligent web scraper
173
+ summary: Simple name extraction utility.
174
174
  test_files:
175
175
  - test/data_test.rb
176
176
  - test/matlock_test.rb