matlock 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,7 +47,7 @@ class Matlock
47
47
  names = []
48
48
 
49
49
  # Split content into words.
50
- words = content.split(/\b+/).select {|v| v.index(/^[a-z]+$/i)}
50
+ words = content.split(/[^-a-z0-9]+/i).select {|v| v.index(/^[-a-z]+$/i)}
51
51
 
52
52
  # Loop over each bigram and check if the words are title cased and if at
53
53
  # least one of the words is a first or last name.
@@ -76,7 +76,13 @@ class Matlock
76
76
  #
77
77
  # @retruns [Boolean] true if the string is a surname, otherwise false.
78
78
  def self.surname?(str)
79
- return !str.nil? && !surnames[str.upcase].nil?
79
+ if !str.nil?
80
+ str.upcase.split(/\-+/).each do |name|
81
+ return true unless surnames[name].nil?
82
+ end
83
+ end
84
+
85
+ return false
80
86
  end
81
87
 
82
88
  # Determines if a string is a first name.
@@ -1,3 +1,3 @@
1
1
  class Matlock
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -12,6 +12,7 @@ class TestData < MiniTest::Unit::TestCase
12
12
  def test_is_surname
13
13
  assert Matlock::Data.surname?("Johnson")
14
14
  assert Matlock::Data.surname?("Smith")
15
+ assert Matlock::Data.surname?("Ennis-London")
15
16
  assert !Matlock::Data.surname?("Crabapple")
16
17
  end
17
18
 
@@ -9,7 +9,7 @@ class TestMatlock < MiniTest::Unit::TestCase
9
9
  # Extract Names
10
10
  ######################################
11
11
 
12
- def test_extract_names
12
+ def test_simple_name_extraction
13
13
  html = <<-BLOCK
14
14
  <html>
15
15
  <body>
@@ -21,4 +21,10 @@ class TestMatlock < MiniTest::Unit::TestCase
21
21
  names = @matlock.extract_names(html)
22
22
  assert_equal ["Jeremy Wilson", "Jenny Smith"], names
23
23
  end
24
+
25
+ def test_name_extraction_with_hyphenated_last_name
26
+ html = "My friend, Delloreen Ennis-London, is an awesome person."
27
+ names = @matlock.extract_names(html)
28
+ assert_equal ["Delloreen Ennis-London"], names
29
+ end
24
30
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: matlock
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-07 00:00:00.000000000 Z
12
+ date: 2013-01-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
@@ -170,7 +170,7 @@ rubyforge_project:
170
170
  rubygems_version: 1.8.24
171
171
  signing_key:
172
172
  specification_version: 3
173
- summary: An intelligent web scraper
173
+ summary: Simple name extraction utility.
174
174
  test_files:
175
175
  - test/data_test.rb
176
176
  - test/matlock_test.rb