ocr_challenge 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +3 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +36 -0
- data/LICENSE +22 -0
- data/README.md +81 -0
- data/lib/ocr_challenge/basic_land_and_cell_number_parser.rb +26 -0
- data/lib/ocr_challenge/basic_ten_digit_telecom_parser.rb +27 -0
- data/lib/ocr_challenge/email_parser.rb +18 -0
- data/lib/ocr_challenge/i_business_card_parser.rb +18 -0
- data/lib/ocr_challenge/i_contact_info.rb +45 -0
- data/lib/ocr_challenge/name_parser.rb +28 -0
- data/lib/ocr_challenge/version.rb +3 -0
- data/lib/ocr_challenge.rb +12 -0
- data/names/names.txt +4408 -0
- data/names/other-names.txt +5559 -0
- data/names/surnames.txt +6260 -0
- data/ocr_challenge.gemspec +20 -0
- data/spec/basic_land_and_cell_number_parser_spec.rb +25 -0
- data/spec/email_parser_spec.rb +25 -0
- data/spec/i_business_card_parser_spec.rb +20 -0
- data/spec/i_contect_info_spec.rb +26 -0
- data/spec/name_parser_spec.rb +34 -0
- data/spec/spec_helper.rb +17 -0
- metadata +115 -0
@@ -0,0 +1,20 @@
|
|
1
|
+
$:.unshift File.expand_path('../lib', __FILE__)
|
2
|
+
|
3
|
+
require 'ocr_challenge/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'ocr_challenge'
|
7
|
+
s.version = OcrChallenge::VERSION
|
8
|
+
s.authors = ['Alexander Vanadio']
|
9
|
+
s.email = 'execdd17@gmail.com'
|
10
|
+
s.homepage = 'https://github.com/execdd17/ocr_challenge'
|
11
|
+
s.summary = 'A solution to the challenge here: http://www.asymmetrik.com/programming-challenges/business-card-ocr.html'
|
12
|
+
s.description = 'This gem allows you to parse text blobs to find names, email addresses, and phone numbers'
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- spec/*`.split("\n")
|
15
|
+
s.require_paths = ['lib']
|
16
|
+
s.add_development_dependency('rspec', '~> 2.99')
|
17
|
+
s.add_development_dependency('simplecov', '~> 0.9')
|
18
|
+
s.add_development_dependency('faker', '~> 1.4')
|
19
|
+
s.license = 'MIT'
|
20
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
class TestParser
|
4
|
+
include OcrChallenge::BasicLandAndCellNumberParser
|
5
|
+
attr_accessor :lines
|
6
|
+
end
|
7
|
+
|
8
|
+
describe OcrChallenge::BasicLandAndCellNumberParser do
|
9
|
+
|
10
|
+
subject { TestParser.new }
|
11
|
+
|
12
|
+
describe '.parse_numbers' do
|
13
|
+
|
14
|
+
SAMPLE_SIZE.times do
|
15
|
+
it 'correctly finds the number' do
|
16
|
+
random_number = Faker::PhoneNumber.cell_phone
|
17
|
+
subject.lines = [random_number + "\n"]
|
18
|
+
numbers = subject.parse_phone_numbers
|
19
|
+
|
20
|
+
formatted_expected_number = subject.send(:format, random_number, '-')
|
21
|
+
numbers.first.should eq formatted_expected_number
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
class TestParser
|
4
|
+
include OcrChallenge::EmailParser
|
5
|
+
attr_accessor :lines
|
6
|
+
end
|
7
|
+
|
8
|
+
describe OcrChallenge::EmailParser do
|
9
|
+
|
10
|
+
subject { TestParser.new }
|
11
|
+
|
12
|
+
describe '.parse_email_addresses' do
|
13
|
+
|
14
|
+
SAMPLE_SIZE.times do
|
15
|
+
it 'correctly finds the email' do
|
16
|
+
random_email = Faker::Internet.email
|
17
|
+
subject.lines = [random_email + "\n"]
|
18
|
+
email_addresses = subject.parse_email_addresses
|
19
|
+
|
20
|
+
email_addresses.first.should eq random_email
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe IBusinessCardParser do
|
4
|
+
|
5
|
+
let(:full_name) { " Alexander Vanadio \n" }
|
6
|
+
let(:email) { " execdd17@gmail.com \n" }
|
7
|
+
let(:phone) { " 410.349.7308 \n" }
|
8
|
+
let(:blank_line) { " \n" }
|
9
|
+
|
10
|
+
# TODO: document 10 number assumption
|
11
|
+
|
12
|
+
describe '.get_contact_info' do
|
13
|
+
it 'returns an IContactInfo' do
|
14
|
+
result = IBusinessCardParser.get_contact_info(blank_line +
|
15
|
+
full_name + email + phone + blank_line)
|
16
|
+
|
17
|
+
result.is_a?(IContactInfo).should be true
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe IContactInfo do
|
4
|
+
|
5
|
+
let(:full_name) { " Alexander Vanadio \n" }
|
6
|
+
let(:email) { " execdd17@gmail.com \n" }
|
7
|
+
let(:phone) { " 410.349.7308 \n" }
|
8
|
+
let(:blank_line) { " \n" }
|
9
|
+
|
10
|
+
subject {
|
11
|
+
parser = IBusinessCardParser.new(blank_line + full_name + email + phone + blank_line)
|
12
|
+
IContactInfo.new(parser)
|
13
|
+
}
|
14
|
+
#
|
15
|
+
it 'returns the correct name' do
|
16
|
+
subject.get_name.should eq "Name: Alexander Vanadio"
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'returns the correct email' do
|
20
|
+
subject.get_email_address.should eq "Email: execdd17@gmail.com"
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'returns the correct phone number' do
|
24
|
+
subject.get_phone_number.should eq "Phone: 410-349-7308"
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
class TestParser
|
4
|
+
include OcrChallenge::NameParser
|
5
|
+
attr_accessor :lines
|
6
|
+
end
|
7
|
+
|
8
|
+
describe OcrChallenge::NameParser do
|
9
|
+
|
10
|
+
subject { TestParser.new }
|
11
|
+
|
12
|
+
describe '.parse_names' do
|
13
|
+
|
14
|
+
#let(:random_name) { Faker::Name.name }
|
15
|
+
let(:random_name) { "Alexander Vanadio" }
|
16
|
+
|
17
|
+
SAMPLE_SIZE.times do
|
18
|
+
it 'correctly finds the name' do
|
19
|
+
subject.lines = [random_name + "\n"]
|
20
|
+
names = subject.parse_names("/home/alex/git_repos/ocr_challenge/names")
|
21
|
+
|
22
|
+
names.first.should eq random_name
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'filters a non-name' do
|
27
|
+
subject.lines = [random_name + "\n", "Software Engineer\n", "234\n", "\n"]
|
28
|
+
names = subject.parse_names("/home/alex/git_repos/ocr_challenge/names")
|
29
|
+
|
30
|
+
names.size.should be 1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
|
3
|
+
SimpleCov.start do
|
4
|
+
add_filter "/spec/"
|
5
|
+
end
|
6
|
+
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.tty = true
|
9
|
+
config.color = true
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'ocr_challenge'
|
13
|
+
require 'faker'
|
14
|
+
|
15
|
+
include OcrChallenge
|
16
|
+
|
17
|
+
SAMPLE_SIZE = 1_000
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ocr_challenge
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '1.0'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alexander Vanadio
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-12-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.99'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.99'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: simplecov
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.9'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.9'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: faker
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.4'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.4'
|
55
|
+
description: This gem allows you to parse text blobs to find names, email addresses,
|
56
|
+
and phone numbers
|
57
|
+
email: execdd17@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- .gitignore
|
63
|
+
- Gemfile
|
64
|
+
- Gemfile.lock
|
65
|
+
- LICENSE
|
66
|
+
- README.md
|
67
|
+
- lib/ocr_challenge.rb
|
68
|
+
- lib/ocr_challenge/basic_land_and_cell_number_parser.rb
|
69
|
+
- lib/ocr_challenge/basic_ten_digit_telecom_parser.rb
|
70
|
+
- lib/ocr_challenge/email_parser.rb
|
71
|
+
- lib/ocr_challenge/i_business_card_parser.rb
|
72
|
+
- lib/ocr_challenge/i_contact_info.rb
|
73
|
+
- lib/ocr_challenge/name_parser.rb
|
74
|
+
- lib/ocr_challenge/version.rb
|
75
|
+
- names/names.txt
|
76
|
+
- names/other-names.txt
|
77
|
+
- names/surnames.txt
|
78
|
+
- ocr_challenge.gemspec
|
79
|
+
- spec/basic_land_and_cell_number_parser_spec.rb
|
80
|
+
- spec/email_parser_spec.rb
|
81
|
+
- spec/i_business_card_parser_spec.rb
|
82
|
+
- spec/i_contect_info_spec.rb
|
83
|
+
- spec/name_parser_spec.rb
|
84
|
+
- spec/spec_helper.rb
|
85
|
+
homepage: https://github.com/execdd17/ocr_challenge
|
86
|
+
licenses:
|
87
|
+
- MIT
|
88
|
+
metadata: {}
|
89
|
+
post_install_message:
|
90
|
+
rdoc_options: []
|
91
|
+
require_paths:
|
92
|
+
- lib
|
93
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - '>='
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - '>='
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
requirements: []
|
104
|
+
rubyforge_project:
|
105
|
+
rubygems_version: 2.2.2
|
106
|
+
signing_key:
|
107
|
+
specification_version: 4
|
108
|
+
summary: 'A solution to the challenge here: http://www.asymmetrik.com/programming-challenges/business-card-ocr.html'
|
109
|
+
test_files:
|
110
|
+
- spec/basic_land_and_cell_number_parser_spec.rb
|
111
|
+
- spec/email_parser_spec.rb
|
112
|
+
- spec/i_business_card_parser_spec.rb
|
113
|
+
- spec/i_contect_info_spec.rb
|
114
|
+
- spec/name_parser_spec.rb
|
115
|
+
- spec/spec_helper.rb
|