guess_who 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +99 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +17 -0
- data/Rakefile +50 -0
- data/VERSION +1 -0
- data/files/females.txt +4275 -0
- data/files/initials.txt +26 -0
- data/files/males.txt +1219 -0
- data/files/surnames.txt +88799 -0
- data/guess_who.gemspec +76 -0
- data/lib/guess_who/comparator.rb +13 -0
- data/lib/guess_who/names_loader.rb +26 -0
- data/lib/guess_who/profiler.rb +53 -0
- data/lib/guess_who/scorer.rb +34 -0
- data/lib/guess_who/tokenizer.rb +28 -0
- data/lib/guess_who.rb +7 -0
- data/spec/lib/profiler_spec.rb +47 -0
- data/spec/spec_helper.rb +29 -0
- metadata +150 -0
data/guess_who.gemspec
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: guess_who 0.1.0 ruby lib
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = "guess_who".freeze
|
9
|
+
s.version = "0.1.0"
|
10
|
+
|
11
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib".freeze]
|
13
|
+
s.authors = ["odina".freeze]
|
14
|
+
s.date = "2017-10-14"
|
15
|
+
s.description = "Extracts name from username or email".freeze
|
16
|
+
s.email = "odina.abellana@gmail.com".freeze
|
17
|
+
s.extra_rdoc_files = [
|
18
|
+
"LICENSE.txt",
|
19
|
+
"README.rdoc"
|
20
|
+
]
|
21
|
+
s.files = [
|
22
|
+
".document",
|
23
|
+
".rspec",
|
24
|
+
"Gemfile",
|
25
|
+
"Gemfile.lock",
|
26
|
+
"LICENSE.txt",
|
27
|
+
"README.rdoc",
|
28
|
+
"Rakefile",
|
29
|
+
"VERSION",
|
30
|
+
"files/females.txt",
|
31
|
+
"files/initials.txt",
|
32
|
+
"files/males.txt",
|
33
|
+
"files/surnames.txt",
|
34
|
+
"guess_who.gemspec",
|
35
|
+
"lib/guess_who.rb",
|
36
|
+
"lib/guess_who/comparator.rb",
|
37
|
+
"lib/guess_who/names_loader.rb",
|
38
|
+
"lib/guess_who/profiler.rb",
|
39
|
+
"lib/guess_who/scorer.rb",
|
40
|
+
"lib/guess_who/tokenizer.rb",
|
41
|
+
"spec/lib/profiler_spec.rb",
|
42
|
+
"spec/spec_helper.rb"
|
43
|
+
]
|
44
|
+
s.homepage = "http://github.com/odina/guess_who".freeze
|
45
|
+
s.licenses = ["MIT".freeze]
|
46
|
+
s.rubygems_version = "2.6.10".freeze
|
47
|
+
s.summary = "Extract name from email".freeze
|
48
|
+
|
49
|
+
if s.respond_to? :specification_version then
|
50
|
+
s.specification_version = 4
|
51
|
+
|
52
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
53
|
+
s.add_runtime_dependency(%q<amatch>.freeze, ["~> 0.4.0"])
|
54
|
+
s.add_development_dependency(%q<rspec>.freeze, ["~> 3.5.0"])
|
55
|
+
s.add_development_dependency(%q<rdoc>.freeze, ["~> 3.12"])
|
56
|
+
s.add_development_dependency(%q<bundler>.freeze, ["~> 1.15"])
|
57
|
+
s.add_development_dependency(%q<jeweler>.freeze, ["~> 2.2.0"])
|
58
|
+
s.add_development_dependency(%q<simplecov>.freeze, [">= 0"])
|
59
|
+
else
|
60
|
+
s.add_dependency(%q<amatch>.freeze, ["~> 0.4.0"])
|
61
|
+
s.add_dependency(%q<rspec>.freeze, ["~> 3.5.0"])
|
62
|
+
s.add_dependency(%q<rdoc>.freeze, ["~> 3.12"])
|
63
|
+
s.add_dependency(%q<bundler>.freeze, ["~> 1.15"])
|
64
|
+
s.add_dependency(%q<jeweler>.freeze, ["~> 2.2.0"])
|
65
|
+
s.add_dependency(%q<simplecov>.freeze, [">= 0"])
|
66
|
+
end
|
67
|
+
else
|
68
|
+
s.add_dependency(%q<amatch>.freeze, ["~> 0.4.0"])
|
69
|
+
s.add_dependency(%q<rspec>.freeze, ["~> 3.5.0"])
|
70
|
+
s.add_dependency(%q<rdoc>.freeze, ["~> 3.12"])
|
71
|
+
s.add_dependency(%q<bundler>.freeze, ["~> 1.15"])
|
72
|
+
s.add_dependency(%q<jeweler>.freeze, ["~> 2.2.0"])
|
73
|
+
s.add_dependency(%q<simplecov>.freeze, [">= 0"])
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module GuessWho
|
2
|
+
module Comparator
|
3
|
+
def self.better?(contender, best, contender_length, best_length)
|
4
|
+
if contender >= best
|
5
|
+
if contender == best # if the same score, go with simpler option
|
6
|
+
contender_length <= best_length
|
7
|
+
else # if not the same score, we have a winner!
|
8
|
+
true
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require "amatch"
|
2
|
+
|
3
|
+
module GuessWho
|
4
|
+
module NamesLoader
|
5
|
+
def self.load!
|
6
|
+
names = []
|
7
|
+
|
8
|
+
filenames = [
|
9
|
+
"files/initials.txt",
|
10
|
+
"files/females.txt",
|
11
|
+
"files/males.txt",
|
12
|
+
"files/surnames.txt"
|
13
|
+
]
|
14
|
+
|
15
|
+
filenames.each do |filename|
|
16
|
+
f = File.open(filename, "r")
|
17
|
+
f.each_line { |l| names << l.split(" ")[0] }
|
18
|
+
f.close
|
19
|
+
end
|
20
|
+
|
21
|
+
@@names = names
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.names; @@names; end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module GuessWho
|
2
|
+
class Profiler
|
3
|
+
attr_reader :full_name,
|
4
|
+
:email
|
5
|
+
|
6
|
+
def self.profile!(email)
|
7
|
+
self.new(email).profile!
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(email)
|
11
|
+
@email = email
|
12
|
+
@full_name = ""
|
13
|
+
end
|
14
|
+
|
15
|
+
def profile!
|
16
|
+
full_name_arr = []
|
17
|
+
raw_str = email.split("@")[0]
|
18
|
+
strings = raw_str.split(/[^a-zA-Z]/)
|
19
|
+
|
20
|
+
strings.each do |str|
|
21
|
+
best = {
|
22
|
+
score: 0,
|
23
|
+
parts: [],
|
24
|
+
count: 0
|
25
|
+
}
|
26
|
+
|
27
|
+
token_arrays = Tokenizer.tokenize!(str)
|
28
|
+
|
29
|
+
Scorer.score!(token_arrays) do |score, tokens|
|
30
|
+
is_better = Comparator.better?(score,
|
31
|
+
best[:score],
|
32
|
+
tokens.size,
|
33
|
+
best[:count])
|
34
|
+
if is_better
|
35
|
+
best = {
|
36
|
+
score: score,
|
37
|
+
parts: tokens,
|
38
|
+
count: tokens.size
|
39
|
+
}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
best[:parts].each do |part|
|
44
|
+
full_name_arr << part.capitalize
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
@full_name = full_name_arr.join(" ")
|
49
|
+
|
50
|
+
self
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module GuessWho
|
2
|
+
class Scorer
|
3
|
+
def self.score!(token_arrays, &block)
|
4
|
+
scores = []
|
5
|
+
|
6
|
+
token_arrays.each do |token_array|
|
7
|
+
scorer = Scorer.new(token_array)
|
8
|
+
score = scorer.score!
|
9
|
+
yield score, token_array
|
10
|
+
end
|
11
|
+
|
12
|
+
scores
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(token_array)
|
16
|
+
@token_array = token_array
|
17
|
+
end
|
18
|
+
|
19
|
+
def score!
|
20
|
+
score = 0
|
21
|
+
|
22
|
+
@token_array.each do |token|
|
23
|
+
m = Amatch::JaroWinkler.new(token)
|
24
|
+
|
25
|
+
score += NamesLoader.names.map do |name|
|
26
|
+
m.match(name)
|
27
|
+
end.max
|
28
|
+
end
|
29
|
+
|
30
|
+
score /= @token_array.length
|
31
|
+
score
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module GuessWho
|
2
|
+
class Tokenizer
|
3
|
+
def self.tokenize!(str)
|
4
|
+
self.new(str).tokenize!
|
5
|
+
end
|
6
|
+
|
7
|
+
def initialize(str)
|
8
|
+
@raw_str = str
|
9
|
+
end
|
10
|
+
|
11
|
+
def tokenize!
|
12
|
+
tokens = []
|
13
|
+
|
14
|
+
(0..@raw_str.size-1).each do |i|
|
15
|
+
str = @raw_str.clone
|
16
|
+
possible_firstname = str.slice(0..i)
|
17
|
+
|
18
|
+
(possible_firstname.length..str.length).each do |j|
|
19
|
+
combination = str.scan(/(?=(#{possible_firstname})([a-zA-Z]{,#{j}})([a-zA-Z]*))/)
|
20
|
+
combination = combination.flatten.reject(&:empty?)
|
21
|
+
tokens << combination unless combination.empty?
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
return tokens.uniq
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/guess_who.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe GuessWho::Profiler do
|
4
|
+
it "should generate a guess for the name from an email with 1 word" do
|
5
|
+
email = "john.doe@gmail.com"
|
6
|
+
profiler = GuessWho::Profiler.profile!(email)
|
7
|
+
|
8
|
+
puts "Running for #{email}..."
|
9
|
+
|
10
|
+
profiler.full_name.should == "John Doe"
|
11
|
+
profiler.email.should == "john.doe@gmail.com"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should extract all test names correctly" do
|
15
|
+
test_names = {
|
16
|
+
'christianbundy': ['Christian', 'Bundy'],
|
17
|
+
'emmitt.a.riggin': ['Emmitt', 'A', 'Riggin'],
|
18
|
+
'meghannyunker': ['Meghann', 'Yunker'],
|
19
|
+
'huechadwick': ['Hue', 'Chadwick'],
|
20
|
+
'delphia.a.kimmer': ['Delphia', 'A', 'Kimmer'],
|
21
|
+
'lilymendel': ['Lily', 'Mendel'],
|
22
|
+
'natisha.pedroza': ['Natisha', 'Pedroza'],
|
23
|
+
'leoramauch': ['Leora', 'Mauch'],
|
24
|
+
'jamika.mcgranahan': ['Jamika', 'Mcgranahan'],
|
25
|
+
'celestinachittenden': ['Celestina', 'Chittenden'],
|
26
|
+
'jadacwalson': ['Ja', 'Dac', 'Walson'],
|
27
|
+
'denese.d.eichler': ['Denese', 'D', 'Eichler'],
|
28
|
+
'marybethgant': ['Marybeth', 'Gant'],
|
29
|
+
'ashleamondy': ['Ashlea', 'Mondy'],
|
30
|
+
'brittanynowakowski': ['Brittany', 'Nowakowski'],
|
31
|
+
'nelliersepeda': ['Nellier', 'Sepeda'],
|
32
|
+
'anastasia.matchett': ['Anastasia', 'Matchett'],
|
33
|
+
'glory.mclester': ['Glory', 'Mclester'],
|
34
|
+
'wilburn.f.hinkson': ['Wilburn', 'F', 'Hinkson'],
|
35
|
+
'grace.k.baham': ['Grace', 'K', 'Baham'],
|
36
|
+
'haroldcrick': ['Harold', 'Crick']
|
37
|
+
}
|
38
|
+
|
39
|
+
test_names.each do |name, extracted|
|
40
|
+
print "Running for #{name}..."
|
41
|
+
profiler = GuessWho::Profiler.profile!(name.to_s)
|
42
|
+
print " Got #{profiler.full_name}"
|
43
|
+
puts "\n"
|
44
|
+
profiler.full_name.should == extracted.join(" ")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
|
3
|
+
module SimpleCov::Configuration
|
4
|
+
def clean_filters
|
5
|
+
@filters = []
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
SimpleCov.configure do
|
10
|
+
clean_filters
|
11
|
+
load_adapter 'test_frameworks'
|
12
|
+
end
|
13
|
+
|
14
|
+
ENV["COVERAGE"] && SimpleCov.start do
|
15
|
+
add_filter "/.rvm/"
|
16
|
+
end
|
17
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
18
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
19
|
+
|
20
|
+
require 'rspec'
|
21
|
+
require 'guess_who'
|
22
|
+
|
23
|
+
# Requires supporting files with custom matchers and macros, etc,
|
24
|
+
# in ./support/ and its subdirectories.
|
25
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
26
|
+
|
27
|
+
RSpec.configure do |config|
|
28
|
+
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: guess_who
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- odina
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-10-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: amatch
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.4.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.4.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 3.5.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 3.5.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rdoc
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.12'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.12'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.15'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.15'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: jeweler
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 2.2.0
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 2.2.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: simplecov
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Extracts name from username or email
|
98
|
+
email: odina.abellana@gmail.com
|
99
|
+
executables: []
|
100
|
+
extensions: []
|
101
|
+
extra_rdoc_files:
|
102
|
+
- LICENSE.txt
|
103
|
+
- README.rdoc
|
104
|
+
files:
|
105
|
+
- ".document"
|
106
|
+
- ".rspec"
|
107
|
+
- Gemfile
|
108
|
+
- Gemfile.lock
|
109
|
+
- LICENSE.txt
|
110
|
+
- README.rdoc
|
111
|
+
- Rakefile
|
112
|
+
- VERSION
|
113
|
+
- files/females.txt
|
114
|
+
- files/initials.txt
|
115
|
+
- files/males.txt
|
116
|
+
- files/surnames.txt
|
117
|
+
- guess_who.gemspec
|
118
|
+
- lib/guess_who.rb
|
119
|
+
- lib/guess_who/comparator.rb
|
120
|
+
- lib/guess_who/names_loader.rb
|
121
|
+
- lib/guess_who/profiler.rb
|
122
|
+
- lib/guess_who/scorer.rb
|
123
|
+
- lib/guess_who/tokenizer.rb
|
124
|
+
- spec/lib/profiler_spec.rb
|
125
|
+
- spec/spec_helper.rb
|
126
|
+
homepage: http://github.com/odina/guess_who
|
127
|
+
licenses:
|
128
|
+
- MIT
|
129
|
+
metadata: {}
|
130
|
+
post_install_message:
|
131
|
+
rdoc_options: []
|
132
|
+
require_paths:
|
133
|
+
- lib
|
134
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
|
+
requirements:
|
141
|
+
- - ">="
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
144
|
+
requirements: []
|
145
|
+
rubyforge_project:
|
146
|
+
rubygems_version: 2.6.10
|
147
|
+
signing_key:
|
148
|
+
specification_version: 4
|
149
|
+
summary: Extract name from email
|
150
|
+
test_files: []
|