guess_who_no_fuzzy 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +87 -0
- data/LICENSE.txt +20 -0
- data/README.md +12 -0
- data/Rakefile +50 -0
- data/VERSION +1 -0
- data/files/females.txt +4275 -0
- data/files/initials.txt +26 -0
- data/files/males.txt +1219 -0
- data/files/surnames.txt +88799 -0
- data/guess_who_no_fuzzy.gemspec +73 -0
- data/lib/guess_who_no_fuzzy.rb +7 -0
- data/lib/guess_who_no_fuzzy/comparator.rb +13 -0
- data/lib/guess_who_no_fuzzy/names_loader.rb +30 -0
- data/lib/guess_who_no_fuzzy/profiler.rb +53 -0
- data/lib/guess_who_no_fuzzy/scorer.rb +37 -0
- data/lib/guess_who_no_fuzzy/tokenizer.rb +42 -0
- data/spec/lib/profiler_spec.rb +47 -0
- data/spec/spec_helper.rb +29 -0
- metadata +139 -0
@@ -0,0 +1,73 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: guess_who_no_fuzzy 0.2.0 ruby lib
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = "guess_who_no_fuzzy"
|
9
|
+
s.version = "0.2.0"
|
10
|
+
|
11
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib"]
|
13
|
+
s.authors = ["odina", "roy"]
|
14
|
+
s.date = "2017-10-19"
|
15
|
+
s.description = "Extracts name from username or email"
|
16
|
+
s.email = ["odina.abellana@gmail.com", "roy.evan.sia@gmail.com"]
|
17
|
+
s.extra_rdoc_files = [
|
18
|
+
"LICENSE.txt",
|
19
|
+
"README.md"
|
20
|
+
]
|
21
|
+
s.files = [
|
22
|
+
".document",
|
23
|
+
".rspec",
|
24
|
+
"Gemfile",
|
25
|
+
"Gemfile.lock",
|
26
|
+
"LICENSE.txt",
|
27
|
+
"README.md",
|
28
|
+
"Rakefile",
|
29
|
+
"VERSION",
|
30
|
+
"files/females.txt",
|
31
|
+
"files/initials.txt",
|
32
|
+
"files/males.txt",
|
33
|
+
"files/surnames.txt",
|
34
|
+
"guess_who_no_fuzzy.gemspec",
|
35
|
+
"lib/guess_who_no_fuzzy.rb",
|
36
|
+
"lib/guess_who_no_fuzzy/comparator.rb",
|
37
|
+
"lib/guess_who_no_fuzzy/names_loader.rb",
|
38
|
+
"lib/guess_who_no_fuzzy/profiler.rb",
|
39
|
+
"lib/guess_who_no_fuzzy/scorer.rb",
|
40
|
+
"lib/guess_who_no_fuzzy/tokenizer.rb",
|
41
|
+
"spec/lib/profiler_spec.rb",
|
42
|
+
"spec/spec_helper.rb"
|
43
|
+
]
|
44
|
+
s.homepage = "http://github.com/darklight721/guess_who"
|
45
|
+
s.licenses = ["MIT"]
|
46
|
+
s.rubygems_version = "2.5.1"
|
47
|
+
s.summary = "Extract name from email"
|
48
|
+
|
49
|
+
if s.respond_to? :specification_version then
|
50
|
+
s.specification_version = 4
|
51
|
+
|
52
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
53
|
+
s.add_development_dependency(%q<rspec>, ["~> 3.5.0"])
|
54
|
+
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
55
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.15"])
|
56
|
+
s.add_development_dependency(%q<jeweler>, ["~> 2.2.0"])
|
57
|
+
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
58
|
+
else
|
59
|
+
s.add_dependency(%q<rspec>, ["~> 3.5.0"])
|
60
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
61
|
+
s.add_dependency(%q<bundler>, ["~> 1.15"])
|
62
|
+
s.add_dependency(%q<jeweler>, ["~> 2.2.0"])
|
63
|
+
s.add_dependency(%q<simplecov>, [">= 0"])
|
64
|
+
end
|
65
|
+
else
|
66
|
+
s.add_dependency(%q<rspec>, ["~> 3.5.0"])
|
67
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
68
|
+
s.add_dependency(%q<bundler>, ["~> 1.15"])
|
69
|
+
s.add_dependency(%q<jeweler>, ["~> 2.2.0"])
|
70
|
+
s.add_dependency(%q<simplecov>, [">= 0"])
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module GuessWhoNoFuzzy
|
2
|
+
module Comparator
|
3
|
+
def self.better?(contender, best, contender_length, best_length)
|
4
|
+
if contender >= best
|
5
|
+
if contender == best # if the same score, go with simpler option
|
6
|
+
contender_length <= best_length
|
7
|
+
else # if not the same score, we have a winner!
|
8
|
+
true
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module GuessWhoNoFuzzy
|
2
|
+
module NamesLoader
|
3
|
+
def self.load!
|
4
|
+
names_by_first_letter = {}
|
5
|
+
|
6
|
+
filenames = [
|
7
|
+
"files/initials.txt",
|
8
|
+
"files/females.txt",
|
9
|
+
"files/males.txt",
|
10
|
+
"files/surnames.txt"
|
11
|
+
]
|
12
|
+
|
13
|
+
filenames.each do |filename|
|
14
|
+
path = File.join(File.dirname(__FILE__), '../../', filename)
|
15
|
+
f = File.open(path, "r")
|
16
|
+
f.each_line do |l|
|
17
|
+
name = l.split(" ")[0]
|
18
|
+
first_char = name[0]
|
19
|
+
names_by_first_letter[first_char] ||= []
|
20
|
+
names_by_first_letter[first_char] << name
|
21
|
+
end
|
22
|
+
f.close
|
23
|
+
end
|
24
|
+
|
25
|
+
@@names_by_first_letter = names_by_first_letter
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.names_by_first_letter(letter); @@names_by_first_letter[letter] || []; end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module GuessWhoNoFuzzy
|
2
|
+
class Profiler
|
3
|
+
attr_reader :full_name,
|
4
|
+
:email
|
5
|
+
|
6
|
+
def self.profile!(email)
|
7
|
+
self.new(email).profile!
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(email)
|
11
|
+
@email = email
|
12
|
+
@full_name = ""
|
13
|
+
end
|
14
|
+
|
15
|
+
def profile!
|
16
|
+
full_name_arr = []
|
17
|
+
raw_str = email.split("@")[0].upcase
|
18
|
+
strings = raw_str.split(/[^A-Z]/)
|
19
|
+
|
20
|
+
strings.each do |str|
|
21
|
+
best = {
|
22
|
+
score: 0,
|
23
|
+
parts: [],
|
24
|
+
count: 0
|
25
|
+
}
|
26
|
+
|
27
|
+
token_arrays = Tokenizer.tokenize!(str)
|
28
|
+
|
29
|
+
Scorer.score!(token_arrays) do |score, tokens|
|
30
|
+
is_better = Comparator.better?(score,
|
31
|
+
best[:score],
|
32
|
+
tokens.size,
|
33
|
+
best[:count])
|
34
|
+
if is_better
|
35
|
+
best = {
|
36
|
+
score: score,
|
37
|
+
parts: tokens,
|
38
|
+
count: tokens.size
|
39
|
+
}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
best[:parts].each do |part|
|
44
|
+
full_name_arr << part.capitalize
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
@full_name = full_name_arr.join(" ")
|
49
|
+
|
50
|
+
self
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module GuessWhoNoFuzzy
|
2
|
+
class Scorer
|
3
|
+
def self.score!(token_arrays, &block)
|
4
|
+
scores = []
|
5
|
+
|
6
|
+
token_arrays.each do |token_array|
|
7
|
+
scorer = Scorer.new(token_array)
|
8
|
+
score = scorer.score!
|
9
|
+
yield score, token_array
|
10
|
+
end
|
11
|
+
|
12
|
+
scores
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(token_array)
|
16
|
+
@token_array = token_array
|
17
|
+
end
|
18
|
+
|
19
|
+
def score!
|
20
|
+
score = 0
|
21
|
+
|
22
|
+
@token_array.each do |token|
|
23
|
+
score += 1.0 if included_in_names?(token)
|
24
|
+
end
|
25
|
+
|
26
|
+
score /= @token_array.length
|
27
|
+
score
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def included_in_names?(token)
|
33
|
+
names = NamesLoader.names_by_first_letter(token[0])
|
34
|
+
names.include?(token)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module GuessWhoNoFuzzy
|
2
|
+
class Tokenizer
|
3
|
+
def self.tokenize!(str)
|
4
|
+
self.new(str).tokenize!
|
5
|
+
end
|
6
|
+
|
7
|
+
def initialize(str)
|
8
|
+
@str = str
|
9
|
+
end
|
10
|
+
|
11
|
+
def tokenize!
|
12
|
+
token_arrays = []
|
13
|
+
str_size = @str.size
|
14
|
+
|
15
|
+
str_size.times do |i|
|
16
|
+
first_slice = i + 1
|
17
|
+
first_name = @str.slice(0, first_slice)
|
18
|
+
|
19
|
+
(str_size - i).times do |j|
|
20
|
+
mid_slice = j + 1
|
21
|
+
mid_name = @str.slice(first_slice, mid_slice)
|
22
|
+
|
23
|
+
last_slice = first_slice + mid_slice
|
24
|
+
last_name = @str.slice(last_slice...str_size)
|
25
|
+
|
26
|
+
token_array = [first_name]
|
27
|
+
token_array << mid_name unless blank?(mid_name)
|
28
|
+
token_array << last_name unless blank?(last_name)
|
29
|
+
token_arrays << token_array
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
token_arrays
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def blank?(token)
|
39
|
+
token.nil? || token.empty?
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe GuessWhoNoFuzzy::Profiler do
|
4
|
+
it "should generate a guess for the name from an email with 1 word" do
|
5
|
+
email = "john.doe@gmail.com"
|
6
|
+
profiler = GuessWhoNoFuzzy::Profiler.profile!(email)
|
7
|
+
|
8
|
+
puts "Running for #{email}..."
|
9
|
+
|
10
|
+
profiler.full_name.should == "John Doe"
|
11
|
+
profiler.email.should == "john.doe@gmail.com"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should extract all test names correctly" do
|
15
|
+
test_names = {
|
16
|
+
'christianbundy': ['Christian', 'Bundy'],
|
17
|
+
'emmitt.a.riggin': ['Emmitt', 'A', 'Riggin'],
|
18
|
+
'meghannyunker': ['Meghann', 'Yunker'],
|
19
|
+
'huechadwick': ['Hue', 'Chadwick'],
|
20
|
+
'delphia.a.kimmer': ['Delphia', 'A', 'Kimmer'],
|
21
|
+
'lilymendel': ['Lily', 'Mendel'],
|
22
|
+
'natisha.pedroza': ['Natisha', 'Pedroza'],
|
23
|
+
'leoramauch': ['Leora', 'Mauch'],
|
24
|
+
'jamika.mcgranahan': ['Jamika', 'Mcgranahan'],
|
25
|
+
'celestinachittenden': ['Celestina', 'Chittenden'],
|
26
|
+
'jadacwalson': ['Jada', 'C', 'Walson'],
|
27
|
+
'denese.d.eichler': ['Denese', 'D', 'Eichler'],
|
28
|
+
'marybethgant': ['Marybeth', 'Gant'],
|
29
|
+
'ashleamondy': ['Ashlea', 'Mondy'],
|
30
|
+
'brittanynowakowski': ['Brittany', 'Nowakowski'],
|
31
|
+
'nelliersepeda': ['Nellie', 'R', 'Sepeda'],
|
32
|
+
'anastasia.matchett': ['Anastasia', 'Matchett'],
|
33
|
+
'glory.mclester': ['Glory', 'Mclester'],
|
34
|
+
'wilburn.f.hinkson': ['Wilburn', 'F', 'Hinkson'],
|
35
|
+
'grace.k.baham': ['Grace', 'K', 'Baham'],
|
36
|
+
'haroldcrick': ['Harold', 'Crick']
|
37
|
+
}
|
38
|
+
|
39
|
+
test_names.each do |name, extracted|
|
40
|
+
print "Running for #{name}..."
|
41
|
+
profiler = GuessWhoNoFuzzy::Profiler.profile!(name.to_s)
|
42
|
+
print " Got #{profiler.full_name}"
|
43
|
+
puts "\n"
|
44
|
+
profiler.full_name.should == extracted.join(" ")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
|
3
|
+
module SimpleCov::Configuration
|
4
|
+
def clean_filters
|
5
|
+
@filters = []
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
SimpleCov.configure do
|
10
|
+
clean_filters
|
11
|
+
load_adapter 'test_frameworks'
|
12
|
+
end
|
13
|
+
|
14
|
+
ENV["COVERAGE"] && SimpleCov.start do
|
15
|
+
add_filter "/.rvm/"
|
16
|
+
end
|
17
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
18
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
19
|
+
|
20
|
+
require 'rspec'
|
21
|
+
require 'guess_who_no_fuzzy'
|
22
|
+
|
23
|
+
# Requires supporting files with custom matchers and macros, etc,
|
24
|
+
# in ./support/ and its subdirectories.
|
25
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
26
|
+
|
27
|
+
RSpec.configure do |config|
|
28
|
+
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: guess_who_no_fuzzy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- odina
|
8
|
+
- roy
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2017-10-19 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: 3.5.0
|
21
|
+
type: :development
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: 3.5.0
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rdoc
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '3.12'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '3.12'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: bundler
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '1.15'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '1.15'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: jeweler
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 2.2.0
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 2.2.0
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: simplecov
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
description: Extracts name from username or email
|
85
|
+
email:
|
86
|
+
- odina.abellana@gmail.com
|
87
|
+
- roy.evan.sia@gmail.com
|
88
|
+
executables: []
|
89
|
+
extensions: []
|
90
|
+
extra_rdoc_files:
|
91
|
+
- LICENSE.txt
|
92
|
+
- README.md
|
93
|
+
files:
|
94
|
+
- ".document"
|
95
|
+
- ".rspec"
|
96
|
+
- Gemfile
|
97
|
+
- Gemfile.lock
|
98
|
+
- LICENSE.txt
|
99
|
+
- README.md
|
100
|
+
- Rakefile
|
101
|
+
- VERSION
|
102
|
+
- files/females.txt
|
103
|
+
- files/initials.txt
|
104
|
+
- files/males.txt
|
105
|
+
- files/surnames.txt
|
106
|
+
- guess_who_no_fuzzy.gemspec
|
107
|
+
- lib/guess_who_no_fuzzy.rb
|
108
|
+
- lib/guess_who_no_fuzzy/comparator.rb
|
109
|
+
- lib/guess_who_no_fuzzy/names_loader.rb
|
110
|
+
- lib/guess_who_no_fuzzy/profiler.rb
|
111
|
+
- lib/guess_who_no_fuzzy/scorer.rb
|
112
|
+
- lib/guess_who_no_fuzzy/tokenizer.rb
|
113
|
+
- spec/lib/profiler_spec.rb
|
114
|
+
- spec/spec_helper.rb
|
115
|
+
homepage: http://github.com/darklight721/guess_who
|
116
|
+
licenses:
|
117
|
+
- MIT
|
118
|
+
metadata: {}
|
119
|
+
post_install_message:
|
120
|
+
rdoc_options: []
|
121
|
+
require_paths:
|
122
|
+
- lib
|
123
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
+
requirements:
|
125
|
+
- - ">="
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: '0'
|
128
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
129
|
+
requirements:
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: '0'
|
133
|
+
requirements: []
|
134
|
+
rubyforge_project:
|
135
|
+
rubygems_version: 2.5.1
|
136
|
+
signing_key:
|
137
|
+
specification_version: 4
|
138
|
+
summary: Extract name from email
|
139
|
+
test_files: []
|