ladyboy 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +65 -0
- data/Rakefile +10 -0
- data/data/name2name.csv +3024 -0
- data/data/name2sex.csv +1307 -0
- data/ladyboy.gemspec +25 -0
- data/lib/ladyboy.rb +45 -0
- data/lib/ladyboy/parser.rb +36 -0
- data/lib/ladyboy/version.rb +3 -0
- data/spec/ladyboy/parser_spec.rb +62 -0
- data/spec/spec_helper.rb +4 -0
- metadata +101 -0
data/ladyboy.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'ladyboy/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "ladyboy"
|
8
|
+
spec.version = Ladyboy::VERSION
|
9
|
+
spec.authors = ["Peter Yanovich"]
|
10
|
+
spec.email = ["fl00r@yandex.ru"]
|
11
|
+
spec.summary = %q{Hard choice: boy or girl}
|
12
|
+
spec.description = %q{Detecting sex by name}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "Floctory"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
|
24
|
+
spec.add_dependency "unicode"
|
25
|
+
end
|
data/lib/ladyboy.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require "ladyboy/version"
|
2
|
+
require "unicode"
|
3
|
+
|
4
|
+
module Ladyboy
|
5
|
+
extend self
|
6
|
+
|
7
|
+
SEX = {
|
8
|
+
0 => nil,
|
9
|
+
1 => :male,
|
10
|
+
2 => :female
|
11
|
+
}
|
12
|
+
|
13
|
+
def root
|
14
|
+
File.expand_path("../../", __FILE__)
|
15
|
+
end
|
16
|
+
|
17
|
+
# https://github.com/flocktory/Rinatolytics/blob/master/social/names_substitutes.csv
|
18
|
+
def names
|
19
|
+
@names ||= begin
|
20
|
+
n = {}
|
21
|
+
fn = File.join(root, "data", "name2name.csv")
|
22
|
+
File.read(fn).each_line do |line|
|
23
|
+
nick, name, type = line.chomp.split(",")
|
24
|
+
n[nick] = name if type == "first"
|
25
|
+
end
|
26
|
+
n
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# https://github.com/flocktory/Rinatolytics/blob/master/social/firstnames.csv
|
31
|
+
def sexes
|
32
|
+
@sexes ||= begin
|
33
|
+
s = {}
|
34
|
+
fn = File.join(root, "data", "name2sex.csv")
|
35
|
+
File.read(fn).each_line.with_index do |line, priority|
|
36
|
+
name, sex = line.chomp.split(",")
|
37
|
+
sex = sex.to_i
|
38
|
+
s[name] = [sex, priority] if SEX.has_key?(sex)
|
39
|
+
end
|
40
|
+
s
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
require File.join(Ladyboy.root, 'lib', 'ladyboy', 'parser')
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Ladyboy
|
2
|
+
class Parser
|
3
|
+
attr_reader :first_name, :gender, :full_name
|
4
|
+
alias_method :sex, :gender
|
5
|
+
|
6
|
+
def initialize(full_name)
|
7
|
+
@full_name = full_name
|
8
|
+
parse!
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def normalize_name(name)
|
14
|
+
name = Unicode.downcase(name.to_s).gsub(/\d+/, '') # .gsub("ё", "е")
|
15
|
+
Ladyboy.names[name] or Ladyboy.sexes[name] && name
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse!
|
19
|
+
major = major_name_and_sex_for_full_name(@full_name)
|
20
|
+
return unless major
|
21
|
+
|
22
|
+
@first_name = Unicode.capitalize(major[0])
|
23
|
+
@gender = SEX[major[1]]
|
24
|
+
end
|
25
|
+
|
26
|
+
def major_name_and_sex_for_full_name(full_name)
|
27
|
+
full_name.to_s.scan(/[[:word:]]+/).map do |token|
|
28
|
+
name = normalize_name(token)
|
29
|
+
if name
|
30
|
+
sex = Ladyboy.sexes[name] || [nil, Float::INFINITY]
|
31
|
+
[name, *sex]
|
32
|
+
end
|
33
|
+
end.compact.sort_by(&:last).first
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Ladyboy::Parser do
|
4
|
+
it do
|
5
|
+
example = Ladyboy::Parser.new 'Петр'
|
6
|
+
example.first_name.must_equal 'Петр'
|
7
|
+
example.gender.must_equal :male
|
8
|
+
example.sex.must_equal :male # alias to gender
|
9
|
+
|
10
|
+
example = Ladyboy::Parser.new 'Оля'
|
11
|
+
example.first_name.must_equal 'Ольга'
|
12
|
+
example.gender.must_equal :female
|
13
|
+
|
14
|
+
example = Ladyboy::Parser.new 'zhenya'
|
15
|
+
example.first_name.must_equal 'Женя'
|
16
|
+
example.gender.must_equal nil # unisex
|
17
|
+
|
18
|
+
example = Ladyboy::Parser.new 'Саша'
|
19
|
+
example.first_name.must_equal 'Саша'
|
20
|
+
example.gender.must_equal nil
|
21
|
+
|
22
|
+
example = Ladyboy::Parser.new 'Рамзанкадыров'
|
23
|
+
example.first_name.must_equal nil
|
24
|
+
example.gender.must_equal nil
|
25
|
+
|
26
|
+
example = Ladyboy::Parser.new 'пётр, Абулгары Эрнест '
|
27
|
+
example.first_name.must_equal 'Петр'
|
28
|
+
example.gender.must_equal :male
|
29
|
+
|
30
|
+
example = Ladyboy::Parser.new 'Абулгары Эрнест!'
|
31
|
+
example.first_name.must_equal 'Эрнест'
|
32
|
+
example.gender.must_equal :male
|
33
|
+
|
34
|
+
example = Ladyboy::Parser.new 'Пётр, Абулгары Наташа!'
|
35
|
+
example.first_name.must_equal 'Наталья'
|
36
|
+
example.gender.must_equal :female
|
37
|
+
|
38
|
+
example = Ladyboy::Parser.new 'Рамзанкадыров Тут Проходил'
|
39
|
+
example.first_name.must_equal nil
|
40
|
+
example.gender.must_equal nil
|
41
|
+
|
42
|
+
example = Ladyboy::Parser.new 'vasya1985@mail.ru'
|
43
|
+
example.first_name.must_equal 'Василий'
|
44
|
+
example.gender.must_equal :male
|
45
|
+
|
46
|
+
example = Ladyboy::Parser.new 'kolya.petrov@google.com'
|
47
|
+
example.first_name.must_equal 'Николай'
|
48
|
+
example.gender.must_equal :male
|
49
|
+
|
50
|
+
example = Ladyboy::Parser.new 'olga.petrov@yandex.ru'
|
51
|
+
example.first_name.must_equal 'Ольга'
|
52
|
+
example.gender.must_equal :female
|
53
|
+
|
54
|
+
example = Ladyboy::Parser.new 'dark.imperior@live.com'
|
55
|
+
example.first_name.must_equal nil
|
56
|
+
example.gender.must_equal nil
|
57
|
+
|
58
|
+
example = Ladyboy::Parser.new nil
|
59
|
+
example.first_name.must_equal nil
|
60
|
+
example.gender.must_equal nil
|
61
|
+
end
|
62
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ladyboy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Peter Yanovich
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-11-04 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: unicode
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Detecting sex by name
|
56
|
+
email:
|
57
|
+
- fl00r@yandex.ru
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- Gemfile
|
64
|
+
- LICENSE.txt
|
65
|
+
- README.md
|
66
|
+
- Rakefile
|
67
|
+
- data/name2name.csv
|
68
|
+
- data/name2sex.csv
|
69
|
+
- ladyboy.gemspec
|
70
|
+
- lib/ladyboy.rb
|
71
|
+
- lib/ladyboy/parser.rb
|
72
|
+
- lib/ladyboy/version.rb
|
73
|
+
- spec/ladyboy/parser_spec.rb
|
74
|
+
- spec/spec_helper.rb
|
75
|
+
homepage: ''
|
76
|
+
licenses:
|
77
|
+
- Floctory
|
78
|
+
metadata: {}
|
79
|
+
post_install_message:
|
80
|
+
rdoc_options: []
|
81
|
+
require_paths:
|
82
|
+
- lib
|
83
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
requirements: []
|
94
|
+
rubyforge_project:
|
95
|
+
rubygems_version: 2.5.0
|
96
|
+
signing_key:
|
97
|
+
specification_version: 4
|
98
|
+
summary: 'Hard choice: boy or girl'
|
99
|
+
test_files:
|
100
|
+
- spec/ladyboy/parser_spec.rb
|
101
|
+
- spec/spec_helper.rb
|