gender_detector 0.1.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/gender_detector.rb +65 -48
- data/lib/gender_detector/version.rb +1 -11
- metadata +58 -38
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fdee5d9e457b4a5c4c9e6dd8f52e64cd86fe1bff
|
4
|
+
data.tar.gz: 41b06696c8af71759cfffc69c9268471a8cb578d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c799a9cb5b317826b83f3ed794ca4cdf9e501956fcd111cd8f9762ccc49bfcad68a4b49a42fdc888968b7e4edef49df3824d9335fc4989aa7d2fa69cf8e0cf3d
|
7
|
+
data.tar.gz: 609ff37bf50849e52ee38922101f81b4fe8db973765b5f7f4b638c63adc6406f59423c29f2f3ce60cf5bed7caa4e09de323d0b35ef3a58d788e7e5d757c5da7a
|
data/lib/gender_detector.rb
CHANGED
@@ -1,13 +1,17 @@
|
|
1
1
|
require 'gender_detector/version'
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
# Main class for interacting with the data file
|
5
4
|
class GenderDetector
|
6
|
-
COUNTRIES = [
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
COUNTRIES = [:great_britain, :ireland, :usa, :italy, :malta, :portugal,
|
6
|
+
:spain, :france, :belgium, :luxembourg, :the_netherlands,
|
7
|
+
:east_frisia, :germany, :austria, :swiss, :iceland, :denmark,
|
8
|
+
:norway, :sweden, :finland, :estonia, :latvia, :lithuania,
|
9
|
+
:poland, :czech_republic, :slovakia, :hungary, :romania,
|
10
|
+
:bulgaria, :bosniaand, :croatia, :kosovo, :macedonia,
|
11
|
+
:montenegro, :serbia, :slovenia, :albania, :greece, :russia,
|
12
|
+
:belarus, :moldova, :ukraine, :armenia, :azerbaijan, :georgia,
|
13
|
+
:the_stans, :turkey, :arabia, :israel, :china, :india, :japan,
|
14
|
+
:korea, :vietnam, :other_countries].freeze
|
11
15
|
|
12
16
|
ISO_3166_MAPPING = {
|
13
17
|
'AE' => :arabia, 'AL' => :albania, 'AM' => :armenia, 'AT' => :austria,
|
@@ -19,20 +23,21 @@ class GenderDetector
|
|
19
23
|
'GR' => :greece, 'HK' => :china, 'HR' => :croatia, 'HU' => :hungary,
|
20
24
|
'IE' => :ireland, 'IL' => :israel, 'IN' => :india, 'IS' => :iceland,
|
21
25
|
'IT' => :italy, 'JP' => :japan, 'KP' => :korea, 'KR' => :korea,
|
22
|
-
'KZ' => :the_stans, 'LT' => :lithuania, 'LU' => :luxembourg,
|
23
|
-
'
|
24
|
-
'
|
25
|
-
'
|
26
|
-
'
|
27
|
-
'
|
28
|
-
'UZ' => :the_stans, 'VN' => :vietnam
|
29
|
-
}
|
26
|
+
'KZ' => :the_stans, 'LT' => :lithuania, 'LU' => :luxembourg,
|
27
|
+
'LV' => :latvia, 'MD' => :moldova, 'ME' => :montenegro, 'MK' => :macedonia,
|
28
|
+
'MT' => :malta, 'NL' => :the_netherlands, 'NO' => :norway, 'PL' => :poland,
|
29
|
+
'PT' => :portugal, 'QA' => :arabia, 'RO' => :romania, 'RS' => :serbia,
|
30
|
+
'RU' => :russia, 'SA' => :arabia, 'SE' => :sweden, 'SI' => :slovenia,
|
31
|
+
'SK' => :slovakia, 'TR' => :turkey, 'TW' => :china, 'UA' => :ukraine,
|
32
|
+
'US' => :usa, 'UZ' => :the_stans, 'VN' => :vietnam
|
33
|
+
}.freeze
|
30
34
|
|
31
35
|
def initialize(opts = {})
|
36
|
+
relpath = '../gender_detector/data/nam_dict.txt'
|
32
37
|
opts = {
|
33
|
-
:
|
34
|
-
:
|
35
|
-
:
|
38
|
+
filename: File.expand_path(relpath, __FILE__),
|
39
|
+
case_sensitive: true,
|
40
|
+
unknown_value: :andy
|
36
41
|
}.merge(opts)
|
37
42
|
@filename = opts[:filename]
|
38
43
|
@case_sensitive = opts[:case_sensitive]
|
@@ -42,31 +47,31 @@ class GenderDetector
|
|
42
47
|
|
43
48
|
def parse(fname)
|
44
49
|
@names = {}
|
45
|
-
open(fname,
|
46
|
-
f.each_line
|
50
|
+
open(fname, 'r:iso8859-1:utf-8') do |f|
|
51
|
+
f.each_line do |line|
|
47
52
|
eat_name_line line
|
48
|
-
|
49
|
-
|
53
|
+
end
|
54
|
+
end
|
50
55
|
end
|
51
56
|
|
52
57
|
def knows_country?(country)
|
53
|
-
COUNTRIES.include?(country)
|
58
|
+
COUNTRIES.include?(country) || ISO_3166_MAPPING.include?(country)
|
54
59
|
end
|
55
60
|
|
56
61
|
def name_exists?(name)
|
57
|
-
name =
|
58
|
-
@names.
|
62
|
+
name = downcase(name) unless @case_sensitive
|
63
|
+
@names.key?(name) ? name : false
|
59
64
|
end
|
60
65
|
|
61
66
|
def get_gender(name, country = nil)
|
62
|
-
name =
|
67
|
+
name = downcase(name) unless @case_sensitive
|
63
68
|
|
64
|
-
if
|
69
|
+
if !name_exists?(name)
|
65
70
|
@unknown_value
|
66
71
|
elsif country.nil?
|
67
|
-
most_popular_gender(name)
|
68
|
-
country_values.split(
|
69
|
-
|
72
|
+
most_popular_gender(name) do |country_values|
|
73
|
+
country_values.split('').select { |l| l.strip != '' }.length
|
74
|
+
end
|
70
75
|
elsif COUNTRIES.include?(country)
|
71
76
|
most_popular_gender_in_country(name, country)
|
72
77
|
elsif ISO_3166_MAPPING.include?(country)
|
@@ -77,57 +82,69 @@ class GenderDetector
|
|
77
82
|
end
|
78
83
|
|
79
84
|
def inspect
|
80
|
-
"#<#{self.class.name} filename=\"#{@filename}\"
|
85
|
+
"#<#{self.class.name} filename=\"#{@filename}\" " \
|
86
|
+
" case_sensitive=#{@case_sensitive} unknown_value=#{@unknown_value}>"
|
81
87
|
end
|
82
88
|
|
83
89
|
private
|
90
|
+
|
84
91
|
def most_popular_gender_in_country(name, country)
|
85
92
|
index = COUNTRIES.index(country)
|
86
|
-
most_popular_gender(name)
|
93
|
+
most_popular_gender(name) do |country_values|
|
87
94
|
country_values[index].ord
|
88
|
-
|
95
|
+
end
|
89
96
|
end
|
90
97
|
|
91
98
|
def eat_name_line(line)
|
92
|
-
return if line.start_with?(
|
99
|
+
return if line.start_with?('#', '=')
|
93
100
|
|
94
|
-
parts = line.split(
|
101
|
+
parts = line.split(' ').select { |p| p.strip != '' }
|
95
102
|
country_values = line.slice(30, line.length)
|
96
|
-
name = @case_sensitive ? parts[1] :
|
103
|
+
name = @case_sensitive ? parts[1] : downcase(parts[1])
|
97
104
|
|
98
105
|
case parts[0]
|
99
|
-
when
|
100
|
-
when
|
101
|
-
when
|
102
|
-
when
|
103
|
-
when
|
106
|
+
when 'M' then set(name, :male, country_values)
|
107
|
+
when '1M', '?M' then set(name, :mostly_male, country_values)
|
108
|
+
when 'F' then set(name, :female, country_values)
|
109
|
+
when '1F', '?F' then set(name, :mostly_female, country_values)
|
110
|
+
when '?' then set(name, :andy, country_values)
|
104
111
|
else raise "Not sure what to do with a gender of #{parts[0]}"
|
105
112
|
end
|
106
113
|
end
|
107
114
|
|
108
115
|
def most_popular_gender(name)
|
109
|
-
return @unknown_value unless @names.
|
116
|
+
return @unknown_value unless @names.key?(name)
|
110
117
|
|
111
118
|
max = 0
|
112
119
|
best = @names[name].keys.first
|
113
|
-
@names[name].each
|
120
|
+
@names[name].each do |gender, country_values|
|
114
121
|
count = yield country_values
|
115
122
|
if count > max
|
116
123
|
max = count
|
117
124
|
best = gender
|
118
125
|
end
|
119
|
-
|
126
|
+
end
|
120
127
|
best
|
121
128
|
end
|
122
129
|
|
123
130
|
def set(name, gender, country_values)
|
124
|
-
if name.include?
|
125
|
-
[
|
126
|
-
set name.gsub(
|
127
|
-
|
131
|
+
if name.include? '+'
|
132
|
+
['', '-', ' '].each do |replacement|
|
133
|
+
set name.gsub('+', replacement), gender, country_values
|
134
|
+
end
|
128
135
|
else
|
129
136
|
@names[name] ||= {}
|
130
137
|
@names[name][gender] = country_values
|
131
138
|
end
|
132
139
|
end
|
140
|
+
|
141
|
+
def downcase(name)
|
142
|
+
if defined?(UnicodeUtils)
|
143
|
+
UnicodeUtils.downcase(name)
|
144
|
+
elsif defined?(ActiveSupport::Multibyte::Chars)
|
145
|
+
name.mb_chars.downcase.to_s
|
146
|
+
else
|
147
|
+
name.downcase
|
148
|
+
end
|
149
|
+
end
|
133
150
|
end
|
metadata
CHANGED
@@ -1,112 +1,132 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gender_detector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Brian Muller
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2016-08-25 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
14
|
+
name: rubocop
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
22
|
-
type: :
|
19
|
+
version: '0.42'
|
20
|
+
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
|
-
version:
|
26
|
+
version: '0.42'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: minitest
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - "~>"
|
36
32
|
- !ruby/object:Gem::Version
|
37
|
-
version: '
|
33
|
+
version: '5.9'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - "~>"
|
44
39
|
- !ruby/object:Gem::Version
|
45
|
-
version: '
|
40
|
+
version: '5.9'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: rake
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- -
|
52
|
+
- - "~>"
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
|
-
name:
|
56
|
+
name: minitest-stub-const
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
|
-
- -
|
59
|
+
- - "~>"
|
68
60
|
- !ruby/object:Gem::Version
|
69
|
-
version: '0'
|
61
|
+
version: '0.5'
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
|
-
- -
|
66
|
+
- - "~>"
|
76
67
|
- !ruby/object:Gem::Version
|
77
|
-
version: '0'
|
78
|
-
|
68
|
+
version: '0.5'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: unicode_utils
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.3'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.3'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activesupport
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '5.0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '5.0'
|
97
|
+
description: Guess gender from first name, with multilingual support.
|
79
98
|
email:
|
80
99
|
- bamuller@gmail.com
|
81
100
|
executables: []
|
82
101
|
extensions: []
|
83
102
|
extra_rdoc_files: []
|
84
103
|
files:
|
85
|
-
- lib/gender_detector/version.rb
|
86
104
|
- lib/gender_detector.rb
|
87
105
|
- lib/gender_detector/data/nam_dict.txt
|
106
|
+
- lib/gender_detector/version.rb
|
88
107
|
homepage: https://github.com/bmuller/gender_detector
|
89
|
-
licenses:
|
90
|
-
|
108
|
+
licenses:
|
109
|
+
- MIT
|
110
|
+
metadata: {}
|
111
|
+
post_install_message: For unicode support you'll need to also install the unicode_utils
|
112
|
+
or activesupport gem
|
91
113
|
rdoc_options: []
|
92
114
|
require_paths:
|
93
115
|
- lib
|
94
116
|
required_ruby_version: !ruby/object:Gem::Requirement
|
95
|
-
none: false
|
96
117
|
requirements:
|
97
|
-
- -
|
118
|
+
- - ">="
|
98
119
|
- !ruby/object:Gem::Version
|
99
120
|
version: 1.9.0
|
100
121
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
-
none: false
|
102
122
|
requirements:
|
103
|
-
- -
|
123
|
+
- - ">="
|
104
124
|
- !ruby/object:Gem::Version
|
105
125
|
version: '0'
|
106
126
|
requirements: []
|
107
127
|
rubyforge_project:
|
108
|
-
rubygems_version:
|
128
|
+
rubygems_version: 2.5.1
|
109
129
|
signing_key:
|
110
|
-
specification_version:
|
130
|
+
specification_version: 4
|
111
131
|
summary: Get gender from first name.
|
112
132
|
test_files: []
|