gender_detector 0.1.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/gender_detector/version.rb +1 -11
- data/lib/gender_detector.rb +66 -47
- metadata +46 -40
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7c5163b78567fcc01065054f923931d101f83015
|
4
|
+
data.tar.gz: 28bd20c35b5946ac08c33c413a462e4dfd70e86e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 613a60d4e62eac81e1e2465e103798ea853f5ba4f490161cf79502390d2f13dff7ac6eb06f4adcebff3cbb9d007c9bdba8850561dd6a603fe95d4ba70b3b37de
|
7
|
+
data.tar.gz: 4a4418adabcffdb57f8f09d3f10112747e40c0546a0ca542fcefb5f9b755f1f56e5c11fd4f5ee0260301013a0d9a8da6c2e81a58611d06634491f46eb0740df5
|
data/lib/gender_detector.rb
CHANGED
@@ -1,13 +1,21 @@
|
|
1
1
|
require 'gender_detector/version'
|
2
2
|
|
3
|
-
|
3
|
+
if Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.4.0')
|
4
|
+
require 'active_support/core_ext/string/multibyte'
|
5
|
+
end
|
4
6
|
|
7
|
+
# Main class for interacting with the data file
|
5
8
|
class GenderDetector
|
6
|
-
COUNTRIES = [
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
9
|
+
COUNTRIES = %i[great_britain ireland usa italy malta portugal
|
10
|
+
spain france belgium luxembourg the_netherlands
|
11
|
+
east_frisia germany austria swiss iceland denmark
|
12
|
+
norway sweden finland estonia latvia lithuania
|
13
|
+
poland czech_republic slovakia hungary romania
|
14
|
+
bulgaria bosniaand croatia kosovo macedonia
|
15
|
+
montenegro serbia slovenia albania greece russia
|
16
|
+
belarus moldova ukraine armenia azerbaijan georgia
|
17
|
+
the_stans turkey arabia israel china india japan
|
18
|
+
korea vietnam other_countries].freeze
|
11
19
|
|
12
20
|
ISO_3166_MAPPING = {
|
13
21
|
'AE' => :arabia, 'AL' => :albania, 'AM' => :armenia, 'AT' => :austria,
|
@@ -19,20 +27,21 @@ class GenderDetector
|
|
19
27
|
'GR' => :greece, 'HK' => :china, 'HR' => :croatia, 'HU' => :hungary,
|
20
28
|
'IE' => :ireland, 'IL' => :israel, 'IN' => :india, 'IS' => :iceland,
|
21
29
|
'IT' => :italy, 'JP' => :japan, 'KP' => :korea, 'KR' => :korea,
|
22
|
-
'KZ' => :the_stans, 'LT' => :lithuania, 'LU' => :luxembourg,
|
23
|
-
'
|
24
|
-
'
|
25
|
-
'
|
26
|
-
'
|
27
|
-
'
|
28
|
-
'UZ' => :the_stans, 'VN' => :vietnam
|
29
|
-
}
|
30
|
+
'KZ' => :the_stans, 'LT' => :lithuania, 'LU' => :luxembourg,
|
31
|
+
'LV' => :latvia, 'MD' => :moldova, 'ME' => :montenegro, 'MK' => :macedonia,
|
32
|
+
'MT' => :malta, 'NL' => :the_netherlands, 'NO' => :norway, 'PL' => :poland,
|
33
|
+
'PT' => :portugal, 'QA' => :arabia, 'RO' => :romania, 'RS' => :serbia,
|
34
|
+
'RU' => :russia, 'SA' => :arabia, 'SE' => :sweden, 'SI' => :slovenia,
|
35
|
+
'SK' => :slovakia, 'TR' => :turkey, 'TW' => :china, 'UA' => :ukraine,
|
36
|
+
'US' => :usa, 'UZ' => :the_stans, 'VN' => :vietnam
|
37
|
+
}.freeze
|
30
38
|
|
31
39
|
def initialize(opts = {})
|
40
|
+
relpath = '../gender_detector/data/nam_dict.txt'
|
32
41
|
opts = {
|
33
|
-
:
|
34
|
-
:
|
35
|
-
:
|
42
|
+
filename: File.expand_path(relpath, __FILE__),
|
43
|
+
case_sensitive: true,
|
44
|
+
unknown_value: :andy
|
36
45
|
}.merge(opts)
|
37
46
|
@filename = opts[:filename]
|
38
47
|
@case_sensitive = opts[:case_sensitive]
|
@@ -42,31 +51,31 @@ class GenderDetector
|
|
42
51
|
|
43
52
|
def parse(fname)
|
44
53
|
@names = {}
|
45
|
-
open(fname,
|
46
|
-
f.each_line
|
54
|
+
open(fname, 'r:iso8859-1:utf-8') do |f|
|
55
|
+
f.each_line do |line|
|
47
56
|
eat_name_line line
|
48
|
-
|
49
|
-
|
57
|
+
end
|
58
|
+
end
|
50
59
|
end
|
51
60
|
|
52
61
|
def knows_country?(country)
|
53
|
-
COUNTRIES.include?(country)
|
62
|
+
COUNTRIES.include?(country) || ISO_3166_MAPPING.include?(country)
|
54
63
|
end
|
55
64
|
|
56
65
|
def name_exists?(name)
|
57
|
-
name =
|
58
|
-
@names.
|
66
|
+
name = downcase(name) unless @case_sensitive
|
67
|
+
@names.key?(name) ? name : false
|
59
68
|
end
|
60
69
|
|
61
70
|
def get_gender(name, country = nil)
|
62
|
-
name =
|
71
|
+
name = downcase(name) unless @case_sensitive
|
63
72
|
|
64
|
-
if
|
73
|
+
if !name_exists?(name)
|
65
74
|
@unknown_value
|
66
75
|
elsif country.nil?
|
67
|
-
most_popular_gender(name)
|
68
|
-
country_values.split(
|
69
|
-
|
76
|
+
most_popular_gender(name) do |country_values|
|
77
|
+
country_values.split('').reject { |l| l.strip == '' }.length
|
78
|
+
end
|
70
79
|
elsif COUNTRIES.include?(country)
|
71
80
|
most_popular_gender_in_country(name, country)
|
72
81
|
elsif ISO_3166_MAPPING.include?(country)
|
@@ -77,57 +86,67 @@ class GenderDetector
|
|
77
86
|
end
|
78
87
|
|
79
88
|
def inspect
|
80
|
-
"#<#{self.class.name} filename=\"#{@filename}\"
|
89
|
+
"#<#{self.class.name} filename=\"#{@filename}\" " \
|
90
|
+
" case_sensitive=#{@case_sensitive} unknown_value=#{@unknown_value}>"
|
81
91
|
end
|
82
92
|
|
83
93
|
private
|
94
|
+
|
84
95
|
def most_popular_gender_in_country(name, country)
|
85
96
|
index = COUNTRIES.index(country)
|
86
|
-
most_popular_gender(name)
|
97
|
+
most_popular_gender(name) do |country_values|
|
87
98
|
country_values[index].ord
|
88
|
-
|
99
|
+
end
|
89
100
|
end
|
90
101
|
|
91
102
|
def eat_name_line(line)
|
92
|
-
return if line.start_with?(
|
103
|
+
return if line.start_with?('#', '=')
|
93
104
|
|
94
|
-
parts = line.split(
|
105
|
+
parts = line.split(' ').reject { |p| p.strip == '' }
|
95
106
|
country_values = line.slice(30, line.length)
|
96
|
-
name = @case_sensitive ? parts[1] :
|
107
|
+
name = @case_sensitive ? parts[1] : downcase(parts[1])
|
97
108
|
|
98
109
|
case parts[0]
|
99
|
-
when
|
100
|
-
when
|
101
|
-
when
|
102
|
-
when
|
103
|
-
when
|
110
|
+
when 'M' then set(name, :male, country_values)
|
111
|
+
when '1M', '?M' then set(name, :mostly_male, country_values)
|
112
|
+
when 'F' then set(name, :female, country_values)
|
113
|
+
when '1F', '?F' then set(name, :mostly_female, country_values)
|
114
|
+
when '?' then set(name, :andy, country_values)
|
104
115
|
else raise "Not sure what to do with a gender of #{parts[0]}"
|
105
116
|
end
|
106
117
|
end
|
107
118
|
|
108
119
|
def most_popular_gender(name)
|
109
|
-
return @unknown_value unless @names.
|
120
|
+
return @unknown_value unless @names.key?(name)
|
110
121
|
|
111
122
|
max = 0
|
112
123
|
best = @names[name].keys.first
|
113
|
-
@names[name].each
|
124
|
+
@names[name].each do |gender, country_values|
|
114
125
|
count = yield country_values
|
115
126
|
if count > max
|
116
127
|
max = count
|
117
128
|
best = gender
|
118
129
|
end
|
119
|
-
|
130
|
+
end
|
120
131
|
best
|
121
132
|
end
|
122
133
|
|
123
134
|
def set(name, gender, country_values)
|
124
|
-
if name.include?
|
125
|
-
[
|
126
|
-
set name.gsub(
|
127
|
-
|
135
|
+
if name.include? '+'
|
136
|
+
['', '-', ' '].each do |replacement|
|
137
|
+
set name.gsub('+', replacement), gender, country_values
|
138
|
+
end
|
128
139
|
else
|
129
140
|
@names[name] ||= {}
|
130
141
|
@names[name][gender] = country_values
|
131
142
|
end
|
132
143
|
end
|
144
|
+
|
145
|
+
def downcase(name)
|
146
|
+
if defined?(ActiveSupport::Multibyte::Chars)
|
147
|
+
name.mb_chars.downcase.to_s
|
148
|
+
else
|
149
|
+
name.downcase
|
150
|
+
end
|
151
|
+
end
|
133
152
|
end
|
metadata
CHANGED
@@ -1,112 +1,118 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gender_detector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 2.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Brian Muller
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2017-09-17 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
14
|
+
name: rubocop
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
22
|
-
type: :
|
19
|
+
version: '0.50'
|
20
|
+
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
|
-
version:
|
26
|
+
version: '0.50'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: minitest
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - "~>"
|
36
32
|
- !ruby/object:Gem::Version
|
37
|
-
version: '
|
33
|
+
version: '5.10'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - "~>"
|
44
39
|
- !ruby/object:Gem::Version
|
45
|
-
version: '
|
40
|
+
version: '5.10'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: rake
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '12.1'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '12.1'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: activesupport
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
52
60
|
- !ruby/object:Gem::Version
|
53
|
-
version: '
|
61
|
+
version: '5.1'
|
54
62
|
type: :development
|
55
63
|
prerelease: false
|
56
64
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
65
|
requirements:
|
59
|
-
- -
|
66
|
+
- - "~>"
|
60
67
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
68
|
+
version: '5.1'
|
62
69
|
- !ruby/object:Gem::Dependency
|
63
|
-
name:
|
70
|
+
name: minitest-stub-const
|
64
71
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
72
|
requirements:
|
67
|
-
- -
|
73
|
+
- - "~>"
|
68
74
|
- !ruby/object:Gem::Version
|
69
|
-
version: '0'
|
75
|
+
version: '0.6'
|
70
76
|
type: :development
|
71
77
|
prerelease: false
|
72
78
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
79
|
requirements:
|
75
|
-
- -
|
80
|
+
- - "~>"
|
76
81
|
- !ruby/object:Gem::Version
|
77
|
-
version: '0'
|
78
|
-
description:
|
82
|
+
version: '0.6'
|
83
|
+
description: Guess gender from first name, with multilingual support.
|
79
84
|
email:
|
80
85
|
- bamuller@gmail.com
|
81
86
|
executables: []
|
82
87
|
extensions: []
|
83
88
|
extra_rdoc_files: []
|
84
89
|
files:
|
85
|
-
- lib/gender_detector/version.rb
|
86
90
|
- lib/gender_detector.rb
|
87
91
|
- lib/gender_detector/data/nam_dict.txt
|
92
|
+
- lib/gender_detector/version.rb
|
88
93
|
homepage: https://github.com/bmuller/gender_detector
|
89
|
-
licenses:
|
90
|
-
|
94
|
+
licenses:
|
95
|
+
- MIT
|
96
|
+
metadata: {}
|
97
|
+
post_install_message: For unicode support you'll need to also install the unicode_utils
|
98
|
+
or activesupport gem
|
91
99
|
rdoc_options: []
|
92
100
|
require_paths:
|
93
101
|
- lib
|
94
102
|
required_ruby_version: !ruby/object:Gem::Requirement
|
95
|
-
none: false
|
96
103
|
requirements:
|
97
|
-
- -
|
104
|
+
- - ">="
|
98
105
|
- !ruby/object:Gem::Version
|
99
106
|
version: 1.9.0
|
100
107
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
-
none: false
|
102
108
|
requirements:
|
103
|
-
- -
|
109
|
+
- - ">="
|
104
110
|
- !ruby/object:Gem::Version
|
105
111
|
version: '0'
|
106
112
|
requirements: []
|
107
113
|
rubyforge_project:
|
108
|
-
rubygems_version:
|
114
|
+
rubygems_version: 2.6.13
|
109
115
|
signing_key:
|
110
|
-
specification_version:
|
116
|
+
specification_version: 4
|
111
117
|
summary: Get gender from first name.
|
112
118
|
test_files: []
|