gender_detector 0.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7c5163b78567fcc01065054f923931d101f83015
4
+ data.tar.gz: 28bd20c35b5946ac08c33c413a462e4dfd70e86e
5
+ SHA512:
6
+ metadata.gz: 613a60d4e62eac81e1e2465e103798ea853f5ba4f490161cf79502390d2f13dff7ac6eb06f4adcebff3cbb9d007c9bdba8850561dd6a603fe95d4ba70b3b37de
7
+ data.tar.gz: 4a4418adabcffdb57f8f09d3f10112747e40c0546a0ca542fcefb5f9b755f1f56e5c11fd4f5ee0260301013a0d9a8da6c2e81a58611d06634491f46eb0740df5
@@ -1,13 +1,3 @@
1
1
  class GenderDetector
2
- class Version
3
- MAJOR = 0
4
- MINOR = 1
5
- PATCH = 2
6
-
7
- def self.to_s
8
- [MAJOR, MINOR, PATCH].compact.join('.')
9
- end
10
- end
11
-
12
- VERSION = Version.to_s
2
+ VERSION = '2.0.0'.freeze
13
3
  end
@@ -1,13 +1,21 @@
1
1
  require 'gender_detector/version'
2
2
 
3
- require "unicode_utils/downcase"
3
+ if Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.4.0')
4
+ require 'active_support/core_ext/string/multibyte'
5
+ end
4
6
 
7
+ # Main class for interacting with the data file
5
8
  class GenderDetector
6
- COUNTRIES = [ :great_britain, :ireland, :usa, :italy, :malta, :portugal, :spain, :france, :belgium, :luxembourg, :the_netherlands, :east_frisia,
7
- :germany, :austria, :swiss, :iceland, :denmark, :norway, :sweden, :finland, :estonia, :latvia, :lithuania, :poland, :czech_republic,
8
- :slovakia, :hungary, :romania, :bulgaria, :bosniaand, :croatia, :kosovo, :macedonia, :montenegro, :serbia, :slovenia, :albania,
9
- :greece, :russia, :belarus, :moldova, :ukraine, :armenia, :azerbaijan, :georgia, :the_stans, :turkey, :arabia, :israel, :china,
10
- :india, :japan, :korea, :vietnam, :other_countries ]
9
+ COUNTRIES = %i[great_britain ireland usa italy malta portugal
10
+ spain france belgium luxembourg the_netherlands
11
+ east_frisia germany austria swiss iceland denmark
12
+ norway sweden finland estonia latvia lithuania
13
+ poland czech_republic slovakia hungary romania
14
+ bulgaria bosniaand croatia kosovo macedonia
15
+ montenegro serbia slovenia albania greece russia
16
+ belarus moldova ukraine armenia azerbaijan georgia
17
+ the_stans turkey arabia israel china india japan
18
+ korea vietnam other_countries].freeze
11
19
 
12
20
  ISO_3166_MAPPING = {
13
21
  'AE' => :arabia, 'AL' => :albania, 'AM' => :armenia, 'AT' => :austria,
@@ -19,20 +27,21 @@ class GenderDetector
19
27
  'GR' => :greece, 'HK' => :china, 'HR' => :croatia, 'HU' => :hungary,
20
28
  'IE' => :ireland, 'IL' => :israel, 'IN' => :india, 'IS' => :iceland,
21
29
  'IT' => :italy, 'JP' => :japan, 'KP' => :korea, 'KR' => :korea,
22
- 'KZ' => :the_stans, 'LT' => :lithuania, 'LU' => :luxembourg, 'LV' => :latvia,
23
- 'MD' => :moldova, 'ME' => :montenegro, 'MK' => :macedonia, 'MT' => :malta,
24
- 'NL' => :the_netherlands, 'NO' => :norway, 'PL' => :poland, 'PT' => :portugal,
25
- 'QA' => :arabia, 'RO' => :romania, 'RS' => :serbia, 'RU' => :russia,
26
- 'SA' => :arabia, 'SE' => :sweden, 'SI' => :slovenia, 'SK' => :slovakia,
27
- 'TR' => :turkey, 'TW' => :china, 'UA' => :ukraine, 'US' => :usa,
28
- 'UZ' => :the_stans, 'VN' => :vietnam
29
- }
30
+ 'KZ' => :the_stans, 'LT' => :lithuania, 'LU' => :luxembourg,
31
+ 'LV' => :latvia, 'MD' => :moldova, 'ME' => :montenegro, 'MK' => :macedonia,
32
+ 'MT' => :malta, 'NL' => :the_netherlands, 'NO' => :norway, 'PL' => :poland,
33
+ 'PT' => :portugal, 'QA' => :arabia, 'RO' => :romania, 'RS' => :serbia,
34
+ 'RU' => :russia, 'SA' => :arabia, 'SE' => :sweden, 'SI' => :slovenia,
35
+ 'SK' => :slovakia, 'TR' => :turkey, 'TW' => :china, 'UA' => :ukraine,
36
+ 'US' => :usa, 'UZ' => :the_stans, 'VN' => :vietnam
37
+ }.freeze
30
38
 
31
39
  def initialize(opts = {})
40
+ relpath = '../gender_detector/data/nam_dict.txt'
32
41
  opts = {
33
- :filename => File.expand_path('../gender_detector/data/nam_dict.txt', __FILE__),
34
- :case_sensitive => true,
35
- :unknown_value => :andy
42
+ filename: File.expand_path(relpath, __FILE__),
43
+ case_sensitive: true,
44
+ unknown_value: :andy
36
45
  }.merge(opts)
37
46
  @filename = opts[:filename]
38
47
  @case_sensitive = opts[:case_sensitive]
@@ -42,31 +51,31 @@ class GenderDetector
42
51
 
43
52
  def parse(fname)
44
53
  @names = {}
45
- open(fname, "r:iso8859-1:utf-8") { |f|
46
- f.each_line { |line|
54
+ open(fname, 'r:iso8859-1:utf-8') do |f|
55
+ f.each_line do |line|
47
56
  eat_name_line line
48
- }
49
- }
57
+ end
58
+ end
50
59
  end
51
60
 
52
61
  def knows_country?(country)
53
- COUNTRIES.include?(country) or ISO_3166_MAPPING.include?(country)
62
+ COUNTRIES.include?(country) || ISO_3166_MAPPING.include?(country)
54
63
  end
55
64
 
56
65
  def name_exists?(name)
57
- name = UnicodeUtils.downcase(name) unless @case_sensitive
58
- @names.has_key?(name) ? name : false
66
+ name = downcase(name) unless @case_sensitive
67
+ @names.key?(name) ? name : false
59
68
  end
60
69
 
61
70
  def get_gender(name, country = nil)
62
- name = UnicodeUtils.downcase(name) unless @case_sensitive
71
+ name = downcase(name) unless @case_sensitive
63
72
 
64
- if not name_exists?(name)
73
+ if !name_exists?(name)
65
74
  @unknown_value
66
75
  elsif country.nil?
67
- most_popular_gender(name) { |country_values|
68
- country_values.split("").select { |l| l.strip != "" }.length
69
- }
76
+ most_popular_gender(name) do |country_values|
77
+ country_values.split('').reject { |l| l.strip == '' }.length
78
+ end
70
79
  elsif COUNTRIES.include?(country)
71
80
  most_popular_gender_in_country(name, country)
72
81
  elsif ISO_3166_MAPPING.include?(country)
@@ -77,57 +86,67 @@ class GenderDetector
77
86
  end
78
87
 
79
88
  def inspect
80
- "#<#{self.class.name} filename=\"#{@filename}\" case_sensitive=#{@case_sensitive} unknown_value=#{@unknown_value}>"
89
+ "#<#{self.class.name} filename=\"#{@filename}\" " \
90
+ " case_sensitive=#{@case_sensitive} unknown_value=#{@unknown_value}>"
81
91
  end
82
92
 
83
93
  private
94
+
84
95
  def most_popular_gender_in_country(name, country)
85
96
  index = COUNTRIES.index(country)
86
- most_popular_gender(name) { |country_values|
97
+ most_popular_gender(name) do |country_values|
87
98
  country_values[index].ord
88
- }
99
+ end
89
100
  end
90
101
 
91
102
  def eat_name_line(line)
92
- return if line.start_with?("#") or line.start_with?("=")
103
+ return if line.start_with?('#', '=')
93
104
 
94
- parts = line.split(" ").select { |p| p.strip != "" }
105
+ parts = line.split(' ').reject { |p| p.strip == '' }
95
106
  country_values = line.slice(30, line.length)
96
- name = @case_sensitive ? parts[1] : UnicodeUtils.downcase(parts[1])
107
+ name = @case_sensitive ? parts[1] : downcase(parts[1])
97
108
 
98
109
  case parts[0]
99
- when "M" then set(name, :male, country_values)
100
- when "1M", "?M" then set(name, :mostly_male, country_values)
101
- when "F" then set(name, :female, country_values)
102
- when "1F", "?F" then set(name, :mostly_female, country_values)
103
- when "?" then set(name, :andy, country_values)
110
+ when 'M' then set(name, :male, country_values)
111
+ when '1M', '?M' then set(name, :mostly_male, country_values)
112
+ when 'F' then set(name, :female, country_values)
113
+ when '1F', '?F' then set(name, :mostly_female, country_values)
114
+ when '?' then set(name, :andy, country_values)
104
115
  else raise "Not sure what to do with a gender of #{parts[0]}"
105
116
  end
106
117
  end
107
118
 
108
119
  def most_popular_gender(name)
109
- return @unknown_value unless @names.has_key?(name)
120
+ return @unknown_value unless @names.key?(name)
110
121
 
111
122
  max = 0
112
123
  best = @names[name].keys.first
113
- @names[name].each { |gender, country_values|
124
+ @names[name].each do |gender, country_values|
114
125
  count = yield country_values
115
126
  if count > max
116
127
  max = count
117
128
  best = gender
118
129
  end
119
- }
130
+ end
120
131
  best
121
132
  end
122
133
 
123
134
  def set(name, gender, country_values)
124
- if name.include? "+"
125
- [ '', '-', ' ' ].each { |replacement|
126
- set name.gsub("+", replacement), gender, country_values
127
- }
135
+ if name.include? '+'
136
+ ['', '-', ' '].each do |replacement|
137
+ set name.gsub('+', replacement), gender, country_values
138
+ end
128
139
  else
129
140
  @names[name] ||= {}
130
141
  @names[name][gender] = country_values
131
142
  end
132
143
  end
144
+
145
+ def downcase(name)
146
+ if defined?(ActiveSupport::Multibyte::Chars)
147
+ name.mb_chars.downcase.to_s
148
+ else
149
+ name.downcase
150
+ end
151
+ end
133
152
  end
metadata CHANGED
@@ -1,112 +1,118 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gender_detector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
5
- prerelease:
4
+ version: 2.0.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Brian Muller
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-11-17 00:00:00.000000000 Z
11
+ date: 2017-09-17 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
- name: unicode_utils
14
+ name: rubocop
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
- version: 1.3.0
22
- type: :runtime
19
+ version: '0.50'
20
+ type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
- version: 1.3.0
26
+ version: '0.50'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: minitest
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ! '>='
31
+ - - "~>"
36
32
  - !ruby/object:Gem::Version
37
- version: '0'
33
+ version: '5.10'
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ! '>='
38
+ - - "~>"
44
39
  - !ruby/object:Gem::Version
45
- version: '0'
40
+ version: '5.10'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: rake
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ! '>='
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '12.1'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '12.1'
55
+ - !ruby/object:Gem::Dependency
56
+ name: activesupport
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
52
60
  - !ruby/object:Gem::Version
53
- version: '0'
61
+ version: '5.1'
54
62
  type: :development
55
63
  prerelease: false
56
64
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
65
  requirements:
59
- - - ! '>='
66
+ - - "~>"
60
67
  - !ruby/object:Gem::Version
61
- version: '0'
68
+ version: '5.1'
62
69
  - !ruby/object:Gem::Dependency
63
- name: rdoc
70
+ name: minitest-stub-const
64
71
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
72
  requirements:
67
- - - ! '>='
73
+ - - "~>"
68
74
  - !ruby/object:Gem::Version
69
- version: '0'
75
+ version: '0.6'
70
76
  type: :development
71
77
  prerelease: false
72
78
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
79
  requirements:
75
- - - ! '>='
80
+ - - "~>"
76
81
  - !ruby/object:Gem::Version
77
- version: '0'
78
- description: Get gender from first name.
82
+ version: '0.6'
83
+ description: Guess gender from first name, with multilingual support.
79
84
  email:
80
85
  - bamuller@gmail.com
81
86
  executables: []
82
87
  extensions: []
83
88
  extra_rdoc_files: []
84
89
  files:
85
- - lib/gender_detector/version.rb
86
90
  - lib/gender_detector.rb
87
91
  - lib/gender_detector/data/nam_dict.txt
92
+ - lib/gender_detector/version.rb
88
93
  homepage: https://github.com/bmuller/gender_detector
89
- licenses: []
90
- post_install_message:
94
+ licenses:
95
+ - MIT
96
+ metadata: {}
97
+ post_install_message: For unicode support you'll need to also install the unicode_utils
98
+ or activesupport gem
91
99
  rdoc_options: []
92
100
  require_paths:
93
101
  - lib
94
102
  required_ruby_version: !ruby/object:Gem::Requirement
95
- none: false
96
103
  requirements:
97
- - - ! '>='
104
+ - - ">="
98
105
  - !ruby/object:Gem::Version
99
106
  version: 1.9.0
100
107
  required_rubygems_version: !ruby/object:Gem::Requirement
101
- none: false
102
108
  requirements:
103
- - - ! '>='
109
+ - - ">="
104
110
  - !ruby/object:Gem::Version
105
111
  version: '0'
106
112
  requirements: []
107
113
  rubyforge_project:
108
- rubygems_version: 1.8.25
114
+ rubygems_version: 2.6.13
109
115
  signing_key:
110
- specification_version: 3
116
+ specification_version: 4
111
117
  summary: Get gender from first name.
112
118
  test_files: []