gender_detector 0.1.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: fdee5d9e457b4a5c4c9e6dd8f52e64cd86fe1bff
4
+ data.tar.gz: 41b06696c8af71759cfffc69c9268471a8cb578d
5
+ SHA512:
6
+ metadata.gz: c799a9cb5b317826b83f3ed794ca4cdf9e501956fcd111cd8f9762ccc49bfcad68a4b49a42fdc888968b7e4edef49df3824d9335fc4989aa7d2fa69cf8e0cf3d
7
+ data.tar.gz: 609ff37bf50849e52ee38922101f81b4fe8db973765b5f7f4b638c63adc6406f59423c29f2f3ce60cf5bed7caa4e09de323d0b35ef3a58d788e7e5d757c5da7a
@@ -1,13 +1,17 @@
1
1
  require 'gender_detector/version'
2
2
 
3
- require "unicode_utils/downcase"
4
-
3
+ # Main class for interacting with the data file
5
4
  class GenderDetector
6
- COUNTRIES = [ :great_britain, :ireland, :usa, :italy, :malta, :portugal, :spain, :france, :belgium, :luxembourg, :the_netherlands, :east_frisia,
7
- :germany, :austria, :swiss, :iceland, :denmark, :norway, :sweden, :finland, :estonia, :latvia, :lithuania, :poland, :czech_republic,
8
- :slovakia, :hungary, :romania, :bulgaria, :bosniaand, :croatia, :kosovo, :macedonia, :montenegro, :serbia, :slovenia, :albania,
9
- :greece, :russia, :belarus, :moldova, :ukraine, :armenia, :azerbaijan, :georgia, :the_stans, :turkey, :arabia, :israel, :china,
10
- :india, :japan, :korea, :vietnam, :other_countries ]
5
+ COUNTRIES = [:great_britain, :ireland, :usa, :italy, :malta, :portugal,
6
+ :spain, :france, :belgium, :luxembourg, :the_netherlands,
7
+ :east_frisia, :germany, :austria, :swiss, :iceland, :denmark,
8
+ :norway, :sweden, :finland, :estonia, :latvia, :lithuania,
9
+ :poland, :czech_republic, :slovakia, :hungary, :romania,
10
+ :bulgaria, :bosniaand, :croatia, :kosovo, :macedonia,
11
+ :montenegro, :serbia, :slovenia, :albania, :greece, :russia,
12
+ :belarus, :moldova, :ukraine, :armenia, :azerbaijan, :georgia,
13
+ :the_stans, :turkey, :arabia, :israel, :china, :india, :japan,
14
+ :korea, :vietnam, :other_countries].freeze
11
15
 
12
16
  ISO_3166_MAPPING = {
13
17
  'AE' => :arabia, 'AL' => :albania, 'AM' => :armenia, 'AT' => :austria,
@@ -19,20 +23,21 @@ class GenderDetector
19
23
  'GR' => :greece, 'HK' => :china, 'HR' => :croatia, 'HU' => :hungary,
20
24
  'IE' => :ireland, 'IL' => :israel, 'IN' => :india, 'IS' => :iceland,
21
25
  'IT' => :italy, 'JP' => :japan, 'KP' => :korea, 'KR' => :korea,
22
- 'KZ' => :the_stans, 'LT' => :lithuania, 'LU' => :luxembourg, 'LV' => :latvia,
23
- 'MD' => :moldova, 'ME' => :montenegro, 'MK' => :macedonia, 'MT' => :malta,
24
- 'NL' => :the_netherlands, 'NO' => :norway, 'PL' => :poland, 'PT' => :portugal,
25
- 'QA' => :arabia, 'RO' => :romania, 'RS' => :serbia, 'RU' => :russia,
26
- 'SA' => :arabia, 'SE' => :sweden, 'SI' => :slovenia, 'SK' => :slovakia,
27
- 'TR' => :turkey, 'TW' => :china, 'UA' => :ukraine, 'US' => :usa,
28
- 'UZ' => :the_stans, 'VN' => :vietnam
29
- }
26
+ 'KZ' => :the_stans, 'LT' => :lithuania, 'LU' => :luxembourg,
27
+ 'LV' => :latvia, 'MD' => :moldova, 'ME' => :montenegro, 'MK' => :macedonia,
28
+ 'MT' => :malta, 'NL' => :the_netherlands, 'NO' => :norway, 'PL' => :poland,
29
+ 'PT' => :portugal, 'QA' => :arabia, 'RO' => :romania, 'RS' => :serbia,
30
+ 'RU' => :russia, 'SA' => :arabia, 'SE' => :sweden, 'SI' => :slovenia,
31
+ 'SK' => :slovakia, 'TR' => :turkey, 'TW' => :china, 'UA' => :ukraine,
32
+ 'US' => :usa, 'UZ' => :the_stans, 'VN' => :vietnam
33
+ }.freeze
30
34
 
31
35
  def initialize(opts = {})
36
+ relpath = '../gender_detector/data/nam_dict.txt'
32
37
  opts = {
33
- :filename => File.expand_path('../gender_detector/data/nam_dict.txt', __FILE__),
34
- :case_sensitive => true,
35
- :unknown_value => :andy
38
+ filename: File.expand_path(relpath, __FILE__),
39
+ case_sensitive: true,
40
+ unknown_value: :andy
36
41
  }.merge(opts)
37
42
  @filename = opts[:filename]
38
43
  @case_sensitive = opts[:case_sensitive]
@@ -42,31 +47,31 @@ class GenderDetector
42
47
 
43
48
  def parse(fname)
44
49
  @names = {}
45
- open(fname, "r:iso8859-1:utf-8") { |f|
46
- f.each_line { |line|
50
+ open(fname, 'r:iso8859-1:utf-8') do |f|
51
+ f.each_line do |line|
47
52
  eat_name_line line
48
- }
49
- }
53
+ end
54
+ end
50
55
  end
51
56
 
52
57
  def knows_country?(country)
53
- COUNTRIES.include?(country) or ISO_3166_MAPPING.include?(country)
58
+ COUNTRIES.include?(country) || ISO_3166_MAPPING.include?(country)
54
59
  end
55
60
 
56
61
  def name_exists?(name)
57
- name = UnicodeUtils.downcase(name) unless @case_sensitive
58
- @names.has_key?(name) ? name : false
62
+ name = downcase(name) unless @case_sensitive
63
+ @names.key?(name) ? name : false
59
64
  end
60
65
 
61
66
  def get_gender(name, country = nil)
62
- name = UnicodeUtils.downcase(name) unless @case_sensitive
67
+ name = downcase(name) unless @case_sensitive
63
68
 
64
- if not name_exists?(name)
69
+ if !name_exists?(name)
65
70
  @unknown_value
66
71
  elsif country.nil?
67
- most_popular_gender(name) { |country_values|
68
- country_values.split("").select { |l| l.strip != "" }.length
69
- }
72
+ most_popular_gender(name) do |country_values|
73
+ country_values.split('').select { |l| l.strip != '' }.length
74
+ end
70
75
  elsif COUNTRIES.include?(country)
71
76
  most_popular_gender_in_country(name, country)
72
77
  elsif ISO_3166_MAPPING.include?(country)
@@ -77,57 +82,69 @@ class GenderDetector
77
82
  end
78
83
 
79
84
  def inspect
80
- "#<#{self.class.name} filename=\"#{@filename}\" case_sensitive=#{@case_sensitive} unknown_value=#{@unknown_value}>"
85
+ "#<#{self.class.name} filename=\"#{@filename}\" " \
86
+ " case_sensitive=#{@case_sensitive} unknown_value=#{@unknown_value}>"
81
87
  end
82
88
 
83
89
  private
90
+
84
91
  def most_popular_gender_in_country(name, country)
85
92
  index = COUNTRIES.index(country)
86
- most_popular_gender(name) { |country_values|
93
+ most_popular_gender(name) do |country_values|
87
94
  country_values[index].ord
88
- }
95
+ end
89
96
  end
90
97
 
91
98
  def eat_name_line(line)
92
- return if line.start_with?("#") or line.start_with?("=")
99
+ return if line.start_with?('#', '=')
93
100
 
94
- parts = line.split(" ").select { |p| p.strip != "" }
101
+ parts = line.split(' ').select { |p| p.strip != '' }
95
102
  country_values = line.slice(30, line.length)
96
- name = @case_sensitive ? parts[1] : UnicodeUtils.downcase(parts[1])
103
+ name = @case_sensitive ? parts[1] : downcase(parts[1])
97
104
 
98
105
  case parts[0]
99
- when "M" then set(name, :male, country_values)
100
- when "1M", "?M" then set(name, :mostly_male, country_values)
101
- when "F" then set(name, :female, country_values)
102
- when "1F", "?F" then set(name, :mostly_female, country_values)
103
- when "?" then set(name, :andy, country_values)
106
+ when 'M' then set(name, :male, country_values)
107
+ when '1M', '?M' then set(name, :mostly_male, country_values)
108
+ when 'F' then set(name, :female, country_values)
109
+ when '1F', '?F' then set(name, :mostly_female, country_values)
110
+ when '?' then set(name, :andy, country_values)
104
111
  else raise "Not sure what to do with a gender of #{parts[0]}"
105
112
  end
106
113
  end
107
114
 
108
115
  def most_popular_gender(name)
109
- return @unknown_value unless @names.has_key?(name)
116
+ return @unknown_value unless @names.key?(name)
110
117
 
111
118
  max = 0
112
119
  best = @names[name].keys.first
113
- @names[name].each { |gender, country_values|
120
+ @names[name].each do |gender, country_values|
114
121
  count = yield country_values
115
122
  if count > max
116
123
  max = count
117
124
  best = gender
118
125
  end
119
- }
126
+ end
120
127
  best
121
128
  end
122
129
 
123
130
  def set(name, gender, country_values)
124
- if name.include? "+"
125
- [ '', '-', ' ' ].each { |replacement|
126
- set name.gsub("+", replacement), gender, country_values
127
- }
131
+ if name.include? '+'
132
+ ['', '-', ' '].each do |replacement|
133
+ set name.gsub('+', replacement), gender, country_values
134
+ end
128
135
  else
129
136
  @names[name] ||= {}
130
137
  @names[name][gender] = country_values
131
138
  end
132
139
  end
140
+
141
+ def downcase(name)
142
+ if defined?(UnicodeUtils)
143
+ UnicodeUtils.downcase(name)
144
+ elsif defined?(ActiveSupport::Multibyte::Chars)
145
+ name.mb_chars.downcase.to_s
146
+ else
147
+ name.downcase
148
+ end
149
+ end
133
150
  end
@@ -1,13 +1,3 @@
1
1
  class GenderDetector
2
- class Version
3
- MAJOR = 0
4
- MINOR = 1
5
- PATCH = 2
6
-
7
- def self.to_s
8
- [MAJOR, MINOR, PATCH].compact.join('.')
9
- end
10
- end
11
-
12
- VERSION = Version.to_s
2
+ VERSION = '1.0.0'.freeze
13
3
  end
metadata CHANGED
@@ -1,112 +1,132 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gender_detector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
5
- prerelease:
4
+ version: 1.0.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Brian Muller
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-11-17 00:00:00.000000000 Z
11
+ date: 2016-08-25 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
- name: unicode_utils
14
+ name: rubocop
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
- version: 1.3.0
22
- type: :runtime
19
+ version: '0.42'
20
+ type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
- version: 1.3.0
26
+ version: '0.42'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: minitest
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ! '>='
31
+ - - "~>"
36
32
  - !ruby/object:Gem::Version
37
- version: '0'
33
+ version: '5.9'
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ! '>='
38
+ - - "~>"
44
39
  - !ruby/object:Gem::Version
45
- version: '0'
40
+ version: '5.9'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: rake
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ! '>='
45
+ - - "~>"
52
46
  - !ruby/object:Gem::Version
53
47
  version: '0'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ! '>='
52
+ - - "~>"
60
53
  - !ruby/object:Gem::Version
61
54
  version: '0'
62
55
  - !ruby/object:Gem::Dependency
63
- name: rdoc
56
+ name: minitest-stub-const
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ! '>='
59
+ - - "~>"
68
60
  - !ruby/object:Gem::Version
69
- version: '0'
61
+ version: '0.5'
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ! '>='
66
+ - - "~>"
76
67
  - !ruby/object:Gem::Version
77
- version: '0'
78
- description: Get gender from first name.
68
+ version: '0.5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: unicode_utils
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: activesupport
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '5.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '5.0'
97
+ description: Guess gender from first name, with multilingual support.
79
98
  email:
80
99
  - bamuller@gmail.com
81
100
  executables: []
82
101
  extensions: []
83
102
  extra_rdoc_files: []
84
103
  files:
85
- - lib/gender_detector/version.rb
86
104
  - lib/gender_detector.rb
87
105
  - lib/gender_detector/data/nam_dict.txt
106
+ - lib/gender_detector/version.rb
88
107
  homepage: https://github.com/bmuller/gender_detector
89
- licenses: []
90
- post_install_message:
108
+ licenses:
109
+ - MIT
110
+ metadata: {}
111
+ post_install_message: For unicode support you'll need to also install the unicode_utils
112
+ or activesupport gem
91
113
  rdoc_options: []
92
114
  require_paths:
93
115
  - lib
94
116
  required_ruby_version: !ruby/object:Gem::Requirement
95
- none: false
96
117
  requirements:
97
- - - ! '>='
118
+ - - ">="
98
119
  - !ruby/object:Gem::Version
99
120
  version: 1.9.0
100
121
  required_rubygems_version: !ruby/object:Gem::Requirement
101
- none: false
102
122
  requirements:
103
- - - ! '>='
123
+ - - ">="
104
124
  - !ruby/object:Gem::Version
105
125
  version: '0'
106
126
  requirements: []
107
127
  rubyforge_project:
108
- rubygems_version: 1.8.25
128
+ rubygems_version: 2.5.1
109
129
  signing_key:
110
- specification_version: 3
130
+ specification_version: 4
111
131
  summary: Get gender from first name.
112
132
  test_files: []