gender_detector 0.1.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: fdee5d9e457b4a5c4c9e6dd8f52e64cd86fe1bff
4
+ data.tar.gz: 41b06696c8af71759cfffc69c9268471a8cb578d
5
+ SHA512:
6
+ metadata.gz: c799a9cb5b317826b83f3ed794ca4cdf9e501956fcd111cd8f9762ccc49bfcad68a4b49a42fdc888968b7e4edef49df3824d9335fc4989aa7d2fa69cf8e0cf3d
7
+ data.tar.gz: 609ff37bf50849e52ee38922101f81b4fe8db973765b5f7f4b638c63adc6406f59423c29f2f3ce60cf5bed7caa4e09de323d0b35ef3a58d788e7e5d757c5da7a
@@ -1,13 +1,17 @@
1
1
  require 'gender_detector/version'
2
2
 
3
- require "unicode_utils/downcase"
4
-
3
+ # Main class for interacting with the data file
5
4
  class GenderDetector
6
- COUNTRIES = [ :great_britain, :ireland, :usa, :italy, :malta, :portugal, :spain, :france, :belgium, :luxembourg, :the_netherlands, :east_frisia,
7
- :germany, :austria, :swiss, :iceland, :denmark, :norway, :sweden, :finland, :estonia, :latvia, :lithuania, :poland, :czech_republic,
8
- :slovakia, :hungary, :romania, :bulgaria, :bosniaand, :croatia, :kosovo, :macedonia, :montenegro, :serbia, :slovenia, :albania,
9
- :greece, :russia, :belarus, :moldova, :ukraine, :armenia, :azerbaijan, :georgia, :the_stans, :turkey, :arabia, :israel, :china,
10
- :india, :japan, :korea, :vietnam, :other_countries ]
5
+ COUNTRIES = [:great_britain, :ireland, :usa, :italy, :malta, :portugal,
6
+ :spain, :france, :belgium, :luxembourg, :the_netherlands,
7
+ :east_frisia, :germany, :austria, :swiss, :iceland, :denmark,
8
+ :norway, :sweden, :finland, :estonia, :latvia, :lithuania,
9
+ :poland, :czech_republic, :slovakia, :hungary, :romania,
10
+ :bulgaria, :bosniaand, :croatia, :kosovo, :macedonia,
11
+ :montenegro, :serbia, :slovenia, :albania, :greece, :russia,
12
+ :belarus, :moldova, :ukraine, :armenia, :azerbaijan, :georgia,
13
+ :the_stans, :turkey, :arabia, :israel, :china, :india, :japan,
14
+ :korea, :vietnam, :other_countries].freeze
11
15
 
12
16
  ISO_3166_MAPPING = {
13
17
  'AE' => :arabia, 'AL' => :albania, 'AM' => :armenia, 'AT' => :austria,
@@ -19,20 +23,21 @@ class GenderDetector
19
23
  'GR' => :greece, 'HK' => :china, 'HR' => :croatia, 'HU' => :hungary,
20
24
  'IE' => :ireland, 'IL' => :israel, 'IN' => :india, 'IS' => :iceland,
21
25
  'IT' => :italy, 'JP' => :japan, 'KP' => :korea, 'KR' => :korea,
22
- 'KZ' => :the_stans, 'LT' => :lithuania, 'LU' => :luxembourg, 'LV' => :latvia,
23
- 'MD' => :moldova, 'ME' => :montenegro, 'MK' => :macedonia, 'MT' => :malta,
24
- 'NL' => :the_netherlands, 'NO' => :norway, 'PL' => :poland, 'PT' => :portugal,
25
- 'QA' => :arabia, 'RO' => :romania, 'RS' => :serbia, 'RU' => :russia,
26
- 'SA' => :arabia, 'SE' => :sweden, 'SI' => :slovenia, 'SK' => :slovakia,
27
- 'TR' => :turkey, 'TW' => :china, 'UA' => :ukraine, 'US' => :usa,
28
- 'UZ' => :the_stans, 'VN' => :vietnam
29
- }
26
+ 'KZ' => :the_stans, 'LT' => :lithuania, 'LU' => :luxembourg,
27
+ 'LV' => :latvia, 'MD' => :moldova, 'ME' => :montenegro, 'MK' => :macedonia,
28
+ 'MT' => :malta, 'NL' => :the_netherlands, 'NO' => :norway, 'PL' => :poland,
29
+ 'PT' => :portugal, 'QA' => :arabia, 'RO' => :romania, 'RS' => :serbia,
30
+ 'RU' => :russia, 'SA' => :arabia, 'SE' => :sweden, 'SI' => :slovenia,
31
+ 'SK' => :slovakia, 'TR' => :turkey, 'TW' => :china, 'UA' => :ukraine,
32
+ 'US' => :usa, 'UZ' => :the_stans, 'VN' => :vietnam
33
+ }.freeze
30
34
 
31
35
  def initialize(opts = {})
36
+ relpath = '../gender_detector/data/nam_dict.txt'
32
37
  opts = {
33
- :filename => File.expand_path('../gender_detector/data/nam_dict.txt', __FILE__),
34
- :case_sensitive => true,
35
- :unknown_value => :andy
38
+ filename: File.expand_path(relpath, __FILE__),
39
+ case_sensitive: true,
40
+ unknown_value: :andy
36
41
  }.merge(opts)
37
42
  @filename = opts[:filename]
38
43
  @case_sensitive = opts[:case_sensitive]
@@ -42,31 +47,31 @@ class GenderDetector
42
47
 
43
48
  def parse(fname)
44
49
  @names = {}
45
- open(fname, "r:iso8859-1:utf-8") { |f|
46
- f.each_line { |line|
50
+ open(fname, 'r:iso8859-1:utf-8') do |f|
51
+ f.each_line do |line|
47
52
  eat_name_line line
48
- }
49
- }
53
+ end
54
+ end
50
55
  end
51
56
 
52
57
  def knows_country?(country)
53
- COUNTRIES.include?(country) or ISO_3166_MAPPING.include?(country)
58
+ COUNTRIES.include?(country) || ISO_3166_MAPPING.include?(country)
54
59
  end
55
60
 
56
61
  def name_exists?(name)
57
- name = UnicodeUtils.downcase(name) unless @case_sensitive
58
- @names.has_key?(name) ? name : false
62
+ name = downcase(name) unless @case_sensitive
63
+ @names.key?(name) ? name : false
59
64
  end
60
65
 
61
66
  def get_gender(name, country = nil)
62
- name = UnicodeUtils.downcase(name) unless @case_sensitive
67
+ name = downcase(name) unless @case_sensitive
63
68
 
64
- if not name_exists?(name)
69
+ if !name_exists?(name)
65
70
  @unknown_value
66
71
  elsif country.nil?
67
- most_popular_gender(name) { |country_values|
68
- country_values.split("").select { |l| l.strip != "" }.length
69
- }
72
+ most_popular_gender(name) do |country_values|
73
+ country_values.split('').select { |l| l.strip != '' }.length
74
+ end
70
75
  elsif COUNTRIES.include?(country)
71
76
  most_popular_gender_in_country(name, country)
72
77
  elsif ISO_3166_MAPPING.include?(country)
@@ -77,57 +82,69 @@ class GenderDetector
77
82
  end
78
83
 
79
84
  def inspect
80
- "#<#{self.class.name} filename=\"#{@filename}\" case_sensitive=#{@case_sensitive} unknown_value=#{@unknown_value}>"
85
+ "#<#{self.class.name} filename=\"#{@filename}\" " \
86
+ " case_sensitive=#{@case_sensitive} unknown_value=#{@unknown_value}>"
81
87
  end
82
88
 
83
89
  private
90
+
84
91
  def most_popular_gender_in_country(name, country)
85
92
  index = COUNTRIES.index(country)
86
- most_popular_gender(name) { |country_values|
93
+ most_popular_gender(name) do |country_values|
87
94
  country_values[index].ord
88
- }
95
+ end
89
96
  end
90
97
 
91
98
  def eat_name_line(line)
92
- return if line.start_with?("#") or line.start_with?("=")
99
+ return if line.start_with?('#', '=')
93
100
 
94
- parts = line.split(" ").select { |p| p.strip != "" }
101
+ parts = line.split(' ').select { |p| p.strip != '' }
95
102
  country_values = line.slice(30, line.length)
96
- name = @case_sensitive ? parts[1] : UnicodeUtils.downcase(parts[1])
103
+ name = @case_sensitive ? parts[1] : downcase(parts[1])
97
104
 
98
105
  case parts[0]
99
- when "M" then set(name, :male, country_values)
100
- when "1M", "?M" then set(name, :mostly_male, country_values)
101
- when "F" then set(name, :female, country_values)
102
- when "1F", "?F" then set(name, :mostly_female, country_values)
103
- when "?" then set(name, :andy, country_values)
106
+ when 'M' then set(name, :male, country_values)
107
+ when '1M', '?M' then set(name, :mostly_male, country_values)
108
+ when 'F' then set(name, :female, country_values)
109
+ when '1F', '?F' then set(name, :mostly_female, country_values)
110
+ when '?' then set(name, :andy, country_values)
104
111
  else raise "Not sure what to do with a gender of #{parts[0]}"
105
112
  end
106
113
  end
107
114
 
108
115
  def most_popular_gender(name)
109
- return @unknown_value unless @names.has_key?(name)
116
+ return @unknown_value unless @names.key?(name)
110
117
 
111
118
  max = 0
112
119
  best = @names[name].keys.first
113
- @names[name].each { |gender, country_values|
120
+ @names[name].each do |gender, country_values|
114
121
  count = yield country_values
115
122
  if count > max
116
123
  max = count
117
124
  best = gender
118
125
  end
119
- }
126
+ end
120
127
  best
121
128
  end
122
129
 
123
130
  def set(name, gender, country_values)
124
- if name.include? "+"
125
- [ '', '-', ' ' ].each { |replacement|
126
- set name.gsub("+", replacement), gender, country_values
127
- }
131
+ if name.include? '+'
132
+ ['', '-', ' '].each do |replacement|
133
+ set name.gsub('+', replacement), gender, country_values
134
+ end
128
135
  else
129
136
  @names[name] ||= {}
130
137
  @names[name][gender] = country_values
131
138
  end
132
139
  end
140
+
141
+ def downcase(name)
142
+ if defined?(UnicodeUtils)
143
+ UnicodeUtils.downcase(name)
144
+ elsif defined?(ActiveSupport::Multibyte::Chars)
145
+ name.mb_chars.downcase.to_s
146
+ else
147
+ name.downcase
148
+ end
149
+ end
133
150
  end
@@ -1,13 +1,3 @@
1
1
  class GenderDetector
2
- class Version
3
- MAJOR = 0
4
- MINOR = 1
5
- PATCH = 2
6
-
7
- def self.to_s
8
- [MAJOR, MINOR, PATCH].compact.join('.')
9
- end
10
- end
11
-
12
- VERSION = Version.to_s
2
+ VERSION = '1.0.0'.freeze
13
3
  end
metadata CHANGED
@@ -1,112 +1,132 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gender_detector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
5
- prerelease:
4
+ version: 1.0.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Brian Muller
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-11-17 00:00:00.000000000 Z
11
+ date: 2016-08-25 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
- name: unicode_utils
14
+ name: rubocop
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
- version: 1.3.0
22
- type: :runtime
19
+ version: '0.42'
20
+ type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
- version: 1.3.0
26
+ version: '0.42'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: minitest
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ! '>='
31
+ - - "~>"
36
32
  - !ruby/object:Gem::Version
37
- version: '0'
33
+ version: '5.9'
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ! '>='
38
+ - - "~>"
44
39
  - !ruby/object:Gem::Version
45
- version: '0'
40
+ version: '5.9'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: rake
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ! '>='
45
+ - - "~>"
52
46
  - !ruby/object:Gem::Version
53
47
  version: '0'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ! '>='
52
+ - - "~>"
60
53
  - !ruby/object:Gem::Version
61
54
  version: '0'
62
55
  - !ruby/object:Gem::Dependency
63
- name: rdoc
56
+ name: minitest-stub-const
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ! '>='
59
+ - - "~>"
68
60
  - !ruby/object:Gem::Version
69
- version: '0'
61
+ version: '0.5'
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ! '>='
66
+ - - "~>"
76
67
  - !ruby/object:Gem::Version
77
- version: '0'
78
- description: Get gender from first name.
68
+ version: '0.5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: unicode_utils
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: activesupport
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '5.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '5.0'
97
+ description: Guess gender from first name, with multilingual support.
79
98
  email:
80
99
  - bamuller@gmail.com
81
100
  executables: []
82
101
  extensions: []
83
102
  extra_rdoc_files: []
84
103
  files:
85
- - lib/gender_detector/version.rb
86
104
  - lib/gender_detector.rb
87
105
  - lib/gender_detector/data/nam_dict.txt
106
+ - lib/gender_detector/version.rb
88
107
  homepage: https://github.com/bmuller/gender_detector
89
- licenses: []
90
- post_install_message:
108
+ licenses:
109
+ - MIT
110
+ metadata: {}
111
+ post_install_message: For unicode support you'll need to also install the unicode_utils
112
+ or activesupport gem
91
113
  rdoc_options: []
92
114
  require_paths:
93
115
  - lib
94
116
  required_ruby_version: !ruby/object:Gem::Requirement
95
- none: false
96
117
  requirements:
97
- - - ! '>='
118
+ - - ">="
98
119
  - !ruby/object:Gem::Version
99
120
  version: 1.9.0
100
121
  required_rubygems_version: !ruby/object:Gem::Requirement
101
- none: false
102
122
  requirements:
103
- - - ! '>='
123
+ - - ">="
104
124
  - !ruby/object:Gem::Version
105
125
  version: '0'
106
126
  requirements: []
107
127
  rubyforge_project:
108
- rubygems_version: 1.8.25
128
+ rubygems_version: 2.5.1
109
129
  signing_key:
110
- specification_version: 3
130
+ specification_version: 4
111
131
  summary: Get gender from first name.
112
132
  test_files: []