sexmachine 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sexmachine (0.0.3)
4
+ sexmachine (0.0.4)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
@@ -8,13 +8,22 @@ This gem uses the underlying data from the program "gender" by Jorg Michael (des
8
8
  >> d.get_gender("Sally")
9
9
  :female
10
10
  >> d.get_gender("Pauley") # should be androgynous
11
- :andy
11
+ :andy
12
+
13
+ The result will be one of andy (androgynous), male, female, mostly_male, or mostly_female. Any unknown names are considered andies.
12
14
 
13
15
  I18N is fully supported:
14
16
 
15
17
  >> d.get_gender("�lfr�n")
16
18
  :female
17
19
 
20
+ Additionally, you can give preference to specific countries:
21
+
22
+ >> d.get_gender("Jamie")
23
+ => :female
24
+ >> d.get_gender("Jamie", :great_britain)
25
+ => :mostly_male
26
+
18
27
  If you have an alterative data file, you can pass that in as an optional argument to the Detector.
19
28
 
20
29
  Try to avoid creating many Detectors, as each creation means reading in the data file.
@@ -1,6 +1,12 @@
1
1
  module SexMachine
2
2
 
3
3
  class Detector
4
+ COUNTRIES = [ :great_britain, :ireland, :usa, :italy, :malta, :portugal, :spain, :france, :belgium, :luxembourg, :the_netherlands, :east_frisia,
5
+ :germany, :austria, :swiss, :iceland, :denmark, :norway, :sweden, :finland, :estonia, :latvia, :lithuania, :poland, :czech_republic,
6
+ :slovakia, :hungary, :romania, :bulgaria, :bosniaand, :croatia, :kosovo, :macedonia, :montenegro, :serbia, :slovenia, :albania,
7
+ :greece, :russia, :belarus, :moldova, :ukraine, :armenia, :azerbaijan, :georgia, :the_stans, :turkey, :arabia, :israel, :china,
8
+ :india, :japan, :korea, :vietnam, :other_countries ]
9
+
4
10
  def initialize(fname=nil)
5
11
  fname ||= File.expand_path('../data/nam_dict.txt', __FILE__)
6
12
  parse fname
@@ -10,40 +16,68 @@ module SexMachine
10
16
  @names = {}
11
17
  open(fname, "r:iso8859-1:utf-8") { |f|
12
18
  f.each_line { |line|
13
- eatNameLine line
19
+ eat_name_line line
14
20
  }
15
21
  }
16
22
  end
17
23
 
18
- def get_gender(name)
19
- @names.fetch(name, :andy)
24
+ def get_gender(name, country = nil)
25
+ if not @names.has_key?(name)
26
+ :andy
27
+ elsif country.nil?
28
+ most_popular_gender(name) { |country_values|
29
+ country_values.split("").select { |l| l.strip != "" }.length
30
+ }
31
+ elsif COUNTRIES.include?(country)
32
+ index = COUNTRIES.index(country)
33
+ most_popular_gender(name) { |country_values|
34
+ country_values[index].ord
35
+ }
36
+ else
37
+ raise "No such country: #{country}"
38
+ end
20
39
  end
21
40
 
22
41
  private
23
- def eatNameLine(line)
42
+ def eat_name_line(line)
24
43
  return if line.start_with?("#") or line.start_with?("=")
25
44
 
26
45
  parts = line.split(" ").select { |p| p.strip != "" }
27
-
28
- if parts[0].include? "F"
29
- set parts[1], :female
30
- elsif parts[0].include? "M"
31
- set parts[1], :male
32
- else
33
- set parts[1], :andy
46
+ country_values = line.slice(30, line.length)
47
+
48
+ case parts[0]
49
+ when "M" then set(parts[1], :male, country_values)
50
+ when "1M", "?M" then set(parts[1], :mostly_male, country_values)
51
+ when "F" then set(parts[1], :female, country_values)
52
+ when "1F", "?F" then set(parts[1], :mostly_female, country_values)
53
+ when "?" then set(parts[1], :andy, country_values)
54
+ else raise "Not sure what to do with a sex of #{parts[0]}"
34
55
  end
35
56
  end
36
57
 
37
- def set(name, gender)
38
- # go w/ first option, don't reset
39
- return if @names.has_key? name
58
+ def most_popular_gender(name)
59
+ return :andy unless @names.has_key?(name)
40
60
 
61
+ max = 0
62
+ best = @names[name].keys.first
63
+ @names[name].each { |gender, country_values|
64
+ count = yield country_values
65
+ if count > max
66
+ max = count
67
+ best = gender
68
+ end
69
+ }
70
+ best
71
+ end
72
+
73
+ def set(name, gender, country_values)
41
74
  if name.include? "+"
42
75
  [ '', '-', ' ' ].each { |replacement|
43
- set name.gsub("+", replacement), gender
76
+ set name.gsub("+", replacement), gender, country_values
44
77
  }
45
78
  else
46
- @names[name] = gender
79
+ @names[name] ||= {}
80
+ @names[name][gender] = country_values
47
81
  end
48
82
  end
49
83
  end
@@ -1,3 +1,3 @@
1
1
  module SexMachine
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sexmachine
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-24 00:00:00.000000000Z
12
+ date: 2012-07-25 00:00:00.000000000Z
13
13
  dependencies: []
14
14
  description: Get gender from first name.
15
15
  email: brian.muller@livingsocial.com