sexmachine 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/README.rdoc +10 -1
- data/lib/sexmachine/detector.rb +50 -16
- data/lib/sexmachine/version.rb +1 -1
- metadata +2 -2
data/Gemfile.lock
CHANGED
data/README.rdoc
CHANGED
@@ -8,13 +8,22 @@ This gem uses the underlying data from the program "gender" by Jorg Michael (des
|
|
8
8
|
>> d.get_gender("Sally")
|
9
9
|
:female
|
10
10
|
>> d.get_gender("Pauley") # should be androgynous
|
11
|
-
:andy
|
11
|
+
:andy
|
12
|
+
|
13
|
+
The result will be one of andy (androgynous), male, female, mostly_male, or mostly_female. Any unknown names are considered andies.
|
12
14
|
|
13
15
|
I18N is fully supported:
|
14
16
|
|
15
17
|
>> d.get_gender("�lfr�n")
|
16
18
|
:female
|
17
19
|
|
20
|
+
Additionally, you can give preference to specific countries:
|
21
|
+
|
22
|
+
>> d.get_gender("Jamie")
|
23
|
+
=> :female
|
24
|
+
>> d.get_gender("Jamie", :great_britain)
|
25
|
+
=> :mostly_male
|
26
|
+
|
18
27
|
If you have an alterative data file, you can pass that in as an optional argument to the Detector.
|
19
28
|
|
20
29
|
Try to avoid creating many Detectors, as each creation means reading in the data file.
|
data/lib/sexmachine/detector.rb
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
module SexMachine
|
2
2
|
|
3
3
|
class Detector
|
4
|
+
COUNTRIES = [ :great_britain, :ireland, :usa, :italy, :malta, :portugal, :spain, :france, :belgium, :luxembourg, :the_netherlands, :east_frisia,
|
5
|
+
:germany, :austria, :swiss, :iceland, :denmark, :norway, :sweden, :finland, :estonia, :latvia, :lithuania, :poland, :czech_republic,
|
6
|
+
:slovakia, :hungary, :romania, :bulgaria, :bosniaand, :croatia, :kosovo, :macedonia, :montenegro, :serbia, :slovenia, :albania,
|
7
|
+
:greece, :russia, :belarus, :moldova, :ukraine, :armenia, :azerbaijan, :georgia, :the_stans, :turkey, :arabia, :israel, :china,
|
8
|
+
:india, :japan, :korea, :vietnam, :other_countries ]
|
9
|
+
|
4
10
|
def initialize(fname=nil)
|
5
11
|
fname ||= File.expand_path('../data/nam_dict.txt', __FILE__)
|
6
12
|
parse fname
|
@@ -10,40 +16,68 @@ module SexMachine
|
|
10
16
|
@names = {}
|
11
17
|
open(fname, "r:iso8859-1:utf-8") { |f|
|
12
18
|
f.each_line { |line|
|
13
|
-
|
19
|
+
eat_name_line line
|
14
20
|
}
|
15
21
|
}
|
16
22
|
end
|
17
23
|
|
18
|
-
def get_gender(name)
|
19
|
-
@names.
|
24
|
+
def get_gender(name, country = nil)
|
25
|
+
if not @names.has_key?(name)
|
26
|
+
:andy
|
27
|
+
elsif country.nil?
|
28
|
+
most_popular_gender(name) { |country_values|
|
29
|
+
country_values.split("").select { |l| l.strip != "" }.length
|
30
|
+
}
|
31
|
+
elsif COUNTRIES.include?(country)
|
32
|
+
index = COUNTRIES.index(country)
|
33
|
+
most_popular_gender(name) { |country_values|
|
34
|
+
country_values[index].ord
|
35
|
+
}
|
36
|
+
else
|
37
|
+
raise "No such country: #{country}"
|
38
|
+
end
|
20
39
|
end
|
21
40
|
|
22
41
|
private
|
23
|
-
def
|
42
|
+
def eat_name_line(line)
|
24
43
|
return if line.start_with?("#") or line.start_with?("=")
|
25
44
|
|
26
45
|
parts = line.split(" ").select { |p| p.strip != "" }
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
set
|
32
|
-
|
33
|
-
set
|
46
|
+
country_values = line.slice(30, line.length)
|
47
|
+
|
48
|
+
case parts[0]
|
49
|
+
when "M" then set(parts[1], :male, country_values)
|
50
|
+
when "1M", "?M" then set(parts[1], :mostly_male, country_values)
|
51
|
+
when "F" then set(parts[1], :female, country_values)
|
52
|
+
when "1F", "?F" then set(parts[1], :mostly_female, country_values)
|
53
|
+
when "?" then set(parts[1], :andy, country_values)
|
54
|
+
else raise "Not sure what to do with a sex of #{parts[0]}"
|
34
55
|
end
|
35
56
|
end
|
36
57
|
|
37
|
-
def
|
38
|
-
|
39
|
-
return if @names.has_key? name
|
58
|
+
def most_popular_gender(name)
|
59
|
+
return :andy unless @names.has_key?(name)
|
40
60
|
|
61
|
+
max = 0
|
62
|
+
best = @names[name].keys.first
|
63
|
+
@names[name].each { |gender, country_values|
|
64
|
+
count = yield country_values
|
65
|
+
if count > max
|
66
|
+
max = count
|
67
|
+
best = gender
|
68
|
+
end
|
69
|
+
}
|
70
|
+
best
|
71
|
+
end
|
72
|
+
|
73
|
+
def set(name, gender, country_values)
|
41
74
|
if name.include? "+"
|
42
75
|
[ '', '-', ' ' ].each { |replacement|
|
43
|
-
set name.gsub("+", replacement), gender
|
76
|
+
set name.gsub("+", replacement), gender, country_values
|
44
77
|
}
|
45
78
|
else
|
46
|
-
@names[name]
|
79
|
+
@names[name] ||= {}
|
80
|
+
@names[name][gender] = country_values
|
47
81
|
end
|
48
82
|
end
|
49
83
|
end
|
data/lib/sexmachine/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sexmachine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-25 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: Get gender from first name.
|
15
15
|
email: brian.muller@livingsocial.com
|