sexmachine 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/README.rdoc +10 -1
- data/lib/sexmachine/detector.rb +50 -16
- data/lib/sexmachine/version.rb +1 -1
- metadata +2 -2
data/Gemfile.lock
CHANGED
data/README.rdoc
CHANGED
@@ -8,13 +8,22 @@ This gem uses the underlying data from the program "gender" by Jorg Michael (des
|
|
8
8
|
>> d.get_gender("Sally")
|
9
9
|
:female
|
10
10
|
>> d.get_gender("Pauley") # should be androgynous
|
11
|
-
:andy
|
11
|
+
:andy
|
12
|
+
|
13
|
+
The result will be one of andy (androgynous), male, female, mostly_male, or mostly_female. Any unknown names are considered andies.
|
12
14
|
|
13
15
|
I18N is fully supported:
|
14
16
|
|
15
17
|
>> d.get_gender("�lfr�n")
|
16
18
|
:female
|
17
19
|
|
20
|
+
Additionally, you can give preference to specific countries:
|
21
|
+
|
22
|
+
>> d.get_gender("Jamie")
|
23
|
+
=> :female
|
24
|
+
>> d.get_gender("Jamie", :great_britain)
|
25
|
+
=> :mostly_male
|
26
|
+
|
18
27
|
If you have an alterative data file, you can pass that in as an optional argument to the Detector.
|
19
28
|
|
20
29
|
Try to avoid creating many Detectors, as each creation means reading in the data file.
|
data/lib/sexmachine/detector.rb
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
module SexMachine
|
2
2
|
|
3
3
|
class Detector
|
4
|
+
COUNTRIES = [ :great_britain, :ireland, :usa, :italy, :malta, :portugal, :spain, :france, :belgium, :luxembourg, :the_netherlands, :east_frisia,
|
5
|
+
:germany, :austria, :swiss, :iceland, :denmark, :norway, :sweden, :finland, :estonia, :latvia, :lithuania, :poland, :czech_republic,
|
6
|
+
:slovakia, :hungary, :romania, :bulgaria, :bosniaand, :croatia, :kosovo, :macedonia, :montenegro, :serbia, :slovenia, :albania,
|
7
|
+
:greece, :russia, :belarus, :moldova, :ukraine, :armenia, :azerbaijan, :georgia, :the_stans, :turkey, :arabia, :israel, :china,
|
8
|
+
:india, :japan, :korea, :vietnam, :other_countries ]
|
9
|
+
|
4
10
|
def initialize(fname=nil)
|
5
11
|
fname ||= File.expand_path('../data/nam_dict.txt', __FILE__)
|
6
12
|
parse fname
|
@@ -10,40 +16,68 @@ module SexMachine
|
|
10
16
|
@names = {}
|
11
17
|
open(fname, "r:iso8859-1:utf-8") { |f|
|
12
18
|
f.each_line { |line|
|
13
|
-
|
19
|
+
eat_name_line line
|
14
20
|
}
|
15
21
|
}
|
16
22
|
end
|
17
23
|
|
18
|
-
def get_gender(name)
|
19
|
-
@names.
|
24
|
+
def get_gender(name, country = nil)
|
25
|
+
if not @names.has_key?(name)
|
26
|
+
:andy
|
27
|
+
elsif country.nil?
|
28
|
+
most_popular_gender(name) { |country_values|
|
29
|
+
country_values.split("").select { |l| l.strip != "" }.length
|
30
|
+
}
|
31
|
+
elsif COUNTRIES.include?(country)
|
32
|
+
index = COUNTRIES.index(country)
|
33
|
+
most_popular_gender(name) { |country_values|
|
34
|
+
country_values[index].ord
|
35
|
+
}
|
36
|
+
else
|
37
|
+
raise "No such country: #{country}"
|
38
|
+
end
|
20
39
|
end
|
21
40
|
|
22
41
|
private
|
23
|
-
def
|
42
|
+
def eat_name_line(line)
|
24
43
|
return if line.start_with?("#") or line.start_with?("=")
|
25
44
|
|
26
45
|
parts = line.split(" ").select { |p| p.strip != "" }
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
set
|
32
|
-
|
33
|
-
set
|
46
|
+
country_values = line.slice(30, line.length)
|
47
|
+
|
48
|
+
case parts[0]
|
49
|
+
when "M" then set(parts[1], :male, country_values)
|
50
|
+
when "1M", "?M" then set(parts[1], :mostly_male, country_values)
|
51
|
+
when "F" then set(parts[1], :female, country_values)
|
52
|
+
when "1F", "?F" then set(parts[1], :mostly_female, country_values)
|
53
|
+
when "?" then set(parts[1], :andy, country_values)
|
54
|
+
else raise "Not sure what to do with a sex of #{parts[0]}"
|
34
55
|
end
|
35
56
|
end
|
36
57
|
|
37
|
-
def
|
38
|
-
|
39
|
-
return if @names.has_key? name
|
58
|
+
def most_popular_gender(name)
|
59
|
+
return :andy unless @names.has_key?(name)
|
40
60
|
|
61
|
+
max = 0
|
62
|
+
best = @names[name].keys.first
|
63
|
+
@names[name].each { |gender, country_values|
|
64
|
+
count = yield country_values
|
65
|
+
if count > max
|
66
|
+
max = count
|
67
|
+
best = gender
|
68
|
+
end
|
69
|
+
}
|
70
|
+
best
|
71
|
+
end
|
72
|
+
|
73
|
+
def set(name, gender, country_values)
|
41
74
|
if name.include? "+"
|
42
75
|
[ '', '-', ' ' ].each { |replacement|
|
43
|
-
set name.gsub("+", replacement), gender
|
76
|
+
set name.gsub("+", replacement), gender, country_values
|
44
77
|
}
|
45
78
|
else
|
46
|
-
@names[name]
|
79
|
+
@names[name] ||= {}
|
80
|
+
@names[name][gender] = country_values
|
47
81
|
end
|
48
82
|
end
|
49
83
|
end
|
data/lib/sexmachine/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sexmachine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-25 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: Get gender from first name.
|
15
15
|
email: brian.muller@livingsocial.com
|