name_gender_classifier 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 787939c630e3a02792909f4e987711b5ce2e336b515db92f349bed2ebe696309
|
4
|
+
data.tar.gz: 1ad33afd9ce5b28cdb1737345b7ba4d4b104b58fd0a85aa69d88cea2339f4403
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 54975df68d17a91d44292eac77e6ab4372c64f922ccd9ade1a2dee3ebe9b1a967f3a992d42b5b6367218a5cddbfce1559630b7013607b9189e377b46bc082afa
|
7
|
+
data.tar.gz: 52f02d8ae418868dc3f529cf5eaa357470192fb1b549bf8602797576268d7128e60fb99eafc61abc0c163e8b58ba7b87f925875048b517ae3675e3e9bec7af97
|
Binary file
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'gdbm'
|
4
|
+
|
5
|
+
module NameGenderClassifier
|
6
|
+
# Uses GDBM database to retrieve gender classification from {DB_NAME}.
|
7
|
+
module DatabaseManager
|
8
|
+
# @return [String] the database location (which holds the classified names)
|
9
|
+
DB_NAME = "#{Gem.loaded_specs['name_gender_classifier'].gem_dir}/lib/"\
|
10
|
+
'name_gender_classifier/classified_names_pt-br.db'
|
11
|
+
|
12
|
+
# Find in the database the value for a previously saved key. The key holds the first name
|
13
|
+
# and the value the gender probability.
|
14
|
+
#
|
15
|
+
# @param key [String, Symbol] a key to be searched in the database
|
16
|
+
#
|
17
|
+
# @return [Float] the gender probability (value between 0 and 1, where 0 <= male < 0.5 <= female <= 1)
|
18
|
+
def self.find(key)
|
19
|
+
value = gdbm[key.to_s]
|
20
|
+
gdbm.close
|
21
|
+
@gdbm = nil
|
22
|
+
|
23
|
+
value ? value.to_f : nil
|
24
|
+
end
|
25
|
+
|
26
|
+
# With a block { |db| ... } allow to read multiple records with a single database open request,
|
27
|
+
# or return the database instance for a single read request.
|
28
|
+
#
|
29
|
+
# @yard [db] gives the database instance to the block
|
30
|
+
# @return [GDBM, nil] the GDBM database instance or nil if used with a block
|
31
|
+
def self.gdbm
|
32
|
+
@gdbm ||= GDBM.new(DB_NAME)
|
33
|
+
|
34
|
+
if block_given?
|
35
|
+
yield(@gdbm)
|
36
|
+
|
37
|
+
@gdbm.close
|
38
|
+
@gdbm = nil
|
39
|
+
else
|
40
|
+
@gdbm
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NameGenderClassifier
|
4
|
+
# If no match is found in the database, this module is called to predict the
|
5
|
+
# gender based on the first name suffix.
|
6
|
+
module FallbackGenderDetector
|
7
|
+
# @return [String] the locale
|
8
|
+
LOCALE = 'PT_BR'
|
9
|
+
|
10
|
+
# @return [String] male suffix terminations for pt-br
|
11
|
+
PT_BR_MALE_SUFFIXES = %w[ard as el eu ex iz is o on or os ur us rge me pe se re vi].freeze
|
12
|
+
# @return [String] female suffix terminations for pt-br
|
13
|
+
PT_BR_FEMALE_SUFFIXES = %w[a ais are ari eis eme ere ese iko ime ire yse ise isse
|
14
|
+
oko uko ume quel bel cao ce de dis le li lis liz lse ne
|
15
|
+
nis nge ris riz sse].freeze
|
16
|
+
|
17
|
+
# Try to guess the gender based on first name suffix.
|
18
|
+
#
|
19
|
+
# @param name [String] first name
|
20
|
+
#
|
21
|
+
# @return [String] the gender
|
22
|
+
def self.guess_gender(name)
|
23
|
+
return 'female' if const_get("#{LOCALE}_FEMALE_SUFFIXES").any? { |t| name.end_with?(t) }
|
24
|
+
return 'male' if const_get("#{LOCALE}_MALE_SUFFIXES").any? { |t| name.end_with?(t) }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
3
|
+
# Gender detector for first names.
|
4
|
+
module NameGenderClassifier
|
5
|
+
# Return the gender(s) (probability) for the informed name(s). The result type will vary depending
|
6
|
+
# on the parameter type:
|
7
|
+
#
|
8
|
+
# [String, Symbol] the gender (String) is returned.
|
9
|
+
# [Array<String>] an array (Array<String>) with the genders is returned.
|
10
|
+
# [Array<Object>] an array (Array<Object>) with the same objects and the newly assigned genders is returned.
|
11
|
+
#
|
12
|
+
# @param arg [String, Symbol, Array<String>, Array<Symbol>, Array<Object>] argument holding first
|
13
|
+
# name(s) information(s).
|
14
|
+
# @param options [Hash] first_name_attribute: name of the method that returns the first name,
|
15
|
+
# gender_attribute: name of the method which will receive the gender assignment.
|
16
|
+
#
|
17
|
+
# @return [String, Array<String>, Array<Object>] the gender classification for the passed first names
|
18
|
+
def self.classify(arg, options = {})
|
19
|
+
case arg
|
20
|
+
when String, Symbol
|
21
|
+
most_probable_gender(arg)
|
22
|
+
when Array
|
23
|
+
# Assumes that all elements within the array are of the same type as the first.
|
24
|
+
if arg[0].is_a?(String) || arg[0].is_a?(Symbol)
|
25
|
+
classify_array(arg)
|
26
|
+
else
|
27
|
+
classify_objects(arg, options)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Return the genders (probabilistically) for the informed names.
|
33
|
+
#
|
34
|
+
# @param array [Array<String>, Array<Symbol>] see {NameGenderClassifier.classify}
|
35
|
+
#
|
36
|
+
# @return [Array<String>] see {NameGenderClassifier.classify}
|
37
|
+
def self.classify_array(array)
|
38
|
+
result = []
|
39
|
+
DatabaseManager.gdbm do |db|
|
40
|
+
array.each do |name|
|
41
|
+
next unless name
|
42
|
+
|
43
|
+
result << most_probable_gender(name, db)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
result
|
48
|
+
end
|
49
|
+
|
50
|
+
# For each object in the array, it tries to classify the gender for object.first_name or
|
51
|
+
# object.name (or equivalent method) and save it on object.gender (or equivalent method).
|
52
|
+
#
|
53
|
+
# @param objects [Array<Object>] see {NameGenderClassifier.classify}
|
54
|
+
# @param options see {NameGenderClassifier.classify}
|
55
|
+
#
|
56
|
+
# @return [Array<Object>] the objects with the assigned genders
|
57
|
+
def self.classify_objects(objects, options = {})
|
58
|
+
first_name_attribute = options.fetch(:first_name_attribute, nil) ||
|
59
|
+
(:first_name if defined?(objects[0].first_name)) ||
|
60
|
+
(:name if defined?(objects[0].name))
|
61
|
+
|
62
|
+
if first_name_attribute.nil?
|
63
|
+
puts 'The object doesn\'t have the methods \'name\' nor \'first_name\'. '\
|
64
|
+
'Use #classify(arg, first_name_attribute: nil, gender_attribute: nil) '\
|
65
|
+
'to inform which methods to lookup.'
|
66
|
+
|
67
|
+
return objects
|
68
|
+
end
|
69
|
+
|
70
|
+
gender_attribute = options.fetch(:gender_attribute, 'gender')
|
71
|
+
gender_attribute_assignment = "#{gender_attribute}="
|
72
|
+
|
73
|
+
DatabaseManager.gdbm do |db|
|
74
|
+
objects.each do |object|
|
75
|
+
next unless name = object.public_send(first_name_attribute)
|
76
|
+
|
77
|
+
object.public_send(gender_attribute_assignment, most_probable_gender(name, db))
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
objects
|
82
|
+
end
|
83
|
+
|
84
|
+
# Remove whitespaces, secondary names, accents, digits and transform to string and lower case.
|
85
|
+
def self.remove_unwanted_chars(name)
|
86
|
+
# Enforce the string format, remove white spaces and discard secondary names
|
87
|
+
return unless name = name.to_s.strip.split(' ')[0]
|
88
|
+
|
89
|
+
# Transform to lower case, transliterate and remove non letter characters
|
90
|
+
Iconv.iconv('ascii//translit//ignore', 'utf-8', name.downcase)[0].gsub(/\W+/, '')
|
91
|
+
end
|
92
|
+
private_class_method :remove_unwanted_chars
|
93
|
+
|
94
|
+
# @return [String, nil] the gender of the informed name
|
95
|
+
def self.most_probable_gender(name, db = nil)
|
96
|
+
return unless name = remove_unwanted_chars(name)
|
97
|
+
|
98
|
+
if fem_probability = db ? db[name]&.to_f : DatabaseManager.find(name)
|
99
|
+
fem_probability >= 0.5 ? 'female' : 'male'
|
100
|
+
else
|
101
|
+
FallbackGenderDetector.guess_gender(name)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
private_class_method :most_probable_gender
|
105
|
+
end
|
106
|
+
|
107
|
+
require 'name_gender_classifier/database_manager'
|
108
|
+
require 'name_gender_classifier/fallback_gender_detector'
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: name_gender_classifier
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Avantsoft
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-09-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: gdbm
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.0.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.0.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: iconv
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.0.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.0.0
|
41
|
+
description: Using primarily IBGE census data [2010], this gem classifies brazilian
|
42
|
+
first names as 'male' or 'female'.
|
43
|
+
email: hello@avantsoft.com.br
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- lib/name_gender_classifier.rb
|
49
|
+
- lib/name_gender_classifier/classified_names_pt-br.db
|
50
|
+
- lib/name_gender_classifier/database_manager.rb
|
51
|
+
- lib/name_gender_classifier/fallback_gender_detector.rb
|
52
|
+
homepage: https://rubygems.org/gems/name_gender_classifier
|
53
|
+
licenses:
|
54
|
+
- MIT
|
55
|
+
metadata: {}
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubygems_version: 3.0.9
|
72
|
+
signing_key:
|
73
|
+
specification_version: 4
|
74
|
+
summary: Gender detection for brazilian first names.
|
75
|
+
test_files: []
|