petrovich 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/petrovich/gender/rule.rb +3 -4
- data/lib/petrovich/rule_set.rb +18 -20
- data/lib/petrovich/version.rb +1 -1
- data/lib/tasks/evaluate.rake +101 -53
- data/rules/gender.yml +477 -122
- data/rules/rules.yml +38 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 641bd09d48d3c8a48ea3bc6ecdbbf569d93c5084
|
4
|
+
data.tar.gz: 5ded3884cc68b5018fdd02dea609110949680440
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bd46a801786008a0b968c2fff2d00312c585fec23648eb4df49f115a6e79de95d8c8e801f78011727cbd2b4421c03a509c5167cd36454de3845e18d2fbf05886
|
7
|
+
data.tar.gz: 9dfe2bbc5cda78d60dafb9d468f5f87954699dad7dfd0a58dbe377c9185a1a7268e9ed73c1e3804079d56aeccea65d30f26e0baeac2dc58baca7926384ab260c
|
@@ -2,18 +2,17 @@ module Petrovich
|
|
2
2
|
module Gender
|
3
3
|
# A gender rule from the set of rules
|
4
4
|
class Rule
|
5
|
-
attr_reader :gender, :as, :suffix
|
5
|
+
attr_reader :gender, :as, :suffix, :accuracy
|
6
6
|
|
7
7
|
# TODO: check options (see Case::Rule)
|
8
8
|
def initialize(opts)
|
9
9
|
@gender = opts[:gender]
|
10
10
|
@as = opts[:as]
|
11
11
|
@suffix = /#{opts[:suffix]}$/i
|
12
|
+
@accuracy = opts[:suffix].length
|
12
13
|
end
|
13
14
|
|
14
|
-
def match?(name
|
15
|
-
return false unless match_as == as
|
16
|
-
|
15
|
+
def match?(name)
|
17
16
|
!!name.match(suffix)
|
18
17
|
end
|
19
18
|
end
|
data/lib/petrovich/rule_set.rb
CHANGED
@@ -15,14 +15,6 @@ module Petrovich
|
|
15
15
|
@case_rules << rule
|
16
16
|
end
|
17
17
|
|
18
|
-
def add_gender_rule(rule)
|
19
|
-
unless rule.is_a?(Gender::Rule)
|
20
|
-
fail ArgumentError, 'Expecting rule of type Petrovich::Gender::Rule'.freeze
|
21
|
-
end
|
22
|
-
|
23
|
-
@gender_rules << rule
|
24
|
-
end
|
25
|
-
|
26
18
|
def find_all_case_rules(name, gender, as, known_gender = false)
|
27
19
|
parts = name.split('-')
|
28
20
|
parts.map.with_index { |part, index| find_case_rule(part, gender, as, (index == parts.count-1) && known_gender) }
|
@@ -34,7 +26,8 @@ module Petrovich
|
|
34
26
|
|
35
27
|
def clear!
|
36
28
|
@case_rules = []
|
37
|
-
@gender_rules =
|
29
|
+
@gender_rules = {}
|
30
|
+
@gender_exceptions = {}
|
38
31
|
end
|
39
32
|
|
40
33
|
def load!
|
@@ -70,16 +63,23 @@ module Petrovich
|
|
70
63
|
# Load rules for genders
|
71
64
|
def load_gender_rules!(rules)
|
72
65
|
[:lastname, :firstname, :middlename].each do |name_part|
|
73
|
-
# First, add androgynous rules. Order is matters.
|
74
66
|
Petrovich::GENDERS.each do |section|
|
75
|
-
entries = rules['gender'][name_part.to_s][section.to_s]
|
76
|
-
|
77
|
-
|
78
|
-
entries.each do |entry|
|
67
|
+
entries = rules['gender'][name_part.to_s]['suffixes'][section.to_s]
|
68
|
+
Array(entries).each do |entry|
|
79
69
|
load_gender_entry(name_part, section, entry)
|
80
70
|
end
|
71
|
+
|
72
|
+
exceptions = rules['gender'][name_part.to_s]['exceptions']
|
73
|
+
@gender_exceptions[name_part] ||= {}
|
74
|
+
next if exceptions.nil?
|
75
|
+
Array(exceptions[section.to_s]).each do |exception|
|
76
|
+
@gender_exceptions[name_part][exception] = Gender::Rule.new(as: name_part, gender: section, suffix: exception)
|
77
|
+
end
|
81
78
|
end
|
82
79
|
end
|
80
|
+
@gender_rules.each do |_, gender_rules|
|
81
|
+
gender_rules.sort_by!{ |rule| -rule.accuracy }
|
82
|
+
end
|
83
83
|
end
|
84
84
|
|
85
85
|
def find_case_rule(name, gender, as, known_gender = false)
|
@@ -88,7 +88,7 @@ module Petrovich
|
|
88
88
|
end
|
89
89
|
|
90
90
|
def find_gender_rule(name, as)
|
91
|
-
@gender_rules.find
|
91
|
+
@gender_exceptions[as][Unicode.downcase(name)] || @gender_rules[as].find{ |rule| rule.match?(name) }
|
92
92
|
end
|
93
93
|
|
94
94
|
def load_case_entry(as, section, entry)
|
@@ -99,6 +99,7 @@ module Petrovich
|
|
99
99
|
end
|
100
100
|
|
101
101
|
tests = entry['test'].map do |suffix|
|
102
|
+
suffix = "^#{suffix}" if section == :exceptions
|
102
103
|
Petrovich::Case::Rule::Test.new(suffix)
|
103
104
|
end
|
104
105
|
|
@@ -113,11 +114,8 @@ module Petrovich
|
|
113
114
|
end
|
114
115
|
|
115
116
|
def load_gender_entry(as, section, entry)
|
116
|
-
|
117
|
-
|
118
|
-
gender: section,
|
119
|
-
suffix: entry
|
120
|
-
)
|
117
|
+
@gender_rules[as] ||= []
|
118
|
+
@gender_rules[as] << Gender::Rule.new(as: as, gender: section, suffix: entry)
|
121
119
|
end
|
122
120
|
end
|
123
121
|
end
|
data/lib/petrovich/version.rb
CHANGED
data/lib/tasks/evaluate.rake
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
require 'csv'
|
4
4
|
|
5
|
-
def check!(errors, correct, total,
|
6
|
-
petrovich = Petrovich(
|
7
|
-
|
5
|
+
def check!(errors, correct, total, name, gender, gcase, expected)
|
6
|
+
petrovich = Petrovich(name.merge(gender: gender))
|
7
|
+
lemma = name.values.join(' ')
|
8
|
+
actual = Petrovich::Unicode.upcase(petrovich.public_send(gcase).to_s)
|
8
9
|
total[[gender, gcase]] += 1
|
9
10
|
if actual == expected
|
10
11
|
correct[[gender, gcase]] += 1
|
@@ -15,18 +16,36 @@ def check!(errors, correct, total, lemma, gender, gcase, expected)
|
|
15
16
|
end
|
16
17
|
end
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
def figure_namepart(args)
|
20
|
+
namepart_filename = args[:namepart] || "surnames"
|
21
|
+
namepart_filename += 's' unless namepart_filename.end_with?('s')
|
22
|
+
namepart_symbol = namepart_filename.chop.to_sym
|
23
|
+
namepart_symbol = :lastname if namepart_symbol == :surname
|
24
|
+
namepart_symbol = :middlename if namepart_symbol == :midname
|
25
|
+
namepart_filename += ".#{args[:subset]}" if args[:subset]
|
26
|
+
[namepart_filename, namepart_symbol]
|
27
|
+
end
|
28
|
+
|
29
|
+
desc 'Evaluate Petrovich'
|
30
|
+
task :evaluate, [:namepart, :subset] => [:'evaluate:rules', :'evaluate:gender']
|
31
|
+
|
32
|
+
namespace :evaluate do
|
33
|
+
desc 'Evaluate the inflector on lastnames'
|
34
|
+
task :rules, [:namepart, :subset] => :petrovich do |_, args|
|
35
|
+
namepart_filename, namepart_symbol = figure_namepart(args)
|
36
|
+
filename = File.expand_path("../../../eval/#{namepart_filename}.tsv", __FILE__)
|
37
|
+
unless File.file?(filename)
|
38
|
+
warn "File #{filename} not found, skipping task"
|
39
|
+
next
|
40
|
+
end
|
41
|
+
errors_filename = ENV['errors'] || 'errors.tsv'
|
22
42
|
|
23
|
-
|
43
|
+
correct, total = Hash.new(0), Hash.new(0)
|
24
44
|
|
25
|
-
|
26
|
-
|
45
|
+
puts 'I will evaluate the inflector on "%s" ' \
|
46
|
+
'and store errors to "%s".' % [filename, errors_filename]
|
27
47
|
|
28
|
-
|
29
|
-
errors << %w(lemma expected actual params)
|
48
|
+
errors = []
|
30
49
|
|
31
50
|
CSV.open(filename, "r:BINARY", col_sep: "\t", headers: true).each do |row|
|
32
51
|
word = row['word'].force_encoding('UTF-8')
|
@@ -43,81 +62,110 @@ task :evaluate => :petrovich do
|
|
43
62
|
if grammemes.include? '0'
|
44
63
|
# some words are aptotic so we have to ensure that
|
45
64
|
Petrovich::CASES.each do |gcase|
|
46
|
-
check!(errors, correct, total, lemma, gender, gcase, word)
|
65
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, gcase, word)
|
47
66
|
end
|
48
67
|
elsif grammemes.include? 'им'
|
49
|
-
check!(errors, correct, total, lemma, gender, :nominative, word)
|
68
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :nominative, word)
|
50
69
|
elsif grammemes.include? 'рд'
|
51
|
-
check!(errors, correct, total, lemma, gender, :genitive, word)
|
70
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :genitive, word)
|
52
71
|
elsif grammemes.include? 'дт'
|
53
|
-
check!(errors, correct, total, lemma, gender, :dative, word)
|
72
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :dative, word)
|
54
73
|
elsif grammemes.include? 'вн'
|
55
|
-
check!(errors, correct, total, lemma, gender, :accusative, word)
|
74
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :accusative, word)
|
56
75
|
elsif grammemes.include? 'тв'
|
57
|
-
check!(errors, correct, total, lemma, gender, :instrumental, word)
|
76
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :instrumental, word)
|
58
77
|
elsif grammemes.include? 'пр'
|
59
|
-
check!(errors, correct, total, lemma, gender, :prepositional, word)
|
78
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :prepositional, word)
|
60
79
|
end
|
61
80
|
end
|
62
|
-
end
|
63
81
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
82
|
+
errors.sort_by!{ |array| array.first.reverse + array.last.first.to_s }
|
83
|
+
|
84
|
+
CSV.open(errors_filename, 'w', col_sep: "\t") do |errors_file|
|
85
|
+
errors_file << %w(lemma expected actual params)
|
86
|
+
errors.each do |array|
|
87
|
+
errors_file << array
|
88
|
+
end
|
89
|
+
end
|
68
90
|
|
69
|
-
|
70
|
-
|
91
|
+
total.each do |(gender, gcase), correct_count|
|
92
|
+
accuracy = correct[[gender, gcase]] / correct_count.to_f * 100
|
93
|
+
puts "\tAc(%s|%s) = %.4f%%" % [gcase, gender, accuracy]
|
94
|
+
end
|
71
95
|
|
72
|
-
|
73
|
-
|
96
|
+
correct_size = correct.values.inject(&:+).to_i
|
97
|
+
total_size = total.values.inject(&:+).to_i
|
74
98
|
|
75
|
-
|
76
|
-
|
77
|
-
end
|
99
|
+
puts 'Well, the accuracy on %d examples is about %.4f%%.' %
|
100
|
+
[total_size, (correct_size / total_size.to_f * 100)]
|
78
101
|
|
79
|
-
|
80
|
-
|
81
|
-
|
102
|
+
puts 'Sum of the %d correct examples and %d mistakes is %d.' %
|
103
|
+
[correct_size, total_size - correct_size, total_size]
|
104
|
+
end
|
82
105
|
|
83
|
-
|
84
|
-
|
106
|
+
desc 'Evaluate the gender detector'
|
107
|
+
task :gender, [:namepart, :subset] => :petrovich do |_, args|
|
108
|
+
GENDER_MAP = { 'мр' => :male, 'жр' => :female, 'мр-жр' => :androgynous }
|
85
109
|
|
86
|
-
|
110
|
+
namepart_filename, namepart_symbol = figure_namepart(args)
|
111
|
+
filename = File.expand_path("../../../eval/#{namepart_filename}.gender.tsv", __FILE__)
|
112
|
+
unless File.file?(filename)
|
113
|
+
warn "File #{filename} not found, skipping task"
|
114
|
+
next
|
115
|
+
end
|
116
|
+
errors_filename = ENV['errors'] || 'errors.gender.tsv'
|
87
117
|
|
88
|
-
|
89
|
-
'and store errors to "%s".' % [filename, errors_filename]
|
118
|
+
correct, total = Hash.new(0), Hash.new(0)
|
90
119
|
|
91
|
-
|
92
|
-
|
120
|
+
puts 'I will evaluate gender detector on "%s" ' \
|
121
|
+
'and store errors to "%s".' % [filename, errors_filename]
|
122
|
+
|
123
|
+
errors = []
|
124
|
+
hard_error_count = 0
|
93
125
|
|
94
126
|
CSV.open(filename, "r:BINARY", col_sep: "\t", headers: true).each do |row|
|
95
127
|
lemma = row['lemma'].force_encoding('UTF-8')
|
96
128
|
gender_name = row['gender'].force_encoding('UTF-8')
|
97
129
|
expected_gender = GENDER_MAP[gender_name]
|
98
130
|
|
99
|
-
detected_gender = Petrovich(
|
131
|
+
detected_gender = Petrovich(namepart_symbol => lemma).gender
|
100
132
|
|
101
133
|
total[expected_gender] += 1
|
102
134
|
if detected_gender == expected_gender
|
103
135
|
correct[expected_gender] += 1
|
104
136
|
else
|
105
137
|
errors << [lemma, expected_gender, detected_gender]
|
138
|
+
if detected_gender != :androgynous
|
139
|
+
hard_error_count += 1
|
140
|
+
warn " - #{Petrovich::Unicode.downcase(lemma)}"
|
141
|
+
end
|
106
142
|
end
|
107
143
|
end
|
108
|
-
end
|
109
144
|
|
110
|
-
|
111
|
-
accuracy = correct[gender] / correct_count.to_f * 100
|
112
|
-
puts "\tAc(%s) = %.4f%%" % [gender, accuracy]
|
113
|
-
end
|
145
|
+
puts 'Hard error count: %d.' % [hard_error_count]
|
114
146
|
|
115
|
-
|
116
|
-
|
147
|
+
PART_INDEX = {:female => 0, :male => 1, :androgynous => 3}
|
148
|
+
errors.sort_by!{ |array| array.first.reverse + PART_INDEX[array[1]].to_s }
|
117
149
|
|
118
|
-
|
119
|
-
|
150
|
+
CSV.open(errors_filename, 'w', col_sep: "\t") do |errors_file|
|
151
|
+
errors_file << %w(lemma expected actual)
|
152
|
+
errors.each do |array|
|
153
|
+
errors_file << array
|
154
|
+
end
|
155
|
+
end
|
120
156
|
|
121
|
-
|
122
|
-
|
157
|
+
total.each do |gender, correct_count|
|
158
|
+
accuracy = correct[gender] / correct_count.to_f * 100
|
159
|
+
puts "\tAc(%s) = %.4f%%" % [gender, accuracy]
|
160
|
+
end
|
161
|
+
|
162
|
+
correct_size = correct.values.inject(&:+).to_i
|
163
|
+
total_size = total.values.inject(&:+).to_i
|
164
|
+
|
165
|
+
puts 'Well, the accuracy on %d examples is about %.4f%%.' %
|
166
|
+
[total_size, (correct_size / total_size.to_f * 100)]
|
167
|
+
|
168
|
+
puts 'Sum of the %d correct examples and %d mistakes is %d.' %
|
169
|
+
[correct_size, total_size - correct_size, total_size]
|
170
|
+
end
|
123
171
|
end
|
data/rules/gender.yml
CHANGED
@@ -3,127 +3,482 @@ gender:
|
|
3
3
|
lastname:
|
4
4
|
# Здесь андрогинные фамилии не выделены в отдельную группу. Если в группе female и male
|
5
5
|
# не будет найдено совпадений, то фамилия будет считаться андрогинной.
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
6
|
+
exceptions:
|
7
|
+
androgynous:
|
8
|
+
- бова
|
9
|
+
- регин
|
10
|
+
- дарвин
|
11
|
+
- пэйлин
|
12
|
+
- грин
|
13
|
+
- цин
|
14
|
+
- шенгелая
|
15
|
+
suffixes:
|
16
|
+
female:
|
17
|
+
- ова
|
18
|
+
- ая
|
19
|
+
- ына
|
20
|
+
- ина
|
21
|
+
- ева
|
22
|
+
- ска
|
23
|
+
- ёва
|
24
|
+
male:
|
25
|
+
- кий
|
26
|
+
- ов
|
27
|
+
- ын
|
28
|
+
- ев
|
29
|
+
- ин
|
30
|
+
- ёв
|
31
|
+
- хий
|
32
|
+
- ний
|
33
|
+
- ый
|
34
|
+
- ой
|
25
35
|
firstname:
|
26
|
-
#
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
36
|
+
# сортировка по окончанию
|
37
|
+
exceptions:
|
38
|
+
androgynous:
|
39
|
+
- сева
|
40
|
+
- иона
|
41
|
+
- муса
|
42
|
+
- саша
|
43
|
+
- алвард
|
44
|
+
- валери
|
45
|
+
- кири
|
46
|
+
- анри
|
47
|
+
- ким
|
48
|
+
- райхон
|
49
|
+
- закия
|
50
|
+
- захария
|
51
|
+
- женя
|
52
|
+
male:
|
53
|
+
- абиба
|
54
|
+
- савва
|
55
|
+
- лёва
|
56
|
+
- вова
|
57
|
+
- ага
|
58
|
+
- ахмедага
|
59
|
+
- алиага
|
60
|
+
- амирага
|
61
|
+
- агга
|
62
|
+
- серега
|
63
|
+
- фейга
|
64
|
+
- гога
|
65
|
+
- алиада
|
66
|
+
- муктада
|
67
|
+
- абида
|
68
|
+
- алда
|
69
|
+
- маджуда
|
70
|
+
- нурлыхуда
|
71
|
+
- гиа
|
72
|
+
- элиа
|
73
|
+
- гарсиа
|
74
|
+
- вавила
|
75
|
+
- гавриила
|
76
|
+
- генка
|
77
|
+
- лука
|
78
|
+
- дима
|
79
|
+
- зосима
|
80
|
+
- тима
|
81
|
+
- фима
|
82
|
+
- фома
|
83
|
+
- кузьма
|
84
|
+
- жора
|
85
|
+
- миша
|
86
|
+
- ермила
|
87
|
+
- данила
|
88
|
+
- гаврила
|
89
|
+
- абдалла
|
90
|
+
- аталла
|
91
|
+
- абдилла
|
92
|
+
- атилла
|
93
|
+
- кайролла
|
94
|
+
- абулла
|
95
|
+
- абула
|
96
|
+
- свитлана
|
97
|
+
- бена
|
98
|
+
- гена
|
99
|
+
- агелина
|
100
|
+
- джанна
|
101
|
+
- кришна
|
102
|
+
- степа
|
103
|
+
- дра
|
104
|
+
- назера
|
105
|
+
- валера
|
106
|
+
- эстера
|
107
|
+
- двойра
|
108
|
+
- калистра
|
109
|
+
- заратустра
|
110
|
+
- юра
|
111
|
+
- иса
|
112
|
+
- аиса
|
113
|
+
- халиса
|
114
|
+
- холиса
|
115
|
+
- валенса
|
116
|
+
- мусса
|
117
|
+
- ата
|
118
|
+
- паата
|
119
|
+
- алета
|
120
|
+
- никита
|
121
|
+
- мота
|
122
|
+
- шота
|
123
|
+
- фаста
|
124
|
+
- коста
|
125
|
+
- маритта
|
126
|
+
- малюта
|
127
|
+
- васюта
|
128
|
+
- вафа
|
129
|
+
- мустафа
|
130
|
+
- ганифа
|
131
|
+
- лев
|
132
|
+
- яков
|
133
|
+
- шелли
|
134
|
+
- константин
|
135
|
+
- марсель
|
136
|
+
- рамиль
|
137
|
+
- эмиль
|
138
|
+
- бактыгуль
|
139
|
+
- даниэль
|
140
|
+
- игорь
|
141
|
+
- арминэ
|
142
|
+
- изя
|
143
|
+
- кузя
|
144
|
+
- гия
|
145
|
+
- мазия
|
146
|
+
- кирикия
|
147
|
+
- ркия
|
148
|
+
- еркия
|
149
|
+
- эркия
|
150
|
+
- гулия
|
151
|
+
- аксания
|
152
|
+
- закария
|
153
|
+
- зекерия
|
154
|
+
- гарсия
|
155
|
+
- шендля
|
156
|
+
- филя
|
157
|
+
- вилля
|
158
|
+
- толя
|
159
|
+
- ваня
|
160
|
+
- саня
|
161
|
+
- загиря
|
162
|
+
- боря
|
163
|
+
- цайся
|
164
|
+
- вася
|
165
|
+
- ося
|
166
|
+
- петя
|
167
|
+
- витя
|
168
|
+
- митя
|
169
|
+
- костя
|
170
|
+
- алья
|
171
|
+
- илья
|
172
|
+
- ларья
|
173
|
+
female:
|
174
|
+
- судаба
|
175
|
+
- сураба
|
176
|
+
- любава
|
177
|
+
- джанлука
|
178
|
+
- варвара
|
179
|
+
- наташа
|
180
|
+
- зайнаб
|
181
|
+
- любов
|
182
|
+
- сольвейг
|
183
|
+
- шакед
|
184
|
+
- аннаид
|
185
|
+
- ингрид
|
186
|
+
- синди
|
187
|
+
- аллаберди
|
188
|
+
- сандали
|
189
|
+
- лали
|
190
|
+
- натали
|
191
|
+
- гулькай
|
192
|
+
- алтынай
|
193
|
+
- гюнай
|
194
|
+
- гюльчитай
|
195
|
+
- нурангиз
|
196
|
+
- лиз
|
197
|
+
- элиз
|
198
|
+
- ботагоз
|
199
|
+
- юлдуз
|
200
|
+
- диляфруз
|
201
|
+
- габи
|
202
|
+
- сажи
|
203
|
+
- фанни
|
204
|
+
- мери
|
205
|
+
- элдари
|
206
|
+
- эльдари
|
207
|
+
- хилари
|
208
|
+
- хиллари
|
209
|
+
- аннемари
|
210
|
+
- розмари
|
211
|
+
- товсари
|
212
|
+
- ансари
|
213
|
+
- одри
|
214
|
+
- тери
|
215
|
+
- ири
|
216
|
+
- катри
|
217
|
+
- мэри
|
218
|
+
- сатаней
|
219
|
+
- ефтений
|
220
|
+
- верунчик
|
221
|
+
- гюзел
|
222
|
+
- этел
|
223
|
+
- рэйчел
|
224
|
+
- джил
|
225
|
+
- мерил
|
226
|
+
- нинелл
|
227
|
+
- бурул
|
228
|
+
- ахлам
|
229
|
+
- майрам
|
230
|
+
- махаррам
|
231
|
+
- мириам
|
232
|
+
- дилярам
|
233
|
+
- асем
|
234
|
+
- мерьем
|
235
|
+
- мирьем
|
236
|
+
- эркаим
|
237
|
+
- гулаим
|
238
|
+
- айгерим
|
239
|
+
- марьям
|
240
|
+
- мирьям
|
241
|
+
- эван
|
242
|
+
- гульжиган
|
243
|
+
- айдан
|
244
|
+
- айжан
|
245
|
+
- вивиан
|
246
|
+
- гульжиан
|
247
|
+
- лилиан
|
248
|
+
- мариан
|
249
|
+
- саиман
|
250
|
+
- джоан
|
251
|
+
- чулпан
|
252
|
+
- лоран
|
253
|
+
- моран
|
254
|
+
- джохан
|
255
|
+
- гульшан
|
256
|
+
- аделин
|
257
|
+
- жаклин
|
258
|
+
- карин
|
259
|
+
- каролин
|
260
|
+
- каталин
|
261
|
+
- катрин
|
262
|
+
- керстин
|
263
|
+
- кэтрин
|
264
|
+
- мэрилин
|
265
|
+
- рузалин
|
266
|
+
- хелин
|
267
|
+
- цеткин
|
268
|
+
- ширин
|
269
|
+
- элисон
|
270
|
+
- дурсун
|
271
|
+
- кристин
|
272
|
+
- гульжиян
|
273
|
+
- марьян
|
274
|
+
- ренато
|
275
|
+
- зейнеп
|
276
|
+
- санабар
|
277
|
+
- дильбар
|
278
|
+
- гулизар
|
279
|
+
- гульзар
|
280
|
+
- пилар
|
281
|
+
- дагмар
|
282
|
+
- элинар
|
283
|
+
- нилуфар
|
284
|
+
- анхар
|
285
|
+
- гаухар
|
286
|
+
- естер
|
287
|
+
- эстер
|
288
|
+
- дженнифер
|
289
|
+
- линор
|
290
|
+
- элинор
|
291
|
+
- элеонор
|
292
|
+
- айнур
|
293
|
+
- гульнур
|
294
|
+
- шамсинур
|
295
|
+
- элнур
|
296
|
+
- ильсияр
|
297
|
+
- нигяр
|
298
|
+
- сигитас
|
299
|
+
- агнес
|
300
|
+
- анес
|
301
|
+
- долорес
|
302
|
+
- инес
|
303
|
+
- анаис
|
304
|
+
- таис
|
305
|
+
- эллис
|
306
|
+
- элис
|
307
|
+
- кларис
|
308
|
+
- амнерис
|
309
|
+
- айрис
|
310
|
+
- дорис
|
311
|
+
- беатрис
|
312
|
+
- грейс
|
313
|
+
- грэйс
|
314
|
+
- ботагос
|
315
|
+
- маргос
|
316
|
+
- джулианс
|
317
|
+
- арус
|
318
|
+
- диляфрус
|
319
|
+
- саодат
|
320
|
+
- зулхижат
|
321
|
+
- хамат
|
322
|
+
- патимат
|
323
|
+
- хатимат
|
324
|
+
- альжанат
|
325
|
+
- маймунат
|
326
|
+
- гульшат
|
327
|
+
- биргит
|
328
|
+
- рут
|
329
|
+
- иргаш
|
330
|
+
- айнаш
|
331
|
+
- агнеш
|
332
|
+
- зауреш
|
333
|
+
- тэрбиш
|
334
|
+
- ануш
|
335
|
+
- азгануш
|
336
|
+
- гаруш
|
337
|
+
- николь
|
338
|
+
- адась
|
339
|
+
- афиля
|
340
|
+
- тафиля
|
341
|
+
- фаня
|
342
|
+
- аня
|
343
|
+
suffixes:
|
344
|
+
androgynous:
|
345
|
+
- улла
|
346
|
+
male:
|
347
|
+
- аба
|
348
|
+
- б
|
349
|
+
- ав
|
350
|
+
- ев
|
351
|
+
- ов
|
352
|
+
- г
|
353
|
+
- д
|
354
|
+
- ж
|
355
|
+
- з
|
356
|
+
- би
|
357
|
+
- ди
|
358
|
+
- жи
|
359
|
+
- али
|
360
|
+
- ри
|
361
|
+
- ай
|
362
|
+
- ей
|
363
|
+
- ий
|
364
|
+
- ой
|
365
|
+
- ый
|
366
|
+
- к
|
367
|
+
- л
|
368
|
+
- ам
|
369
|
+
- ем
|
370
|
+
- им
|
371
|
+
- ом
|
372
|
+
- ум
|
373
|
+
- ым
|
374
|
+
- ям
|
375
|
+
- ан
|
376
|
+
- бен
|
377
|
+
- вен
|
378
|
+
- ген
|
379
|
+
- ден
|
380
|
+
- ин
|
381
|
+
- сейн
|
382
|
+
- он
|
383
|
+
- ун
|
384
|
+
- ян
|
385
|
+
- ио
|
386
|
+
- ло
|
387
|
+
- ро
|
388
|
+
- то
|
389
|
+
- шо
|
390
|
+
- п
|
391
|
+
- ар
|
392
|
+
- др
|
393
|
+
- ер
|
394
|
+
- ир
|
395
|
+
- ор
|
396
|
+
- тр
|
397
|
+
- ур
|
398
|
+
- ыр
|
399
|
+
- яр
|
400
|
+
- ас
|
401
|
+
- ес
|
402
|
+
- ис
|
403
|
+
- йс
|
404
|
+
- кс
|
405
|
+
- мс
|
406
|
+
- ос
|
407
|
+
- нс
|
408
|
+
- рс
|
409
|
+
- ус
|
410
|
+
- юс
|
411
|
+
- яс
|
412
|
+
- ат
|
413
|
+
- мет
|
414
|
+
- кт
|
415
|
+
- нт
|
416
|
+
- рт
|
417
|
+
- ст
|
418
|
+
- ут
|
419
|
+
- ф
|
420
|
+
- х
|
421
|
+
- ш
|
422
|
+
- ы
|
423
|
+
- сь
|
424
|
+
- емеля
|
425
|
+
- коля
|
426
|
+
female:
|
427
|
+
- иба
|
428
|
+
- люба
|
429
|
+
- лава
|
430
|
+
- ева
|
431
|
+
- га
|
432
|
+
- да
|
433
|
+
- еа
|
434
|
+
- иза
|
435
|
+
- иа
|
436
|
+
- ика
|
437
|
+
- нка
|
438
|
+
- ска
|
439
|
+
- ела
|
440
|
+
- ила
|
441
|
+
- лла
|
442
|
+
- эла
|
443
|
+
- има
|
444
|
+
- на
|
445
|
+
- ра
|
446
|
+
- са
|
447
|
+
- та
|
448
|
+
- фа
|
449
|
+
- елли
|
450
|
+
- еса
|
451
|
+
- сса
|
452
|
+
- гуль
|
453
|
+
- нуэль
|
454
|
+
- гюль
|
455
|
+
- нэ
|
456
|
+
- ая
|
457
|
+
- ея
|
458
|
+
- ия
|
459
|
+
- йя
|
460
|
+
- ля
|
461
|
+
- мя
|
462
|
+
- оя
|
463
|
+
- ря
|
464
|
+
- ся
|
465
|
+
- вья
|
466
|
+
- лья
|
467
|
+
- мья
|
468
|
+
- нья
|
469
|
+
- рья
|
470
|
+
- сья
|
471
|
+
- тья
|
472
|
+
- фья
|
473
|
+
- зя
|
120
474
|
middlename:
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
475
|
+
suffixes:
|
476
|
+
female:
|
477
|
+
- на
|
478
|
+
- кызы
|
479
|
+
- гызы
|
480
|
+
male:
|
481
|
+
- ич
|
482
|
+
- оглы
|
483
|
+
- улы
|
484
|
+
- уулу
|
data/rules/rules.yml
CHANGED
@@ -186,6 +186,11 @@ lastname:
|
|
186
186
|
test: [ок]
|
187
187
|
mods: [--ка, --ку, --ка, --ком, --ке]
|
188
188
|
|
189
|
+
# Коломиец
|
190
|
+
- gender: male
|
191
|
+
test: [иец, еец]
|
192
|
+
mods: [--йца, --йцу, --йца, --йцом, --йце]
|
193
|
+
|
189
194
|
# Кравец
|
190
195
|
- gender: male
|
191
196
|
test: [ец]
|
@@ -230,16 +235,20 @@ firstname:
|
|
230
235
|
mods: [., ., ., ., .]
|
231
236
|
|
232
237
|
- gender: female
|
233
|
-
test: [
|
234
|
-
mods: [
|
238
|
+
test: [агидель, жизель, нинель, рашель, рахиль]
|
239
|
+
mods: [-и, -и, ., ю, -и]
|
235
240
|
|
236
241
|
suffixes:
|
237
242
|
- gender: androgynous
|
238
243
|
test: [е, ё, и, о, у, ы, э, ю]
|
239
244
|
mods: [., ., ., ., .]
|
240
245
|
|
246
|
+
- gender: male
|
247
|
+
test: [уа, иа]
|
248
|
+
mods: [., ., ., ., .]
|
249
|
+
|
241
250
|
- gender: female
|
242
|
-
test: [б, в, г, д, ж, з, й, к, л, м, н, п, р, с, т, ф, х, ц, ч, ш, щ,
|
251
|
+
test: [б, в, г, д, ж, з, й, к, л, м, н, п, р, с, т, ф, х, ц, ч, ш, щ, ъ, иа, ль]
|
243
252
|
mods: [., ., ., ., .]
|
244
253
|
|
245
254
|
- gender: female
|
@@ -260,6 +269,11 @@ firstname:
|
|
260
269
|
test: [ша]
|
261
270
|
mods: [-и, -е, -у, -ей, -е]
|
262
271
|
|
272
|
+
# Даша, Саша
|
273
|
+
- gender: male
|
274
|
+
test: [ша, ча, жа]
|
275
|
+
mods: [-и, -е, -у, -ей, -е]
|
276
|
+
|
263
277
|
- gender: androgynous
|
264
278
|
test: [а]
|
265
279
|
mods: [-ы, -е, -у, -ой, -е]
|
@@ -269,6 +283,16 @@ firstname:
|
|
269
283
|
test: [ия]
|
270
284
|
mods: [-и, -и, -ю, -ей, -и]
|
271
285
|
|
286
|
+
# Майка
|
287
|
+
- gender: female
|
288
|
+
test: [ка, га, ха]
|
289
|
+
mods: [-и, -е, -у, -ой, -е]
|
290
|
+
|
291
|
+
# Марица
|
292
|
+
- gender: female
|
293
|
+
test: [ца]
|
294
|
+
mods: [-ы, -е, -у, -ей, -е]
|
295
|
+
|
272
296
|
# Светлана
|
273
297
|
- gender: female
|
274
298
|
test: [а]
|
@@ -287,15 +311,19 @@ firstname:
|
|
287
311
|
test: [я]
|
288
312
|
mods: [-и, -е, -ю, -ей, -е]
|
289
313
|
|
314
|
+
# Афанасий
|
315
|
+
- gender: male
|
316
|
+
test: [ий]
|
317
|
+
mods: [-я, -ю, -я, -ем, -и]
|
318
|
+
|
290
319
|
# Андрей, *
|
291
320
|
- gender: male
|
292
321
|
test: [ей, й]
|
293
322
|
mods: [-я, -ю, -я, -ем, -е]
|
294
323
|
|
295
|
-
# Афанасий
|
296
324
|
- gender: male
|
297
|
-
test: [
|
298
|
-
mods: [
|
325
|
+
test: [ш, ж]
|
326
|
+
mods: [а, у, а, ем, е]
|
299
327
|
|
300
328
|
- gender: male
|
301
329
|
test: [б, в, г, д, ж, з, к, л, м, н, п, р, с, т, ф, х, ц, ч]
|
@@ -314,6 +342,10 @@ middlename:
|
|
314
342
|
tags: [first_word]
|
315
343
|
|
316
344
|
suffixes:
|
345
|
+
- gender: male
|
346
|
+
test: [мич, ьич, кич]
|
347
|
+
mods: [а, у, а, ом, е]
|
348
|
+
|
317
349
|
- gender: male
|
318
350
|
test: [ич]
|
319
351
|
mods: [а, у, а, ем, е]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: petrovich
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kozlov
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-07-
|
12
|
+
date: 2016-07-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|