petrovich 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/petrovich/gender/rule.rb +3 -4
- data/lib/petrovich/rule_set.rb +18 -20
- data/lib/petrovich/version.rb +1 -1
- data/lib/tasks/evaluate.rake +101 -53
- data/rules/gender.yml +477 -122
- data/rules/rules.yml +38 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 641bd09d48d3c8a48ea3bc6ecdbbf569d93c5084
|
4
|
+
data.tar.gz: 5ded3884cc68b5018fdd02dea609110949680440
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bd46a801786008a0b968c2fff2d00312c585fec23648eb4df49f115a6e79de95d8c8e801f78011727cbd2b4421c03a509c5167cd36454de3845e18d2fbf05886
|
7
|
+
data.tar.gz: 9dfe2bbc5cda78d60dafb9d468f5f87954699dad7dfd0a58dbe377c9185a1a7268e9ed73c1e3804079d56aeccea65d30f26e0baeac2dc58baca7926384ab260c
|
@@ -2,18 +2,17 @@ module Petrovich
|
|
2
2
|
module Gender
|
3
3
|
# A gender rule from the set of rules
|
4
4
|
class Rule
|
5
|
-
attr_reader :gender, :as, :suffix
|
5
|
+
attr_reader :gender, :as, :suffix, :accuracy
|
6
6
|
|
7
7
|
# TODO: check options (see Case::Rule)
|
8
8
|
def initialize(opts)
|
9
9
|
@gender = opts[:gender]
|
10
10
|
@as = opts[:as]
|
11
11
|
@suffix = /#{opts[:suffix]}$/i
|
12
|
+
@accuracy = opts[:suffix].length
|
12
13
|
end
|
13
14
|
|
14
|
-
def match?(name
|
15
|
-
return false unless match_as == as
|
16
|
-
|
15
|
+
def match?(name)
|
17
16
|
!!name.match(suffix)
|
18
17
|
end
|
19
18
|
end
|
data/lib/petrovich/rule_set.rb
CHANGED
@@ -15,14 +15,6 @@ module Petrovich
|
|
15
15
|
@case_rules << rule
|
16
16
|
end
|
17
17
|
|
18
|
-
def add_gender_rule(rule)
|
19
|
-
unless rule.is_a?(Gender::Rule)
|
20
|
-
fail ArgumentError, 'Expecting rule of type Petrovich::Gender::Rule'.freeze
|
21
|
-
end
|
22
|
-
|
23
|
-
@gender_rules << rule
|
24
|
-
end
|
25
|
-
|
26
18
|
def find_all_case_rules(name, gender, as, known_gender = false)
|
27
19
|
parts = name.split('-')
|
28
20
|
parts.map.with_index { |part, index| find_case_rule(part, gender, as, (index == parts.count-1) && known_gender) }
|
@@ -34,7 +26,8 @@ module Petrovich
|
|
34
26
|
|
35
27
|
def clear!
|
36
28
|
@case_rules = []
|
37
|
-
@gender_rules =
|
29
|
+
@gender_rules = {}
|
30
|
+
@gender_exceptions = {}
|
38
31
|
end
|
39
32
|
|
40
33
|
def load!
|
@@ -70,16 +63,23 @@ module Petrovich
|
|
70
63
|
# Load rules for genders
|
71
64
|
def load_gender_rules!(rules)
|
72
65
|
[:lastname, :firstname, :middlename].each do |name_part|
|
73
|
-
# First, add androgynous rules. Order is matters.
|
74
66
|
Petrovich::GENDERS.each do |section|
|
75
|
-
entries = rules['gender'][name_part.to_s][section.to_s]
|
76
|
-
|
77
|
-
|
78
|
-
entries.each do |entry|
|
67
|
+
entries = rules['gender'][name_part.to_s]['suffixes'][section.to_s]
|
68
|
+
Array(entries).each do |entry|
|
79
69
|
load_gender_entry(name_part, section, entry)
|
80
70
|
end
|
71
|
+
|
72
|
+
exceptions = rules['gender'][name_part.to_s]['exceptions']
|
73
|
+
@gender_exceptions[name_part] ||= {}
|
74
|
+
next if exceptions.nil?
|
75
|
+
Array(exceptions[section.to_s]).each do |exception|
|
76
|
+
@gender_exceptions[name_part][exception] = Gender::Rule.new(as: name_part, gender: section, suffix: exception)
|
77
|
+
end
|
81
78
|
end
|
82
79
|
end
|
80
|
+
@gender_rules.each do |_, gender_rules|
|
81
|
+
gender_rules.sort_by!{ |rule| -rule.accuracy }
|
82
|
+
end
|
83
83
|
end
|
84
84
|
|
85
85
|
def find_case_rule(name, gender, as, known_gender = false)
|
@@ -88,7 +88,7 @@ module Petrovich
|
|
88
88
|
end
|
89
89
|
|
90
90
|
def find_gender_rule(name, as)
|
91
|
-
@gender_rules.find
|
91
|
+
@gender_exceptions[as][Unicode.downcase(name)] || @gender_rules[as].find{ |rule| rule.match?(name) }
|
92
92
|
end
|
93
93
|
|
94
94
|
def load_case_entry(as, section, entry)
|
@@ -99,6 +99,7 @@ module Petrovich
|
|
99
99
|
end
|
100
100
|
|
101
101
|
tests = entry['test'].map do |suffix|
|
102
|
+
suffix = "^#{suffix}" if section == :exceptions
|
102
103
|
Petrovich::Case::Rule::Test.new(suffix)
|
103
104
|
end
|
104
105
|
|
@@ -113,11 +114,8 @@ module Petrovich
|
|
113
114
|
end
|
114
115
|
|
115
116
|
def load_gender_entry(as, section, entry)
|
116
|
-
|
117
|
-
|
118
|
-
gender: section,
|
119
|
-
suffix: entry
|
120
|
-
)
|
117
|
+
@gender_rules[as] ||= []
|
118
|
+
@gender_rules[as] << Gender::Rule.new(as: as, gender: section, suffix: entry)
|
121
119
|
end
|
122
120
|
end
|
123
121
|
end
|
data/lib/petrovich/version.rb
CHANGED
data/lib/tasks/evaluate.rake
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
require 'csv'
|
4
4
|
|
5
|
-
def check!(errors, correct, total,
|
6
|
-
petrovich = Petrovich(
|
7
|
-
|
5
|
+
def check!(errors, correct, total, name, gender, gcase, expected)
|
6
|
+
petrovich = Petrovich(name.merge(gender: gender))
|
7
|
+
lemma = name.values.join(' ')
|
8
|
+
actual = Petrovich::Unicode.upcase(petrovich.public_send(gcase).to_s)
|
8
9
|
total[[gender, gcase]] += 1
|
9
10
|
if actual == expected
|
10
11
|
correct[[gender, gcase]] += 1
|
@@ -15,18 +16,36 @@ def check!(errors, correct, total, lemma, gender, gcase, expected)
|
|
15
16
|
end
|
16
17
|
end
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
def figure_namepart(args)
|
20
|
+
namepart_filename = args[:namepart] || "surnames"
|
21
|
+
namepart_filename += 's' unless namepart_filename.end_with?('s')
|
22
|
+
namepart_symbol = namepart_filename.chop.to_sym
|
23
|
+
namepart_symbol = :lastname if namepart_symbol == :surname
|
24
|
+
namepart_symbol = :middlename if namepart_symbol == :midname
|
25
|
+
namepart_filename += ".#{args[:subset]}" if args[:subset]
|
26
|
+
[namepart_filename, namepart_symbol]
|
27
|
+
end
|
28
|
+
|
29
|
+
desc 'Evaluate Petrovich'
|
30
|
+
task :evaluate, [:namepart, :subset] => [:'evaluate:rules', :'evaluate:gender']
|
31
|
+
|
32
|
+
namespace :evaluate do
|
33
|
+
desc 'Evaluate the inflector on lastnames'
|
34
|
+
task :rules, [:namepart, :subset] => :petrovich do |_, args|
|
35
|
+
namepart_filename, namepart_symbol = figure_namepart(args)
|
36
|
+
filename = File.expand_path("../../../eval/#{namepart_filename}.tsv", __FILE__)
|
37
|
+
unless File.file?(filename)
|
38
|
+
warn "File #{filename} not found, skipping task"
|
39
|
+
next
|
40
|
+
end
|
41
|
+
errors_filename = ENV['errors'] || 'errors.tsv'
|
22
42
|
|
23
|
-
|
43
|
+
correct, total = Hash.new(0), Hash.new(0)
|
24
44
|
|
25
|
-
|
26
|
-
|
45
|
+
puts 'I will evaluate the inflector on "%s" ' \
|
46
|
+
'and store errors to "%s".' % [filename, errors_filename]
|
27
47
|
|
28
|
-
|
29
|
-
errors << %w(lemma expected actual params)
|
48
|
+
errors = []
|
30
49
|
|
31
50
|
CSV.open(filename, "r:BINARY", col_sep: "\t", headers: true).each do |row|
|
32
51
|
word = row['word'].force_encoding('UTF-8')
|
@@ -43,81 +62,110 @@ task :evaluate => :petrovich do
|
|
43
62
|
if grammemes.include? '0'
|
44
63
|
# some words are aptotic so we have to ensure that
|
45
64
|
Petrovich::CASES.each do |gcase|
|
46
|
-
check!(errors, correct, total, lemma, gender, gcase, word)
|
65
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, gcase, word)
|
47
66
|
end
|
48
67
|
elsif grammemes.include? 'им'
|
49
|
-
check!(errors, correct, total, lemma, gender, :nominative, word)
|
68
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :nominative, word)
|
50
69
|
elsif grammemes.include? 'рд'
|
51
|
-
check!(errors, correct, total, lemma, gender, :genitive, word)
|
70
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :genitive, word)
|
52
71
|
elsif grammemes.include? 'дт'
|
53
|
-
check!(errors, correct, total, lemma, gender, :dative, word)
|
72
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :dative, word)
|
54
73
|
elsif grammemes.include? 'вн'
|
55
|
-
check!(errors, correct, total, lemma, gender, :accusative, word)
|
74
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :accusative, word)
|
56
75
|
elsif grammemes.include? 'тв'
|
57
|
-
check!(errors, correct, total, lemma, gender, :instrumental, word)
|
76
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :instrumental, word)
|
58
77
|
elsif grammemes.include? 'пр'
|
59
|
-
check!(errors, correct, total, lemma, gender, :prepositional, word)
|
78
|
+
check!(errors, correct, total, { namepart_symbol => lemma }, gender, :prepositional, word)
|
60
79
|
end
|
61
80
|
end
|
62
|
-
end
|
63
81
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
82
|
+
errors.sort_by!{ |array| array.first.reverse + array.last.first.to_s }
|
83
|
+
|
84
|
+
CSV.open(errors_filename, 'w', col_sep: "\t") do |errors_file|
|
85
|
+
errors_file << %w(lemma expected actual params)
|
86
|
+
errors.each do |array|
|
87
|
+
errors_file << array
|
88
|
+
end
|
89
|
+
end
|
68
90
|
|
69
|
-
|
70
|
-
|
91
|
+
total.each do |(gender, gcase), correct_count|
|
92
|
+
accuracy = correct[[gender, gcase]] / correct_count.to_f * 100
|
93
|
+
puts "\tAc(%s|%s) = %.4f%%" % [gcase, gender, accuracy]
|
94
|
+
end
|
71
95
|
|
72
|
-
|
73
|
-
|
96
|
+
correct_size = correct.values.inject(&:+).to_i
|
97
|
+
total_size = total.values.inject(&:+).to_i
|
74
98
|
|
75
|
-
|
76
|
-
|
77
|
-
end
|
99
|
+
puts 'Well, the accuracy on %d examples is about %.4f%%.' %
|
100
|
+
[total_size, (correct_size / total_size.to_f * 100)]
|
78
101
|
|
79
|
-
|
80
|
-
|
81
|
-
|
102
|
+
puts 'Sum of the %d correct examples and %d mistakes is %d.' %
|
103
|
+
[correct_size, total_size - correct_size, total_size]
|
104
|
+
end
|
82
105
|
|
83
|
-
|
84
|
-
|
106
|
+
desc 'Evaluate the gender detector'
|
107
|
+
task :gender, [:namepart, :subset] => :petrovich do |_, args|
|
108
|
+
GENDER_MAP = { 'мр' => :male, 'жр' => :female, 'мр-жр' => :androgynous }
|
85
109
|
|
86
|
-
|
110
|
+
namepart_filename, namepart_symbol = figure_namepart(args)
|
111
|
+
filename = File.expand_path("../../../eval/#{namepart_filename}.gender.tsv", __FILE__)
|
112
|
+
unless File.file?(filename)
|
113
|
+
warn "File #{filename} not found, skipping task"
|
114
|
+
next
|
115
|
+
end
|
116
|
+
errors_filename = ENV['errors'] || 'errors.gender.tsv'
|
87
117
|
|
88
|
-
|
89
|
-
'and store errors to "%s".' % [filename, errors_filename]
|
118
|
+
correct, total = Hash.new(0), Hash.new(0)
|
90
119
|
|
91
|
-
|
92
|
-
|
120
|
+
puts 'I will evaluate gender detector on "%s" ' \
|
121
|
+
'and store errors to "%s".' % [filename, errors_filename]
|
122
|
+
|
123
|
+
errors = []
|
124
|
+
hard_error_count = 0
|
93
125
|
|
94
126
|
CSV.open(filename, "r:BINARY", col_sep: "\t", headers: true).each do |row|
|
95
127
|
lemma = row['lemma'].force_encoding('UTF-8')
|
96
128
|
gender_name = row['gender'].force_encoding('UTF-8')
|
97
129
|
expected_gender = GENDER_MAP[gender_name]
|
98
130
|
|
99
|
-
detected_gender = Petrovich(
|
131
|
+
detected_gender = Petrovich(namepart_symbol => lemma).gender
|
100
132
|
|
101
133
|
total[expected_gender] += 1
|
102
134
|
if detected_gender == expected_gender
|
103
135
|
correct[expected_gender] += 1
|
104
136
|
else
|
105
137
|
errors << [lemma, expected_gender, detected_gender]
|
138
|
+
if detected_gender != :androgynous
|
139
|
+
hard_error_count += 1
|
140
|
+
warn " - #{Petrovich::Unicode.downcase(lemma)}"
|
141
|
+
end
|
106
142
|
end
|
107
143
|
end
|
108
|
-
end
|
109
144
|
|
110
|
-
|
111
|
-
accuracy = correct[gender] / correct_count.to_f * 100
|
112
|
-
puts "\tAc(%s) = %.4f%%" % [gender, accuracy]
|
113
|
-
end
|
145
|
+
puts 'Hard error count: %d.' % [hard_error_count]
|
114
146
|
|
115
|
-
|
116
|
-
|
147
|
+
PART_INDEX = {:female => 0, :male => 1, :androgynous => 3}
|
148
|
+
errors.sort_by!{ |array| array.first.reverse + PART_INDEX[array[1]].to_s }
|
117
149
|
|
118
|
-
|
119
|
-
|
150
|
+
CSV.open(errors_filename, 'w', col_sep: "\t") do |errors_file|
|
151
|
+
errors_file << %w(lemma expected actual)
|
152
|
+
errors.each do |array|
|
153
|
+
errors_file << array
|
154
|
+
end
|
155
|
+
end
|
120
156
|
|
121
|
-
|
122
|
-
|
157
|
+
total.each do |gender, correct_count|
|
158
|
+
accuracy = correct[gender] / correct_count.to_f * 100
|
159
|
+
puts "\tAc(%s) = %.4f%%" % [gender, accuracy]
|
160
|
+
end
|
161
|
+
|
162
|
+
correct_size = correct.values.inject(&:+).to_i
|
163
|
+
total_size = total.values.inject(&:+).to_i
|
164
|
+
|
165
|
+
puts 'Well, the accuracy on %d examples is about %.4f%%.' %
|
166
|
+
[total_size, (correct_size / total_size.to_f * 100)]
|
167
|
+
|
168
|
+
puts 'Sum of the %d correct examples and %d mistakes is %d.' %
|
169
|
+
[correct_size, total_size - correct_size, total_size]
|
170
|
+
end
|
123
171
|
end
|
data/rules/gender.yml
CHANGED
@@ -3,127 +3,482 @@ gender:
|
|
3
3
|
lastname:
|
4
4
|
# Здесь андрогинные фамилии не выделены в отдельную группу. Если в группе female и male
|
5
5
|
# не будет найдено совпадений, то фамилия будет считаться андрогинной.
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
6
|
+
exceptions:
|
7
|
+
androgynous:
|
8
|
+
- бова
|
9
|
+
- регин
|
10
|
+
- дарвин
|
11
|
+
- пэйлин
|
12
|
+
- грин
|
13
|
+
- цин
|
14
|
+
- шенгелая
|
15
|
+
suffixes:
|
16
|
+
female:
|
17
|
+
- ова
|
18
|
+
- ая
|
19
|
+
- ына
|
20
|
+
- ина
|
21
|
+
- ева
|
22
|
+
- ска
|
23
|
+
- ёва
|
24
|
+
male:
|
25
|
+
- кий
|
26
|
+
- ов
|
27
|
+
- ын
|
28
|
+
- ев
|
29
|
+
- ин
|
30
|
+
- ёв
|
31
|
+
- хий
|
32
|
+
- ний
|
33
|
+
- ый
|
34
|
+
- ой
|
25
35
|
firstname:
|
26
|
-
#
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
36
|
+
# сортировка по окончанию
|
37
|
+
exceptions:
|
38
|
+
androgynous:
|
39
|
+
- сева
|
40
|
+
- иона
|
41
|
+
- муса
|
42
|
+
- саша
|
43
|
+
- алвард
|
44
|
+
- валери
|
45
|
+
- кири
|
46
|
+
- анри
|
47
|
+
- ким
|
48
|
+
- райхон
|
49
|
+
- закия
|
50
|
+
- захария
|
51
|
+
- женя
|
52
|
+
male:
|
53
|
+
- абиба
|
54
|
+
- савва
|
55
|
+
- лёва
|
56
|
+
- вова
|
57
|
+
- ага
|
58
|
+
- ахмедага
|
59
|
+
- алиага
|
60
|
+
- амирага
|
61
|
+
- агга
|
62
|
+
- серега
|
63
|
+
- фейга
|
64
|
+
- гога
|
65
|
+
- алиада
|
66
|
+
- муктада
|
67
|
+
- абида
|
68
|
+
- алда
|
69
|
+
- маджуда
|
70
|
+
- нурлыхуда
|
71
|
+
- гиа
|
72
|
+
- элиа
|
73
|
+
- гарсиа
|
74
|
+
- вавила
|
75
|
+
- гавриила
|
76
|
+
- генка
|
77
|
+
- лука
|
78
|
+
- дима
|
79
|
+
- зосима
|
80
|
+
- тима
|
81
|
+
- фима
|
82
|
+
- фома
|
83
|
+
- кузьма
|
84
|
+
- жора
|
85
|
+
- миша
|
86
|
+
- ермила
|
87
|
+
- данила
|
88
|
+
- гаврила
|
89
|
+
- абдалла
|
90
|
+
- аталла
|
91
|
+
- абдилла
|
92
|
+
- атилла
|
93
|
+
- кайролла
|
94
|
+
- абулла
|
95
|
+
- абула
|
96
|
+
- свитлана
|
97
|
+
- бена
|
98
|
+
- гена
|
99
|
+
- агелина
|
100
|
+
- джанна
|
101
|
+
- кришна
|
102
|
+
- степа
|
103
|
+
- дра
|
104
|
+
- назера
|
105
|
+
- валера
|
106
|
+
- эстера
|
107
|
+
- двойра
|
108
|
+
- калистра
|
109
|
+
- заратустра
|
110
|
+
- юра
|
111
|
+
- иса
|
112
|
+
- аиса
|
113
|
+
- халиса
|
114
|
+
- холиса
|
115
|
+
- валенса
|
116
|
+
- мусса
|
117
|
+
- ата
|
118
|
+
- паата
|
119
|
+
- алета
|
120
|
+
- никита
|
121
|
+
- мота
|
122
|
+
- шота
|
123
|
+
- фаста
|
124
|
+
- коста
|
125
|
+
- маритта
|
126
|
+
- малюта
|
127
|
+
- васюта
|
128
|
+
- вафа
|
129
|
+
- мустафа
|
130
|
+
- ганифа
|
131
|
+
- лев
|
132
|
+
- яков
|
133
|
+
- шелли
|
134
|
+
- константин
|
135
|
+
- марсель
|
136
|
+
- рамиль
|
137
|
+
- эмиль
|
138
|
+
- бактыгуль
|
139
|
+
- даниэль
|
140
|
+
- игорь
|
141
|
+
- арминэ
|
142
|
+
- изя
|
143
|
+
- кузя
|
144
|
+
- гия
|
145
|
+
- мазия
|
146
|
+
- кирикия
|
147
|
+
- ркия
|
148
|
+
- еркия
|
149
|
+
- эркия
|
150
|
+
- гулия
|
151
|
+
- аксания
|
152
|
+
- закария
|
153
|
+
- зекерия
|
154
|
+
- гарсия
|
155
|
+
- шендля
|
156
|
+
- филя
|
157
|
+
- вилля
|
158
|
+
- толя
|
159
|
+
- ваня
|
160
|
+
- саня
|
161
|
+
- загиря
|
162
|
+
- боря
|
163
|
+
- цайся
|
164
|
+
- вася
|
165
|
+
- ося
|
166
|
+
- петя
|
167
|
+
- витя
|
168
|
+
- митя
|
169
|
+
- костя
|
170
|
+
- алья
|
171
|
+
- илья
|
172
|
+
- ларья
|
173
|
+
female:
|
174
|
+
- судаба
|
175
|
+
- сураба
|
176
|
+
- любава
|
177
|
+
- джанлука
|
178
|
+
- варвара
|
179
|
+
- наташа
|
180
|
+
- зайнаб
|
181
|
+
- любов
|
182
|
+
- сольвейг
|
183
|
+
- шакед
|
184
|
+
- аннаид
|
185
|
+
- ингрид
|
186
|
+
- синди
|
187
|
+
- аллаберди
|
188
|
+
- сандали
|
189
|
+
- лали
|
190
|
+
- натали
|
191
|
+
- гулькай
|
192
|
+
- алтынай
|
193
|
+
- гюнай
|
194
|
+
- гюльчитай
|
195
|
+
- нурангиз
|
196
|
+
- лиз
|
197
|
+
- элиз
|
198
|
+
- ботагоз
|
199
|
+
- юлдуз
|
200
|
+
- диляфруз
|
201
|
+
- габи
|
202
|
+
- сажи
|
203
|
+
- фанни
|
204
|
+
- мери
|
205
|
+
- элдари
|
206
|
+
- эльдари
|
207
|
+
- хилари
|
208
|
+
- хиллари
|
209
|
+
- аннемари
|
210
|
+
- розмари
|
211
|
+
- товсари
|
212
|
+
- ансари
|
213
|
+
- одри
|
214
|
+
- тери
|
215
|
+
- ири
|
216
|
+
- катри
|
217
|
+
- мэри
|
218
|
+
- сатаней
|
219
|
+
- ефтений
|
220
|
+
- верунчик
|
221
|
+
- гюзел
|
222
|
+
- этел
|
223
|
+
- рэйчел
|
224
|
+
- джил
|
225
|
+
- мерил
|
226
|
+
- нинелл
|
227
|
+
- бурул
|
228
|
+
- ахлам
|
229
|
+
- майрам
|
230
|
+
- махаррам
|
231
|
+
- мириам
|
232
|
+
- дилярам
|
233
|
+
- асем
|
234
|
+
- мерьем
|
235
|
+
- мирьем
|
236
|
+
- эркаим
|
237
|
+
- гулаим
|
238
|
+
- айгерим
|
239
|
+
- марьям
|
240
|
+
- мирьям
|
241
|
+
- эван
|
242
|
+
- гульжиган
|
243
|
+
- айдан
|
244
|
+
- айжан
|
245
|
+
- вивиан
|
246
|
+
- гульжиан
|
247
|
+
- лилиан
|
248
|
+
- мариан
|
249
|
+
- саиман
|
250
|
+
- джоан
|
251
|
+
- чулпан
|
252
|
+
- лоран
|
253
|
+
- моран
|
254
|
+
- джохан
|
255
|
+
- гульшан
|
256
|
+
- аделин
|
257
|
+
- жаклин
|
258
|
+
- карин
|
259
|
+
- каролин
|
260
|
+
- каталин
|
261
|
+
- катрин
|
262
|
+
- керстин
|
263
|
+
- кэтрин
|
264
|
+
- мэрилин
|
265
|
+
- рузалин
|
266
|
+
- хелин
|
267
|
+
- цеткин
|
268
|
+
- ширин
|
269
|
+
- элисон
|
270
|
+
- дурсун
|
271
|
+
- кристин
|
272
|
+
- гульжиян
|
273
|
+
- марьян
|
274
|
+
- ренато
|
275
|
+
- зейнеп
|
276
|
+
- санабар
|
277
|
+
- дильбар
|
278
|
+
- гулизар
|
279
|
+
- гульзар
|
280
|
+
- пилар
|
281
|
+
- дагмар
|
282
|
+
- элинар
|
283
|
+
- нилуфар
|
284
|
+
- анхар
|
285
|
+
- гаухар
|
286
|
+
- естер
|
287
|
+
- эстер
|
288
|
+
- дженнифер
|
289
|
+
- линор
|
290
|
+
- элинор
|
291
|
+
- элеонор
|
292
|
+
- айнур
|
293
|
+
- гульнур
|
294
|
+
- шамсинур
|
295
|
+
- элнур
|
296
|
+
- ильсияр
|
297
|
+
- нигяр
|
298
|
+
- сигитас
|
299
|
+
- агнес
|
300
|
+
- анес
|
301
|
+
- долорес
|
302
|
+
- инес
|
303
|
+
- анаис
|
304
|
+
- таис
|
305
|
+
- эллис
|
306
|
+
- элис
|
307
|
+
- кларис
|
308
|
+
- амнерис
|
309
|
+
- айрис
|
310
|
+
- дорис
|
311
|
+
- беатрис
|
312
|
+
- грейс
|
313
|
+
- грэйс
|
314
|
+
- ботагос
|
315
|
+
- маргос
|
316
|
+
- джулианс
|
317
|
+
- арус
|
318
|
+
- диляфрус
|
319
|
+
- саодат
|
320
|
+
- зулхижат
|
321
|
+
- хамат
|
322
|
+
- патимат
|
323
|
+
- хатимат
|
324
|
+
- альжанат
|
325
|
+
- маймунат
|
326
|
+
- гульшат
|
327
|
+
- биргит
|
328
|
+
- рут
|
329
|
+
- иргаш
|
330
|
+
- айнаш
|
331
|
+
- агнеш
|
332
|
+
- зауреш
|
333
|
+
- тэрбиш
|
334
|
+
- ануш
|
335
|
+
- азгануш
|
336
|
+
- гаруш
|
337
|
+
- николь
|
338
|
+
- адась
|
339
|
+
- афиля
|
340
|
+
- тафиля
|
341
|
+
- фаня
|
342
|
+
- аня
|
343
|
+
suffixes:
|
344
|
+
androgynous:
|
345
|
+
- улла
|
346
|
+
male:
|
347
|
+
- аба
|
348
|
+
- б
|
349
|
+
- ав
|
350
|
+
- ев
|
351
|
+
- ов
|
352
|
+
- г
|
353
|
+
- д
|
354
|
+
- ж
|
355
|
+
- з
|
356
|
+
- би
|
357
|
+
- ди
|
358
|
+
- жи
|
359
|
+
- али
|
360
|
+
- ри
|
361
|
+
- ай
|
362
|
+
- ей
|
363
|
+
- ий
|
364
|
+
- ой
|
365
|
+
- ый
|
366
|
+
- к
|
367
|
+
- л
|
368
|
+
- ам
|
369
|
+
- ем
|
370
|
+
- им
|
371
|
+
- ом
|
372
|
+
- ум
|
373
|
+
- ым
|
374
|
+
- ям
|
375
|
+
- ан
|
376
|
+
- бен
|
377
|
+
- вен
|
378
|
+
- ген
|
379
|
+
- ден
|
380
|
+
- ин
|
381
|
+
- сейн
|
382
|
+
- он
|
383
|
+
- ун
|
384
|
+
- ян
|
385
|
+
- ио
|
386
|
+
- ло
|
387
|
+
- ро
|
388
|
+
- то
|
389
|
+
- шо
|
390
|
+
- п
|
391
|
+
- ар
|
392
|
+
- др
|
393
|
+
- ер
|
394
|
+
- ир
|
395
|
+
- ор
|
396
|
+
- тр
|
397
|
+
- ур
|
398
|
+
- ыр
|
399
|
+
- яр
|
400
|
+
- ас
|
401
|
+
- ес
|
402
|
+
- ис
|
403
|
+
- йс
|
404
|
+
- кс
|
405
|
+
- мс
|
406
|
+
- ос
|
407
|
+
- нс
|
408
|
+
- рс
|
409
|
+
- ус
|
410
|
+
- юс
|
411
|
+
- яс
|
412
|
+
- ат
|
413
|
+
- мет
|
414
|
+
- кт
|
415
|
+
- нт
|
416
|
+
- рт
|
417
|
+
- ст
|
418
|
+
- ут
|
419
|
+
- ф
|
420
|
+
- х
|
421
|
+
- ш
|
422
|
+
- ы
|
423
|
+
- сь
|
424
|
+
- емеля
|
425
|
+
- коля
|
426
|
+
female:
|
427
|
+
- иба
|
428
|
+
- люба
|
429
|
+
- лава
|
430
|
+
- ева
|
431
|
+
- га
|
432
|
+
- да
|
433
|
+
- еа
|
434
|
+
- иза
|
435
|
+
- иа
|
436
|
+
- ика
|
437
|
+
- нка
|
438
|
+
- ска
|
439
|
+
- ела
|
440
|
+
- ила
|
441
|
+
- лла
|
442
|
+
- эла
|
443
|
+
- има
|
444
|
+
- на
|
445
|
+
- ра
|
446
|
+
- са
|
447
|
+
- та
|
448
|
+
- фа
|
449
|
+
- елли
|
450
|
+
- еса
|
451
|
+
- сса
|
452
|
+
- гуль
|
453
|
+
- нуэль
|
454
|
+
- гюль
|
455
|
+
- нэ
|
456
|
+
- ая
|
457
|
+
- ея
|
458
|
+
- ия
|
459
|
+
- йя
|
460
|
+
- ля
|
461
|
+
- мя
|
462
|
+
- оя
|
463
|
+
- ря
|
464
|
+
- ся
|
465
|
+
- вья
|
466
|
+
- лья
|
467
|
+
- мья
|
468
|
+
- нья
|
469
|
+
- рья
|
470
|
+
- сья
|
471
|
+
- тья
|
472
|
+
- фья
|
473
|
+
- зя
|
120
474
|
middlename:
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
475
|
+
suffixes:
|
476
|
+
female:
|
477
|
+
- на
|
478
|
+
- кызы
|
479
|
+
- гызы
|
480
|
+
male:
|
481
|
+
- ич
|
482
|
+
- оглы
|
483
|
+
- улы
|
484
|
+
- уулу
|
data/rules/rules.yml
CHANGED
@@ -186,6 +186,11 @@ lastname:
|
|
186
186
|
test: [ок]
|
187
187
|
mods: [--ка, --ку, --ка, --ком, --ке]
|
188
188
|
|
189
|
+
# Коломиец
|
190
|
+
- gender: male
|
191
|
+
test: [иец, еец]
|
192
|
+
mods: [--йца, --йцу, --йца, --йцом, --йце]
|
193
|
+
|
189
194
|
# Кравец
|
190
195
|
- gender: male
|
191
196
|
test: [ец]
|
@@ -230,16 +235,20 @@ firstname:
|
|
230
235
|
mods: [., ., ., ., .]
|
231
236
|
|
232
237
|
- gender: female
|
233
|
-
test: [
|
234
|
-
mods: [
|
238
|
+
test: [агидель, жизель, нинель, рашель, рахиль]
|
239
|
+
mods: [-и, -и, ., ю, -и]
|
235
240
|
|
236
241
|
suffixes:
|
237
242
|
- gender: androgynous
|
238
243
|
test: [е, ё, и, о, у, ы, э, ю]
|
239
244
|
mods: [., ., ., ., .]
|
240
245
|
|
246
|
+
- gender: male
|
247
|
+
test: [уа, иа]
|
248
|
+
mods: [., ., ., ., .]
|
249
|
+
|
241
250
|
- gender: female
|
242
|
-
test: [б, в, г, д, ж, з, й, к, л, м, н, п, р, с, т, ф, х, ц, ч, ш, щ,
|
251
|
+
test: [б, в, г, д, ж, з, й, к, л, м, н, п, р, с, т, ф, х, ц, ч, ш, щ, ъ, иа, ль]
|
243
252
|
mods: [., ., ., ., .]
|
244
253
|
|
245
254
|
- gender: female
|
@@ -260,6 +269,11 @@ firstname:
|
|
260
269
|
test: [ша]
|
261
270
|
mods: [-и, -е, -у, -ей, -е]
|
262
271
|
|
272
|
+
# Даша, Саша
|
273
|
+
- gender: male
|
274
|
+
test: [ша, ча, жа]
|
275
|
+
mods: [-и, -е, -у, -ей, -е]
|
276
|
+
|
263
277
|
- gender: androgynous
|
264
278
|
test: [а]
|
265
279
|
mods: [-ы, -е, -у, -ой, -е]
|
@@ -269,6 +283,16 @@ firstname:
|
|
269
283
|
test: [ия]
|
270
284
|
mods: [-и, -и, -ю, -ей, -и]
|
271
285
|
|
286
|
+
# Майка
|
287
|
+
- gender: female
|
288
|
+
test: [ка, га, ха]
|
289
|
+
mods: [-и, -е, -у, -ой, -е]
|
290
|
+
|
291
|
+
# Марица
|
292
|
+
- gender: female
|
293
|
+
test: [ца]
|
294
|
+
mods: [-ы, -е, -у, -ей, -е]
|
295
|
+
|
272
296
|
# Светлана
|
273
297
|
- gender: female
|
274
298
|
test: [а]
|
@@ -287,15 +311,19 @@ firstname:
|
|
287
311
|
test: [я]
|
288
312
|
mods: [-и, -е, -ю, -ей, -е]
|
289
313
|
|
314
|
+
# Афанасий
|
315
|
+
- gender: male
|
316
|
+
test: [ий]
|
317
|
+
mods: [-я, -ю, -я, -ем, -и]
|
318
|
+
|
290
319
|
# Андрей, *
|
291
320
|
- gender: male
|
292
321
|
test: [ей, й]
|
293
322
|
mods: [-я, -ю, -я, -ем, -е]
|
294
323
|
|
295
|
-
# Афанасий
|
296
324
|
- gender: male
|
297
|
-
test: [
|
298
|
-
mods: [
|
325
|
+
test: [ш, ж]
|
326
|
+
mods: [а, у, а, ем, е]
|
299
327
|
|
300
328
|
- gender: male
|
301
329
|
test: [б, в, г, д, ж, з, к, л, м, н, п, р, с, т, ф, х, ц, ч]
|
@@ -314,6 +342,10 @@ middlename:
|
|
314
342
|
tags: [first_word]
|
315
343
|
|
316
344
|
suffixes:
|
345
|
+
- gender: male
|
346
|
+
test: [мич, ьич, кич]
|
347
|
+
mods: [а, у, а, ом, е]
|
348
|
+
|
317
349
|
- gender: male
|
318
350
|
test: [ич]
|
319
351
|
mods: [а, у, а, ем, е]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: petrovich
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kozlov
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-07-
|
12
|
+
date: 2016-07-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|