petrovich 0.2.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +79 -177
- data/Rakefile +7 -9
- data/lib/petrovich.rb +78 -77
- data/lib/petrovich/case/rule.rb +63 -0
- data/lib/petrovich/case/rule/modifier.rb +19 -0
- data/lib/petrovich/case/rule/test.rb +23 -0
- data/lib/petrovich/gender.rb +39 -0
- data/lib/petrovich/gender/rule.rb +22 -0
- data/lib/petrovich/inflected.rb +18 -0
- data/lib/petrovich/inflector.rb +42 -0
- data/lib/petrovich/name.rb +75 -0
- data/lib/petrovich/rule_set.rb +118 -0
- data/lib/petrovich/unicode.rb +4 -3
- data/lib/petrovich/value.rb +12 -0
- data/lib/tasks/evaluate.rake +14 -32
- data/rules/rules.yml +186 -44
- metadata +57 -7
- data/lib/petrovich/extension.rb +0 -140
- data/lib/petrovich/rules.rb +0 -209
metadata
CHANGED
@@ -1,16 +1,44 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: petrovich
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
- Andrew
|
7
|
+
- Andrew Kozlov
|
8
8
|
- Dmitry Ustalov
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2016-03-16 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: commander
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - '='
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: 4.3.5
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - '='
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: 4.3.5
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '10.0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '10.0'
|
14
42
|
- !ruby/object:Gem::Dependency
|
15
43
|
name: minitest
|
16
44
|
requirement: !ruby/object:Gem::Requirement
|
@@ -25,8 +53,22 @@ dependencies:
|
|
25
53
|
- - ">="
|
26
54
|
- !ruby/object:Gem::Version
|
27
55
|
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: minitest-reporters
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
28
70
|
description: A library to inflect Russian anthroponyms such as first names, last names,
|
29
|
-
and middle names.
|
71
|
+
and middle names. Also it has gender detection functionality.
|
30
72
|
email:
|
31
73
|
- demerest@gmail.com
|
32
74
|
- dmitry@eveel.ru
|
@@ -38,9 +80,17 @@ files:
|
|
38
80
|
- README.md
|
39
81
|
- Rakefile
|
40
82
|
- lib/petrovich.rb
|
41
|
-
- lib/petrovich/
|
42
|
-
- lib/petrovich/
|
83
|
+
- lib/petrovich/case/rule.rb
|
84
|
+
- lib/petrovich/case/rule/modifier.rb
|
85
|
+
- lib/petrovich/case/rule/test.rb
|
86
|
+
- lib/petrovich/gender.rb
|
87
|
+
- lib/petrovich/gender/rule.rb
|
88
|
+
- lib/petrovich/inflected.rb
|
89
|
+
- lib/petrovich/inflector.rb
|
90
|
+
- lib/petrovich/name.rb
|
91
|
+
- lib/petrovich/rule_set.rb
|
43
92
|
- lib/petrovich/unicode.rb
|
93
|
+
- lib/petrovich/value.rb
|
44
94
|
- lib/tasks/evaluate.rake
|
45
95
|
- rules/rules.yml
|
46
96
|
homepage: https://github.com/petrovich/petrovich-ruby
|
@@ -55,7 +105,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
55
105
|
requirements:
|
56
106
|
- - ">="
|
57
107
|
- !ruby/object:Gem::Version
|
58
|
-
version: 1.9.
|
108
|
+
version: 1.9.3
|
59
109
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
110
|
requirements:
|
61
111
|
- - ">="
|
data/lib/petrovich/extension.rb
DELETED
@@ -1,140 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
class Petrovich
|
4
|
-
# Этот модуль разработан для возможности его подмешивания в класс Ruby.
|
5
|
-
# Его можно подмешать в любой класс, например, в модель ActiveRecord.
|
6
|
-
#
|
7
|
-
# При помощи вызова метода +petrovich+ вы указываете, какие аттрибуты или методы класса
|
8
|
-
# будут возвращать фамилию, имя и отчество.
|
9
|
-
#
|
10
|
-
# Опции:
|
11
|
-
#
|
12
|
-
# [:+firstname+]
|
13
|
-
# Указывает метод, возвращающий имя
|
14
|
-
#
|
15
|
-
# [:+middlename+]
|
16
|
-
# Указывает метод, возвращающий отчество
|
17
|
-
#
|
18
|
-
# [:+lastname+]
|
19
|
-
# Указывает метод, возвращающий фамилию
|
20
|
-
#
|
21
|
-
# [:+gender+]
|
22
|
-
# Указывает метод, возвращающий пол. Если пол не был указан, используется автоматическое определение
|
23
|
-
# пола на основе отчества. Если отчество также не было указано, пытаемся определить правильное склонение
|
24
|
-
# на основе файла правил.
|
25
|
-
#
|
26
|
-
# Пример использования
|
27
|
-
#
|
28
|
-
# class User
|
29
|
-
# include Petrovich::Extension
|
30
|
-
#
|
31
|
-
# petrovich :firstname => :my_firstname,
|
32
|
-
# :middlename => :my_middlename,
|
33
|
-
# :lastname => :my_lastname,
|
34
|
-
# :gender => :my_gender
|
35
|
-
#
|
36
|
-
# def my_firstname
|
37
|
-
# 'Пётр'
|
38
|
-
# end
|
39
|
-
#
|
40
|
-
# def my_middlename
|
41
|
-
# 'Александрович'
|
42
|
-
# end
|
43
|
-
#
|
44
|
-
# def my_lastname
|
45
|
-
# 'Ларин'
|
46
|
-
# end
|
47
|
-
#
|
48
|
-
# def my_gender
|
49
|
-
# :male # :male, :female или :androgynous
|
50
|
-
# end
|
51
|
-
#
|
52
|
-
# end
|
53
|
-
#
|
54
|
-
# Вы получите следующие методы
|
55
|
-
#
|
56
|
-
# user = User.new
|
57
|
-
# user.my_lastname_dative # => Ларину
|
58
|
-
# user.my_firstname_dative # => Петру
|
59
|
-
# user.my_middlename_dative # => Александровичу
|
60
|
-
#
|
61
|
-
# Вышеперечисленные методы доступны и внутри класса User.
|
62
|
-
#
|
63
|
-
module Extension
|
64
|
-
def self.included(base)
|
65
|
-
base.extend ClassMethods
|
66
|
-
end
|
67
|
-
|
68
|
-
module ClassMethods
|
69
|
-
def petrovich_configuration
|
70
|
-
@petrovich_configuration ||= {
|
71
|
-
:lastname => nil,
|
72
|
-
:firstname => nil,
|
73
|
-
:middlename => nil,
|
74
|
-
:gender => nil
|
75
|
-
}
|
76
|
-
end
|
77
|
-
|
78
|
-
def petrovich(options)
|
79
|
-
self.petrovich_configuration.update(options)
|
80
|
-
end
|
81
|
-
|
82
|
-
def inherited(subclass)
|
83
|
-
subclass.petrovich_configuration.update(self.petrovich_configuration)
|
84
|
-
super
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def petrovich_create_getter(method_name, attribute, gcase)
|
89
|
-
options = self.class.petrovich_configuration
|
90
|
-
reflection = options.key(attribute.to_sym) or
|
91
|
-
raise "No reflection for attribute '#{attribute}'!"
|
92
|
-
|
93
|
-
self.class.send(:define_method, method_name) do
|
94
|
-
# detect by gender attr if defined
|
95
|
-
gender = options[:gender] && send(options[:gender])
|
96
|
-
# detect by middlename attr if defined
|
97
|
-
gender ||= begin
|
98
|
-
middlename = options[:middlename] && send(options[:middlename])
|
99
|
-
middlename && Petrovich.detect_gender(middlename)
|
100
|
-
end
|
101
|
-
|
102
|
-
rn = Petrovich.new gender
|
103
|
-
rn.send reflection, send(attribute), gcase
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
def method_missing(method_name, *args, &block)
|
108
|
-
if match = method_name.to_s.match(petrovich_method_regex)
|
109
|
-
attribute = match[1]
|
110
|
-
gcase = match[2]
|
111
|
-
|
112
|
-
petrovich_create_getter(method_name, attribute, gcase)
|
113
|
-
|
114
|
-
if respond_to_without_petrovich?(method_name)
|
115
|
-
send method_name
|
116
|
-
else
|
117
|
-
super
|
118
|
-
end
|
119
|
-
else
|
120
|
-
super
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
alias :respond_to_without_petrovich? :respond_to?
|
125
|
-
|
126
|
-
def respond_to?(method_name, include_private = false)
|
127
|
-
if match = method_name.to_s.match(petrovich_method_regex)
|
128
|
-
true
|
129
|
-
else
|
130
|
-
super
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
def petrovich_method_regex
|
135
|
-
%r{(.+)_(#{Petrovich::CASES.join('|')})$}
|
136
|
-
end
|
137
|
-
|
138
|
-
protected :petrovich_method_regex
|
139
|
-
end
|
140
|
-
end
|
data/lib/petrovich/rules.rb
DELETED
@@ -1,209 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
class Petrovich
|
4
|
-
# Загрузка правил происходит один раз
|
5
|
-
RULES = YAML.load_file(File.expand_path('../../../rules/rules.yml', __FILE__))
|
6
|
-
|
7
|
-
class UnknownCaseException < Exception;;end
|
8
|
-
class UnknownRuleException < Exception;;end
|
9
|
-
class CantApplyRuleException < Exception;;end
|
10
|
-
|
11
|
-
# Набор методов для нахождения и применения правил к имени, фамилии и отчеству.
|
12
|
-
class Rules
|
13
|
-
include Petrovich::Unicode
|
14
|
-
|
15
|
-
attr_reader :gender
|
16
|
-
|
17
|
-
Matchers = [
|
18
|
-
proc {| x, y, i | y[ 0 ].size <=> x[ 0 ].size },
|
19
|
-
proc {| x, y, i | x[ 1 ][ 'gender' ] != i.gender && 1 ||
|
20
|
-
y[ 1 ][ 'gender' ] != i.gender && -1 || 0 },
|
21
|
-
proc {| x, y, i | y[ 1 ][ 'test' ][ 0 ].size <=>
|
22
|
-
x[ 1 ][ 'test' ][ 0 ].size },
|
23
|
-
proc {| x, y, i | x[ 1 ][ 'test' ][ 0 ] <=> y[ 1 ][ 'test' ][ 0 ] } ]
|
24
|
-
|
25
|
-
def initialize(gender = nil)
|
26
|
-
@gender = gender
|
27
|
-
end
|
28
|
-
|
29
|
-
# Определяет методы +lastname_<i>case</i>+, +firstname_<i>case</i>+ и +middlename_<i>case</i>+
|
30
|
-
# для получения имени, фамилии и отчества в нужном падеже.
|
31
|
-
#
|
32
|
-
# Использование:
|
33
|
-
#
|
34
|
-
# # Дательный падеж
|
35
|
-
# lastname_dative('Комаров') # => Комарову
|
36
|
-
#
|
37
|
-
# # Винительный падеж
|
38
|
-
# lastname_accusative('Комаров') # => Комарова
|
39
|
-
#
|
40
|
-
[:lastname, :firstname, :middlename].each do |method_name|
|
41
|
-
define_method(method_name) do |name, gcase, scase|
|
42
|
-
inflect(name, gcase, scase, Petrovich::RULES[method_name.to_s])
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
protected
|
47
|
-
# Известно несколько типов признаков, которые влияют на процесс поиска.
|
48
|
-
#
|
49
|
-
# Признак +first_word+ указывает, что данное слово является первым словом
|
50
|
-
# в составном слове. Например, в двойной русской фамилии Иванов-Сидоров.
|
51
|
-
#
|
52
|
-
def match?(name, gcase, scase, rule, match_whole_word, tags)
|
53
|
-
return false unless tags_allow? tags, rule['tags']
|
54
|
-
return false if rule['gender'] == 'male' && female? ||
|
55
|
-
rule['gender'] == 'female' && !female?
|
56
|
-
|
57
|
-
rule['test'].each do |chars|
|
58
|
-
begin
|
59
|
-
chars = apply(chars, rule, scase, gcase) if scase != NOMINATIVE
|
60
|
-
rescue CantApplyRuleException
|
61
|
-
next
|
62
|
-
end
|
63
|
-
|
64
|
-
test = match_whole_word ? name : name.slice([name.size - chars.size, 0].max .. -1)
|
65
|
-
return chars if test == chars
|
66
|
-
end
|
67
|
-
|
68
|
-
false
|
69
|
-
end
|
70
|
-
|
71
|
-
def male?
|
72
|
-
@gender == 'male'
|
73
|
-
end
|
74
|
-
|
75
|
-
def female?
|
76
|
-
@gender == 'female'
|
77
|
-
end
|
78
|
-
|
79
|
-
def inflect(name, gcase, scase, rules)
|
80
|
-
i = 0
|
81
|
-
|
82
|
-
parts = name.split('-')
|
83
|
-
|
84
|
-
parts.map! do |part|
|
85
|
-
first_word = (i += 1) == 1 && parts.size > 1
|
86
|
-
find_and_apply(part, gcase, scase, rules, first_word: first_word)
|
87
|
-
end
|
88
|
-
|
89
|
-
parts.join('-')
|
90
|
-
end
|
91
|
-
|
92
|
-
# Применить правило
|
93
|
-
def apply(name, rule, gcase, scase)
|
94
|
-
mod = modificator_from(scase, rule) + modificator_for(gcase, rule)
|
95
|
-
skip = 0
|
96
|
-
mod.each_char do |char|
|
97
|
-
case char
|
98
|
-
when '.'
|
99
|
-
when '-'
|
100
|
-
raise CantApplyRuleException if name.empty?
|
101
|
-
name = name.slice(0, name.size - 1)
|
102
|
-
else
|
103
|
-
name += char
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
name
|
108
|
-
end
|
109
|
-
|
110
|
-
# Найти правило и применить к имени с учетом склонения
|
111
|
-
def find_and_apply(name, gcase, scase, rules, features = {})
|
112
|
-
rule = find_for(name, gcase, scase, rules, features)
|
113
|
-
apply(name, rule, gcase, scase)
|
114
|
-
rescue UnknownRuleException, CantApplyRuleException
|
115
|
-
# Если не найдено правило для имени, или случилась ошибка применения
|
116
|
-
# правила, возвращаем неизмененное имя.
|
117
|
-
name
|
118
|
-
end
|
119
|
-
|
120
|
-
# Найти подходящее правило в исключениях или суффиксах
|
121
|
-
def find_for(name, gcase, scase, rules, features = {})
|
122
|
-
tags = extract_tags(features)
|
123
|
-
|
124
|
-
# Сначала пытаемся найти исключения
|
125
|
-
if rules.has_key?('exceptions')
|
126
|
-
p = find(name, gcase, scase, rules['exceptions'], true, tags)
|
127
|
-
return p if p
|
128
|
-
end
|
129
|
-
|
130
|
-
# Не получилось, ищем в суффиксах. Если не получилось найти и в них,
|
131
|
-
# возвращаем неизмененное имя.
|
132
|
-
find(name, gcase, scase, rules['suffixes'], false, tags) ||
|
133
|
-
raise( UnknownRuleException, "Cannot find rule for #{name}" )
|
134
|
-
end
|
135
|
-
|
136
|
-
# Найти подходящее правило в конкретном списке правил
|
137
|
-
def find(name, gcase, scase, rules, match_whole_word, tags)
|
138
|
-
name = downcase(name)
|
139
|
-
first =
|
140
|
-
rules.map do| rule |
|
141
|
-
score = match?(name, gcase, scase, rule, match_whole_word, tags)
|
142
|
-
score && [ score, rule ] || nil
|
143
|
-
end.compact.sort do| x, y |
|
144
|
-
Matchers.reduce( 0 ) do| c, m |
|
145
|
-
c = m.call( x, y, self )
|
146
|
-
break c if c != 0
|
147
|
-
end
|
148
|
-
end.first
|
149
|
-
|
150
|
-
first && first[ 1 ]
|
151
|
-
end
|
152
|
-
|
153
|
-
# Получить модификатор из указанного правиля для указанного склонения
|
154
|
-
def modificator_for(gcase, rule)
|
155
|
-
case gcase.to_sym
|
156
|
-
when NOMINATIVE
|
157
|
-
'.'
|
158
|
-
when GENITIVE
|
159
|
-
rule['mods'][0]
|
160
|
-
when DATIVE
|
161
|
-
rule['mods'][1]
|
162
|
-
when ACCUSATIVE
|
163
|
-
rule['mods'][2]
|
164
|
-
when INSTRUMENTAL
|
165
|
-
rule['mods'][3]
|
166
|
-
when PREPOSITIONAL
|
167
|
-
rule['mods'][4]
|
168
|
-
else
|
169
|
-
raise UnknownCaseException, "Unknown grammatic case: #{gcase}"
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
# Получить модификатор из указанного правила для преобразования
|
174
|
-
# из указанного склонения
|
175
|
-
def modificator_from(scase, rule)
|
176
|
-
return '.' if scase.to_sym == NOMINATIVE
|
177
|
-
|
178
|
-
# TODO т.к. именительный падеж не может быть восстановлен
|
179
|
-
# в некоторых случаях верно, используется первый попавшийся вариант
|
180
|
-
# видимо нужно менять формат таблицы, или развязывать варианты,
|
181
|
-
# находящиеся в поле test
|
182
|
-
base = rule['test'][0].unpack('U*')
|
183
|
-
mod = modificator_for(scase, rule).unpack('U*')
|
184
|
-
mod.map do | char |
|
185
|
-
case char
|
186
|
-
when 46 # '.'
|
187
|
-
46
|
188
|
-
when 45 # '-'
|
189
|
-
base.pop
|
190
|
-
else
|
191
|
-
45
|
192
|
-
end
|
193
|
-
end.reverse.pack('U*')
|
194
|
-
end
|
195
|
-
|
196
|
-
# Преобразование +{a: true, b: false, c: true}+ в +%w(a c)+.
|
197
|
-
def extract_tags(features = {})
|
198
|
-
features.keys.select { |k| features[k] == true }.map(&:to_s)
|
199
|
-
end
|
200
|
-
|
201
|
-
# Правило не подходит только в том случае, если оно содержит больше
|
202
|
-
# тегов, чем требуется для данного слова.
|
203
|
-
#
|
204
|
-
def tags_allow?(tags, rule_tags)
|
205
|
-
rule_tags ||= []
|
206
|
-
(rule_tags - tags).empty?
|
207
|
-
end
|
208
|
-
end
|
209
|
-
end
|