petrovich 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/petrovich.rb +6 -4
- data/lib/petrovich/rules.rb +3 -1
- data/lib/petrovich/unicode.rb +27 -0
- data/lib/tasks/evaluate.rake +6 -1
- metadata +4 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 472235efdec99b791c56797b53fbbc8c8685a8f5
|
4
|
+
data.tar.gz: 84ebcaa80006b7939fdb426adda30dd3102ac22e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c32dd5188b1345645e6ab1bdc2f068176c609d42ec8fd802d48b41b914dac96059004f8a34ae612c186f5741fdd46d11d2899fe790cbae9aa1e5cc59c51f196
|
7
|
+
data.tar.gz: 7e3c8db37df8c7c8216b77a30ada9068a1dd4e1920924e4a763da27c765d5b3ae7300328933150ce59ce3d0f2ca49781ba3502794d09d351135155716408758d
|
data/README.md
CHANGED
@@ -50,7 +50,7 @@ p.middlename('Сергеевич', :dative) # => Сергеевичу
|
|
50
50
|
|
51
51
|
Важно понимать, что явное указание пола повышает аккуратность обработки слов.
|
52
52
|
Если пол неизвестен, однако известно отчество, то гем постарается
|
53
|
-
определить по
|
53
|
+
определить пол по отчеству на основе простой эвристики.
|
54
54
|
|
55
55
|
### Продвинутое использование
|
56
56
|
|
data/lib/petrovich.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'yaml'
|
3
|
-
require '
|
3
|
+
require 'petrovich/unicode'
|
4
4
|
require 'petrovich/rules'
|
5
5
|
require 'petrovich/extension'
|
6
6
|
|
7
7
|
# Склонение падежей русских имён фамилий и отчеств. Вы задаёте начальное имя в именительном падеже,
|
8
8
|
# а получаете в нужном вам.
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# Использование
|
11
|
-
#
|
11
|
+
#
|
12
12
|
# # Склонение в дательном падеже
|
13
13
|
# rn = Petrovich.new
|
14
14
|
# puts rn.firstname('Иван', :dative) # => Ивану
|
@@ -67,6 +67,8 @@ class Petrovich
|
|
67
67
|
end
|
68
68
|
|
69
69
|
class << self
|
70
|
+
include Petrovich::Unicode
|
71
|
+
|
70
72
|
# Определение пола по отчеству
|
71
73
|
#
|
72
74
|
# detect_gender('Алексеевич') # => male
|
@@ -76,7 +78,7 @@ class Petrovich
|
|
76
78
|
# detect_gender('блаблабла') # => androgynous
|
77
79
|
#
|
78
80
|
def detect_gender(midname)
|
79
|
-
case
|
81
|
+
case downcase(midname[-2, 2])
|
80
82
|
when /ич|ыч/
|
81
83
|
'male'
|
82
84
|
when 'на'
|
data/lib/petrovich/rules.rb
CHANGED
@@ -10,6 +10,8 @@ class Petrovich
|
|
10
10
|
|
11
11
|
# Набор методов для нахождения и применения правил к имени, фамилии и отчеству.
|
12
12
|
class Rules
|
13
|
+
include Petrovich::Unicode
|
14
|
+
|
13
15
|
attr_reader :gender
|
14
16
|
|
15
17
|
Matchers = [
|
@@ -133,7 +135,7 @@ class Petrovich
|
|
133
135
|
|
134
136
|
# Найти подходящее правило в конкретном списке правил
|
135
137
|
def find(name, gcase, scase, rules, match_whole_word, tags)
|
136
|
-
name =
|
138
|
+
name = downcase(name)
|
137
139
|
first =
|
138
140
|
rules.map do| rule |
|
139
141
|
score = match?(name, gcase, scase, rule, match_whole_word, tags)
|
@@ -0,0 +1,27 @@
|
|
1
|
+
class Petrovich
|
2
|
+
module Unicode
|
3
|
+
RU_UPPER = [
|
4
|
+
"\u0410", "\u0411", "\u0412", "\u0413", "\u0414", "\u0415", "\u0416", "\u0417",
|
5
|
+
"\u0418", "\u0419", "\u041A", "\u041B", "\u041C", "\u041D", "\u041E", "\u041F",
|
6
|
+
"\u0420", "\u0421", "\u0422", "\u0423", "\u0424", "\u0425", "\u0426", "\u0427",
|
7
|
+
"\u0428", "\u0429", "\u042A", "\u042B", "\u042C", "\u042D", "\u042E", "\u042F",
|
8
|
+
"\u0401" # Ё
|
9
|
+
].join
|
10
|
+
|
11
|
+
RU_LOWER = [
|
12
|
+
"\u0430", "\u0431", "\u0432", "\u0433", "\u0434", "\u0435", "\u0436", "\u0437",
|
13
|
+
"\u0438", "\u0439", "\u043A", "\u043B", "\u043C", "\u043D", "\u043E", "\u043F",
|
14
|
+
"\u0440", "\u0441", "\u0442", "\u0443", "\u0444", "\u0445", "\u0446", "\u0447",
|
15
|
+
"\u0448", "\u0449", "\u044A", "\u044B", "\u044C", "\u044D", "\u044E", "\u044F",
|
16
|
+
"\u0451" # Ё
|
17
|
+
].join
|
18
|
+
|
19
|
+
def downcase(entry)
|
20
|
+
entry.to_s.tr(RU_UPPER, RU_LOWER)
|
21
|
+
end
|
22
|
+
|
23
|
+
def upcase(entry)
|
24
|
+
entry.to_s.tr(RU_LOWER, RU_UPPER)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/tasks/evaluate.rake
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'csv'
|
4
|
+
require 'petrovich/unicode'
|
5
|
+
|
6
|
+
class Object
|
7
|
+
include Petrovich::Unicode
|
8
|
+
end
|
4
9
|
|
5
10
|
CASES = [
|
6
11
|
:nominative,
|
@@ -13,7 +18,7 @@ CASES = [
|
|
13
18
|
|
14
19
|
def check!(errors, correct, total, lemma, gender, gcase, expected)
|
15
20
|
inflector = Petrovich.new(gender)
|
16
|
-
inflection =
|
21
|
+
inflection = upcase(inflector.lastname(lemma, gcase))
|
17
22
|
|
18
23
|
total[[gender, gcase]] += 1
|
19
24
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: petrovich
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kozloff
|
@@ -9,22 +9,8 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-11-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: unicode_utils
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
requirements:
|
18
|
-
- - "~>"
|
19
|
-
- !ruby/object:Gem::Version
|
20
|
-
version: '1.4'
|
21
|
-
type: :runtime
|
22
|
-
prerelease: false
|
23
|
-
version_requirements: !ruby/object:Gem::Requirement
|
24
|
-
requirements:
|
25
|
-
- - "~>"
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
version: '1.4'
|
28
14
|
- !ruby/object:Gem::Dependency
|
29
15
|
name: minitest
|
30
16
|
requirement: !ruby/object:Gem::Requirement
|
@@ -54,6 +40,7 @@ files:
|
|
54
40
|
- lib/petrovich.rb
|
55
41
|
- lib/petrovich/extension.rb
|
56
42
|
- lib/petrovich/rules.rb
|
43
|
+
- lib/petrovich/unicode.rb
|
57
44
|
- lib/tasks/evaluate.rake
|
58
45
|
- rules/rules.yml
|
59
46
|
homepage: https://github.com/petrovich/petrovich-ruby
|
@@ -76,7 +63,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
63
|
version: '0'
|
77
64
|
requirements: []
|
78
65
|
rubyforge_project:
|
79
|
-
rubygems_version: 2.
|
66
|
+
rubygems_version: 2.4.2
|
80
67
|
signing_key:
|
81
68
|
specification_version: 4
|
82
69
|
summary: Automatic inflection of Russian anthroponyms
|