petrovich 0.2.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +79 -177
- data/Rakefile +7 -9
- data/lib/petrovich.rb +78 -77
- data/lib/petrovich/case/rule.rb +63 -0
- data/lib/petrovich/case/rule/modifier.rb +19 -0
- data/lib/petrovich/case/rule/test.rb +23 -0
- data/lib/petrovich/gender.rb +39 -0
- data/lib/petrovich/gender/rule.rb +22 -0
- data/lib/petrovich/inflected.rb +18 -0
- data/lib/petrovich/inflector.rb +42 -0
- data/lib/petrovich/name.rb +75 -0
- data/lib/petrovich/rule_set.rb +118 -0
- data/lib/petrovich/unicode.rb +4 -3
- data/lib/petrovich/value.rb +12 -0
- data/lib/tasks/evaluate.rake +14 -32
- data/rules/rules.yml +186 -44
- metadata +57 -7
- data/lib/petrovich/extension.rb +0 -140
- data/lib/petrovich/rules.rb +0 -209
@@ -0,0 +1,63 @@
|
|
1
|
+
module Petrovich
|
2
|
+
module Case
|
3
|
+
# A case rule from the set of rules
|
4
|
+
class Rule
|
5
|
+
attr_reader :gender, :modifiers, :tests, :tags, :as, :an_exception
|
6
|
+
|
7
|
+
def initialize(opts)
|
8
|
+
@gender = opts[:gender].to_sym.downcase
|
9
|
+
@as = opts[:as]
|
10
|
+
@an_exception = opts[:section] == :exceptions
|
11
|
+
@modifiers = opts[:modifiers]
|
12
|
+
@tests = opts[:tests]
|
13
|
+
@tags = []
|
14
|
+
|
15
|
+
assert_name_part!(@as)
|
16
|
+
end
|
17
|
+
|
18
|
+
def match?(name, match_gender, match_as)
|
19
|
+
assert_name_part!(match_as)
|
20
|
+
|
21
|
+
return false unless match_as == as
|
22
|
+
|
23
|
+
match_gender = match_gender.to_sym.downcase
|
24
|
+
|
25
|
+
return false if gender == :male && match_gender == :female
|
26
|
+
return false if gender == :female && match_gender != :female
|
27
|
+
|
28
|
+
tests.detect { |test| test.match?(name) }
|
29
|
+
end
|
30
|
+
|
31
|
+
# Is this exceptional rule?
|
32
|
+
def an_exception?
|
33
|
+
an_exception == true
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_modifier(name_case)
|
37
|
+
case name_case.to_sym
|
38
|
+
when :nominative
|
39
|
+
nil
|
40
|
+
when :genitive
|
41
|
+
modifiers[0]
|
42
|
+
when :dative
|
43
|
+
modifiers[1]
|
44
|
+
when :accusative
|
45
|
+
modifiers[2]
|
46
|
+
when :instrumental
|
47
|
+
modifiers[3]
|
48
|
+
when :prepositional
|
49
|
+
modifiers[4]
|
50
|
+
else
|
51
|
+
fail UnknownCaseError, "Unknown grammatic case: #{name_case}".freeze
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def assert_name_part!(name_part)
|
58
|
+
return if [:lastname, :firstname, :middlename].include?(name_part)
|
59
|
+
fail ArgumentError, "Unknown 'as' option #{name_part}".freeze
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Petrovich
|
2
|
+
module Case
|
3
|
+
class Rule
|
4
|
+
# A modifier for the test rule
|
5
|
+
class Modifier
|
6
|
+
attr_reader :suffix, :offset
|
7
|
+
|
8
|
+
def initialize(suffix, offset = 0)
|
9
|
+
@suffix = suffix.to_s
|
10
|
+
@offset = offset
|
11
|
+
end
|
12
|
+
|
13
|
+
def inspect
|
14
|
+
[suffix, offset].inspect
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Petrovich
|
2
|
+
module Case
|
3
|
+
class Rule
|
4
|
+
# A test for the case rule
|
5
|
+
class Test
|
6
|
+
attr_reader :suffix
|
7
|
+
|
8
|
+
def initialize(suffix)
|
9
|
+
@suffix = Unicode.downcase(suffix)
|
10
|
+
end
|
11
|
+
|
12
|
+
def match?(name)
|
13
|
+
name = Unicode.downcase(name)
|
14
|
+
suffix == name.slice([name.size - suffix.size, 0].max..-1)
|
15
|
+
end
|
16
|
+
|
17
|
+
def inspect
|
18
|
+
suffix.inspect
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Petrovich
|
2
|
+
# Methods of determining gender by the name
|
3
|
+
module Gender
|
4
|
+
def self.detect(name)
|
5
|
+
# Accept hash and convert it to ostruct object
|
6
|
+
name = Petrovich.normalize_name(name)
|
7
|
+
rule_set = Petrovich.rule_set
|
8
|
+
genders = {}
|
9
|
+
|
10
|
+
Petrovich.assert_name!(name)
|
11
|
+
|
12
|
+
[:lastname, :firstname, :middlename].each do |name_part|
|
13
|
+
next unless name.respond_to?(name_part) && name.send(name_part)
|
14
|
+
|
15
|
+
rules = rule_set.find_all_gender_rules(name.send(name_part), name_part)
|
16
|
+
|
17
|
+
rules.each do |rule|
|
18
|
+
genders[name_part] = rule.nil? ? :androgynous : rule.gender
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Return gender if middlename is specified and gender is determined.
|
23
|
+
return genders[:middlename] if genders[:middlename] && genders[:middlename] != :androgynous
|
24
|
+
|
25
|
+
if genders.values.uniq.size > 1
|
26
|
+
if genders[:firstname] != :androgynous && genders[:lastname] == :androgynous
|
27
|
+
return genders[:firstname]
|
28
|
+
end
|
29
|
+
|
30
|
+
if genders[:lastname] != :androgynous && genders[:firstname] == :androgynous
|
31
|
+
return genders[:lastname]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Otherwise, it returns what recognized
|
36
|
+
return genders.values.uniq.first if genders.values.uniq.size == 1
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Petrovich
|
2
|
+
module Gender
|
3
|
+
# A gender rule from the set of rules
|
4
|
+
class Rule
|
5
|
+
attr_reader :gender, :as, :suffix
|
6
|
+
|
7
|
+
# TODO: check options (see Case::Rule)
|
8
|
+
def initialize(opts)
|
9
|
+
@gender = opts[:gender]
|
10
|
+
@as = opts[:as]
|
11
|
+
@suffix = opts[:suffix]
|
12
|
+
end
|
13
|
+
|
14
|
+
def match?(name, match_as)
|
15
|
+
return false unless match_as == as
|
16
|
+
|
17
|
+
name = Unicode.downcase(name)
|
18
|
+
@suffix == name.slice([name.size - @suffix.size, 0].max..-1)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Petrovich
|
2
|
+
# Keeps inflected @name
|
3
|
+
class Inflected
|
4
|
+
extend Forwardable
|
5
|
+
|
6
|
+
def_delegator :@name, :lastname, :lastname
|
7
|
+
def_delegator :@name, :firstname, :firstname
|
8
|
+
def_delegator :@name, :middlename, :middlename
|
9
|
+
|
10
|
+
def initialize(name)
|
11
|
+
@name = name
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
[lastname, firstname, middlename].compact.join(' ')
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Petrovich
|
2
|
+
class Inflector
|
3
|
+
def initialize(name, gender, name_case)
|
4
|
+
Petrovich.assert_name!(name)
|
5
|
+
|
6
|
+
@name = Petrovich.normalize_name(name)
|
7
|
+
@gender = gender
|
8
|
+
@name_case = name_case
|
9
|
+
end
|
10
|
+
|
11
|
+
def inflect_lastname(rules)
|
12
|
+
inflect(@name.lastname, rules)
|
13
|
+
end
|
14
|
+
|
15
|
+
def inflect_firstname(rules)
|
16
|
+
inflect(@name.firstname, rules)
|
17
|
+
end
|
18
|
+
|
19
|
+
def inflect_middlename(rules)
|
20
|
+
inflect(@name.middlename, rules)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def inflect(name, rules)
|
26
|
+
return name if rules.size == 0
|
27
|
+
|
28
|
+
parts = name.split('-')
|
29
|
+
parts.map! do |part|
|
30
|
+
rule = rules.shift
|
31
|
+
|
32
|
+
if rule && (modifier = rule.get_modifier(@name_case))
|
33
|
+
part.slice(0, part.size - modifier.offset) + modifier.suffix
|
34
|
+
else
|
35
|
+
part
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
parts.join('-')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Petrovich
|
2
|
+
class Name
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegator :@name, :lastname, :lastname
|
6
|
+
def_delegator :@name, :firstname, :firstname
|
7
|
+
def_delegator :@name, :middlename, :middlename
|
8
|
+
|
9
|
+
def initialize(opts)
|
10
|
+
@rule_set = Petrovich.rule_set
|
11
|
+
@gender = opts[:gender]
|
12
|
+
@name = Petrovich.normalize_name(
|
13
|
+
lastname: opts[:lastname],
|
14
|
+
firstname: opts[:firstname],
|
15
|
+
middlename: opts[:middlename]
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
def gender
|
20
|
+
if !@gender.nil? && [:male, :female, :androgynous].include?(@gender.to_sym)
|
21
|
+
@gender.to_sym
|
22
|
+
else
|
23
|
+
Gender.detect(@name)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def male?
|
28
|
+
Gender.detect(@name) == :male
|
29
|
+
end
|
30
|
+
|
31
|
+
def female?
|
32
|
+
Gender.detect(@name) == :female
|
33
|
+
end
|
34
|
+
|
35
|
+
def androgynous?
|
36
|
+
Gender.detect(@name) == :androgynous
|
37
|
+
end
|
38
|
+
|
39
|
+
def to(name_case)
|
40
|
+
Petrovich.assert_case!(name_case)
|
41
|
+
Inflected.new(inflect(@name.dup, gender, name_case))
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s
|
45
|
+
[lastname, firstname, middlename].join(' ')
|
46
|
+
end
|
47
|
+
|
48
|
+
Petrovich::CASES.each do |name_case|
|
49
|
+
define_method name_case do
|
50
|
+
to(name_case)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def inflect(name, gender, name_case)
|
57
|
+
inflector = Inflector.new(name, gender, name_case)
|
58
|
+
find = proc { |x| @rule_set.find_all_case_rules(name.send(x), gender, x) }
|
59
|
+
|
60
|
+
if !name.lastname.nil? && (rules = find.call(:lastname))
|
61
|
+
name.lastname = inflector.inflect_lastname(rules)
|
62
|
+
end
|
63
|
+
|
64
|
+
if !name.firstname.nil? && (rules = find.call(:firstname))
|
65
|
+
name.firstname = inflector.inflect_firstname(rules)
|
66
|
+
end
|
67
|
+
|
68
|
+
if !name.middlename.nil? && (rules = find.call(:middlename))
|
69
|
+
name.middlename = inflector.inflect_middlename(rules)
|
70
|
+
end
|
71
|
+
|
72
|
+
name
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Petrovich
|
4
|
+
# A set of loaded rules from YAML file
|
5
|
+
class RuleSet
|
6
|
+
def initialize
|
7
|
+
clear!
|
8
|
+
end
|
9
|
+
|
10
|
+
def add_case_rule(rule)
|
11
|
+
unless rule.is_a?(Case::Rule)
|
12
|
+
fail ArgumentError, 'Expecting rule of type Petrovich::Case::Rule'.freeze
|
13
|
+
end
|
14
|
+
|
15
|
+
@case_rules << rule
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_gender_rule(rule)
|
19
|
+
unless rule.is_a?(Gender::Rule)
|
20
|
+
fail ArgumentError, 'Expecting rule of type Petrovich::Gender::Rule'.freeze
|
21
|
+
end
|
22
|
+
|
23
|
+
@gender_rules << rule
|
24
|
+
end
|
25
|
+
|
26
|
+
def find_all_case_rules(name, gender, as)
|
27
|
+
name.split('-').map { |part| find_case_rule(part, gender, as) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def find_all_gender_rules(name, as)
|
31
|
+
name.split('-').map { |part| find_gender_rule(part, as) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def clear!
|
35
|
+
@case_rules = []
|
36
|
+
@gender_rules = []
|
37
|
+
end
|
38
|
+
|
39
|
+
def load!
|
40
|
+
return false if @case_rules.size > 0
|
41
|
+
|
42
|
+
rules = YAML.load_file(
|
43
|
+
File.expand_path('../../../rules/rules.yml', __FILE__)
|
44
|
+
)
|
45
|
+
|
46
|
+
load_case_rules!(rules)
|
47
|
+
load_gender_rules!(rules)
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
# Load rules for names
|
53
|
+
def load_case_rules!(rules)
|
54
|
+
[:lastname, :firstname, :middlename].each do |name_part|
|
55
|
+
[:exceptions, :suffixes].each do |section|
|
56
|
+
entries = rules[name_part.to_s][section.to_s]
|
57
|
+
next if entries.nil?
|
58
|
+
|
59
|
+
entries.each do |entry|
|
60
|
+
load_case_entry(name_part, section, entry)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Load rules for genders
|
67
|
+
def load_gender_rules!(rules)
|
68
|
+
[:lastname, :firstname, :middlename].each do |name_part|
|
69
|
+
# First, add androgynous rules. Order is matters.
|
70
|
+
[:androgynous, :male, :female].each do |section|
|
71
|
+
entries = rules['gender'][name_part.to_s][section.to_s]
|
72
|
+
next if entries.nil?
|
73
|
+
|
74
|
+
entries.each do |entry|
|
75
|
+
load_gender_entry(name_part, section, entry)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def find_case_rule(name, gender, as)
|
82
|
+
@case_rules.find { |rule| rule.match?(name, gender, as) }
|
83
|
+
end
|
84
|
+
|
85
|
+
def find_gender_rule(name, as)
|
86
|
+
@gender_rules.find { |rule| rule.match?(name, as) }
|
87
|
+
end
|
88
|
+
|
89
|
+
def load_case_entry(as, section, entry)
|
90
|
+
modifiers = entry['mods'].map do |mod|
|
91
|
+
suffix = mod.scan(/[^.-]+/).first
|
92
|
+
offset = mod.count('-')
|
93
|
+
Petrovich::Case::Rule::Modifier.new(suffix, offset)
|
94
|
+
end
|
95
|
+
|
96
|
+
tests = entry['test'].map do |suffix|
|
97
|
+
Petrovich::Case::Rule::Test.new(suffix)
|
98
|
+
end
|
99
|
+
|
100
|
+
add_case_rule Petrovich::Case::Rule.new(
|
101
|
+
gender: entry['gender'],
|
102
|
+
as: as,
|
103
|
+
section: section,
|
104
|
+
modifiers: modifiers,
|
105
|
+
tests: tests,
|
106
|
+
tags: entry['tags']
|
107
|
+
)
|
108
|
+
end
|
109
|
+
|
110
|
+
def load_gender_entry(as, section, entry)
|
111
|
+
add_gender_rule Gender::Rule.new(
|
112
|
+
as: as,
|
113
|
+
gender: section,
|
114
|
+
suffix: entry
|
115
|
+
)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
data/lib/petrovich/unicode.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
module Petrovich
|
2
|
+
# Custom downcase and upcase methods for russian language.
|
2
3
|
module Unicode
|
3
4
|
RU_UPPER = [
|
4
5
|
"\u0410", "\u0411", "\u0412", "\u0413", "\u0414", "\u0415", "\u0416", "\u0417",
|
@@ -16,11 +17,11 @@ class Petrovich
|
|
16
17
|
"\u0451" # Ё
|
17
18
|
].join
|
18
19
|
|
19
|
-
def downcase(entry)
|
20
|
+
def self.downcase(entry)
|
20
21
|
entry.to_s.tr(RU_UPPER, RU_LOWER)
|
21
22
|
end
|
22
23
|
|
23
|
-
def upcase(entry)
|
24
|
+
def self.upcase(entry)
|
24
25
|
entry.to_s.tr(RU_LOWER, RU_UPPER)
|
25
26
|
end
|
26
27
|
end
|