petrovich 0.2.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +79 -177
- data/Rakefile +7 -9
- data/lib/petrovich.rb +78 -77
- data/lib/petrovich/case/rule.rb +63 -0
- data/lib/petrovich/case/rule/modifier.rb +19 -0
- data/lib/petrovich/case/rule/test.rb +23 -0
- data/lib/petrovich/gender.rb +39 -0
- data/lib/petrovich/gender/rule.rb +22 -0
- data/lib/petrovich/inflected.rb +18 -0
- data/lib/petrovich/inflector.rb +42 -0
- data/lib/petrovich/name.rb +75 -0
- data/lib/petrovich/rule_set.rb +118 -0
- data/lib/petrovich/unicode.rb +4 -3
- data/lib/petrovich/value.rb +12 -0
- data/lib/tasks/evaluate.rake +14 -32
- data/rules/rules.yml +186 -44
- metadata +57 -7
- data/lib/petrovich/extension.rb +0 -140
- data/lib/petrovich/rules.rb +0 -209
@@ -0,0 +1,63 @@
|
|
1
|
+
module Petrovich
|
2
|
+
module Case
|
3
|
+
# A case rule from the set of rules
|
4
|
+
class Rule
|
5
|
+
attr_reader :gender, :modifiers, :tests, :tags, :as, :an_exception
|
6
|
+
|
7
|
+
def initialize(opts)
|
8
|
+
@gender = opts[:gender].to_sym.downcase
|
9
|
+
@as = opts[:as]
|
10
|
+
@an_exception = opts[:section] == :exceptions
|
11
|
+
@modifiers = opts[:modifiers]
|
12
|
+
@tests = opts[:tests]
|
13
|
+
@tags = []
|
14
|
+
|
15
|
+
assert_name_part!(@as)
|
16
|
+
end
|
17
|
+
|
18
|
+
def match?(name, match_gender, match_as)
|
19
|
+
assert_name_part!(match_as)
|
20
|
+
|
21
|
+
return false unless match_as == as
|
22
|
+
|
23
|
+
match_gender = match_gender.to_sym.downcase
|
24
|
+
|
25
|
+
return false if gender == :male && match_gender == :female
|
26
|
+
return false if gender == :female && match_gender != :female
|
27
|
+
|
28
|
+
tests.detect { |test| test.match?(name) }
|
29
|
+
end
|
30
|
+
|
31
|
+
# Is this exceptional rule?
|
32
|
+
def an_exception?
|
33
|
+
an_exception == true
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_modifier(name_case)
|
37
|
+
case name_case.to_sym
|
38
|
+
when :nominative
|
39
|
+
nil
|
40
|
+
when :genitive
|
41
|
+
modifiers[0]
|
42
|
+
when :dative
|
43
|
+
modifiers[1]
|
44
|
+
when :accusative
|
45
|
+
modifiers[2]
|
46
|
+
when :instrumental
|
47
|
+
modifiers[3]
|
48
|
+
when :prepositional
|
49
|
+
modifiers[4]
|
50
|
+
else
|
51
|
+
fail UnknownCaseError, "Unknown grammatic case: #{name_case}".freeze
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def assert_name_part!(name_part)
|
58
|
+
return if [:lastname, :firstname, :middlename].include?(name_part)
|
59
|
+
fail ArgumentError, "Unknown 'as' option #{name_part}".freeze
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Petrovich
|
2
|
+
module Case
|
3
|
+
class Rule
|
4
|
+
# A modifier for the test rule
|
5
|
+
class Modifier
|
6
|
+
attr_reader :suffix, :offset
|
7
|
+
|
8
|
+
def initialize(suffix, offset = 0)
|
9
|
+
@suffix = suffix.to_s
|
10
|
+
@offset = offset
|
11
|
+
end
|
12
|
+
|
13
|
+
def inspect
|
14
|
+
[suffix, offset].inspect
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Petrovich
|
2
|
+
module Case
|
3
|
+
class Rule
|
4
|
+
# A test for the case rule
|
5
|
+
class Test
|
6
|
+
attr_reader :suffix
|
7
|
+
|
8
|
+
def initialize(suffix)
|
9
|
+
@suffix = Unicode.downcase(suffix)
|
10
|
+
end
|
11
|
+
|
12
|
+
def match?(name)
|
13
|
+
name = Unicode.downcase(name)
|
14
|
+
suffix == name.slice([name.size - suffix.size, 0].max..-1)
|
15
|
+
end
|
16
|
+
|
17
|
+
def inspect
|
18
|
+
suffix.inspect
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Petrovich
|
2
|
+
# Methods of determining gender by the name
|
3
|
+
module Gender
|
4
|
+
def self.detect(name)
|
5
|
+
# Accept hash and convert it to ostruct object
|
6
|
+
name = Petrovich.normalize_name(name)
|
7
|
+
rule_set = Petrovich.rule_set
|
8
|
+
genders = {}
|
9
|
+
|
10
|
+
Petrovich.assert_name!(name)
|
11
|
+
|
12
|
+
[:lastname, :firstname, :middlename].each do |name_part|
|
13
|
+
next unless name.respond_to?(name_part) && name.send(name_part)
|
14
|
+
|
15
|
+
rules = rule_set.find_all_gender_rules(name.send(name_part), name_part)
|
16
|
+
|
17
|
+
rules.each do |rule|
|
18
|
+
genders[name_part] = rule.nil? ? :androgynous : rule.gender
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Return gender if middlename is specified and gender is determined.
|
23
|
+
return genders[:middlename] if genders[:middlename] && genders[:middlename] != :androgynous
|
24
|
+
|
25
|
+
if genders.values.uniq.size > 1
|
26
|
+
if genders[:firstname] != :androgynous && genders[:lastname] == :androgynous
|
27
|
+
return genders[:firstname]
|
28
|
+
end
|
29
|
+
|
30
|
+
if genders[:lastname] != :androgynous && genders[:firstname] == :androgynous
|
31
|
+
return genders[:lastname]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Otherwise, it returns what recognized
|
36
|
+
return genders.values.uniq.first if genders.values.uniq.size == 1
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Petrovich
|
2
|
+
module Gender
|
3
|
+
# A gender rule from the set of rules
|
4
|
+
class Rule
|
5
|
+
attr_reader :gender, :as, :suffix
|
6
|
+
|
7
|
+
# TODO: check options (see Case::Rule)
|
8
|
+
def initialize(opts)
|
9
|
+
@gender = opts[:gender]
|
10
|
+
@as = opts[:as]
|
11
|
+
@suffix = opts[:suffix]
|
12
|
+
end
|
13
|
+
|
14
|
+
def match?(name, match_as)
|
15
|
+
return false unless match_as == as
|
16
|
+
|
17
|
+
name = Unicode.downcase(name)
|
18
|
+
@suffix == name.slice([name.size - @suffix.size, 0].max..-1)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Petrovich
|
2
|
+
# Keeps inflected @name
|
3
|
+
class Inflected
|
4
|
+
extend Forwardable
|
5
|
+
|
6
|
+
def_delegator :@name, :lastname, :lastname
|
7
|
+
def_delegator :@name, :firstname, :firstname
|
8
|
+
def_delegator :@name, :middlename, :middlename
|
9
|
+
|
10
|
+
def initialize(name)
|
11
|
+
@name = name
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
[lastname, firstname, middlename].compact.join(' ')
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Petrovich
|
2
|
+
class Inflector
|
3
|
+
def initialize(name, gender, name_case)
|
4
|
+
Petrovich.assert_name!(name)
|
5
|
+
|
6
|
+
@name = Petrovich.normalize_name(name)
|
7
|
+
@gender = gender
|
8
|
+
@name_case = name_case
|
9
|
+
end
|
10
|
+
|
11
|
+
def inflect_lastname(rules)
|
12
|
+
inflect(@name.lastname, rules)
|
13
|
+
end
|
14
|
+
|
15
|
+
def inflect_firstname(rules)
|
16
|
+
inflect(@name.firstname, rules)
|
17
|
+
end
|
18
|
+
|
19
|
+
def inflect_middlename(rules)
|
20
|
+
inflect(@name.middlename, rules)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def inflect(name, rules)
|
26
|
+
return name if rules.size == 0
|
27
|
+
|
28
|
+
parts = name.split('-')
|
29
|
+
parts.map! do |part|
|
30
|
+
rule = rules.shift
|
31
|
+
|
32
|
+
if rule && (modifier = rule.get_modifier(@name_case))
|
33
|
+
part.slice(0, part.size - modifier.offset) + modifier.suffix
|
34
|
+
else
|
35
|
+
part
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
parts.join('-')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Petrovich
|
2
|
+
class Name
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
def_delegator :@name, :lastname, :lastname
|
6
|
+
def_delegator :@name, :firstname, :firstname
|
7
|
+
def_delegator :@name, :middlename, :middlename
|
8
|
+
|
9
|
+
def initialize(opts)
|
10
|
+
@rule_set = Petrovich.rule_set
|
11
|
+
@gender = opts[:gender]
|
12
|
+
@name = Petrovich.normalize_name(
|
13
|
+
lastname: opts[:lastname],
|
14
|
+
firstname: opts[:firstname],
|
15
|
+
middlename: opts[:middlename]
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
def gender
|
20
|
+
if !@gender.nil? && [:male, :female, :androgynous].include?(@gender.to_sym)
|
21
|
+
@gender.to_sym
|
22
|
+
else
|
23
|
+
Gender.detect(@name)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def male?
|
28
|
+
Gender.detect(@name) == :male
|
29
|
+
end
|
30
|
+
|
31
|
+
def female?
|
32
|
+
Gender.detect(@name) == :female
|
33
|
+
end
|
34
|
+
|
35
|
+
def androgynous?
|
36
|
+
Gender.detect(@name) == :androgynous
|
37
|
+
end
|
38
|
+
|
39
|
+
def to(name_case)
|
40
|
+
Petrovich.assert_case!(name_case)
|
41
|
+
Inflected.new(inflect(@name.dup, gender, name_case))
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s
|
45
|
+
[lastname, firstname, middlename].join(' ')
|
46
|
+
end
|
47
|
+
|
48
|
+
Petrovich::CASES.each do |name_case|
|
49
|
+
define_method name_case do
|
50
|
+
to(name_case)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def inflect(name, gender, name_case)
|
57
|
+
inflector = Inflector.new(name, gender, name_case)
|
58
|
+
find = proc { |x| @rule_set.find_all_case_rules(name.send(x), gender, x) }
|
59
|
+
|
60
|
+
if !name.lastname.nil? && (rules = find.call(:lastname))
|
61
|
+
name.lastname = inflector.inflect_lastname(rules)
|
62
|
+
end
|
63
|
+
|
64
|
+
if !name.firstname.nil? && (rules = find.call(:firstname))
|
65
|
+
name.firstname = inflector.inflect_firstname(rules)
|
66
|
+
end
|
67
|
+
|
68
|
+
if !name.middlename.nil? && (rules = find.call(:middlename))
|
69
|
+
name.middlename = inflector.inflect_middlename(rules)
|
70
|
+
end
|
71
|
+
|
72
|
+
name
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Petrovich
|
4
|
+
# A set of loaded rules from YAML file
|
5
|
+
class RuleSet
|
6
|
+
def initialize
|
7
|
+
clear!
|
8
|
+
end
|
9
|
+
|
10
|
+
def add_case_rule(rule)
|
11
|
+
unless rule.is_a?(Case::Rule)
|
12
|
+
fail ArgumentError, 'Expecting rule of type Petrovich::Case::Rule'.freeze
|
13
|
+
end
|
14
|
+
|
15
|
+
@case_rules << rule
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_gender_rule(rule)
|
19
|
+
unless rule.is_a?(Gender::Rule)
|
20
|
+
fail ArgumentError, 'Expecting rule of type Petrovich::Gender::Rule'.freeze
|
21
|
+
end
|
22
|
+
|
23
|
+
@gender_rules << rule
|
24
|
+
end
|
25
|
+
|
26
|
+
def find_all_case_rules(name, gender, as)
|
27
|
+
name.split('-').map { |part| find_case_rule(part, gender, as) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def find_all_gender_rules(name, as)
|
31
|
+
name.split('-').map { |part| find_gender_rule(part, as) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def clear!
|
35
|
+
@case_rules = []
|
36
|
+
@gender_rules = []
|
37
|
+
end
|
38
|
+
|
39
|
+
def load!
|
40
|
+
return false if @case_rules.size > 0
|
41
|
+
|
42
|
+
rules = YAML.load_file(
|
43
|
+
File.expand_path('../../../rules/rules.yml', __FILE__)
|
44
|
+
)
|
45
|
+
|
46
|
+
load_case_rules!(rules)
|
47
|
+
load_gender_rules!(rules)
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
# Load rules for names
|
53
|
+
def load_case_rules!(rules)
|
54
|
+
[:lastname, :firstname, :middlename].each do |name_part|
|
55
|
+
[:exceptions, :suffixes].each do |section|
|
56
|
+
entries = rules[name_part.to_s][section.to_s]
|
57
|
+
next if entries.nil?
|
58
|
+
|
59
|
+
entries.each do |entry|
|
60
|
+
load_case_entry(name_part, section, entry)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Load rules for genders
|
67
|
+
def load_gender_rules!(rules)
|
68
|
+
[:lastname, :firstname, :middlename].each do |name_part|
|
69
|
+
# First, add androgynous rules. Order is matters.
|
70
|
+
[:androgynous, :male, :female].each do |section|
|
71
|
+
entries = rules['gender'][name_part.to_s][section.to_s]
|
72
|
+
next if entries.nil?
|
73
|
+
|
74
|
+
entries.each do |entry|
|
75
|
+
load_gender_entry(name_part, section, entry)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def find_case_rule(name, gender, as)
|
82
|
+
@case_rules.find { |rule| rule.match?(name, gender, as) }
|
83
|
+
end
|
84
|
+
|
85
|
+
def find_gender_rule(name, as)
|
86
|
+
@gender_rules.find { |rule| rule.match?(name, as) }
|
87
|
+
end
|
88
|
+
|
89
|
+
def load_case_entry(as, section, entry)
|
90
|
+
modifiers = entry['mods'].map do |mod|
|
91
|
+
suffix = mod.scan(/[^.-]+/).first
|
92
|
+
offset = mod.count('-')
|
93
|
+
Petrovich::Case::Rule::Modifier.new(suffix, offset)
|
94
|
+
end
|
95
|
+
|
96
|
+
tests = entry['test'].map do |suffix|
|
97
|
+
Petrovich::Case::Rule::Test.new(suffix)
|
98
|
+
end
|
99
|
+
|
100
|
+
add_case_rule Petrovich::Case::Rule.new(
|
101
|
+
gender: entry['gender'],
|
102
|
+
as: as,
|
103
|
+
section: section,
|
104
|
+
modifiers: modifiers,
|
105
|
+
tests: tests,
|
106
|
+
tags: entry['tags']
|
107
|
+
)
|
108
|
+
end
|
109
|
+
|
110
|
+
def load_gender_entry(as, section, entry)
|
111
|
+
add_gender_rule Gender::Rule.new(
|
112
|
+
as: as,
|
113
|
+
gender: section,
|
114
|
+
suffix: entry
|
115
|
+
)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
data/lib/petrovich/unicode.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
module Petrovich
|
2
|
+
# Custom downcase and upcase methods for russian language.
|
2
3
|
module Unicode
|
3
4
|
RU_UPPER = [
|
4
5
|
"\u0410", "\u0411", "\u0412", "\u0413", "\u0414", "\u0415", "\u0416", "\u0417",
|
@@ -16,11 +17,11 @@ class Petrovich
|
|
16
17
|
"\u0451" # Ё
|
17
18
|
].join
|
18
19
|
|
19
|
-
def downcase(entry)
|
20
|
+
def self.downcase(entry)
|
20
21
|
entry.to_s.tr(RU_UPPER, RU_LOWER)
|
21
22
|
end
|
22
23
|
|
23
|
-
def upcase(entry)
|
24
|
+
def self.upcase(entry)
|
24
25
|
entry.to_s.tr(RU_LOWER, RU_UPPER)
|
25
26
|
end
|
26
27
|
end
|