zenlish 0.2.02 → 0.2.06
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +455 -0
- data/CHANGELOG.md +58 -1
- data/Gemfile +5 -3
- data/README.md +27 -0
- data/Rakefile +5 -3
- data/lib/zenlish/feature/boolean_domain.rb +9 -3
- data/lib/zenlish/feature/boolean_value.rb +3 -2
- data/lib/zenlish/feature/enumeration_domain.rb +8 -1
- data/lib/zenlish/feature/feature.rb +9 -7
- data/lib/zenlish/feature/feature_def.rb +10 -6
- data/lib/zenlish/feature/feature_domain.rb +9 -3
- data/lib/zenlish/feature/feature_struct.rb +3 -1
- data/lib/zenlish/feature/feature_struct_def.rb +5 -1
- data/lib/zenlish/feature/feature_struct_def_bearer.rb +6 -4
- data/lib/zenlish/feature/feature_value.rb +4 -2
- data/lib/zenlish/feature/identifier_domain.rb +5 -3
- data/lib/zenlish/feature/identifier_value.rb +3 -2
- data/lib/zenlish/feature/symbol_value.rb +3 -1
- data/lib/zenlish/inflect/atomic_o_expression.rb +2 -1
- data/lib/zenlish/inflect/composite_o_expression.rb +3 -1
- data/lib/zenlish/inflect/concatenation.rb +3 -2
- data/lib/zenlish/inflect/equals_literal.rb +7 -1
- data/lib/zenlish/inflect/feature_heading.rb +11 -5
- data/lib/zenlish/inflect/formal_argument.rb +3 -1
- data/lib/zenlish/inflect/function_call.rb +4 -1
- data/lib/zenlish/inflect/heading.rb +5 -0
- data/lib/zenlish/inflect/inflection_rule.rb +2 -0
- data/lib/zenlish/inflect/inflection_table.rb +29 -4
- data/lib/zenlish/inflect/inflection_table_builder.rb +27 -20
- data/lib/zenlish/inflect/input_asis.rb +3 -0
- data/lib/zenlish/inflect/input_expression.rb +4 -2
- data/lib/zenlish/inflect/literal_asis.rb +4 -1
- data/lib/zenlish/inflect/matches_pattern.rb +2 -0
- data/lib/zenlish/inflect/membership.rb +2 -0
- data/lib/zenlish/inflect/method_heading.rb +7 -6
- data/lib/zenlish/inflect/not_equals_literal.rb +2 -0
- data/lib/zenlish/inflect/nullary_input_expression.rb +3 -1
- data/lib/zenlish/inflect/output_expression.rb +2 -0
- data/lib/zenlish/inflect/substitution.rb +3 -1
- data/lib/zenlish/inflect/unary_input_expression.rb +4 -1
- data/lib/zenlish/inflect/unconditionally_true.rb +4 -2
- data/lib/zenlish/lang/dictionary.rb +35 -29
- data/lib/zenlish/lang/lemmatizer.rb +40 -0
- data/lib/zenlish/lang/zenlish_grammar.rb +9 -6
- data/lib/zenlish/lex/empty_lexicon_factory.rb +4 -3
- data/lib/zenlish/lex/lexeme.rb +9 -2
- data/lib/zenlish/lex/lexical_entry.rb +6 -4
- data/lib/zenlish/lex/lexicon.rb +10 -8
- data/lib/zenlish/lex/literal.rb +3 -1
- data/lib/zenlish/lexer/lexer.rb +144 -0
- data/lib/zenlish/trie/base_trie_node.rb +27 -0
- data/lib/zenlish/trie/trie.rb +132 -0
- data/lib/zenlish/trie/trie_node.rb +21 -0
- data/lib/zenlish/trie/trie_root.rb +10 -0
- data/lib/zenlish/version.rb +3 -1
- data/lib/zenlish/wclasses/adjective.rb +5 -2
- data/lib/zenlish/wclasses/adverb.rb +3 -1
- data/lib/zenlish/wclasses/adverb_maybe.rb +3 -1
- data/lib/zenlish/wclasses/adverb_not.rb +2 -0
- data/lib/zenlish/wclasses/all_word_classes.rb +4 -2
- data/lib/zenlish/wclasses/article.rb +2 -0
- data/lib/zenlish/wclasses/auxiliary.rb +5 -2
- data/lib/zenlish/wclasses/auxiliary_be.rb +34 -1
- data/lib/zenlish/wclasses/auxiliary_do.rb +31 -0
- data/lib/zenlish/wclasses/cardinal.rb +3 -1
- data/lib/zenlish/wclasses/common_noun.rb +2 -0
- data/lib/zenlish/wclasses/comparative_particle.rb +5 -3
- data/lib/zenlish/wclasses/conjunction.rb +2 -0
- data/lib/zenlish/wclasses/conjunctive_pronoun.rb +2 -0
- data/lib/zenlish/wclasses/coordinator.rb +4 -2
- data/lib/zenlish/wclasses/definite_article.rb +2 -0
- data/lib/zenlish/wclasses/degree_adverb.rb +2 -0
- data/lib/zenlish/wclasses/demonstrative_determiner.rb +40 -1
- data/lib/zenlish/wclasses/demonstrative_pronoun.rb +2 -0
- data/lib/zenlish/wclasses/determiner.rb +3 -1
- data/lib/zenlish/wclasses/distributive_determiner.rb +2 -0
- data/lib/zenlish/wclasses/existential_there.rb +2 -0
- data/lib/zenlish/wclasses/fronting_quantifier.rb +3 -1
- data/lib/zenlish/wclasses/indefinite_article.rb +2 -0
- data/lib/zenlish/wclasses/indefinite_pronoun.rb +3 -1
- data/lib/zenlish/wclasses/irregular_linking_verb.rb +3 -1
- data/lib/zenlish/wclasses/irregular_verb.rb +14 -22
- data/lib/zenlish/wclasses/irregular_verb_be.rb +41 -1
- data/lib/zenlish/wclasses/irregular_verb_can.rb +35 -0
- data/lib/zenlish/wclasses/irregular_verb_do.rb +38 -1
- data/lib/zenlish/wclasses/irregular_verb_extension.rb +8 -6
- data/lib/zenlish/wclasses/irregular_verb_have.rb +38 -1
- data/lib/zenlish/wclasses/irregular_verb_know.rb +3 -1
- data/lib/zenlish/wclasses/irregular_verb_say.rb +3 -1
- data/lib/zenlish/wclasses/irregular_verb_think.rb +3 -1
- data/lib/zenlish/wclasses/lexical_verb.rb +3 -4
- data/lib/zenlish/wclasses/linking_adverb.rb +2 -0
- data/lib/zenlish/wclasses/modal_verb_can.rb +27 -0
- data/lib/zenlish/wclasses/noun.rb +4 -2
- data/lib/zenlish/wclasses/numeral.rb +2 -0
- data/lib/zenlish/wclasses/personal_pronoun.rb +70 -1
- data/lib/zenlish/wclasses/possessive_determiner.rb +60 -0
- data/lib/zenlish/wclasses/preposition.rb +3 -1
- data/lib/zenlish/wclasses/preposition_of.rb +2 -0
- data/lib/zenlish/wclasses/preposition_than.rb +2 -0
- data/lib/zenlish/wclasses/pronoun.rb +3 -1
- data/lib/zenlish/wclasses/proper_noun.rb +10 -8
- data/lib/zenlish/wclasses/quantifier.rb +3 -1
- data/lib/zenlish/wclasses/regular_verb.rb +13 -18
- data/lib/zenlish/wclasses/regular_verb_want.rb +4 -2
- data/lib/zenlish/wclasses/relative_pronoun.rb +4 -2
- data/lib/zenlish/wclasses/subordinating_conjunction.rb +7 -5
- data/lib/zenlish/wclasses/verb.rb +16 -8
- data/lib/zenlish/wclasses/word_class.rb +11 -3
- data/spec/spec_helper.rb +3 -1
- data/spec/zenlish/feature/boolean_domain_spec.rb +6 -1
- data/spec/zenlish/feature/boolean_value_spec.rb +1 -1
- data/spec/zenlish/feature/enumeration_domain_spec.rb +9 -5
- data/spec/zenlish/feature/feature_spec.rb +3 -3
- data/spec/zenlish/feature/feature_struct_def_bearer_spec.rb +10 -8
- data/spec/zenlish/feature/feature_struct_def_spec.rb +3 -2
- data/spec/zenlish/feature/identifier_domain_spec.rb +3 -3
- data/spec/zenlish/feature/identifier_value_spec.rb +4 -4
- data/spec/zenlish/feature/symbol_value_spec.rb +3 -3
- data/spec/zenlish/inflect/concatenation_spec.rb +7 -7
- data/spec/zenlish/inflect/feature_heading_spec.rb +23 -3
- data/spec/zenlish/inflect/formal_argument_spec.rb +2 -2
- data/spec/zenlish/inflect/function_call_spec.rb +4 -4
- data/spec/zenlish/inflect/inflection_rule_spec.rb +9 -5
- data/spec/zenlish/inflect/inflection_table_builder_spec.rb +26 -21
- data/spec/zenlish/inflect/inflection_table_spec.rb +39 -22
- data/spec/zenlish/inflect/input_asis_spec.rb +2 -6
- data/spec/zenlish/inflect/literal_asis_spec.rb +3 -3
- data/spec/zenlish/inflect/matches_pattern_spec.rb +1 -1
- data/spec/zenlish/inflect/membership_spec.rb +2 -2
- data/spec/zenlish/inflect/method_heading_spec.rb +7 -1
- data/spec/zenlish/inflect/substitution_spec.rb +1 -1
- data/spec/zenlish/inflect/unconditionally_true_spec.rb +2 -2
- data/spec/zenlish/lang/dictionary_spec.rb +51 -16
- data/spec/zenlish/lang/lemmatizer_spec.rb +43 -0
- data/spec/zenlish/lang/zenlish_grammar_spec.rb +2 -2
- data/spec/zenlish/lex/lexeme_spec.rb +16 -11
- data/spec/zenlish/lex/lexical_entry_spec.rb +1 -2
- data/spec/zenlish/lex/lexicon_spec.rb +9 -8
- data/spec/zenlish/lex/literal_spec.rb +2 -2
- data/spec/zenlish/lexer/lexer_spec.rb +52 -0
- data/spec/zenlish/parser/lesson1_spec.rb +2 -2
- data/spec/zenlish/parser/lesson2_spec.rb +31 -36
- data/spec/zenlish/parser/lesson3_spec.rb +98 -138
- data/spec/zenlish/parser/zparser_spec.rb +2 -1
- data/spec/zenlish/support/minimal_lexicon.rb +2 -0
- data/spec/zenlish/support/var2word.rb +27 -18
- data/spec/zenlish/trie/base_trie_node_spec.rb +33 -0
- data/spec/zenlish/trie/trie_spec.rb +108 -0
- data/spec/zenlish/wclasses/common_noun_spec.rb +19 -7
- data/spec/zenlish/wclasses/demonstrative_determiner_spec.rb +47 -0
- data/spec/zenlish/wclasses/irregular_verb_can_spec.rb +60 -0
- data/spec/zenlish/wclasses/irregular_verb_spec.rb +14 -4
- data/spec/zenlish/wclasses/lexical_verb_spec.rb +9 -2
- data/spec/zenlish/wclasses/modal_verb_can_spec.rb +50 -0
- data/spec/zenlish/wclasses/personal_pronoun_spec.rb +66 -0
- data/spec/zenlish/wclasses/possessive_determiner_spec.rb +69 -0
- data/spec/zenlish/wclasses/regular_verb_spec.rb +35 -15
- data/spec/zenlish/wclasses/regular_verb_want_spec.rb +30 -0
- data/spec/zenlish/wclasses/verb_spec.rb +9 -2
- data/spec/zenlish_spec.rb +3 -1
- data/zenlish.gemspec +17 -14
- metadata +37 -10
- data/lib/zenlish/wclasses/modal_verb_could.rb +0 -9
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'nullary_input_expression'
|
2
4
|
|
3
5
|
module Zenlish
|
@@ -5,7 +7,7 @@ module Zenlish
|
|
5
7
|
class UnconditionallyTrue < NullaryInputExpression
|
6
8
|
def success?(_headings, _lexeme, _heading_values)
|
7
9
|
true
|
8
|
-
end
|
10
|
+
end
|
9
11
|
end # class
|
10
12
|
end # module
|
11
|
-
end # module
|
13
|
+
end # module
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
unless defined?(Zenlish::Lang::Dictionary)
|
2
4
|
require_relative '../feature/feature_struct_def_bearer'
|
3
5
|
module Zenlish
|
@@ -9,15 +11,16 @@ unless defined?(Zenlish::Lang::Dictionary)
|
|
9
11
|
sandbox = Object.new
|
10
12
|
sandbox.extend(Zenlish::Lex::EmptyLexiconFactory)
|
11
13
|
Dictionary = sandbox.create_empty_lexicon
|
12
|
-
|
14
|
+
extend(Feature::FeatureStructDefBearer)
|
13
15
|
|
14
|
-
# @param aLemma [String] is the canonical form, dictionary form,
|
16
|
+
# @param aLemma [String] is the canonical form, dictionary form,
|
15
17
|
# or citation form of a headword.
|
16
18
|
# @param aWClassName [String] the name of a word class.
|
17
19
|
def self.add_entry(aLemma, aWClassName, aFeatureHash = nil, &aBlock)
|
18
20
|
entry = Zenlish::Lex::LexicalEntry.new(aLemma)
|
19
21
|
wclass = Dictionary.name2terminal[aWClassName]
|
20
22
|
raise StandardError, "Undefined word class for '#{aLemma}'" unless wclass
|
23
|
+
|
21
24
|
lexeme = Zenlish::Lex::Lexeme.new(wclass, entry, aFeatureHash)
|
22
25
|
lexeme.instance_exec(&aBlock) if block_given?
|
23
26
|
lexeme.freeze
|
@@ -56,7 +59,7 @@ unless defined?(Zenlish::Lang::Dictionary)
|
|
56
59
|
add_entry('big', 'Adjective')
|
57
60
|
add_entry('body', 'CommonNoun')
|
58
61
|
add_entry('but', 'Coordinator')
|
59
|
-
add_entry('can', '
|
62
|
+
add_entry('can', 'IrregularVerbCan')
|
60
63
|
add_entry('can', 'ModalVerbCan')
|
61
64
|
add_entry('cause', 'RegularVerb')
|
62
65
|
add_entry('change', 'RegularVerb')
|
@@ -65,15 +68,12 @@ unless defined?(Zenlish::Lang::Dictionary)
|
|
65
68
|
end
|
66
69
|
add_entry('contain', 'RegularVerb')
|
67
70
|
add_entry('container', 'CommonNoun')
|
68
|
-
add_entry('
|
69
|
-
add_entry('damage', 'RegularVerb')
|
71
|
+
add_entry('damage', 'RegularVerb')
|
70
72
|
add_entry('die', 'RegularVerb')
|
71
|
-
add_entry('difficult', 'Adjective')
|
73
|
+
add_entry('difficult', 'Adjective')
|
72
74
|
add_entry('different', 'Adjective')
|
73
75
|
add_entry('do', 'AuxiliaryDo')
|
74
|
-
add_entry('do', 'IrregularVerbDo')
|
75
|
-
forms past_simple: 'did', past_participle: 'done'
|
76
|
-
end
|
76
|
+
add_entry('do', 'IrregularVerbDo')
|
77
77
|
add_entry('each', 'DistributiveDeterminer')
|
78
78
|
add_entry('each', 'Pronoun')
|
79
79
|
add_entry('exist', 'RegularVerb')
|
@@ -88,17 +88,22 @@ unless defined?(Zenlish::Lang::Dictionary)
|
|
88
88
|
add_entry('good', 'Adjective')
|
89
89
|
add_entry('have', 'IrregularVerbHave')
|
90
90
|
add_entry('happen', 'RegularVerb')
|
91
|
-
add_entry('hear', 'IrregularLinkingVerb')
|
91
|
+
add_entry('hear', 'IrregularLinkingVerb') do
|
92
|
+
forms past_simple: 'heard', past_participle: 'heard'
|
93
|
+
end
|
92
94
|
add_entry('here', 'Adverb')
|
93
95
|
# example: ...from here (works as a pronoun of a place)
|
94
|
-
add_entry('here', 'CommonNoun', {'NUMBER' => enumeration(:singular),
|
95
|
-
'PARADIGM' => [identifier, 'Singular_only']})
|
96
|
-
add_entry('I', 'PersonalPronoun')
|
96
|
+
add_entry('here', 'CommonNoun', { 'NUMBER' => enumeration(:singular),
|
97
|
+
'PARADIGM' => [identifier, 'Singular_only'] })
|
98
|
+
add_entry('I', 'PersonalPronoun', { 'PERSON' => enumeration(:first),
|
99
|
+
'GENDER' => enumeration(:feminine, :masculine) })
|
97
100
|
add_entry('if', 'SubordinatingConjunction')
|
98
101
|
add_entry('in', 'Preposition')
|
99
102
|
add_entry('inside', 'Preposition')
|
100
|
-
add_entry('it', 'PersonalPronoun')
|
101
|
-
|
103
|
+
add_entry('it', 'PersonalPronoun', { 'PERSON' => enumeration(:third),
|
104
|
+
'PARADIGM' => [identifier, 'ppn_3rd_paradigm'] })
|
105
|
+
add_entry('its', 'PossessiveDeterminer', { 'PERSON' => enumeration(:third),
|
106
|
+
'PARADIGM' => [identifier, 'possdet_3rd_paradigm'] })
|
102
107
|
add_entry('kind', 'CommonNoun')
|
103
108
|
add_entry('know', 'IrregularVerbKnow') do
|
104
109
|
forms past_simple: 'knew', past_participle: 'known'
|
@@ -113,18 +118,18 @@ unless defined?(Zenlish::Lang::Dictionary)
|
|
113
118
|
end
|
114
119
|
add_entry('many', 'Quantifier')
|
115
120
|
add_entry('maybe', 'AdverbMaybe')
|
116
|
-
add_entry('me', 'PersonalPronoun')
|
117
121
|
add_entry('moment', 'CommonNoun')
|
118
122
|
add_entry('more', 'Adjective')
|
119
123
|
add_entry('more', 'Adverb')
|
120
124
|
add_entry('move', 'RegularVerb')
|
121
125
|
add_entry('much', 'Adverb')
|
122
|
-
add_entry('my', 'PossessiveDeterminer')
|
126
|
+
add_entry('my', 'PossessiveDeterminer', { 'PERSON' => enumeration(:first),
|
127
|
+
'GENDER' => enumeration(:feminine, :masculine) })
|
123
128
|
add_entry('near', 'Preposition')
|
124
129
|
add_entry('near to', 'Preposition')
|
125
130
|
add_entry('now', 'Adverb')
|
126
|
-
add_entry('now', 'CommonNoun', {'NUMBER' => enumeration(:singular),
|
127
|
-
'PARADIGM' => [identifier, 'Singular_only']})
|
131
|
+
add_entry('now', 'CommonNoun', { 'NUMBER' => enumeration(:singular),
|
132
|
+
'PARADIGM' => [identifier, 'Singular_only'] })
|
128
133
|
add_entry('not', 'AdverbNot')
|
129
134
|
add_entry('of', 'PrepositionOf')
|
130
135
|
add_entry('on', 'Preposition')
|
@@ -134,10 +139,10 @@ unless defined?(Zenlish::Lang::Dictionary)
|
|
134
139
|
add_entry('or', 'Coordinator')
|
135
140
|
add_entry('other', 'Adjective')
|
136
141
|
add_entry('part', 'CommonNoun')
|
137
|
-
add_entry('people', 'CommonNoun', {'NUMBER' => enumeration(:plural),
|
138
|
-
'PARADIGM' => [identifier, 'Plural_only']})
|
139
|
-
add_entry('person', 'CommonNoun', {'NUMBER' => enumeration(:singular),
|
140
|
-
'PARADIGM' => [identifier, 'Singular_only']})
|
142
|
+
add_entry('people', 'CommonNoun', { 'NUMBER' => enumeration(:plural),
|
143
|
+
'PARADIGM' => [identifier, 'Plural_only'] })
|
144
|
+
add_entry('person', 'CommonNoun', { 'NUMBER' => enumeration(:singular),
|
145
|
+
'PARADIGM' => [identifier, 'Singular_only'] })
|
141
146
|
add_entry('place', 'CommonNoun')
|
142
147
|
add_entry('same', 'Adjective')
|
143
148
|
add_entry('same', 'Pronoun')
|
@@ -158,11 +163,8 @@ unless defined?(Zenlish::Lang::Dictionary)
|
|
158
163
|
add_entry('than', 'PrepositionThan')
|
159
164
|
add_entry('that', 'RelativePronoun')
|
160
165
|
add_entry('the', 'DefiniteArticle')
|
161
|
-
add_entry('them', 'PersonalPronoun')
|
162
166
|
add_entry('then', 'LinkingAdverb')
|
163
|
-
add_entry('their', 'PossessiveDeterminer')
|
164
167
|
add_entry('there', 'ExistentialThere')
|
165
|
-
add_entry('they', 'PersonalPronoun')
|
166
168
|
add_entry('thing', 'CommonNoun')
|
167
169
|
add_entry('think', 'IrregularVerbThink') do
|
168
170
|
forms past_simple: 'thought', past_participle: 'thought'
|
@@ -187,8 +189,12 @@ unless defined?(Zenlish::Lang::Dictionary)
|
|
187
189
|
add_entry('who', 'RelativePronoun')
|
188
190
|
add_entry('with', 'Preposition')
|
189
191
|
add_entry('word', 'CommonNoun')
|
190
|
-
add_entry('you', 'PersonalPronoun')
|
191
|
-
|
192
|
+
add_entry('you', 'PersonalPronoun', { 'PERSON' => enumeration(:second),
|
193
|
+
'GENDER' => enumeration(:feminine, :masculine),
|
194
|
+
'PARADIGM' => [identifier, 'ppn_2nd_paradigm'] })
|
195
|
+
add_entry('your', 'PossessiveDeterminer', { 'PERSON' => enumeration(:second),
|
196
|
+
'GENDER' => enumeration(:feminine, :masculine),
|
197
|
+
'PARADIGM' => [identifier, 'possdet_2nd_paradigm'] })
|
192
198
|
|
193
199
|
# Punctuation signs...
|
194
200
|
add_entry(':', 'Colon')
|
@@ -197,4 +203,4 @@ unless defined?(Zenlish::Lang::Dictionary)
|
|
197
203
|
add_entry('"', 'Quote')
|
198
204
|
end # module
|
199
205
|
end # module
|
200
|
-
end # defined?
|
206
|
+
end # defined?
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../trie/trie'
|
4
|
+
|
5
|
+
module Zenlish
|
6
|
+
module Lang
|
7
|
+
class Lemmatizer
|
8
|
+
# @return [Trie:Trie] Trie (aka prefix tree) with all word forms from dictionary.
|
9
|
+
attr_reader :trie
|
10
|
+
|
11
|
+
def initialize(aLexicon)
|
12
|
+
@trie = Trie::Trie.new
|
13
|
+
initialize_trie(aLexicon)
|
14
|
+
end
|
15
|
+
|
16
|
+
def lemmatize(aWordForm, _hints = nil)
|
17
|
+
node = trie.search(aWordForm)
|
18
|
+
node&.value
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def initialize_trie(aLexicon)
|
24
|
+
aLexicon.entries.each do |ent|
|
25
|
+
citation_form = ent.lemma
|
26
|
+
ent.lexemes.each do |lxm|
|
27
|
+
if lxm.wclass.kind_of?(Zenlish::WClasses::WordClass)
|
28
|
+
if lxm.wclass.invariable?
|
29
|
+
trie.add(citation_form, lxm)
|
30
|
+
else
|
31
|
+
w_forms = lxm.all_inflections
|
32
|
+
w_forms.each { |form| trie.add(form, lxm) }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end # class
|
39
|
+
end # module
|
40
|
+
end # module
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Grammar for a simple subset of English language
|
2
4
|
# It is called Zenlish
|
3
5
|
|
@@ -6,7 +8,7 @@ require_relative 'dictionary'
|
|
6
8
|
|
7
9
|
########################################
|
8
10
|
# Define a grammar for a highly English-like language
|
9
|
-
builder = Rley::
|
11
|
+
builder = Rley::grammar_builder do
|
10
12
|
add_terminals(*Zenlish::Lang::Dictionary.terminals)
|
11
13
|
|
12
14
|
rule 'zenlish' => 'prose'
|
@@ -19,7 +21,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
19
21
|
#################
|
20
22
|
# Simple sentence
|
21
23
|
#################
|
22
|
-
rule 'simple_sentence' =>
|
24
|
+
rule 'simple_sentence' => 'front_adverb simple_sentence'
|
23
25
|
rule 'front_adverb' => 'AdverbMaybe'
|
24
26
|
rule 'front_adverb' => 'Adverb'
|
25
27
|
rule 'simple_sentence' => 'declarative_simple_sentence'
|
@@ -107,7 +109,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
107
109
|
rule 'comparative_start' => 'ComparativeParticle'
|
108
110
|
rule 'conjunctive_prefix' => 'ConjunctivePronoun noun_phrase verb_phrase'
|
109
111
|
rule 'identifying_clause' => 'RelativePronoun tense_verb_phrase'
|
110
|
-
rule 'relative_clause_opt' =>
|
112
|
+
rule 'relative_clause_opt' => 'relative_clause'
|
111
113
|
rule 'relative_clause_opt' => []
|
112
114
|
rule 'relative_clause' => 'RelativePronoun tense_phrase'
|
113
115
|
# Sentence 3-Bxa 'Lisa sees a living thing that is very big.
|
@@ -137,7 +139,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
137
139
|
rule 'tense' => 'AuxiliaryBe'
|
138
140
|
rule 'tense' => 'AuxiliaryDo'
|
139
141
|
rule 'tense' => 'ModalVerbCan'
|
140
|
-
|
142
|
+
|
141
143
|
|
142
144
|
#############
|
143
145
|
# NOUN PHRASE
|
@@ -232,12 +234,13 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
232
234
|
rule 'lexical_verb' => 'IrregularVerb'
|
233
235
|
rule 'lexical_verb' => 'IrregularLinkingVerb'
|
234
236
|
rule 'lexical_verb' => 'IrregularVerbBe'
|
237
|
+
rule 'lexical_verb' => 'IrregularVerbCan'
|
235
238
|
rule 'lexical_verb' => 'IrregularVerbDo'
|
236
239
|
rule 'lexical_verb' => 'IrregularVerbHave'
|
237
240
|
rule 'lexical_verb' => 'IrregularVerbKnow'
|
238
241
|
rule 'lexical_verb' => 'IrregularVerbSay'
|
239
242
|
rule 'lexical_verb' => 'IrregularVerbThink'
|
240
|
-
|
243
|
+
|
241
244
|
|
242
245
|
rule 'linking_verb' => 'IrregularLinkingVerb'
|
243
246
|
|
@@ -293,7 +296,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
293
296
|
# complementation by a verb: gerund -ing form...
|
294
297
|
rule 'preposition_object' => 'noun_phrase_opt lexical_verb post_head_vp'
|
295
298
|
# preposition_object => "a gerund (a verb form ending in "-ing") that acts as a noun # Example: He beat Lee without overly trying.
|
296
|
-
rule 'preposition_object' => 'conjunctive_prefix'
|
299
|
+
rule 'preposition_object' => 'conjunctive_prefix' # It's obvious from _what he said_.
|
297
300
|
rule 'preposition_object' => []
|
298
301
|
|
299
302
|
######################
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../wclasses/all_word_classes'
|
2
4
|
require_relative 'lexicon'
|
3
5
|
|
@@ -7,7 +9,7 @@ module Zenlish
|
|
7
9
|
# Factory method. Helps in creating an "empty" lexicon.
|
8
10
|
# It just contains the word classes of Zenlish but no headwords.
|
9
11
|
# @return [Lexicon] the created lexicon object
|
10
|
-
def create_empty_lexicon
|
12
|
+
def create_empty_lexicon
|
11
13
|
lexicon = Lexicon.new
|
12
14
|
|
13
15
|
add_word_classes(lexicon)
|
@@ -41,6 +43,7 @@ module Zenlish
|
|
41
43
|
add_wclass(aLexicon, WClasses::IrregularLinkingVerb)
|
42
44
|
add_wclass(aLexicon, WClasses::IrregularVerb)
|
43
45
|
add_wclass(aLexicon, WClasses::IrregularVerbBe)
|
46
|
+
add_wclass(aLexicon, WClasses::IrregularVerbCan)
|
44
47
|
add_wclass(aLexicon, WClasses::IrregularVerbDo)
|
45
48
|
add_wclass(aLexicon, WClasses::IrregularVerbHave)
|
46
49
|
add_wclass(aLexicon, WClasses::IrregularVerbKnow)
|
@@ -48,7 +51,6 @@ module Zenlish
|
|
48
51
|
add_wclass(aLexicon, WClasses::IrregularVerbThink)
|
49
52
|
add_wclass(aLexicon, WClasses::LinkingAdverb)
|
50
53
|
add_wclass(aLexicon, WClasses::ModalVerbCan)
|
51
|
-
add_wclass(aLexicon, WClasses::ModalVerbCould)
|
52
54
|
add_wclass(aLexicon, WClasses::PersonalPronoun)
|
53
55
|
add_wclass(aLexicon, WClasses::PossessiveDeterminer)
|
54
56
|
add_wclass(aLexicon, WClasses::Preposition)
|
@@ -73,7 +75,6 @@ module Zenlish
|
|
73
75
|
def add_wclass(aLexicon, aClass)
|
74
76
|
aLexicon.add_terminal(aClass.new.freeze)
|
75
77
|
end
|
76
|
-
|
77
78
|
end # module
|
78
79
|
end # module
|
79
80
|
end # module
|
data/lib/zenlish/lex/lexeme.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../feature/feature_struct_def_bearer'
|
2
4
|
module Zenlish
|
3
5
|
module Lex
|
@@ -20,7 +22,7 @@ module Zenlish
|
|
20
22
|
anEntry.add_lexeme(self)
|
21
23
|
if aWClass.kind_of?(WClasses::WordClass)
|
22
24
|
unless wclass.extension.nil?
|
23
|
-
|
25
|
+
extend(wclass.extension)
|
24
26
|
init_extension(self)
|
25
27
|
end
|
26
28
|
p_struct = aWClass.kind_of?(WClasses::WordClass) ? aWClass.struct : nil
|
@@ -42,6 +44,11 @@ module Zenlish
|
|
42
44
|
table.inflect(self, constraints)
|
43
45
|
end
|
44
46
|
|
47
|
+
def all_inflections
|
48
|
+
table = paradigm
|
49
|
+
table.all_inflections(self)
|
50
|
+
end
|
51
|
+
|
45
52
|
# @return [String] the base (dictionary) form.
|
46
53
|
def lemma
|
47
54
|
entry.lemma
|
@@ -57,4 +64,4 @@ module Zenlish
|
|
57
64
|
alias base_form lemma
|
58
65
|
end # class
|
59
66
|
end # module
|
60
|
-
end # module
|
67
|
+
end # module
|
@@ -1,14 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Zenlish
|
2
4
|
module Lex
|
3
5
|
# TODO: document
|
4
6
|
class LexicalEntry
|
5
|
-
|
7
|
+
# @return [String] the lemma (dictionary) form of a word.
|
6
8
|
attr_reader :lemma
|
7
9
|
|
8
10
|
# @return [Array<Lexeme, Rley::Syntax::Terminal>]
|
9
11
|
attr_reader :lexemes
|
10
12
|
|
11
|
-
# @param theLemma [String] lemma (= citation form), a word form used
|
13
|
+
# @param theLemma [String] lemma (= citation form), a word form used
|
12
14
|
# conventionnaly to represent a lexeme.
|
13
15
|
# @param aLexeme [Lexeme, NilClass] the lexeme to link with lexical entry.
|
14
16
|
def initialize(theLemma, aLexeme = nil)
|
@@ -17,10 +19,10 @@ module Zenlish
|
|
17
19
|
add_lexeme(aLexeme)
|
18
20
|
end
|
19
21
|
|
20
|
-
# @param aLexeme [Lexeme, NilClass] the lexeme to link with lexical entry.
|
22
|
+
# @param aLexeme [Lexeme, NilClass] the lexeme to link with lexical entry.
|
21
23
|
def add_lexeme(aLexeme)
|
22
24
|
lexemes << aLexeme if aLexeme
|
23
25
|
end
|
24
26
|
end # class
|
25
27
|
end # module
|
26
|
-
end # module
|
28
|
+
end # module
|
data/lib/zenlish/lex/lexicon.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Zenlish
|
2
4
|
module Lex
|
3
5
|
# A lexicon is a collection of lexical entries.
|
@@ -5,7 +7,7 @@ module Zenlish
|
|
5
7
|
class Lexicon
|
6
8
|
# @return [Array<Lex::LexicalEntry>] entries in the lexicon
|
7
9
|
attr_reader :entries
|
8
|
-
|
10
|
+
|
9
11
|
# @return [Hash{String => Lex::LexicalEntry}] the lexical entry for the given lemma.
|
10
12
|
attr_reader :lemma2entry
|
11
13
|
|
@@ -23,7 +25,7 @@ module Zenlish
|
|
23
25
|
end
|
24
26
|
|
25
27
|
# @param aLemma[String] retrieve the lexeme form the given "head word".
|
26
|
-
# @param aWordClass [WordClasses::WordClass, NilClass] the word class of
|
28
|
+
# @param aWordClass [WordClasses::WordClass, NilClass] the word class of
|
27
29
|
# the lexeme.
|
28
30
|
# @return [Lex::Lexeme]
|
29
31
|
def get_lexeme(aLemma, aWordClass = nil)
|
@@ -45,11 +47,12 @@ module Zenlish
|
|
45
47
|
|
46
48
|
lexeme
|
47
49
|
else
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
50
|
+
entry = lemma2entry.fetch(aLemma)
|
51
|
+
if entry.kind_of?(Array)
|
52
|
+
err_msg = "Multiple lexemes for #{aLemma}"
|
53
|
+
raise StandardError, err_msg
|
54
|
+
else
|
55
|
+
entry.lexemes.first
|
53
56
|
end
|
54
57
|
end
|
55
58
|
end
|
@@ -82,7 +85,6 @@ module Zenlish
|
|
82
85
|
aHash[aKey] = aValue
|
83
86
|
end
|
84
87
|
end
|
85
|
-
|
86
88
|
end # class
|
87
89
|
end # module
|
88
90
|
end # module
|
data/lib/zenlish/lex/literal.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rley'
|
2
4
|
|
3
5
|
module Zenlish
|
@@ -7,7 +9,7 @@ module Zenlish
|
|
7
9
|
attr_reader :zlexeme
|
8
10
|
|
9
11
|
# initialize(theLexeme, aTerminal, aPosition) ⇒ Token
|
10
|
-
# @param literalText [String] the portion of input text that represents
|
12
|
+
# @param literalText [String] the portion of input text that represents
|
11
13
|
# an occurence of the lexeme.
|
12
14
|
# @param aLexeme [Lex::Lexeme] the lexeme matched by the literal text.
|
13
15
|
# @param aPosition [Integer] the position of the literal in the input.
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../lex/literal'
|
4
|
+
|
5
|
+
module Zenlish
|
6
|
+
module Lexer
|
7
|
+
class Lexer
|
8
|
+
# @return [StringScanner] Low-level scanner object.
|
9
|
+
attr_reader(:scanner)
|
10
|
+
|
11
|
+
# @return [Boolean] true, if lexer is currently busy to scan Zenlish.
|
12
|
+
attr_reader(:zenlish_mode)
|
13
|
+
|
14
|
+
# @return [Integer] Current line number (one-based)
|
15
|
+
attr_reader(:lineno)
|
16
|
+
|
17
|
+
# @return [Integer] Offset of start of current line within IO (one-based).
|
18
|
+
attr_reader(:line_start)
|
19
|
+
|
20
|
+
@@punct2name = {
|
21
|
+
':' => 'Colon',
|
22
|
+
',' => 'Comma',
|
23
|
+
'.' => 'Period',
|
24
|
+
'"' => 'Quote'
|
25
|
+
}.freeze
|
26
|
+
|
27
|
+
class ScanError < StandardError; end
|
28
|
+
|
29
|
+
# Constructor. Initialize a tokenizer for Skeem.
|
30
|
+
# @param source [String] Skeem text to tokenize.
|
31
|
+
def initialize(source)
|
32
|
+
@scanner = StringScanner.new('')
|
33
|
+
@zenlish_mode = true
|
34
|
+
reinitialize(source)
|
35
|
+
end
|
36
|
+
|
37
|
+
# @param source [String] Skeem text to tokenize.
|
38
|
+
def reinitialize(source)
|
39
|
+
@scanner.string = source
|
40
|
+
@lineno = 1
|
41
|
+
@line_start = 0
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [Array<Token>] | Returns a sequence of tokens
|
45
|
+
def tokens
|
46
|
+
tok_sequence = []
|
47
|
+
until @scanner.eos?
|
48
|
+
token = _next_token
|
49
|
+
tok_sequence << token unless token.nil?
|
50
|
+
end
|
51
|
+
|
52
|
+
return tok_sequence
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def _next_token
|
58
|
+
skip_intertoken_spaces
|
59
|
+
if zenlish_mode
|
60
|
+
next_zenlish_token
|
61
|
+
else
|
62
|
+
next_json_token
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def next_zenlish_token
|
67
|
+
curr_ch = scanner.peek(1)
|
68
|
+
return nil if curr_ch.nil? || curr_ch.empty?
|
69
|
+
|
70
|
+
token = nil
|
71
|
+
|
72
|
+
if ':,."'.include? curr_ch
|
73
|
+
# Delimiters, separators => single character token
|
74
|
+
token = build_token(@@punct2name[curr_ch], scanner.getch)
|
75
|
+
elsif (literal = scanner.scan(/[^\s:;,."]+/))
|
76
|
+
token = build_token('WORD', literal)
|
77
|
+
else # Unknown token
|
78
|
+
erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
79
|
+
sequel = scanner.scan(/.{1,20}/)
|
80
|
+
erroneous += sequel unless sequel.nil?
|
81
|
+
raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
|
82
|
+
end
|
83
|
+
|
84
|
+
return token
|
85
|
+
end
|
86
|
+
|
87
|
+
def build_token(aSymbolName, aLiteral, _format = :default)
|
88
|
+
begin
|
89
|
+
col = scanner.pos - aLiteral.size - @line_start + 1
|
90
|
+
pos = Rley::Lexical::Position.new(@lineno, col)
|
91
|
+
token = Rley::Lexical::Token.new(aLiteral, aSymbolName, pos)
|
92
|
+
rescue StandardError => e
|
93
|
+
puts "Failing with '#{aSymbolName}' and '#{aLiteral}'"
|
94
|
+
raise e
|
95
|
+
end
|
96
|
+
|
97
|
+
return token
|
98
|
+
end
|
99
|
+
|
100
|
+
def next_json_token
|
101
|
+
curr_ch = scanner.peek(1)
|
102
|
+
return nil if curr_ch.nil? || curr_ch.empty?
|
103
|
+
|
104
|
+
token = nil
|
105
|
+
|
106
|
+
if ':,."'.include? curr_ch
|
107
|
+
# Delimiters, separators => single character token
|
108
|
+
token = build_token(@@punct2name[curr_ch], scanner.getch)
|
109
|
+
elsif (literal = scanner.scan(/[^\s:;,."]+/))
|
110
|
+
token = build_token('WORD', literal)
|
111
|
+
else # Unknown token
|
112
|
+
erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
113
|
+
sequel = scanner.scan(/.{1,20}/)
|
114
|
+
erroneous += sequel unless sequel.nil?
|
115
|
+
raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
|
116
|
+
end
|
117
|
+
|
118
|
+
return token
|
119
|
+
end
|
120
|
+
|
121
|
+
def skip_intertoken_spaces
|
122
|
+
pre_pos = scanner.pos
|
123
|
+
|
124
|
+
loop do
|
125
|
+
ws_found = scanner.skip(/[ \t\f]+/) ? true : false
|
126
|
+
nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
|
127
|
+
if nl_found
|
128
|
+
ws_found = true
|
129
|
+
next_line
|
130
|
+
end
|
131
|
+
break unless ws_found
|
132
|
+
end
|
133
|
+
|
134
|
+
curr_pos = scanner.pos
|
135
|
+
return if curr_pos == pre_pos
|
136
|
+
end
|
137
|
+
|
138
|
+
def next_line
|
139
|
+
@lineno += 1
|
140
|
+
@line_start = scanner.pos
|
141
|
+
end
|
142
|
+
end # class
|
143
|
+
end # module
|
144
|
+
end # module
|