zenlish 0.2.02 → 0.2.06

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +455 -0
  3. data/CHANGELOG.md +58 -1
  4. data/Gemfile +5 -3
  5. data/README.md +27 -0
  6. data/Rakefile +5 -3
  7. data/lib/zenlish/feature/boolean_domain.rb +9 -3
  8. data/lib/zenlish/feature/boolean_value.rb +3 -2
  9. data/lib/zenlish/feature/enumeration_domain.rb +8 -1
  10. data/lib/zenlish/feature/feature.rb +9 -7
  11. data/lib/zenlish/feature/feature_def.rb +10 -6
  12. data/lib/zenlish/feature/feature_domain.rb +9 -3
  13. data/lib/zenlish/feature/feature_struct.rb +3 -1
  14. data/lib/zenlish/feature/feature_struct_def.rb +5 -1
  15. data/lib/zenlish/feature/feature_struct_def_bearer.rb +6 -4
  16. data/lib/zenlish/feature/feature_value.rb +4 -2
  17. data/lib/zenlish/feature/identifier_domain.rb +5 -3
  18. data/lib/zenlish/feature/identifier_value.rb +3 -2
  19. data/lib/zenlish/feature/symbol_value.rb +3 -1
  20. data/lib/zenlish/inflect/atomic_o_expression.rb +2 -1
  21. data/lib/zenlish/inflect/composite_o_expression.rb +3 -1
  22. data/lib/zenlish/inflect/concatenation.rb +3 -2
  23. data/lib/zenlish/inflect/equals_literal.rb +7 -1
  24. data/lib/zenlish/inflect/feature_heading.rb +11 -5
  25. data/lib/zenlish/inflect/formal_argument.rb +3 -1
  26. data/lib/zenlish/inflect/function_call.rb +4 -1
  27. data/lib/zenlish/inflect/heading.rb +5 -0
  28. data/lib/zenlish/inflect/inflection_rule.rb +2 -0
  29. data/lib/zenlish/inflect/inflection_table.rb +29 -4
  30. data/lib/zenlish/inflect/inflection_table_builder.rb +27 -20
  31. data/lib/zenlish/inflect/input_asis.rb +3 -0
  32. data/lib/zenlish/inflect/input_expression.rb +4 -2
  33. data/lib/zenlish/inflect/literal_asis.rb +4 -1
  34. data/lib/zenlish/inflect/matches_pattern.rb +2 -0
  35. data/lib/zenlish/inflect/membership.rb +2 -0
  36. data/lib/zenlish/inflect/method_heading.rb +7 -6
  37. data/lib/zenlish/inflect/not_equals_literal.rb +2 -0
  38. data/lib/zenlish/inflect/nullary_input_expression.rb +3 -1
  39. data/lib/zenlish/inflect/output_expression.rb +2 -0
  40. data/lib/zenlish/inflect/substitution.rb +3 -1
  41. data/lib/zenlish/inflect/unary_input_expression.rb +4 -1
  42. data/lib/zenlish/inflect/unconditionally_true.rb +4 -2
  43. data/lib/zenlish/lang/dictionary.rb +35 -29
  44. data/lib/zenlish/lang/lemmatizer.rb +40 -0
  45. data/lib/zenlish/lang/zenlish_grammar.rb +9 -6
  46. data/lib/zenlish/lex/empty_lexicon_factory.rb +4 -3
  47. data/lib/zenlish/lex/lexeme.rb +9 -2
  48. data/lib/zenlish/lex/lexical_entry.rb +6 -4
  49. data/lib/zenlish/lex/lexicon.rb +10 -8
  50. data/lib/zenlish/lex/literal.rb +3 -1
  51. data/lib/zenlish/lexer/lexer.rb +144 -0
  52. data/lib/zenlish/trie/base_trie_node.rb +27 -0
  53. data/lib/zenlish/trie/trie.rb +132 -0
  54. data/lib/zenlish/trie/trie_node.rb +21 -0
  55. data/lib/zenlish/trie/trie_root.rb +10 -0
  56. data/lib/zenlish/version.rb +3 -1
  57. data/lib/zenlish/wclasses/adjective.rb +5 -2
  58. data/lib/zenlish/wclasses/adverb.rb +3 -1
  59. data/lib/zenlish/wclasses/adverb_maybe.rb +3 -1
  60. data/lib/zenlish/wclasses/adverb_not.rb +2 -0
  61. data/lib/zenlish/wclasses/all_word_classes.rb +4 -2
  62. data/lib/zenlish/wclasses/article.rb +2 -0
  63. data/lib/zenlish/wclasses/auxiliary.rb +5 -2
  64. data/lib/zenlish/wclasses/auxiliary_be.rb +34 -1
  65. data/lib/zenlish/wclasses/auxiliary_do.rb +31 -0
  66. data/lib/zenlish/wclasses/cardinal.rb +3 -1
  67. data/lib/zenlish/wclasses/common_noun.rb +2 -0
  68. data/lib/zenlish/wclasses/comparative_particle.rb +5 -3
  69. data/lib/zenlish/wclasses/conjunction.rb +2 -0
  70. data/lib/zenlish/wclasses/conjunctive_pronoun.rb +2 -0
  71. data/lib/zenlish/wclasses/coordinator.rb +4 -2
  72. data/lib/zenlish/wclasses/definite_article.rb +2 -0
  73. data/lib/zenlish/wclasses/degree_adverb.rb +2 -0
  74. data/lib/zenlish/wclasses/demonstrative_determiner.rb +40 -1
  75. data/lib/zenlish/wclasses/demonstrative_pronoun.rb +2 -0
  76. data/lib/zenlish/wclasses/determiner.rb +3 -1
  77. data/lib/zenlish/wclasses/distributive_determiner.rb +2 -0
  78. data/lib/zenlish/wclasses/existential_there.rb +2 -0
  79. data/lib/zenlish/wclasses/fronting_quantifier.rb +3 -1
  80. data/lib/zenlish/wclasses/indefinite_article.rb +2 -0
  81. data/lib/zenlish/wclasses/indefinite_pronoun.rb +3 -1
  82. data/lib/zenlish/wclasses/irregular_linking_verb.rb +3 -1
  83. data/lib/zenlish/wclasses/irregular_verb.rb +14 -22
  84. data/lib/zenlish/wclasses/irregular_verb_be.rb +41 -1
  85. data/lib/zenlish/wclasses/irregular_verb_can.rb +35 -0
  86. data/lib/zenlish/wclasses/irregular_verb_do.rb +38 -1
  87. data/lib/zenlish/wclasses/irregular_verb_extension.rb +8 -6
  88. data/lib/zenlish/wclasses/irregular_verb_have.rb +38 -1
  89. data/lib/zenlish/wclasses/irregular_verb_know.rb +3 -1
  90. data/lib/zenlish/wclasses/irregular_verb_say.rb +3 -1
  91. data/lib/zenlish/wclasses/irregular_verb_think.rb +3 -1
  92. data/lib/zenlish/wclasses/lexical_verb.rb +3 -4
  93. data/lib/zenlish/wclasses/linking_adverb.rb +2 -0
  94. data/lib/zenlish/wclasses/modal_verb_can.rb +27 -0
  95. data/lib/zenlish/wclasses/noun.rb +4 -2
  96. data/lib/zenlish/wclasses/numeral.rb +2 -0
  97. data/lib/zenlish/wclasses/personal_pronoun.rb +70 -1
  98. data/lib/zenlish/wclasses/possessive_determiner.rb +60 -0
  99. data/lib/zenlish/wclasses/preposition.rb +3 -1
  100. data/lib/zenlish/wclasses/preposition_of.rb +2 -0
  101. data/lib/zenlish/wclasses/preposition_than.rb +2 -0
  102. data/lib/zenlish/wclasses/pronoun.rb +3 -1
  103. data/lib/zenlish/wclasses/proper_noun.rb +10 -8
  104. data/lib/zenlish/wclasses/quantifier.rb +3 -1
  105. data/lib/zenlish/wclasses/regular_verb.rb +13 -18
  106. data/lib/zenlish/wclasses/regular_verb_want.rb +4 -2
  107. data/lib/zenlish/wclasses/relative_pronoun.rb +4 -2
  108. data/lib/zenlish/wclasses/subordinating_conjunction.rb +7 -5
  109. data/lib/zenlish/wclasses/verb.rb +16 -8
  110. data/lib/zenlish/wclasses/word_class.rb +11 -3
  111. data/spec/spec_helper.rb +3 -1
  112. data/spec/zenlish/feature/boolean_domain_spec.rb +6 -1
  113. data/spec/zenlish/feature/boolean_value_spec.rb +1 -1
  114. data/spec/zenlish/feature/enumeration_domain_spec.rb +9 -5
  115. data/spec/zenlish/feature/feature_spec.rb +3 -3
  116. data/spec/zenlish/feature/feature_struct_def_bearer_spec.rb +10 -8
  117. data/spec/zenlish/feature/feature_struct_def_spec.rb +3 -2
  118. data/spec/zenlish/feature/identifier_domain_spec.rb +3 -3
  119. data/spec/zenlish/feature/identifier_value_spec.rb +4 -4
  120. data/spec/zenlish/feature/symbol_value_spec.rb +3 -3
  121. data/spec/zenlish/inflect/concatenation_spec.rb +7 -7
  122. data/spec/zenlish/inflect/feature_heading_spec.rb +23 -3
  123. data/spec/zenlish/inflect/formal_argument_spec.rb +2 -2
  124. data/spec/zenlish/inflect/function_call_spec.rb +4 -4
  125. data/spec/zenlish/inflect/inflection_rule_spec.rb +9 -5
  126. data/spec/zenlish/inflect/inflection_table_builder_spec.rb +26 -21
  127. data/spec/zenlish/inflect/inflection_table_spec.rb +39 -22
  128. data/spec/zenlish/inflect/input_asis_spec.rb +2 -6
  129. data/spec/zenlish/inflect/literal_asis_spec.rb +3 -3
  130. data/spec/zenlish/inflect/matches_pattern_spec.rb +1 -1
  131. data/spec/zenlish/inflect/membership_spec.rb +2 -2
  132. data/spec/zenlish/inflect/method_heading_spec.rb +7 -1
  133. data/spec/zenlish/inflect/substitution_spec.rb +1 -1
  134. data/spec/zenlish/inflect/unconditionally_true_spec.rb +2 -2
  135. data/spec/zenlish/lang/dictionary_spec.rb +51 -16
  136. data/spec/zenlish/lang/lemmatizer_spec.rb +43 -0
  137. data/spec/zenlish/lang/zenlish_grammar_spec.rb +2 -2
  138. data/spec/zenlish/lex/lexeme_spec.rb +16 -11
  139. data/spec/zenlish/lex/lexical_entry_spec.rb +1 -2
  140. data/spec/zenlish/lex/lexicon_spec.rb +9 -8
  141. data/spec/zenlish/lex/literal_spec.rb +2 -2
  142. data/spec/zenlish/lexer/lexer_spec.rb +52 -0
  143. data/spec/zenlish/parser/lesson1_spec.rb +2 -2
  144. data/spec/zenlish/parser/lesson2_spec.rb +31 -36
  145. data/spec/zenlish/parser/lesson3_spec.rb +98 -138
  146. data/spec/zenlish/parser/zparser_spec.rb +2 -1
  147. data/spec/zenlish/support/minimal_lexicon.rb +2 -0
  148. data/spec/zenlish/support/var2word.rb +27 -18
  149. data/spec/zenlish/trie/base_trie_node_spec.rb +33 -0
  150. data/spec/zenlish/trie/trie_spec.rb +108 -0
  151. data/spec/zenlish/wclasses/common_noun_spec.rb +19 -7
  152. data/spec/zenlish/wclasses/demonstrative_determiner_spec.rb +47 -0
  153. data/spec/zenlish/wclasses/irregular_verb_can_spec.rb +60 -0
  154. data/spec/zenlish/wclasses/irregular_verb_spec.rb +14 -4
  155. data/spec/zenlish/wclasses/lexical_verb_spec.rb +9 -2
  156. data/spec/zenlish/wclasses/modal_verb_can_spec.rb +50 -0
  157. data/spec/zenlish/wclasses/personal_pronoun_spec.rb +66 -0
  158. data/spec/zenlish/wclasses/possessive_determiner_spec.rb +69 -0
  159. data/spec/zenlish/wclasses/regular_verb_spec.rb +35 -15
  160. data/spec/zenlish/wclasses/regular_verb_want_spec.rb +30 -0
  161. data/spec/zenlish/wclasses/verb_spec.rb +9 -2
  162. data/spec/zenlish_spec.rb +3 -1
  163. data/zenlish.gemspec +17 -14
  164. metadata +37 -10
  165. data/lib/zenlish/wclasses/modal_verb_could.rb +0 -9
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'nullary_input_expression'
2
4
 
3
5
  module Zenlish
@@ -5,7 +7,7 @@ module Zenlish
5
7
  class UnconditionallyTrue < NullaryInputExpression
6
8
  def success?(_headings, _lexeme, _heading_values)
7
9
  true
8
- end
10
+ end
9
11
  end # class
10
12
  end # module
11
- end # module
13
+ end # module
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  unless defined?(Zenlish::Lang::Dictionary)
2
4
  require_relative '../feature/feature_struct_def_bearer'
3
5
  module Zenlish
@@ -9,15 +11,16 @@ unless defined?(Zenlish::Lang::Dictionary)
9
11
  sandbox = Object.new
10
12
  sandbox.extend(Zenlish::Lex::EmptyLexiconFactory)
11
13
  Dictionary = sandbox.create_empty_lexicon
12
- self.extend(Feature::FeatureStructDefBearer)
14
+ extend(Feature::FeatureStructDefBearer)
13
15
 
14
- # @param aLemma [String] is the canonical form, dictionary form,
16
+ # @param aLemma [String] is the canonical form, dictionary form,
15
17
  # or citation form of a headword.
16
18
  # @param aWClassName [String] the name of a word class.
17
19
  def self.add_entry(aLemma, aWClassName, aFeatureHash = nil, &aBlock)
18
20
  entry = Zenlish::Lex::LexicalEntry.new(aLemma)
19
21
  wclass = Dictionary.name2terminal[aWClassName]
20
22
  raise StandardError, "Undefined word class for '#{aLemma}'" unless wclass
23
+
21
24
  lexeme = Zenlish::Lex::Lexeme.new(wclass, entry, aFeatureHash)
22
25
  lexeme.instance_exec(&aBlock) if block_given?
23
26
  lexeme.freeze
@@ -56,7 +59,7 @@ unless defined?(Zenlish::Lang::Dictionary)
56
59
  add_entry('big', 'Adjective')
57
60
  add_entry('body', 'CommonNoun')
58
61
  add_entry('but', 'Coordinator')
59
- add_entry('can', 'IrregularVerb')
62
+ add_entry('can', 'IrregularVerbCan')
60
63
  add_entry('can', 'ModalVerbCan')
61
64
  add_entry('cause', 'RegularVerb')
62
65
  add_entry('change', 'RegularVerb')
@@ -65,15 +68,12 @@ unless defined?(Zenlish::Lang::Dictionary)
65
68
  end
66
69
  add_entry('contain', 'RegularVerb')
67
70
  add_entry('container', 'CommonNoun')
68
- add_entry('could', 'ModalVerbCould')
69
- add_entry('damage', 'RegularVerb')
71
+ add_entry('damage', 'RegularVerb')
70
72
  add_entry('die', 'RegularVerb')
71
- add_entry('difficult', 'Adjective')
73
+ add_entry('difficult', 'Adjective')
72
74
  add_entry('different', 'Adjective')
73
75
  add_entry('do', 'AuxiliaryDo')
74
- add_entry('do', 'IrregularVerbDo') do
75
- forms past_simple: 'did', past_participle: 'done'
76
- end
76
+ add_entry('do', 'IrregularVerbDo')
77
77
  add_entry('each', 'DistributiveDeterminer')
78
78
  add_entry('each', 'Pronoun')
79
79
  add_entry('exist', 'RegularVerb')
@@ -88,17 +88,22 @@ unless defined?(Zenlish::Lang::Dictionary)
88
88
  add_entry('good', 'Adjective')
89
89
  add_entry('have', 'IrregularVerbHave')
90
90
  add_entry('happen', 'RegularVerb')
91
- add_entry('hear', 'IrregularLinkingVerb')
91
+ add_entry('hear', 'IrregularLinkingVerb') do
92
+ forms past_simple: 'heard', past_participle: 'heard'
93
+ end
92
94
  add_entry('here', 'Adverb')
93
95
  # example: ...from here (works as a pronoun of a place)
94
- add_entry('here', 'CommonNoun', {'NUMBER' => enumeration(:singular),
95
- 'PARADIGM' => [identifier, 'Singular_only']})
96
- add_entry('I', 'PersonalPronoun')
96
+ add_entry('here', 'CommonNoun', { 'NUMBER' => enumeration(:singular),
97
+ 'PARADIGM' => [identifier, 'Singular_only'] })
98
+ add_entry('I', 'PersonalPronoun', { 'PERSON' => enumeration(:first),
99
+ 'GENDER' => enumeration(:feminine, :masculine) })
97
100
  add_entry('if', 'SubordinatingConjunction')
98
101
  add_entry('in', 'Preposition')
99
102
  add_entry('inside', 'Preposition')
100
- add_entry('it', 'PersonalPronoun')
101
- add_entry('its', 'PossessiveDeterminer')
103
+ add_entry('it', 'PersonalPronoun', { 'PERSON' => enumeration(:third),
104
+ 'PARADIGM' => [identifier, 'ppn_3rd_paradigm'] })
105
+ add_entry('its', 'PossessiveDeterminer', { 'PERSON' => enumeration(:third),
106
+ 'PARADIGM' => [identifier, 'possdet_3rd_paradigm'] })
102
107
  add_entry('kind', 'CommonNoun')
103
108
  add_entry('know', 'IrregularVerbKnow') do
104
109
  forms past_simple: 'knew', past_participle: 'known'
@@ -113,18 +118,18 @@ unless defined?(Zenlish::Lang::Dictionary)
113
118
  end
114
119
  add_entry('many', 'Quantifier')
115
120
  add_entry('maybe', 'AdverbMaybe')
116
- add_entry('me', 'PersonalPronoun')
117
121
  add_entry('moment', 'CommonNoun')
118
122
  add_entry('more', 'Adjective')
119
123
  add_entry('more', 'Adverb')
120
124
  add_entry('move', 'RegularVerb')
121
125
  add_entry('much', 'Adverb')
122
- add_entry('my', 'PossessiveDeterminer')
126
+ add_entry('my', 'PossessiveDeterminer', { 'PERSON' => enumeration(:first),
127
+ 'GENDER' => enumeration(:feminine, :masculine) })
123
128
  add_entry('near', 'Preposition')
124
129
  add_entry('near to', 'Preposition')
125
130
  add_entry('now', 'Adverb')
126
- add_entry('now', 'CommonNoun', {'NUMBER' => enumeration(:singular),
127
- 'PARADIGM' => [identifier, 'Singular_only']})
131
+ add_entry('now', 'CommonNoun', { 'NUMBER' => enumeration(:singular),
132
+ 'PARADIGM' => [identifier, 'Singular_only'] })
128
133
  add_entry('not', 'AdverbNot')
129
134
  add_entry('of', 'PrepositionOf')
130
135
  add_entry('on', 'Preposition')
@@ -134,10 +139,10 @@ unless defined?(Zenlish::Lang::Dictionary)
134
139
  add_entry('or', 'Coordinator')
135
140
  add_entry('other', 'Adjective')
136
141
  add_entry('part', 'CommonNoun')
137
- add_entry('people', 'CommonNoun', {'NUMBER' => enumeration(:plural),
138
- 'PARADIGM' => [identifier, 'Plural_only']})
139
- add_entry('person', 'CommonNoun', {'NUMBER' => enumeration(:singular),
140
- 'PARADIGM' => [identifier, 'Singular_only']})
142
+ add_entry('people', 'CommonNoun', { 'NUMBER' => enumeration(:plural),
143
+ 'PARADIGM' => [identifier, 'Plural_only'] })
144
+ add_entry('person', 'CommonNoun', { 'NUMBER' => enumeration(:singular),
145
+ 'PARADIGM' => [identifier, 'Singular_only'] })
141
146
  add_entry('place', 'CommonNoun')
142
147
  add_entry('same', 'Adjective')
143
148
  add_entry('same', 'Pronoun')
@@ -158,11 +163,8 @@ unless defined?(Zenlish::Lang::Dictionary)
158
163
  add_entry('than', 'PrepositionThan')
159
164
  add_entry('that', 'RelativePronoun')
160
165
  add_entry('the', 'DefiniteArticle')
161
- add_entry('them', 'PersonalPronoun')
162
166
  add_entry('then', 'LinkingAdverb')
163
- add_entry('their', 'PossessiveDeterminer')
164
167
  add_entry('there', 'ExistentialThere')
165
- add_entry('they', 'PersonalPronoun')
166
168
  add_entry('thing', 'CommonNoun')
167
169
  add_entry('think', 'IrregularVerbThink') do
168
170
  forms past_simple: 'thought', past_participle: 'thought'
@@ -187,8 +189,12 @@ unless defined?(Zenlish::Lang::Dictionary)
187
189
  add_entry('who', 'RelativePronoun')
188
190
  add_entry('with', 'Preposition')
189
191
  add_entry('word', 'CommonNoun')
190
- add_entry('you', 'PersonalPronoun')
191
- add_entry('your', 'PossessiveDeterminer')
192
+ add_entry('you', 'PersonalPronoun', { 'PERSON' => enumeration(:second),
193
+ 'GENDER' => enumeration(:feminine, :masculine),
194
+ 'PARADIGM' => [identifier, 'ppn_2nd_paradigm'] })
195
+ add_entry('your', 'PossessiveDeterminer', { 'PERSON' => enumeration(:second),
196
+ 'GENDER' => enumeration(:feminine, :masculine),
197
+ 'PARADIGM' => [identifier, 'possdet_2nd_paradigm'] })
192
198
 
193
199
  # Punctuation signs...
194
200
  add_entry(':', 'Colon')
@@ -197,4 +203,4 @@ unless defined?(Zenlish::Lang::Dictionary)
197
203
  add_entry('"', 'Quote')
198
204
  end # module
199
205
  end # module
200
- end # defined?
206
+ end # defined?
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../trie/trie'
4
+
5
+ module Zenlish
6
+ module Lang
7
+ class Lemmatizer
8
+ # @return [Trie:Trie] Trie (aka prefix tree) with all word forms from dictionary.
9
+ attr_reader :trie
10
+
11
+ def initialize(aLexicon)
12
+ @trie = Trie::Trie.new
13
+ initialize_trie(aLexicon)
14
+ end
15
+
16
+ def lemmatize(aWordForm, _hints = nil)
17
+ node = trie.search(aWordForm)
18
+ node&.value
19
+ end
20
+
21
+ private
22
+
23
+ def initialize_trie(aLexicon)
24
+ aLexicon.entries.each do |ent|
25
+ citation_form = ent.lemma
26
+ ent.lexemes.each do |lxm|
27
+ if lxm.wclass.kind_of?(Zenlish::WClasses::WordClass)
28
+ if lxm.wclass.invariable?
29
+ trie.add(citation_form, lxm)
30
+ else
31
+ w_forms = lxm.all_inflections
32
+ w_forms.each { |form| trie.add(form, lxm) }
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end # class
39
+ end # module
40
+ end # module
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Grammar for a simple subset of English language
2
4
  # It is called Zenlish
3
5
 
@@ -6,7 +8,7 @@ require_relative 'dictionary'
6
8
 
7
9
  ########################################
8
10
  # Define a grammar for a highly English-like language
9
- builder = Rley::Syntax::GrammarBuilder.new do
11
+ builder = Rley::grammar_builder do
10
12
  add_terminals(*Zenlish::Lang::Dictionary.terminals)
11
13
 
12
14
  rule 'zenlish' => 'prose'
@@ -19,7 +21,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
19
21
  #################
20
22
  # Simple sentence
21
23
  #################
22
- rule 'simple_sentence' => 'front_adverb simple_sentence'
24
+ rule 'simple_sentence' => 'front_adverb simple_sentence'
23
25
  rule 'front_adverb' => 'AdverbMaybe'
24
26
  rule 'front_adverb' => 'Adverb'
25
27
  rule 'simple_sentence' => 'declarative_simple_sentence'
@@ -107,7 +109,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
107
109
  rule 'comparative_start' => 'ComparativeParticle'
108
110
  rule 'conjunctive_prefix' => 'ConjunctivePronoun noun_phrase verb_phrase'
109
111
  rule 'identifying_clause' => 'RelativePronoun tense_verb_phrase'
110
- rule 'relative_clause_opt' => 'relative_clause'
112
+ rule 'relative_clause_opt' => 'relative_clause'
111
113
  rule 'relative_clause_opt' => []
112
114
  rule 'relative_clause' => 'RelativePronoun tense_phrase'
113
115
  # Sentence 3-Bxa 'Lisa sees a living thing that is very big.
@@ -137,7 +139,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
137
139
  rule 'tense' => 'AuxiliaryBe'
138
140
  rule 'tense' => 'AuxiliaryDo'
139
141
  rule 'tense' => 'ModalVerbCan'
140
- rule 'tense' => 'ModalVerbCould'
142
+
141
143
 
142
144
  #############
143
145
  # NOUN PHRASE
@@ -232,12 +234,13 @@ builder = Rley::Syntax::GrammarBuilder.new do
232
234
  rule 'lexical_verb' => 'IrregularVerb'
233
235
  rule 'lexical_verb' => 'IrregularLinkingVerb'
234
236
  rule 'lexical_verb' => 'IrregularVerbBe'
237
+ rule 'lexical_verb' => 'IrregularVerbCan'
235
238
  rule 'lexical_verb' => 'IrregularVerbDo'
236
239
  rule 'lexical_verb' => 'IrregularVerbHave'
237
240
  rule 'lexical_verb' => 'IrregularVerbKnow'
238
241
  rule 'lexical_verb' => 'IrregularVerbSay'
239
242
  rule 'lexical_verb' => 'IrregularVerbThink'
240
-
243
+
241
244
 
242
245
  rule 'linking_verb' => 'IrregularLinkingVerb'
243
246
 
@@ -293,7 +296,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
293
296
  # complementation by a verb: gerund -ing form...
294
297
  rule 'preposition_object' => 'noun_phrase_opt lexical_verb post_head_vp'
295
298
  # preposition_object => "a gerund (a verb form ending in "-ing") that acts as a noun # Example: He beat Lee without overly trying.
296
- rule 'preposition_object' => 'conjunctive_prefix' # It's obvious from _what he said_.
299
+ rule 'preposition_object' => 'conjunctive_prefix' # It's obvious from _what he said_.
297
300
  rule 'preposition_object' => []
298
301
 
299
302
  ######################
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../wclasses/all_word_classes'
2
4
  require_relative 'lexicon'
3
5
 
@@ -7,7 +9,7 @@ module Zenlish
7
9
  # Factory method. Helps in creating an "empty" lexicon.
8
10
  # It just contains the word classes of Zenlish but no headwords.
9
11
  # @return [Lexicon] the created lexicon object
10
- def create_empty_lexicon()
12
+ def create_empty_lexicon
11
13
  lexicon = Lexicon.new
12
14
 
13
15
  add_word_classes(lexicon)
@@ -41,6 +43,7 @@ module Zenlish
41
43
  add_wclass(aLexicon, WClasses::IrregularLinkingVerb)
42
44
  add_wclass(aLexicon, WClasses::IrregularVerb)
43
45
  add_wclass(aLexicon, WClasses::IrregularVerbBe)
46
+ add_wclass(aLexicon, WClasses::IrregularVerbCan)
44
47
  add_wclass(aLexicon, WClasses::IrregularVerbDo)
45
48
  add_wclass(aLexicon, WClasses::IrregularVerbHave)
46
49
  add_wclass(aLexicon, WClasses::IrregularVerbKnow)
@@ -48,7 +51,6 @@ module Zenlish
48
51
  add_wclass(aLexicon, WClasses::IrregularVerbThink)
49
52
  add_wclass(aLexicon, WClasses::LinkingAdverb)
50
53
  add_wclass(aLexicon, WClasses::ModalVerbCan)
51
- add_wclass(aLexicon, WClasses::ModalVerbCould)
52
54
  add_wclass(aLexicon, WClasses::PersonalPronoun)
53
55
  add_wclass(aLexicon, WClasses::PossessiveDeterminer)
54
56
  add_wclass(aLexicon, WClasses::Preposition)
@@ -73,7 +75,6 @@ module Zenlish
73
75
  def add_wclass(aLexicon, aClass)
74
76
  aLexicon.add_terminal(aClass.new.freeze)
75
77
  end
76
-
77
78
  end # module
78
79
  end # module
79
80
  end # module
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../feature/feature_struct_def_bearer'
2
4
  module Zenlish
3
5
  module Lex
@@ -20,7 +22,7 @@ module Zenlish
20
22
  anEntry.add_lexeme(self)
21
23
  if aWClass.kind_of?(WClasses::WordClass)
22
24
  unless wclass.extension.nil?
23
- self.extend(wclass.extension)
25
+ extend(wclass.extension)
24
26
  init_extension(self)
25
27
  end
26
28
  p_struct = aWClass.kind_of?(WClasses::WordClass) ? aWClass.struct : nil
@@ -42,6 +44,11 @@ module Zenlish
42
44
  table.inflect(self, constraints)
43
45
  end
44
46
 
47
+ def all_inflections
48
+ table = paradigm
49
+ table.all_inflections(self)
50
+ end
51
+
45
52
  # @return [String] the base (dictionary) form.
46
53
  def lemma
47
54
  entry.lemma
@@ -57,4 +64,4 @@ module Zenlish
57
64
  alias base_form lemma
58
65
  end # class
59
66
  end # module
60
- end # module
67
+ end # module
@@ -1,14 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Zenlish
2
4
  module Lex
3
5
  # TODO: document
4
6
  class LexicalEntry
5
- #@return [String] the lemma (dictionary) form of a word.
7
+ # @return [String] the lemma (dictionary) form of a word.
6
8
  attr_reader :lemma
7
9
 
8
10
  # @return [Array<Lexeme, Rley::Syntax::Terminal>]
9
11
  attr_reader :lexemes
10
12
 
11
- # @param theLemma [String] lemma (= citation form), a word form used
13
+ # @param theLemma [String] lemma (= citation form), a word form used
12
14
  # conventionnaly to represent a lexeme.
13
15
  # @param aLexeme [Lexeme, NilClass] the lexeme to link with lexical entry.
14
16
  def initialize(theLemma, aLexeme = nil)
@@ -17,10 +19,10 @@ module Zenlish
17
19
  add_lexeme(aLexeme)
18
20
  end
19
21
 
20
- # @param aLexeme [Lexeme, NilClass] the lexeme to link with lexical entry.
22
+ # @param aLexeme [Lexeme, NilClass] the lexeme to link with lexical entry.
21
23
  def add_lexeme(aLexeme)
22
24
  lexemes << aLexeme if aLexeme
23
25
  end
24
26
  end # class
25
27
  end # module
26
- end # module
28
+ end # module
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Zenlish
2
4
  module Lex
3
5
  # A lexicon is a collection of lexical entries.
@@ -5,7 +7,7 @@ module Zenlish
5
7
  class Lexicon
6
8
  # @return [Array<Lex::LexicalEntry>] entries in the lexicon
7
9
  attr_reader :entries
8
-
10
+
9
11
  # @return [Hash{String => Lex::LexicalEntry}] the lexical entry for the given lemma.
10
12
  attr_reader :lemma2entry
11
13
 
@@ -23,7 +25,7 @@ module Zenlish
23
25
  end
24
26
 
25
27
  # @param aLemma[String] retrieve the lexeme form the given "head word".
26
- # @param aWordClass [WordClasses::WordClass, NilClass] the word class of
28
+ # @param aWordClass [WordClasses::WordClass, NilClass] the word class of
27
29
  # the lexeme.
28
30
  # @return [Lex::Lexeme]
29
31
  def get_lexeme(aLemma, aWordClass = nil)
@@ -45,11 +47,12 @@ module Zenlish
45
47
 
46
48
  lexeme
47
49
  else
48
- begin
49
- lemma2entry.fetch(aLemma).lexemes.first
50
- rescue NoMethodError => exc
51
- $stderr.puts "Multiple lexemes for #{aLemma}"
52
- raise exc
50
+ entry = lemma2entry.fetch(aLemma)
51
+ if entry.kind_of?(Array)
52
+ err_msg = "Multiple lexemes for #{aLemma}"
53
+ raise StandardError, err_msg
54
+ else
55
+ entry.lexemes.first
53
56
  end
54
57
  end
55
58
  end
@@ -82,7 +85,6 @@ module Zenlish
82
85
  aHash[aKey] = aValue
83
86
  end
84
87
  end
85
-
86
88
  end # class
87
89
  end # module
88
90
  end # module
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rley'
2
4
 
3
5
  module Zenlish
@@ -7,7 +9,7 @@ module Zenlish
7
9
  attr_reader :zlexeme
8
10
 
9
11
  # initialize(theLexeme, aTerminal, aPosition) ⇒ Token
10
- # @param literalText [String] the portion of input text that represents
12
+ # @param literalText [String] the portion of input text that represents
11
13
  # an occurence of the lexeme.
12
14
  # @param aLexeme [Lex::Lexeme] the lexeme matched by the literal text.
13
15
  # @param aPosition [Integer] the position of the literal in the input.
@@ -0,0 +1,144 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../lex/literal'
4
+
5
+ module Zenlish
6
+ module Lexer
7
+ class Lexer
8
+ # @return [StringScanner] Low-level scanner object.
9
+ attr_reader(:scanner)
10
+
11
+ # @return [Boolean] true, if lexer is currently busy to scan Zenlish.
12
+ attr_reader(:zenlish_mode)
13
+
14
+ # @return [Integer] Current line number (one-based)
15
+ attr_reader(:lineno)
16
+
17
+ # @return [Integer] Offset of start of current line within IO (one-based).
18
+ attr_reader(:line_start)
19
+
20
+ @@punct2name = {
21
+ ':' => 'Colon',
22
+ ',' => 'Comma',
23
+ '.' => 'Period',
24
+ '"' => 'Quote'
25
+ }.freeze
26
+
27
+ class ScanError < StandardError; end
28
+
29
+ # Constructor. Initialize a tokenizer for Skeem.
30
+ # @param source [String] Skeem text to tokenize.
31
+ def initialize(source)
32
+ @scanner = StringScanner.new('')
33
+ @zenlish_mode = true
34
+ reinitialize(source)
35
+ end
36
+
37
+ # @param source [String] Skeem text to tokenize.
38
+ def reinitialize(source)
39
+ @scanner.string = source
40
+ @lineno = 1
41
+ @line_start = 0
42
+ end
43
+
44
+ # @return [Array<Token>] | Returns a sequence of tokens
45
+ def tokens
46
+ tok_sequence = []
47
+ until @scanner.eos?
48
+ token = _next_token
49
+ tok_sequence << token unless token.nil?
50
+ end
51
+
52
+ return tok_sequence
53
+ end
54
+
55
+ private
56
+
57
+ def _next_token
58
+ skip_intertoken_spaces
59
+ if zenlish_mode
60
+ next_zenlish_token
61
+ else
62
+ next_json_token
63
+ end
64
+ end
65
+
66
+ def next_zenlish_token
67
+ curr_ch = scanner.peek(1)
68
+ return nil if curr_ch.nil? || curr_ch.empty?
69
+
70
+ token = nil
71
+
72
+ if ':,."'.include? curr_ch
73
+ # Delimiters, separators => single character token
74
+ token = build_token(@@punct2name[curr_ch], scanner.getch)
75
+ elsif (literal = scanner.scan(/[^\s:;,."]+/))
76
+ token = build_token('WORD', literal)
77
+ else # Unknown token
78
+ erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
79
+ sequel = scanner.scan(/.{1,20}/)
80
+ erroneous += sequel unless sequel.nil?
81
+ raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
82
+ end
83
+
84
+ return token
85
+ end
86
+
87
+ def build_token(aSymbolName, aLiteral, _format = :default)
88
+ begin
89
+ col = scanner.pos - aLiteral.size - @line_start + 1
90
+ pos = Rley::Lexical::Position.new(@lineno, col)
91
+ token = Rley::Lexical::Token.new(aLiteral, aSymbolName, pos)
92
+ rescue StandardError => e
93
+ puts "Failing with '#{aSymbolName}' and '#{aLiteral}'"
94
+ raise e
95
+ end
96
+
97
+ return token
98
+ end
99
+
100
+ def next_json_token
101
+ curr_ch = scanner.peek(1)
102
+ return nil if curr_ch.nil? || curr_ch.empty?
103
+
104
+ token = nil
105
+
106
+ if ':,."'.include? curr_ch
107
+ # Delimiters, separators => single character token
108
+ token = build_token(@@punct2name[curr_ch], scanner.getch)
109
+ elsif (literal = scanner.scan(/[^\s:;,."]+/))
110
+ token = build_token('WORD', literal)
111
+ else # Unknown token
112
+ erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
113
+ sequel = scanner.scan(/.{1,20}/)
114
+ erroneous += sequel unless sequel.nil?
115
+ raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
116
+ end
117
+
118
+ return token
119
+ end
120
+
121
+ def skip_intertoken_spaces
122
+ pre_pos = scanner.pos
123
+
124
+ loop do
125
+ ws_found = scanner.skip(/[ \t\f]+/) ? true : false
126
+ nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
127
+ if nl_found
128
+ ws_found = true
129
+ next_line
130
+ end
131
+ break unless ws_found
132
+ end
133
+
134
+ curr_pos = scanner.pos
135
+ return if curr_pos == pre_pos
136
+ end
137
+
138
+ def next_line
139
+ @lineno += 1
140
+ @line_start = scanner.pos
141
+ end
142
+ end # class
143
+ end # module
144
+ end # module