gullah 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,451 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+
5
+ require 'gullah'
6
+ require 'byebug'
7
+
8
+ # :stopdoc:
9
+
10
+ class BasicTest < Minitest::Test
11
+ class Simple
12
+ extend Gullah
13
+
14
+ rule :a, 'a+'
15
+
16
+ leaf :a, /\S+/
17
+ end
18
+
19
+ def test_basic
20
+ parses = Simple.parse 'foo bar baz'
21
+ assert_equal 1, parses.length, 'only one optimal parse'
22
+ parse = parses.first
23
+ assert_equal parse.size, parse.nodes.count, 'nodes iterator works'
24
+ assert_equal 1, parse.roots.length, 'parse has a root node'
25
+ root = parse.roots.first
26
+ assert_equal :a, root.name, 'root node has the right label'
27
+ assert_equal 6, root.subtree.count, 'parse has the expected number of nodes'
28
+ assert_equal 4, root.subtree.count(&:significant?),
29
+ 'parse has the expected balance of significant and ignorable nodes'
30
+ end
31
+
32
+ class FixedCount
33
+ extend Gullah
34
+
35
+ rule :a, 'a{2}'
36
+
37
+ leaf :a, /\S+/
38
+ end
39
+
40
+ def test_fixed_count
41
+ parses = FixedCount.parse 'foo bar baz'
42
+ assert_equal 2, parses.length, '2 optimal parses'
43
+ parses.each do |p|
44
+ assert_equal 1, p.roots.length, 'parse has a root node'
45
+ root = p.roots.first
46
+ assert_equal 2, root.subtree.select(&:nonterminal?).count, 'parse has 2 nonterminal nodes'
47
+ assert root.subtree.select(&:nonterminal?).each do |_n|
48
+ assert_equal 2, b.children.length, 'nonterminal nodes each have 2 children'
49
+ end
50
+ end
51
+ end
52
+
53
+ class Balanced
54
+ extend Gullah
55
+
56
+ rule :a, 'a{2}', tests: %i[balanced]
57
+
58
+ leaf :a, /\S+/
59
+
60
+ # we only want a perfectly symmetrical tree
61
+ def balanced(n)
62
+ if n.children.first.size == n.children.last.size
63
+ :pass
64
+ else
65
+ :fail
66
+ end
67
+ end
68
+ end
69
+
70
+ def test_test
71
+ parses = Balanced.parse 'foo bar baz plugh'
72
+ assert_equal 1, parses.length, '1 optimal parse'
73
+ parse = parses.first
74
+ assert_equal 1, parse.roots.length, 'parse has a root node'
75
+ root = parse.roots.first
76
+ assertion = root.subtree
77
+ .select(&:nonterminal?)
78
+ .select { |n| n.name == :a }.all? do |n|
79
+ (att = n.attributes[:satisfied]) && att.include?([:balanced])
80
+ end
81
+ assert assertion, 'the balanced nodes are marked as such'
82
+ end
83
+
84
+ class Trash
85
+ extend Gullah
86
+
87
+ keep_whitespace
88
+
89
+ leaf :word, /\w+/
90
+ ignore :ws, /\s+/
91
+ end
92
+
93
+ def test_trash
94
+ parses = Trash.parse 'There may be punctuation.'
95
+ assert_equal 1, parses.length, 'only one parse'
96
+ parse = parses.first
97
+ assert_equal 8, parse.roots.length, 'there are 8 nodes in the parse'
98
+ assert parse.roots.all?(&:leaf?), 'all nodes are leaf nodes'
99
+ assert_equal 3, parse.roots.select { |n| n.name == :ws }.count, 'there are 3 whitespace nodes'
100
+ assert_equal 3, parse.roots.select(&:ignorable?).count, 'there are 3 ignorable nodes'
101
+ assert_equal 4, parse.roots.select { |n| n.name == :word }.count, 'there are 4 word nodes'
102
+ assert_equal 1, parse.roots.select(&:trash?).count, 'there is 1 trash node'
103
+ last_node = parse.roots.last
104
+ assert last_node.trash?, 'the last node is the trash node'
105
+ end
106
+
107
+ # TODO: order dependence problem
108
+ class Cat
109
+ extend Gullah
110
+
111
+ rule :S, 'NP VP'
112
+ rule :NP, 'D NB'
113
+ rule :NB, 'A* N'
114
+ rule :VP, 'VP PP'
115
+ rule :VP, 'V'
116
+ rule :PP, 'P NP'
117
+ rule :P, 'prepositions'
118
+ rule :V, 'verbs'
119
+ rule :D, 'determiners'
120
+ rule :N, 'nouns'
121
+ rule :A, 'adjectives'
122
+
123
+ leaf :determiners, /\b(the|a)\b/i
124
+ leaf :nouns, /\b(cat|mat)\b/i
125
+ leaf :prepositions, /\b(on|in|around|above|beside)\b/i
126
+ leaf :verbs, /\b(sat|slept|moped)\b/
127
+ leaf :adjectives, /\b(big|small|hairy|bald)\b/i
128
+ ignore :whatever, /[^\w\s]+/
129
+ end
130
+
131
+ def test_cat
132
+ parses = Cat.parse 'The cat sat on the mat.'
133
+ assert_equal 1, parses.length, 'there is only one parse of this sentence'
134
+ parse = parses.first
135
+ assert_equal 1, parse.roots.reject(&:ignorable?).count, 'there is a root node for this parse'
136
+ root = parse.roots.first
137
+ assert_equal :S, root.name, 'the root node is a sentence'
138
+ vp = root.descendants.find { |d| d.name == :VP }&.descendants&.find { |d| d.name == :V }
139
+ assert_equal 'sat', vp&.text, 'we have the expected verb'
140
+ end
141
+
142
+ class SubRules
143
+ extend Gullah
144
+
145
+ rule :s, 'thing+'
146
+ rule :thing, 'word | integer'
147
+
148
+ leaf :word, /[a-z]+/i
149
+ leaf :integer, /\d+/
150
+ end
151
+
152
+ def test_subrules
153
+ parses = SubRules.parse '123 word'
154
+ assert_equal 1, parses.length, 'there is only one parse of this sentence'
155
+ parse = parses.first
156
+ assert_equal 1, parse.roots.reject(&:ignorable?).count, 'there is a root node for this parse'
157
+ root = parse.roots.first
158
+ assert_equal :s, root.name, 'found expected root'
159
+ assert_equal 2, root.subtree.select { |n| n.name == :thing }.count, 'two things'
160
+ assert_equal 1, root.subtree.select { |n| n.name == :word }.count, 'one word'
161
+ assert_equal 1, root.subtree.select { |n| n.name == :integer }.count, 'one integer'
162
+ end
163
+
164
+ class SubRulesWithTest
165
+ extend Gullah
166
+
167
+ rule :s, 'thing+'
168
+ rule :thing, 'word | integer', tests: %i[foo]
169
+
170
+ leaf :word, /[a-z]+/i
171
+ leaf :integer, /\d+/
172
+
173
+ def foo(_n)
174
+ %i[pass etc]
175
+ end
176
+ end
177
+
178
+ def test_subrules_with_test
179
+ parses = SubRulesWithTest.parse '123 word'
180
+ assert_equal 1, parses.length, 'there is only one parse of this sentence'
181
+ parse = parses.first
182
+ assert_equal 1, parse.roots.reject(&:ignorable?).count, 'there is a root node for this parse'
183
+ root = parse.roots.first
184
+ assert_equal :s, root.name, 'found expected root'
185
+ things = root.subtree.select { |n| n.name == :thing }
186
+ assert_equal 2, things.count, 'two things'
187
+ assert things.all? { |n| n.attributes[:satisfied].include?(%i[foo etc]) }, 'passing tests stuff in extra bits'
188
+ assert_equal 1, root.subtree.select { |n| n.name == :word }.count, 'one word'
189
+ assert_equal 1, root.subtree.select { |n| n.name == :integer }.count, 'one integer'
190
+ end
191
+
192
+ class SubRulesWithAncestorTest
193
+ extend Gullah
194
+
195
+ rule :s, 'thing+'
196
+ rule :thing, 'word | integer', tests: %i[foo]
197
+
198
+ leaf :word, /[a-z]+/i
199
+ leaf :integer, /\d+/
200
+
201
+ def foo(_root, _n)
202
+ :pass
203
+ end
204
+ end
205
+
206
+ def test_subrules_with_ancestor_test
207
+ parses = SubRulesWithAncestorTest.parse '123 word'
208
+ assert_equal 1, parses.length, 'there is only one parse of this sentence'
209
+ parse = parses.first
210
+ assert_equal 1, parse.roots.reject(&:ignorable?).count, 'there is a root node for this parse'
211
+ root = parse.roots.first
212
+ assert_equal :s, root.name, 'found expected root'
213
+ things = root.subtree.select { |n| n.name == :thing }
214
+ assert_equal 2, things.count, 'two things'
215
+ things.each do |thing|
216
+ assert root.attributes[:satisfied_ancestor].include?([:foo, thing.position]),
217
+ 'ancestor is marked when ancestor test passes'
218
+ assert thing.attributes[:satisfied_descendant].include?([:foo, root.position]),
219
+ 'descendant is marked when ancestor test passes'
220
+ end
221
+ assert_equal 1, root.subtree.select { |n| n.name == :word }.count, 'one word'
222
+ assert_equal 1, root.subtree.select { |n| n.name == :integer }.count, 'one integer'
223
+ end
224
+
225
+ class LeftAncestor
226
+ extend Gullah
227
+
228
+ rule :s, 'word+'
229
+ rule :word, 'foo | bar'
230
+
231
+ leaf :foo, /foo/, tests: %i[preceded_by_bar]
232
+ leaf :bar, /bar/
233
+
234
+ def preceded_by_bar(root, n)
235
+ if n.prior.any? { |other| other.name == :bar }
236
+ :pass
237
+ elsif root.name == :s
238
+ :fail
239
+ end
240
+ end
241
+ end
242
+
243
+ def test_left_ancestor
244
+ parses = LeftAncestor.parse 'bar foo'
245
+ assert_equal 1, parses.length, 'one parse'
246
+ parse = parses.first
247
+ assert_equal 1, parse.roots.length, 'one root for parse'
248
+ root = parse.roots.first
249
+ assert_equal 1, root.subtree.count { |n| n.name == :foo }, 'one foo'
250
+ parses = LeftAncestor.parse 'foo bar'
251
+ assert_equal 0, good(parses).length, "no good parses of 'foo bar'"
252
+ end
253
+
254
+ class RightAncestor
255
+ extend Gullah
256
+
257
+ rule :s, 'word+'
258
+ rule :word, 'foo | bar'
259
+
260
+ leaf :foo, /foo/, tests: %i[followed_by_bar]
261
+ leaf :bar, /bar/
262
+
263
+ def followed_by_bar(root, n)
264
+ if n.later.any? { |other| other.name == :bar }
265
+ :pass
266
+ elsif root.name == :s
267
+ :fail
268
+ end
269
+ end
270
+ end
271
+
272
+ def test_right_ancestor
273
+ parses = RightAncestor.parse 'foo bar'
274
+ assert_equal 1, parses.length, 'one parse'
275
+ parse = parses.first
276
+ assert_equal 1, parse.roots.length, 'one root for parse'
277
+ root = parse.roots.first
278
+ assert_equal 1, root.subtree.count { |n| n.name == :foo }, 'one foo'
279
+ parses = RightAncestor.parse 'bar foo'
280
+ assert_equal 0, good(parses).length, "no good parses of 'bar foo'"
281
+ end
282
+
283
+ class LowerLimit
284
+ extend Gullah
285
+
286
+ rule :s, 'a{2,}'
287
+ leaf :a, /a/
288
+ end
289
+
290
+ def test_lower_limit
291
+ parses = LowerLimit.parse 'a'
292
+ assert_equal 0, good(parses).length, 'we need at least one'
293
+ parses = LowerLimit.parse 'a a'
294
+ assert_equal 1, good(parses).length, 'two is enough'
295
+ parses = LowerLimit.parse 'a a a'
296
+ assert_equal 1, good(parses).length, 'more than two is fine'
297
+ end
298
+
299
+ class TwoLimits
300
+ extend Gullah
301
+
302
+ rule :s, 'a{2,3}'
303
+ leaf :a, /a/
304
+ end
305
+
306
+ def test_two_limits
307
+ parses = LowerLimit.parse 'a'
308
+ assert_equal 0, good(parses).length, 'we need at least one'
309
+ parses = LowerLimit.parse 'a a'
310
+ assert_equal 1, good(parses).length, 'two is enough'
311
+ parses = LowerLimit.parse 'a a a'
312
+ assert_equal 1, good(parses).length, 'three is also good'
313
+ parses = LowerLimit.parse 'a a a a'
314
+ assert_equal 1, good(parses).length, 'four is too many'
315
+ end
316
+
317
+ class HoweverMany
318
+ extend Gullah
319
+
320
+ rule :s, 'b a*'
321
+ leaf :a, /a/
322
+ leaf :b, /b/
323
+ end
324
+
325
+ def test_however_many
326
+ parses = HoweverMany.parse 'b'
327
+ assert_equal 1, good(parses).length, "we don't even need one"
328
+ parses = HoweverMany.parse 'b a'
329
+ assert_equal 1, good(parses).length, 'but we can take one'
330
+ parses = HoweverMany.parse 'b a a'
331
+ assert_equal 1, good(parses).length, 'and we can take more than one'
332
+ end
333
+
334
+ class OneOrNone
335
+ extend Gullah
336
+
337
+ rule :s, 'b a?'
338
+ leaf :a, /a/
339
+ leaf :b, /b/
340
+ end
341
+
342
+ def test_one_or_none
343
+ parses = OneOrNone.parse 'b'
344
+ assert_equal 1, good(parses).length, "we don't even need one"
345
+ parses = OneOrNone.parse 'b a'
346
+ assert_equal 1, good(parses).length, 'but we can take one'
347
+ parses = OneOrNone.parse 'b a a'
348
+ assert_equal 0, good(parses).length, "and we can't take more than one"
349
+ end
350
+
351
+ class Literal
352
+ extend Gullah
353
+
354
+ rule :money, '"$" digits'
355
+ leaf :digits, /\d+/
356
+ end
357
+
358
+ def test_literal
359
+ parses = Literal.parse '$12'
360
+ assert_equal 1, good(parses).length, 'it parses'
361
+ parse = parses.first
362
+ assert_equal 1, parse.length, "there's a root node"
363
+ root = parse.roots.first
364
+ assert_equal 2, root.leaves.count, 'there are two leaves'
365
+ assert_equal '$', root.leaves.first.text, "the first leaf is '$'"
366
+ assert_equal '12', root.leaves.last.text, "the last leaf is '12'"
367
+ end
368
+
369
+ class FiltersAndFailures
370
+ extend Gullah
371
+
372
+ rule :numbers, 'integer+'
373
+ rule :integer, 'prime | nonprime'
374
+ rule :prime, 'number', tests: [:prime]
375
+ rule :nonprime, 'number', tests: [:nonprime]
376
+
377
+ leaf :number, /\d+/
378
+
379
+ def prime(n)
380
+ if [2, 3, 5, 7].include?(n.text.to_i)
381
+ :pass
382
+ else
383
+ :fail
384
+ end
385
+ end
386
+
387
+ def nonprime(n)
388
+ prime(n) == :pass ? :fail : :pass
389
+ end
390
+ end
391
+
392
+ def test_filters_and_failures
393
+ parses = FiltersAndFailures.parse '1 2', filters: []
394
+ assert parses.length > 1, 'removing the filters gives us many parses'
395
+ assert parses.any?(&:success?), 'there is at least one correct parse'
396
+ assert parses.any?(&:failure?), 'there is at least one failure'
397
+ assert parses.any?(&:errors?), 'there is at least one erroneous parse'
398
+ n = parses.find(&:errors?).roots.find(&:error?)
399
+ assert !n.nil?, 'found a node that failed its test'
400
+ assert n.attributes[:failures].any? { |ar| [[:prime], [:nonprime]].include? ar }, 'nature of failure is marked'
401
+ end
402
+
403
+ class Ambiguous
404
+ extend Gullah
405
+
406
+ rule :S, 'verb NP'
407
+ rule :NP, 'determiner noun'
408
+
409
+ leaf :determiner, /\b(?:a|an|the)\b/i
410
+ leaf :verb, /\b(?:run|walk|talk)\b/i
411
+ leaf :noun, /\b(?:run|walk|talk)\b/i
412
+ end
413
+
414
+ def test_ambiguous
415
+ parses = Ambiguous.parse 'walk the walk'
416
+ assert_equal 1, parses.length, 'got one good parse'
417
+ parse = parses.first
418
+ assert_equal 1, parse.length, 'got one root node'
419
+ root = parse.roots.first
420
+ noun = root.subtree.find { |n| n.name == :noun }
421
+ assert !noun.nil?, 'found a noun'
422
+ assert_equal 'walk', noun.text, "the noun is 'walk'"
423
+ assert_equal root.leaves.last, noun, 'the noun is the last leaf'
424
+ verb = root.subtree.find { |n| n.name == :verb }
425
+ assert !verb.nil?, 'found a verb'
426
+ assert_equal 'walk', verb.text, "the verb is 'walk'"
427
+ assert_equal root.leaves.first, verb, 'the verb is the first leaf'
428
+ end
429
+
430
+ class Escapes
431
+ extend Gullah
432
+
433
+ rule :stuff, %(name\\?+ "literal\\"")
434
+ leaf :"name?", /\w+/
435
+ end
436
+
437
+ def test_escapes
438
+ parse = Escapes.first 'some words then literal"'
439
+ assert_equal 1, parse.length
440
+ root = parse.roots.first
441
+ assert_equal 8, root.size
442
+ assert_equal 3, (root.descendants.count { |n| n.name == :"name?" })
443
+ assert_equal :'literal"', root.leaves.last.name
444
+ end
445
+
446
+ private
447
+
448
+ def good(parses)
449
+ parses.select(&:success?)
450
+ end
451
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+
5
+ require 'gullah'
6
+ require 'byebug'
7
+
8
+ # :stopdoc:
9
+
10
+ # to verify that we can make one parse quickly even for a big tree
11
+ class BigTreeTest < Minitest::Test
12
+ class Binary
13
+ extend Gullah
14
+
15
+ rule :a, 'a{2}'
16
+
17
+ leaf :a, /\S+/
18
+ end
19
+
20
+ def test_this
21
+ parse = Binary.first '1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16'
22
+ assert_equal 1, parse.roots.length, 'the parse has a root'
23
+ root = parse.roots.first
24
+ assert_equal 31, root.subtree.reject(&:ignorable?).count, 'the tree has the expected number of nodes'
25
+ end
26
+ end