slaw 0.17.2 → 1.0.0.alpha.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -2
- data/bin/slaw +2 -19
- data/lib/slaw/generator.rb +21 -6
- data/lib/slaw/grammars/core_nodes.rb +17 -0
- data/lib/slaw/grammars/inlines.treetop +45 -0
- data/lib/slaw/grammars/inlines_nodes.rb +58 -0
- data/lib/slaw/grammars/pl/act.treetop +246 -0
- data/lib/slaw/grammars/pl/act_nodes.rb +469 -0
- data/lib/slaw/grammars/schedules.treetop +33 -0
- data/lib/slaw/grammars/schedules_nodes.rb +107 -0
- data/lib/slaw/grammars/tables.treetop +59 -0
- data/lib/slaw/grammars/tables_nodes.rb +74 -0
- data/lib/slaw/grammars/terminals.treetop +84 -0
- data/lib/slaw/grammars/za/act.treetop +222 -0
- data/lib/slaw/grammars/za/act_nodes.rb +307 -0
- data/lib/slaw/{za → grammars/za}/act_text.xsl +0 -0
- data/lib/slaw/parse/builder.rb +6 -202
- data/lib/slaw/version.rb +1 -1
- data/spec/generator_spec.rb +2 -0
- data/spec/parse/builder_spec.rb +0 -48
- data/spec/pl/act_block_spec.rb +449 -0
- data/spec/za/act_block_spec.rb +5 -3
- data/spec/za/act_inline_spec.rb +2 -0
- data/spec/za/act_schedules_spec.rb +2 -0
- data/spec/za/act_table_spec.rb +2 -0
- metadata +19 -7
- data/lib/slaw/za/act.treetop +0 -393
- data/lib/slaw/za/act_nodes.rb +0 -532
data/spec/za/act_block_spec.rb
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
require 'slaw'
|
4
4
|
|
5
5
|
describe Slaw::ActGenerator do
|
6
|
+
subject { Slaw::ActGenerator.new('za') }
|
7
|
+
|
6
8
|
def parse(rule, s)
|
7
9
|
subject.builder.text_to_syntax_tree(s, {root: rule})
|
8
10
|
end
|
@@ -1897,12 +1899,12 @@ EOS
|
|
1897
1899
|
it 'should handle a clause with a remark' do
|
1898
1900
|
node = parse :clauses, "simple [[remark]]. text"
|
1899
1901
|
node.text_value.should == "simple [[remark]]. text"
|
1900
|
-
node.elements[7].is_a?(Slaw::ZA::Act::Remark).should be_true
|
1902
|
+
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be_true
|
1901
1903
|
|
1902
1904
|
node = parse :clauses, "simple [[remark]][[another]] text"
|
1903
1905
|
node.text_value.should == "simple [[remark]][[another]] text"
|
1904
|
-
node.elements[7].is_a?(Slaw::ZA::Act::Remark).should be_true
|
1905
|
-
node.elements[7].is_a?(Slaw::ZA::Act::Remark).should be_true
|
1906
|
+
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be_true
|
1907
|
+
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be_true
|
1906
1908
|
end
|
1907
1909
|
end
|
1908
1910
|
end
|
data/spec/za/act_inline_spec.rb
CHANGED
data/spec/za/act_table_spec.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0.alpha.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -174,6 +174,19 @@ files:
|
|
174
174
|
- lib/slaw/extract/html_to_akn_text.xsl
|
175
175
|
- lib/slaw/extract/yomu_patch.rb
|
176
176
|
- lib/slaw/generator.rb
|
177
|
+
- lib/slaw/grammars/core_nodes.rb
|
178
|
+
- lib/slaw/grammars/inlines.treetop
|
179
|
+
- lib/slaw/grammars/inlines_nodes.rb
|
180
|
+
- lib/slaw/grammars/pl/act.treetop
|
181
|
+
- lib/slaw/grammars/pl/act_nodes.rb
|
182
|
+
- lib/slaw/grammars/schedules.treetop
|
183
|
+
- lib/slaw/grammars/schedules_nodes.rb
|
184
|
+
- lib/slaw/grammars/tables.treetop
|
185
|
+
- lib/slaw/grammars/tables_nodes.rb
|
186
|
+
- lib/slaw/grammars/terminals.treetop
|
187
|
+
- lib/slaw/grammars/za/act.treetop
|
188
|
+
- lib/slaw/grammars/za/act_nodes.rb
|
189
|
+
- lib/slaw/grammars/za/act_text.xsl
|
177
190
|
- lib/slaw/lifecycle_event.rb
|
178
191
|
- lib/slaw/logging.rb
|
179
192
|
- lib/slaw/namespace.rb
|
@@ -190,9 +203,6 @@ files:
|
|
190
203
|
- lib/slaw/schemas/xml.xsd
|
191
204
|
- lib/slaw/version.rb
|
192
205
|
- lib/slaw/xml_support.rb
|
193
|
-
- lib/slaw/za/act.treetop
|
194
|
-
- lib/slaw/za/act_nodes.rb
|
195
|
-
- lib/slaw/za/act_text.xsl
|
196
206
|
- slaw.gemspec
|
197
207
|
- spec/act_spec.rb
|
198
208
|
- spec/bylaw_spec.rb
|
@@ -201,6 +211,7 @@ files:
|
|
201
211
|
- spec/generator_spec.rb
|
202
212
|
- spec/parse/builder_spec.rb
|
203
213
|
- spec/parse/cleanser_spec.rb
|
214
|
+
- spec/pl/act_block_spec.rb
|
204
215
|
- spec/spec_helper.rb
|
205
216
|
- spec/xml_helpers.rb
|
206
217
|
- spec/za/act_block_spec.rb
|
@@ -222,9 +233,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
222
233
|
version: '0'
|
223
234
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
224
235
|
requirements:
|
225
|
-
- - "
|
236
|
+
- - ">"
|
226
237
|
- !ruby/object:Gem::Version
|
227
|
-
version:
|
238
|
+
version: 1.3.1
|
228
239
|
requirements: []
|
229
240
|
rubyforge_project:
|
230
241
|
rubygems_version: 2.6.12
|
@@ -239,6 +250,7 @@ test_files:
|
|
239
250
|
- spec/generator_spec.rb
|
240
251
|
- spec/parse/builder_spec.rb
|
241
252
|
- spec/parse/cleanser_spec.rb
|
253
|
+
- spec/pl/act_block_spec.rb
|
242
254
|
- spec/spec_helper.rb
|
243
255
|
- spec/xml_helpers.rb
|
244
256
|
- spec/za/act_block_spec.rb
|
data/lib/slaw/za/act.treetop
DELETED
@@ -1,393 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'slaw/parse/grammar_helpers'
|
4
|
-
require 'slaw/za/act_nodes'
|
5
|
-
|
6
|
-
module Slaw
|
7
|
-
module ZA
|
8
|
-
grammar Act
|
9
|
-
include Slaw::Parse::GrammarHelpers
|
10
|
-
|
11
|
-
########
|
12
|
-
# major containers
|
13
|
-
|
14
|
-
rule act
|
15
|
-
empty_line*
|
16
|
-
preface:preface?
|
17
|
-
preamble:preamble?
|
18
|
-
body
|
19
|
-
schedules:schedules_container? <Act>
|
20
|
-
end
|
21
|
-
|
22
|
-
rule preface
|
23
|
-
!'PREAMBLE'
|
24
|
-
('PREFACE'i space? eol)?
|
25
|
-
statements:(!'PREAMBLE' pre_body_statement)* <Preface>
|
26
|
-
end
|
27
|
-
|
28
|
-
rule preamble
|
29
|
-
'PREAMBLE'i space? eol
|
30
|
-
statements:pre_body_statement* <Preamble>
|
31
|
-
end
|
32
|
-
|
33
|
-
rule body
|
34
|
-
children:(chapter / part / section / subsection / block_paragraphs)+ <Body>
|
35
|
-
end
|
36
|
-
|
37
|
-
rule chapter
|
38
|
-
heading:chapter_heading
|
39
|
-
children:(part / section / subsection / block_paragraphs)*
|
40
|
-
<Chapter>
|
41
|
-
end
|
42
|
-
|
43
|
-
rule part
|
44
|
-
heading:part_heading
|
45
|
-
children:(section / subsection / block_paragraphs)*
|
46
|
-
<Part>
|
47
|
-
end
|
48
|
-
|
49
|
-
rule section
|
50
|
-
section_title
|
51
|
-
children:(subsection / block_paragraphs)* <Section>
|
52
|
-
end
|
53
|
-
|
54
|
-
rule subsection
|
55
|
-
space? subsection_prefix space?
|
56
|
-
# eg: (2) (a) foo
|
57
|
-
first_child:inline_block_element?
|
58
|
-
# eg: (2)
|
59
|
-
eol?
|
60
|
-
children:block_element* <Subsection>
|
61
|
-
end
|
62
|
-
|
63
|
-
rule schedules_container
|
64
|
-
schedules:schedules <ScheduleContainer>
|
65
|
-
end
|
66
|
-
|
67
|
-
rule schedules
|
68
|
-
children:schedule+ <GroupNode>
|
69
|
-
end
|
70
|
-
|
71
|
-
rule schedule
|
72
|
-
schedule_title
|
73
|
-
body:body?
|
74
|
-
<Schedule>
|
75
|
-
end
|
76
|
-
|
77
|
-
##########
|
78
|
-
# group elements
|
79
|
-
#
|
80
|
-
# these are used externally and provide support when parsing just
|
81
|
-
# a particular portion of a document
|
82
|
-
|
83
|
-
rule chapters
|
84
|
-
children:chapter+ <GroupNode>
|
85
|
-
end
|
86
|
-
|
87
|
-
rule parts
|
88
|
-
children:part+ <GroupNode>
|
89
|
-
end
|
90
|
-
|
91
|
-
rule sections
|
92
|
-
children:section+ <GroupNode>
|
93
|
-
end
|
94
|
-
|
95
|
-
##########
|
96
|
-
# headings
|
97
|
-
|
98
|
-
rule chapter_heading
|
99
|
-
space? chapter_heading_prefix heading:(newline? content)? eol
|
100
|
-
<ChapterHeading>
|
101
|
-
end
|
102
|
-
|
103
|
-
rule part_heading
|
104
|
-
space? part_heading_prefix heading:(newline? content)? eol
|
105
|
-
<PartHeading>
|
106
|
-
end
|
107
|
-
|
108
|
-
rule section_title
|
109
|
-
section_title_1 / section_1_title
|
110
|
-
end
|
111
|
-
|
112
|
-
rule section_title_1
|
113
|
-
&{ |s| options[:section_number_after_title] }
|
114
|
-
# Section title
|
115
|
-
# 1. Section content
|
116
|
-
content eol
|
117
|
-
section_title_prefix whitespace <SectionTitleType1>
|
118
|
-
end
|
119
|
-
|
120
|
-
rule section_1_title
|
121
|
-
# 1. Section title
|
122
|
-
# Section content
|
123
|
-
#
|
124
|
-
# Additionally, the section title is optional.
|
125
|
-
!{ |s| options[:section_number_after_title] }
|
126
|
-
space? section_title_prefix section_title:section_title_content? eol?
|
127
|
-
<SectionTitleType2>
|
128
|
-
end
|
129
|
-
|
130
|
-
rule section_title_content
|
131
|
-
# don't match subsections, eg.
|
132
|
-
#
|
133
|
-
# 10. (1) subsection content...
|
134
|
-
space !subsection_prefix content eol
|
135
|
-
end
|
136
|
-
|
137
|
-
rule schedule_title
|
138
|
-
space? schedule_title_prefix space? "\""? num:alphanums? "\""? [ \t:.-]* title:(content)?
|
139
|
-
heading:(newline space? content)?
|
140
|
-
eol
|
141
|
-
end
|
142
|
-
|
143
|
-
##########
|
144
|
-
# blocks of content inside containers
|
145
|
-
|
146
|
-
rule block_paragraphs
|
147
|
-
block_element+ <BlockParagraph>
|
148
|
-
end
|
149
|
-
|
150
|
-
rule block_element
|
151
|
-
(table / blocklist / naked_statement)
|
152
|
-
end
|
153
|
-
|
154
|
-
# Block elements that don't have to appear at the start of a line.
|
155
|
-
# ie. we don't need to guard against the start of a chapter, section, etc.
|
156
|
-
rule inline_block_element
|
157
|
-
(table / blocklist / inline_statement)
|
158
|
-
end
|
159
|
-
|
160
|
-
rule blocklist
|
161
|
-
blocklist_item+ <Blocklist>
|
162
|
-
end
|
163
|
-
|
164
|
-
rule blocklist_item
|
165
|
-
# TODO: this whitespace should probably be space, to allow empty blocklist items followed by plain text
|
166
|
-
space? blocklist_item_prefix whitespace item_content:(!blocklist_item_prefix clauses:clauses? eol)? eol?
|
167
|
-
<BlocklistItem>
|
168
|
-
end
|
169
|
-
|
170
|
-
rule blocklist_item_prefix
|
171
|
-
('(' letter_ordinal ')') / dotted_number_3
|
172
|
-
end
|
173
|
-
|
174
|
-
##########
|
175
|
-
# wikimedia-style tables
|
176
|
-
#
|
177
|
-
# this grammar doesn't support inline table cells (eg: | col1 || col2 || col3)
|
178
|
-
# instead, the builder preprocesses tables to break inline cells onto their own
|
179
|
-
# lines, which we do support.
|
180
|
-
|
181
|
-
rule table
|
182
|
-
space? '{|' eol
|
183
|
-
table_body
|
184
|
-
'|}' eol
|
185
|
-
<Table>
|
186
|
-
end
|
187
|
-
|
188
|
-
rule table_body
|
189
|
-
(table_row / table_cell)*
|
190
|
-
end
|
191
|
-
|
192
|
-
rule table_row
|
193
|
-
'|-' space? eol
|
194
|
-
end
|
195
|
-
|
196
|
-
rule table_cell
|
197
|
-
# don't match end-of-table
|
198
|
-
!'|}'
|
199
|
-
[!|] attribs:table_attribs? space?
|
200
|
-
# first content line, then multiple lines
|
201
|
-
content:(line:table_line (![!|] space? line:table_line)*)
|
202
|
-
<TableCell>
|
203
|
-
end
|
204
|
-
|
205
|
-
rule table_line
|
206
|
-
clauses:clauses? eol
|
207
|
-
<TableLine>
|
208
|
-
end
|
209
|
-
|
210
|
-
rule table_attribs
|
211
|
-
space? attribs:(table_attrib+) '|'
|
212
|
-
end
|
213
|
-
|
214
|
-
rule table_attrib
|
215
|
-
name:([a-z_-]+) '=' value:(
|
216
|
-
('"' (!'"' .)* '"') /
|
217
|
-
("'" (!"'" .)* "'"))
|
218
|
-
space?
|
219
|
-
end
|
220
|
-
|
221
|
-
##########
|
222
|
-
# statements - single lines of content
|
223
|
-
#
|
224
|
-
# If a statement starts with a backslash, it's considered to have escaped the subsequent word,
|
225
|
-
# and is ignored. This allows escaping of section headings, etc.
|
226
|
-
|
227
|
-
rule naked_statement
|
228
|
-
space? !(chapter_heading / part_heading / section_title / schedule_title / subsection_prefix) '\\'? clauses eol
|
229
|
-
<NakedStatement>
|
230
|
-
end
|
231
|
-
|
232
|
-
rule pre_body_statement
|
233
|
-
space? !(chapter_heading / part_heading / section_title / schedule_title) '\\'? clauses eol
|
234
|
-
<NakedStatement>
|
235
|
-
end
|
236
|
-
|
237
|
-
rule inline_statement
|
238
|
-
space? '\\'? clauses eol
|
239
|
-
<NakedStatement>
|
240
|
-
end
|
241
|
-
|
242
|
-
##########
|
243
|
-
# inline content
|
244
|
-
|
245
|
-
# one or more words, allowing inline elements
|
246
|
-
rule clauses
|
247
|
-
(remark / image / ref / [^\n])+
|
248
|
-
<Clauses>
|
249
|
-
end
|
250
|
-
|
251
|
-
rule remark
|
252
|
-
'[[' content:(ref / (!']]' .))+ ']]'
|
253
|
-
<Remark>
|
254
|
-
end
|
255
|
-
|
256
|
-
rule image
|
257
|
-
# images like markdown
|
258
|
-
# eg. ![title text](image url)
|
259
|
-
#
|
260
|
-
# the title text is optional, but the enclosing square brackets aren't
|
261
|
-
'![' content:(!'](' [^\n])* '](' href:([^)\n]+) ')'
|
262
|
-
<Image>
|
263
|
-
end
|
264
|
-
|
265
|
-
rule ref
|
266
|
-
# links like markdown
|
267
|
-
# eg. [link text](link url)
|
268
|
-
'[' content:(!'](' [^\n])+ '](' href:([^)\n]+) ')'
|
269
|
-
<Ref>
|
270
|
-
end
|
271
|
-
|
272
|
-
##########
|
273
|
-
# prefixes
|
274
|
-
|
275
|
-
rule part_heading_prefix
|
276
|
-
'part'i space alphanums [ :-]*
|
277
|
-
end
|
278
|
-
|
279
|
-
rule chapter_heading_prefix
|
280
|
-
'chapter'i space alphanums [ :-]*
|
281
|
-
end
|
282
|
-
|
283
|
-
rule schedule_title_prefix
|
284
|
-
'schedule'i 's'i?
|
285
|
-
end
|
286
|
-
|
287
|
-
rule section_title_prefix
|
288
|
-
number_letter '.'?
|
289
|
-
end
|
290
|
-
|
291
|
-
rule subsection_prefix
|
292
|
-
# there are two subsection handling syntaxes:
|
293
|
-
#
|
294
|
-
# (1) foo
|
295
|
-
# (2A) foo
|
296
|
-
#
|
297
|
-
# and
|
298
|
-
#
|
299
|
-
# 8.2 for
|
300
|
-
# 8.3 bar
|
301
|
-
#
|
302
|
-
# The second is less common, but this allows us to handle it.
|
303
|
-
# Note that it is usually accompanied by a similar list number format:
|
304
|
-
#
|
305
|
-
# 8.2.1 item 1
|
306
|
-
# 8.2.2 item 2
|
307
|
-
#
|
308
|
-
# which aren't subsections, but lists, so force the space at the end
|
309
|
-
# of the number to catch this case.
|
310
|
-
num:('(' number_letter ')')
|
311
|
-
/
|
312
|
-
num:dotted_number_2 '.'? space
|
313
|
-
end
|
314
|
-
|
315
|
-
#########
|
316
|
-
## one line of basic content
|
317
|
-
|
318
|
-
rule content
|
319
|
-
# anything but a newline, followed by a
|
320
|
-
# newline or end of file (without consuming the newline)
|
321
|
-
[^\n]+ &eol
|
322
|
-
end
|
323
|
-
|
324
|
-
##########
|
325
|
-
# terminals
|
326
|
-
|
327
|
-
# eg. 2, 2A, 2b
|
328
|
-
rule number_letter
|
329
|
-
number letter*
|
330
|
-
end
|
331
|
-
|
332
|
-
rule letter_ordinal
|
333
|
-
letter (letter / digit)*
|
334
|
-
end
|
335
|
-
|
336
|
-
rule dotted_number_3
|
337
|
-
number '.' number ('.' number)+
|
338
|
-
end
|
339
|
-
|
340
|
-
rule dotted_number_2
|
341
|
-
number '.' number
|
342
|
-
end
|
343
|
-
|
344
|
-
rule number
|
345
|
-
digit+
|
346
|
-
end
|
347
|
-
|
348
|
-
rule digit
|
349
|
-
[0-9]
|
350
|
-
end
|
351
|
-
|
352
|
-
rule letter
|
353
|
-
[a-zA-Z]
|
354
|
-
end
|
355
|
-
|
356
|
-
rule alphanums
|
357
|
-
[a-zA-Z0-9]+
|
358
|
-
end
|
359
|
-
|
360
|
-
rule quotes
|
361
|
-
["“”]
|
362
|
-
end
|
363
|
-
|
364
|
-
rule non_quotes
|
365
|
-
[^"“”]
|
366
|
-
end
|
367
|
-
|
368
|
-
##########
|
369
|
-
# whitespace
|
370
|
-
|
371
|
-
rule space
|
372
|
-
[ \t]+
|
373
|
-
end
|
374
|
-
|
375
|
-
rule whitespace
|
376
|
-
[ \t\n]*
|
377
|
-
end
|
378
|
-
|
379
|
-
rule empty_line
|
380
|
-
space? newline
|
381
|
-
end
|
382
|
-
|
383
|
-
rule eol
|
384
|
-
newline
|
385
|
-
empty_line*
|
386
|
-
end
|
387
|
-
|
388
|
-
rule newline
|
389
|
-
"\n"
|
390
|
-
end
|
391
|
-
end
|
392
|
-
end
|
393
|
-
end
|