slaw 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/slaw +7 -1
- data/lib/slaw/parse/builder.rb +3 -1
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/act_nodes.rb +70 -59
- data/spec/parse/builder_spec.rb +38 -0
- data/spec/za/act_spec.rb +161 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e5ab33b04df46f9d2d087f7a22ce9c0e7d12278e
|
4
|
+
data.tar.gz: 276c7b704a30c435d0dfbce2e74c34cd2e902b51
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7632de6a68c70b2ce44a4854ec4bcb35867f269447b8c339286056301ef57d73548edabddd5a0ea963d4bc19c66b37d571beb1f8f635d7dc533ca32e1f57bd35
|
7
|
+
data.tar.gz: 6e4a1d0778b6616f6956fba51a1b604514a036f188d89232984d7f57dd5f75f3bad2b031507d9d2eeeb1f470d9b85fef063d055719e97853df7d8665f4715a1a
|
data/bin/slaw
CHANGED
@@ -8,7 +8,7 @@ class SlawCLI < Thor
|
|
8
8
|
|
9
9
|
class_option :verbose, type: :boolean, desc: "Display log output on stderr"
|
10
10
|
|
11
|
-
desc "parse FILE", "
|
11
|
+
desc "parse FILE", "Parse FILE into Akoma Ntoso XML"
|
12
12
|
option :input, enum: ['text', 'pdf'], desc: "Type of input if it can't be determined automatically"
|
13
13
|
option :pdftotext, desc: "Location of the pdftotext binary if not in PATH"
|
14
14
|
option :definitions, type: :boolean, desc: "Find and link definitions (this can be slow). Default: false"
|
@@ -36,6 +36,12 @@ class SlawCLI < Thor
|
|
36
36
|
puts act.to_xml(indent: 2)
|
37
37
|
end
|
38
38
|
|
39
|
+
map %w(--version) => :__print_version
|
40
|
+
desc "--version", "Print slaw version info"
|
41
|
+
def __print_version
|
42
|
+
say "slaw #{Slaw::VERSION}"
|
43
|
+
end
|
44
|
+
|
39
45
|
no_commands do
|
40
46
|
def logging
|
41
47
|
logger = Log4r::Logger.new('Slaw')
|
data/lib/slaw/parse/builder.rb
CHANGED
@@ -154,7 +154,9 @@ module Slaw
|
|
154
154
|
doc.xpath('//a:component/a:doc[@name="schedules"]//a:heading/text()', a: NS)
|
155
155
|
|
156
156
|
nodes.each do |heading|
|
157
|
-
heading.content
|
157
|
+
if !(heading.content =~ /[a-z]/)
|
158
|
+
heading.content = heading.content.downcase.gsub(/^\w/) { $&.upcase }
|
159
|
+
end
|
158
160
|
end
|
159
161
|
end
|
160
162
|
|
data/lib/slaw/version.rb
CHANGED
data/lib/slaw/za/act_nodes.rb
CHANGED
@@ -3,14 +3,17 @@ module Slaw
|
|
3
3
|
module Act
|
4
4
|
class Act < Treetop::Runtime::SyntaxNode
|
5
5
|
FRBR_URI = '/za/act/1980/01'
|
6
|
+
WORK_URI = FRBR_URI
|
7
|
+
EXPRESSION_URI = "#{FRBR_URI}/eng@"
|
8
|
+
MANIFESTATION_URI = EXPRESSION_URI
|
6
9
|
|
7
10
|
def to_xml(b)
|
8
11
|
b.act(contains: "originalVersion") { |b|
|
9
12
|
write_meta(b)
|
10
13
|
write_preamble(b)
|
11
14
|
write_body(b)
|
12
|
-
write_schedules(b)
|
13
15
|
}
|
16
|
+
write_schedules(b)
|
14
17
|
end
|
15
18
|
|
16
19
|
def write_meta(b)
|
@@ -34,23 +37,23 @@ module Slaw
|
|
34
37
|
b.identification(source: "#slaw") { |b|
|
35
38
|
# use stub values so that we can generate a validating document
|
36
39
|
b.FRBRWork { |b|
|
37
|
-
b.FRBRthis(value: "#{
|
38
|
-
b.FRBRuri(value:
|
40
|
+
b.FRBRthis(value: "#{WORK_URI}/main")
|
41
|
+
b.FRBRuri(value: WORK_URI)
|
39
42
|
b.FRBRalias(value: 'Short Title')
|
40
43
|
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
41
44
|
b.FRBRauthor(href: '#council', as: '#author')
|
42
45
|
b.FRBRcountry(value: 'za')
|
43
46
|
}
|
44
47
|
b.FRBRExpression { |b|
|
45
|
-
b.FRBRthis(value:
|
46
|
-
b.FRBRuri(value:
|
48
|
+
b.FRBRthis(value: "#{EXPRESSION_URI}/main")
|
49
|
+
b.FRBRuri(value: EXPRESSION_URI)
|
47
50
|
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
48
51
|
b.FRBRauthor(href: '#council', as: '#author')
|
49
52
|
b.FRBRlanguage(language: 'eng')
|
50
53
|
}
|
51
54
|
b.FRBRManifestation { |b|
|
52
|
-
b.FRBRthis(value:
|
53
|
-
b.FRBRuri(value:
|
55
|
+
b.FRBRthis(value: "#{MANIFESTATION_URI}/main")
|
56
|
+
b.FRBRuri(value: MANIFESTATION_URI)
|
54
57
|
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
|
55
58
|
b.FRBRauthor(href: '#slaw', as: '#author')
|
56
59
|
}
|
@@ -58,11 +61,7 @@ module Slaw
|
|
58
61
|
end
|
59
62
|
|
60
63
|
def write_preamble(b)
|
61
|
-
|
62
|
-
b.preamble { |b|
|
63
|
-
preamble.to_xml(b)
|
64
|
-
}
|
65
|
-
end
|
64
|
+
preamble.to_xml(b)
|
66
65
|
end
|
67
66
|
|
68
67
|
def write_body(b)
|
@@ -78,11 +77,15 @@ module Slaw
|
|
78
77
|
|
79
78
|
class Preamble < Treetop::Runtime::SyntaxNode
|
80
79
|
def to_xml(b)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
80
|
+
if text_value != ""
|
81
|
+
b.preamble { |b|
|
82
|
+
statements.elements.each { |e|
|
83
|
+
if not (e.content.text_value =~ /^preamble/i)
|
84
|
+
b.p(e.content.text_value)
|
85
|
+
end
|
86
|
+
}
|
87
|
+
}
|
88
|
+
end
|
86
89
|
end
|
87
90
|
end
|
88
91
|
|
@@ -315,37 +318,43 @@ module Slaw
|
|
315
318
|
return if schedules.elements.empty?
|
316
319
|
|
317
320
|
b.components { |b|
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
b.identification(source: "#slaw") { |b|
|
322
|
-
b.FRBRWork { |b|
|
323
|
-
b.FRBRthis(value: '/za/act/locale/1980/name/main/schedules')
|
324
|
-
b.FRBRuri(value: '/za/act/locale/1980/name/schedules')
|
325
|
-
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
326
|
-
b.FRBRauthor(href: '#council', as: '#author')
|
327
|
-
b.FRBRcountry(value: 'za')
|
328
|
-
}
|
329
|
-
b.FRBRExpression { |b|
|
330
|
-
b.FRBRthis(value: '/za/act/locale/1980/name/main//schedules/eng@')
|
331
|
-
b.FRBRuri(value: '/za/act/locale/1980/name/schedules/eng@')
|
332
|
-
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
333
|
-
b.FRBRauthor(href: '#council', as: '#author')
|
334
|
-
b.FRBRlanguage(language: 'eng')
|
335
|
-
}
|
336
|
-
b.FRBRManifestation { |b|
|
337
|
-
b.FRBRthis(value: '/za/act/locale/1980/name/main/schedules/eng@')
|
338
|
-
b.FRBRuri(value: '/za/act/locale/1980/name/schedules/eng@')
|
339
|
-
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
|
340
|
-
b.FRBRauthor(href: '#slaw', as: '#author')
|
341
|
-
}
|
342
|
-
}
|
343
|
-
}
|
321
|
+
schedules.elements.each_with_index { |e, i| write_schedule(e, i+1, b) }
|
322
|
+
}
|
323
|
+
end
|
344
324
|
|
345
|
-
|
346
|
-
|
325
|
+
def write_schedule(element, i, b)
|
326
|
+
# component name
|
327
|
+
comp = "schedule#{i}"
|
328
|
+
|
329
|
+
b.component(id: "component-#{i}") { |b|
|
330
|
+
b.doc(name: "schedule#{i}") { |b|
|
331
|
+
b.meta { |b|
|
332
|
+
b.identification(source: "#slaw") { |b|
|
333
|
+
b.FRBRWork { |b|
|
334
|
+
b.FRBRthis(value: "#{Act::WORK_URI}/#{comp}")
|
335
|
+
b.FRBRuri(value: Act::WORK_URI)
|
336
|
+
b.FRBRalias(value: element.alias)
|
337
|
+
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
338
|
+
b.FRBRauthor(href: '#council', as: '#author')
|
339
|
+
b.FRBRcountry(value: 'za')
|
340
|
+
}
|
341
|
+
b.FRBRExpression { |b|
|
342
|
+
b.FRBRthis(value: "#{Act::EXPRESSION_URI}/#{comp}")
|
343
|
+
b.FRBRuri(value: Act::EXPRESSION_URI)
|
344
|
+
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
345
|
+
b.FRBRauthor(href: '#council', as: '#author')
|
346
|
+
b.FRBRlanguage(language: 'eng')
|
347
|
+
}
|
348
|
+
b.FRBRManifestation { |b|
|
349
|
+
b.FRBRthis(value: "#{Act::MANIFESTATION_URI}/#{comp}")
|
350
|
+
b.FRBRuri(value: Act::MANIFESTATION_URI)
|
351
|
+
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
|
352
|
+
b.FRBRauthor(href: '#slaw', as: '#author')
|
353
|
+
}
|
347
354
|
}
|
348
355
|
}
|
356
|
+
|
357
|
+
b.mainBody { |b| element.to_xml(b, i) }
|
349
358
|
}
|
350
359
|
}
|
351
360
|
end
|
@@ -357,6 +366,14 @@ module Slaw
|
|
357
366
|
return (n && !n.empty?) ? n : nil
|
358
367
|
end
|
359
368
|
|
369
|
+
def alias
|
370
|
+
if num
|
371
|
+
"Schedule #{num}"
|
372
|
+
else
|
373
|
+
"Schedule"
|
374
|
+
end
|
375
|
+
end
|
376
|
+
|
360
377
|
def heading
|
361
378
|
if schedule_heading.schedule_title.respond_to? :content
|
362
379
|
schedule_heading.schedule_title.content.text_value
|
@@ -366,21 +383,15 @@ module Slaw
|
|
366
383
|
end
|
367
384
|
|
368
385
|
def to_xml(b, i)
|
369
|
-
n = num
|
370
|
-
id =
|
371
|
-
"schedule-#{n}"
|
372
|
-
else
|
373
|
-
"schedules"
|
374
|
-
end
|
375
|
-
|
376
|
-
b.chapter(id: id) { |b|
|
377
|
-
b.num(num) if num
|
378
|
-
b.heading(heading) if heading
|
386
|
+
n = num.nil? ? i : num
|
387
|
+
id = "schedule-#{n}"
|
379
388
|
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
389
|
+
# there is no good AKN hierarchy container for schedules, so we
|
390
|
+
# just use article because we don't use it anywhere else.
|
391
|
+
b.article(id: id) { |b|
|
392
|
+
b.heading(heading) if heading
|
393
|
+
b.content { |b|
|
394
|
+
statements.elements.each { |e| b.p(e.content.text_value) }
|
384
395
|
}
|
385
396
|
}
|
386
397
|
end
|
data/spec/parse/builder_spec.rb
CHANGED
@@ -541,6 +541,44 @@ XML
|
|
541
541
|
</blockList>
|
542
542
|
</content>
|
543
543
|
</subsection>
|
544
|
+
XML
|
545
|
+
)
|
546
|
+
end
|
547
|
+
end
|
548
|
+
|
549
|
+
describe '#normalise_headings' do
|
550
|
+
it 'should normalise ALL CAPS headings' do
|
551
|
+
doc = xml2doc(section(<<XML
|
552
|
+
<heading>DEFINITIONS FOR A.B.C.</heading>
|
553
|
+
<content>
|
554
|
+
<p></p>
|
555
|
+
</content>
|
556
|
+
XML
|
557
|
+
))
|
558
|
+
subject.normalise_headings(doc)
|
559
|
+
doc.to_s.should == section(<<XML
|
560
|
+
<heading>Definitions for a.b.c.</heading>
|
561
|
+
<content>
|
562
|
+
<p/>
|
563
|
+
</content>
|
564
|
+
XML
|
565
|
+
)
|
566
|
+
end
|
567
|
+
|
568
|
+
it 'should not normalise normal headings' do
|
569
|
+
doc = xml2doc(section(<<XML
|
570
|
+
<heading>Definitions for A.B.C.</heading>
|
571
|
+
<content>
|
572
|
+
<p></p>
|
573
|
+
</content>
|
574
|
+
XML
|
575
|
+
))
|
576
|
+
subject.normalise_headings(doc)
|
577
|
+
doc.to_s.should == section(<<XML
|
578
|
+
<heading>Definitions for A.B.C.</heading>
|
579
|
+
<content>
|
580
|
+
<p/>
|
581
|
+
</content>
|
544
582
|
XML
|
545
583
|
)
|
546
584
|
end
|
data/spec/za/act_spec.rb
CHANGED
@@ -364,5 +364,166 @@ EOS
|
|
364
364
|
sched.statements.elements[0].content.text_value.should == "Baz"
|
365
365
|
sched.statements.elements[1].content.text_value.should == "Boom"
|
366
366
|
end
|
367
|
+
|
368
|
+
it 'should serialise many schedules correctly' do
|
369
|
+
node = parse :schedules, <<EOS
|
370
|
+
Schedule "1"
|
371
|
+
A Title
|
372
|
+
1. Foo
|
373
|
+
2. Bar
|
374
|
+
Schedule 2
|
375
|
+
Another Title
|
376
|
+
Baz
|
377
|
+
Boom
|
378
|
+
EOS
|
379
|
+
|
380
|
+
s = ""
|
381
|
+
builder = ::Builder::XmlMarkup.new(indent: 2, target: s)
|
382
|
+
|
383
|
+
node.to_xml(builder)
|
384
|
+
|
385
|
+
today = Time.now.strftime('%Y-%m-%d')
|
386
|
+
|
387
|
+
s.should == <<EOS
|
388
|
+
<components>
|
389
|
+
<component id="component-1">
|
390
|
+
<doc name="schedule1">
|
391
|
+
<meta>
|
392
|
+
<identification source="#slaw">
|
393
|
+
<FRBRWork>
|
394
|
+
<FRBRthis value="/za/act/1980/01/schedule1"/>
|
395
|
+
<FRBRuri value="/za/act/1980/01"/>
|
396
|
+
<FRBRalias value="Schedule 1"/>
|
397
|
+
<FRBRdate date="1980-01-01" name="Generation"/>
|
398
|
+
<FRBRauthor href="#council" as="#author"/>
|
399
|
+
<FRBRcountry value="za"/>
|
400
|
+
</FRBRWork>
|
401
|
+
<FRBRExpression>
|
402
|
+
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
403
|
+
<FRBRuri value="/za/act/1980/01/eng@"/>
|
404
|
+
<FRBRdate date="1980-01-01" name="Generation"/>
|
405
|
+
<FRBRauthor href="#council" as="#author"/>
|
406
|
+
<FRBRlanguage language="eng"/>
|
407
|
+
</FRBRExpression>
|
408
|
+
<FRBRManifestation>
|
409
|
+
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
410
|
+
<FRBRuri value="/za/act/1980/01/eng@"/>
|
411
|
+
<FRBRdate date="#{today}" name="Generation"/>
|
412
|
+
<FRBRauthor href="#slaw" as="#author"/>
|
413
|
+
</FRBRManifestation>
|
414
|
+
</identification>
|
415
|
+
</meta>
|
416
|
+
<mainBody>
|
417
|
+
<article id="schedule-1">
|
418
|
+
<heading>A Title</heading>
|
419
|
+
<content>
|
420
|
+
<p>1. Foo</p>
|
421
|
+
<p>2. Bar</p>
|
422
|
+
</content>
|
423
|
+
</article>
|
424
|
+
</mainBody>
|
425
|
+
</doc>
|
426
|
+
</component>
|
427
|
+
<component id="component-2">
|
428
|
+
<doc name="schedule2">
|
429
|
+
<meta>
|
430
|
+
<identification source="#slaw">
|
431
|
+
<FRBRWork>
|
432
|
+
<FRBRthis value="/za/act/1980/01/schedule2"/>
|
433
|
+
<FRBRuri value="/za/act/1980/01"/>
|
434
|
+
<FRBRalias value="Schedule 2"/>
|
435
|
+
<FRBRdate date="1980-01-01" name="Generation"/>
|
436
|
+
<FRBRauthor href="#council" as="#author"/>
|
437
|
+
<FRBRcountry value="za"/>
|
438
|
+
</FRBRWork>
|
439
|
+
<FRBRExpression>
|
440
|
+
<FRBRthis value="/za/act/1980/01/eng@/schedule2"/>
|
441
|
+
<FRBRuri value="/za/act/1980/01/eng@"/>
|
442
|
+
<FRBRdate date="1980-01-01" name="Generation"/>
|
443
|
+
<FRBRauthor href="#council" as="#author"/>
|
444
|
+
<FRBRlanguage language="eng"/>
|
445
|
+
</FRBRExpression>
|
446
|
+
<FRBRManifestation>
|
447
|
+
<FRBRthis value="/za/act/1980/01/eng@/schedule2"/>
|
448
|
+
<FRBRuri value="/za/act/1980/01/eng@"/>
|
449
|
+
<FRBRdate date="#{today}" name="Generation"/>
|
450
|
+
<FRBRauthor href="#slaw" as="#author"/>
|
451
|
+
</FRBRManifestation>
|
452
|
+
</identification>
|
453
|
+
</meta>
|
454
|
+
<mainBody>
|
455
|
+
<article id="schedule-2">
|
456
|
+
<heading>Another Title</heading>
|
457
|
+
<content>
|
458
|
+
<p>Baz</p>
|
459
|
+
<p>Boom</p>
|
460
|
+
</content>
|
461
|
+
</article>
|
462
|
+
</mainBody>
|
463
|
+
</doc>
|
464
|
+
</component>
|
465
|
+
</components>
|
466
|
+
EOS
|
467
|
+
|
468
|
+
end
|
469
|
+
|
470
|
+
it 'should serialise a single schedule without a heading' do
|
471
|
+
node = parse :schedules, <<EOS
|
472
|
+
Schedule "1"
|
473
|
+
Other than as is set out hereinbelow, no signs other than locality bound signs, temporary signs including loose portable sign, estate agents signs, newspaper headline posters and posters (the erection of which must comply with the appropriate schedules pertinent thereto) shall be erected on Municipal owned land.
|
474
|
+
1. Foo
|
475
|
+
2. Bar
|
476
|
+
EOS
|
477
|
+
|
478
|
+
s = ""
|
479
|
+
builder = ::Builder::XmlMarkup.new(indent: 2, target: s)
|
480
|
+
|
481
|
+
node.to_xml(builder)
|
482
|
+
|
483
|
+
today = Time.now.strftime('%Y-%m-%d')
|
484
|
+
|
485
|
+
s.should == <<EOS
|
486
|
+
<components>
|
487
|
+
<component id="component-1">
|
488
|
+
<doc name="schedule1">
|
489
|
+
<meta>
|
490
|
+
<identification source="#slaw">
|
491
|
+
<FRBRWork>
|
492
|
+
<FRBRthis value="/za/act/1980/01/schedule1"/>
|
493
|
+
<FRBRuri value="/za/act/1980/01"/>
|
494
|
+
<FRBRalias value="Schedule 1"/>
|
495
|
+
<FRBRdate date="1980-01-01" name="Generation"/>
|
496
|
+
<FRBRauthor href="#council" as="#author"/>
|
497
|
+
<FRBRcountry value="za"/>
|
498
|
+
</FRBRWork>
|
499
|
+
<FRBRExpression>
|
500
|
+
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
501
|
+
<FRBRuri value="/za/act/1980/01/eng@"/>
|
502
|
+
<FRBRdate date="1980-01-01" name="Generation"/>
|
503
|
+
<FRBRauthor href="#council" as="#author"/>
|
504
|
+
<FRBRlanguage language="eng"/>
|
505
|
+
</FRBRExpression>
|
506
|
+
<FRBRManifestation>
|
507
|
+
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
508
|
+
<FRBRuri value="/za/act/1980/01/eng@"/>
|
509
|
+
<FRBRdate date="#{today}" name="Generation"/>
|
510
|
+
<FRBRauthor href="#slaw" as="#author"/>
|
511
|
+
</FRBRManifestation>
|
512
|
+
</identification>
|
513
|
+
</meta>
|
514
|
+
<mainBody>
|
515
|
+
<article id="schedule-1">
|
516
|
+
<content>
|
517
|
+
<p>Other than as is set out hereinbelow, no signs other than locality bound signs, temporary signs including loose portable sign, estate agents signs, newspaper headline posters and posters (the erection of which must comply with the appropriate schedules pertinent thereto) shall be erected on Municipal owned land.</p>
|
518
|
+
<p>1. Foo</p>
|
519
|
+
<p>2. Bar</p>
|
520
|
+
</content>
|
521
|
+
</article>
|
522
|
+
</mainBody>
|
523
|
+
</doc>
|
524
|
+
</component>
|
525
|
+
</components>
|
526
|
+
EOS
|
527
|
+
end
|
367
528
|
end
|
368
529
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|