slaw 0.5.1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 26e7c12c3e421410a6be3b18c19f589915e920fb
4
- data.tar.gz: e8c1e77994e30c194b42bcca11bbc3b4eace6f76
3
+ metadata.gz: e5ab33b04df46f9d2d087f7a22ce9c0e7d12278e
4
+ data.tar.gz: 276c7b704a30c435d0dfbce2e74c34cd2e902b51
5
5
  SHA512:
6
- metadata.gz: 3e229100f80879b9646135b9c9f6fab2c543a20cd89ed6b021a74164fb92874b876451571bc9552d3150a2953a00be8d927b63c3e7ad780dec800751bff086c3
7
- data.tar.gz: c8d2d73f08b67a2535816d9b5ae8f8abfb7cd2627f3aef1688a986bd4e6ba1e71a01cb1c53c0a5b66d9f836191abef6c89e8f43767a12d8b949d329e635c1043
6
+ metadata.gz: 7632de6a68c70b2ce44a4854ec4bcb35867f269447b8c339286056301ef57d73548edabddd5a0ea963d4bc19c66b37d571beb1f8f635d7dc533ca32e1f57bd35
7
+ data.tar.gz: 6e4a1d0778b6616f6956fba51a1b604514a036f188d89232984d7f57dd5f75f3bad2b031507d9d2eeeb1f470d9b85fef063d055719e97853df7d8665f4715a1a
data/bin/slaw CHANGED
@@ -8,7 +8,7 @@ class SlawCLI < Thor
8
8
 
9
9
  class_option :verbose, type: :boolean, desc: "Display log output on stderr"
10
10
 
11
- desc "parse FILE", "parse FILE into Akoma Ntoso XML"
11
+ desc "parse FILE", "Parse FILE into Akoma Ntoso XML"
12
12
  option :input, enum: ['text', 'pdf'], desc: "Type of input if it can't be determined automatically"
13
13
  option :pdftotext, desc: "Location of the pdftotext binary if not in PATH"
14
14
  option :definitions, type: :boolean, desc: "Find and link definitions (this can be slow). Default: false"
@@ -36,6 +36,12 @@ class SlawCLI < Thor
36
36
  puts act.to_xml(indent: 2)
37
37
  end
38
38
 
39
+ map %w(--version) => :__print_version
40
+ desc "--version", "Print slaw version info"
41
+ def __print_version
42
+ say "slaw #{Slaw::VERSION}"
43
+ end
44
+
39
45
  no_commands do
40
46
  def logging
41
47
  logger = Log4r::Logger.new('Slaw')
@@ -154,7 +154,9 @@ module Slaw
154
154
  doc.xpath('//a:component/a:doc[@name="schedules"]//a:heading/text()', a: NS)
155
155
 
156
156
  nodes.each do |heading|
157
- heading.content = heading.content.downcase.gsub(/^\w/) { $&.upcase }
157
+ if !(heading.content =~ /[a-z]/)
158
+ heading.content = heading.content.downcase.gsub(/^\w/) { $&.upcase }
159
+ end
158
160
  end
159
161
  end
160
162
 
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "0.5.1"
2
+ VERSION = "0.6.0"
3
3
  end
@@ -3,14 +3,17 @@ module Slaw
3
3
  module Act
4
4
  class Act < Treetop::Runtime::SyntaxNode
5
5
  FRBR_URI = '/za/act/1980/01'
6
+ WORK_URI = FRBR_URI
7
+ EXPRESSION_URI = "#{FRBR_URI}/eng@"
8
+ MANIFESTATION_URI = EXPRESSION_URI
6
9
 
7
10
  def to_xml(b)
8
11
  b.act(contains: "originalVersion") { |b|
9
12
  write_meta(b)
10
13
  write_preamble(b)
11
14
  write_body(b)
12
- write_schedules(b)
13
15
  }
16
+ write_schedules(b)
14
17
  end
15
18
 
16
19
  def write_meta(b)
@@ -34,23 +37,23 @@ module Slaw
34
37
  b.identification(source: "#slaw") { |b|
35
38
  # use stub values so that we can generate a validating document
36
39
  b.FRBRWork { |b|
37
- b.FRBRthis(value: "#{FRBR_URI}/main")
38
- b.FRBRuri(value: '/za/act/locale/1980/name')
40
+ b.FRBRthis(value: "#{WORK_URI}/main")
41
+ b.FRBRuri(value: WORK_URI)
39
42
  b.FRBRalias(value: 'Short Title')
40
43
  b.FRBRdate(date: '1980-01-01', name: 'Generation')
41
44
  b.FRBRauthor(href: '#council', as: '#author')
42
45
  b.FRBRcountry(value: 'za')
43
46
  }
44
47
  b.FRBRExpression { |b|
45
- b.FRBRthis(value: '/za/act/locale/1980/name/main/eng@')
46
- b.FRBRuri(value: '/za/act/locale/1980/name/eng@')
48
+ b.FRBRthis(value: "#{EXPRESSION_URI}/main")
49
+ b.FRBRuri(value: EXPRESSION_URI)
47
50
  b.FRBRdate(date: '1980-01-01', name: 'Generation')
48
51
  b.FRBRauthor(href: '#council', as: '#author')
49
52
  b.FRBRlanguage(language: 'eng')
50
53
  }
51
54
  b.FRBRManifestation { |b|
52
- b.FRBRthis(value: '/za/act/locale/1980/name/main/eng@')
53
- b.FRBRuri(value: '/za/act/locale/1980/name/eng@')
55
+ b.FRBRthis(value: "#{MANIFESTATION_URI}/main")
56
+ b.FRBRuri(value: MANIFESTATION_URI)
54
57
  b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
55
58
  b.FRBRauthor(href: '#slaw', as: '#author')
56
59
  }
@@ -58,11 +61,7 @@ module Slaw
58
61
  end
59
62
 
60
63
  def write_preamble(b)
61
- if preamble.text_value != ""
62
- b.preamble { |b|
63
- preamble.to_xml(b)
64
- }
65
- end
64
+ preamble.to_xml(b)
66
65
  end
67
66
 
68
67
  def write_body(b)
@@ -78,11 +77,15 @@ module Slaw
78
77
 
79
78
  class Preamble < Treetop::Runtime::SyntaxNode
80
79
  def to_xml(b)
81
- statements.elements.each { |e|
82
- if not (e.content.text_value =~ /^preamble/i)
83
- b.p(e.content.text_value)
84
- end
85
- }
80
+ if text_value != ""
81
+ b.preamble { |b|
82
+ statements.elements.each { |e|
83
+ if not (e.content.text_value =~ /^preamble/i)
84
+ b.p(e.content.text_value)
85
+ end
86
+ }
87
+ }
88
+ end
86
89
  end
87
90
  end
88
91
 
@@ -315,37 +318,43 @@ module Slaw
315
318
  return if schedules.elements.empty?
316
319
 
317
320
  b.components { |b|
318
- b.component(id: 'component-0') { |b|
319
- b.doc(name: 'schedules') { |b|
320
- b.meta { |b|
321
- b.identification(source: "#slaw") { |b|
322
- b.FRBRWork { |b|
323
- b.FRBRthis(value: '/za/act/locale/1980/name/main/schedules')
324
- b.FRBRuri(value: '/za/act/locale/1980/name/schedules')
325
- b.FRBRdate(date: '1980-01-01', name: 'Generation')
326
- b.FRBRauthor(href: '#council', as: '#author')
327
- b.FRBRcountry(value: 'za')
328
- }
329
- b.FRBRExpression { |b|
330
- b.FRBRthis(value: '/za/act/locale/1980/name/main//schedules/eng@')
331
- b.FRBRuri(value: '/za/act/locale/1980/name/schedules/eng@')
332
- b.FRBRdate(date: '1980-01-01', name: 'Generation')
333
- b.FRBRauthor(href: '#council', as: '#author')
334
- b.FRBRlanguage(language: 'eng')
335
- }
336
- b.FRBRManifestation { |b|
337
- b.FRBRthis(value: '/za/act/locale/1980/name/main/schedules/eng@')
338
- b.FRBRuri(value: '/za/act/locale/1980/name/schedules/eng@')
339
- b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
340
- b.FRBRauthor(href: '#slaw', as: '#author')
341
- }
342
- }
343
- }
321
+ schedules.elements.each_with_index { |e, i| write_schedule(e, i+1, b) }
322
+ }
323
+ end
344
324
 
345
- b.mainBody { |b|
346
- schedules.elements.each_with_index { |e, i| e.to_xml(b, i) }
325
+ def write_schedule(element, i, b)
326
+ # component name
327
+ comp = "schedule#{i}"
328
+
329
+ b.component(id: "component-#{i}") { |b|
330
+ b.doc(name: "schedule#{i}") { |b|
331
+ b.meta { |b|
332
+ b.identification(source: "#slaw") { |b|
333
+ b.FRBRWork { |b|
334
+ b.FRBRthis(value: "#{Act::WORK_URI}/#{comp}")
335
+ b.FRBRuri(value: Act::WORK_URI)
336
+ b.FRBRalias(value: element.alias)
337
+ b.FRBRdate(date: '1980-01-01', name: 'Generation')
338
+ b.FRBRauthor(href: '#council', as: '#author')
339
+ b.FRBRcountry(value: 'za')
340
+ }
341
+ b.FRBRExpression { |b|
342
+ b.FRBRthis(value: "#{Act::EXPRESSION_URI}/#{comp}")
343
+ b.FRBRuri(value: Act::EXPRESSION_URI)
344
+ b.FRBRdate(date: '1980-01-01', name: 'Generation')
345
+ b.FRBRauthor(href: '#council', as: '#author')
346
+ b.FRBRlanguage(language: 'eng')
347
+ }
348
+ b.FRBRManifestation { |b|
349
+ b.FRBRthis(value: "#{Act::MANIFESTATION_URI}/#{comp}")
350
+ b.FRBRuri(value: Act::MANIFESTATION_URI)
351
+ b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
352
+ b.FRBRauthor(href: '#slaw', as: '#author')
353
+ }
347
354
  }
348
355
  }
356
+
357
+ b.mainBody { |b| element.to_xml(b, i) }
349
358
  }
350
359
  }
351
360
  end
@@ -357,6 +366,14 @@ module Slaw
357
366
  return (n && !n.empty?) ? n : nil
358
367
  end
359
368
 
369
+ def alias
370
+ if num
371
+ "Schedule #{num}"
372
+ else
373
+ "Schedule"
374
+ end
375
+ end
376
+
360
377
  def heading
361
378
  if schedule_heading.schedule_title.respond_to? :content
362
379
  schedule_heading.schedule_title.content.text_value
@@ -366,21 +383,15 @@ module Slaw
366
383
  end
367
384
 
368
385
  def to_xml(b, i)
369
- n = num
370
- id = if n
371
- "schedule-#{n}"
372
- else
373
- "schedules"
374
- end
375
-
376
- b.chapter(id: id) { |b|
377
- b.num(num) if num
378
- b.heading(heading) if heading
386
+ n = num.nil? ? i : num
387
+ id = "schedule-#{n}"
379
388
 
380
- b.section(id: id + ".section-0") { |b|
381
- b.content { |b|
382
- statements.elements.each { |e| b.p(e.content.text_value) }
383
- }
389
+ # there is no good AKN hierarchy container for schedules, so we
390
+ # just use article because we don't use it anywhere else.
391
+ b.article(id: id) { |b|
392
+ b.heading(heading) if heading
393
+ b.content { |b|
394
+ statements.elements.each { |e| b.p(e.content.text_value) }
384
395
  }
385
396
  }
386
397
  end
@@ -541,6 +541,44 @@ XML
541
541
  </blockList>
542
542
  </content>
543
543
  </subsection>
544
+ XML
545
+ )
546
+ end
547
+ end
548
+
549
+ describe '#normalise_headings' do
550
+ it 'should normalise ALL CAPS headings' do
551
+ doc = xml2doc(section(<<XML
552
+ <heading>DEFINITIONS FOR A.B.C.</heading>
553
+ <content>
554
+ <p></p>
555
+ </content>
556
+ XML
557
+ ))
558
+ subject.normalise_headings(doc)
559
+ doc.to_s.should == section(<<XML
560
+ <heading>Definitions for a.b.c.</heading>
561
+ <content>
562
+ <p/>
563
+ </content>
564
+ XML
565
+ )
566
+ end
567
+
568
+ it 'should not normalise normal headings' do
569
+ doc = xml2doc(section(<<XML
570
+ <heading>Definitions for A.B.C.</heading>
571
+ <content>
572
+ <p></p>
573
+ </content>
574
+ XML
575
+ ))
576
+ subject.normalise_headings(doc)
577
+ doc.to_s.should == section(<<XML
578
+ <heading>Definitions for A.B.C.</heading>
579
+ <content>
580
+ <p/>
581
+ </content>
544
582
  XML
545
583
  )
546
584
  end
data/spec/za/act_spec.rb CHANGED
@@ -364,5 +364,166 @@ EOS
364
364
  sched.statements.elements[0].content.text_value.should == "Baz"
365
365
  sched.statements.elements[1].content.text_value.should == "Boom"
366
366
  end
367
+
368
+ it 'should serialise many schedules correctly' do
369
+ node = parse :schedules, <<EOS
370
+ Schedule "1"
371
+ A Title
372
+ 1. Foo
373
+ 2. Bar
374
+ Schedule 2
375
+ Another Title
376
+ Baz
377
+ Boom
378
+ EOS
379
+
380
+ s = ""
381
+ builder = ::Builder::XmlMarkup.new(indent: 2, target: s)
382
+
383
+ node.to_xml(builder)
384
+
385
+ today = Time.now.strftime('%Y-%m-%d')
386
+
387
+ s.should == <<EOS
388
+ <components>
389
+ <component id="component-1">
390
+ <doc name="schedule1">
391
+ <meta>
392
+ <identification source="#slaw">
393
+ <FRBRWork>
394
+ <FRBRthis value="/za/act/1980/01/schedule1"/>
395
+ <FRBRuri value="/za/act/1980/01"/>
396
+ <FRBRalias value="Schedule 1"/>
397
+ <FRBRdate date="1980-01-01" name="Generation"/>
398
+ <FRBRauthor href="#council" as="#author"/>
399
+ <FRBRcountry value="za"/>
400
+ </FRBRWork>
401
+ <FRBRExpression>
402
+ <FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
403
+ <FRBRuri value="/za/act/1980/01/eng@"/>
404
+ <FRBRdate date="1980-01-01" name="Generation"/>
405
+ <FRBRauthor href="#council" as="#author"/>
406
+ <FRBRlanguage language="eng"/>
407
+ </FRBRExpression>
408
+ <FRBRManifestation>
409
+ <FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
410
+ <FRBRuri value="/za/act/1980/01/eng@"/>
411
+ <FRBRdate date="#{today}" name="Generation"/>
412
+ <FRBRauthor href="#slaw" as="#author"/>
413
+ </FRBRManifestation>
414
+ </identification>
415
+ </meta>
416
+ <mainBody>
417
+ <article id="schedule-1">
418
+ <heading>A Title</heading>
419
+ <content>
420
+ <p>1. Foo</p>
421
+ <p>2. Bar</p>
422
+ </content>
423
+ </article>
424
+ </mainBody>
425
+ </doc>
426
+ </component>
427
+ <component id="component-2">
428
+ <doc name="schedule2">
429
+ <meta>
430
+ <identification source="#slaw">
431
+ <FRBRWork>
432
+ <FRBRthis value="/za/act/1980/01/schedule2"/>
433
+ <FRBRuri value="/za/act/1980/01"/>
434
+ <FRBRalias value="Schedule 2"/>
435
+ <FRBRdate date="1980-01-01" name="Generation"/>
436
+ <FRBRauthor href="#council" as="#author"/>
437
+ <FRBRcountry value="za"/>
438
+ </FRBRWork>
439
+ <FRBRExpression>
440
+ <FRBRthis value="/za/act/1980/01/eng@/schedule2"/>
441
+ <FRBRuri value="/za/act/1980/01/eng@"/>
442
+ <FRBRdate date="1980-01-01" name="Generation"/>
443
+ <FRBRauthor href="#council" as="#author"/>
444
+ <FRBRlanguage language="eng"/>
445
+ </FRBRExpression>
446
+ <FRBRManifestation>
447
+ <FRBRthis value="/za/act/1980/01/eng@/schedule2"/>
448
+ <FRBRuri value="/za/act/1980/01/eng@"/>
449
+ <FRBRdate date="#{today}" name="Generation"/>
450
+ <FRBRauthor href="#slaw" as="#author"/>
451
+ </FRBRManifestation>
452
+ </identification>
453
+ </meta>
454
+ <mainBody>
455
+ <article id="schedule-2">
456
+ <heading>Another Title</heading>
457
+ <content>
458
+ <p>Baz</p>
459
+ <p>Boom</p>
460
+ </content>
461
+ </article>
462
+ </mainBody>
463
+ </doc>
464
+ </component>
465
+ </components>
466
+ EOS
467
+
468
+ end
469
+
470
+ it 'should serialise a single schedule without a heading' do
471
+ node = parse :schedules, <<EOS
472
+ Schedule "1"
473
+ Other than as is set out hereinbelow, no signs other than locality bound signs, temporary signs including loose portable sign, estate agents signs, newspaper headline posters and posters (the erection of which must comply with the appropriate schedules pertinent thereto) shall be erected on Municipal owned land.
474
+ 1. Foo
475
+ 2. Bar
476
+ EOS
477
+
478
+ s = ""
479
+ builder = ::Builder::XmlMarkup.new(indent: 2, target: s)
480
+
481
+ node.to_xml(builder)
482
+
483
+ today = Time.now.strftime('%Y-%m-%d')
484
+
485
+ s.should == <<EOS
486
+ <components>
487
+ <component id="component-1">
488
+ <doc name="schedule1">
489
+ <meta>
490
+ <identification source="#slaw">
491
+ <FRBRWork>
492
+ <FRBRthis value="/za/act/1980/01/schedule1"/>
493
+ <FRBRuri value="/za/act/1980/01"/>
494
+ <FRBRalias value="Schedule 1"/>
495
+ <FRBRdate date="1980-01-01" name="Generation"/>
496
+ <FRBRauthor href="#council" as="#author"/>
497
+ <FRBRcountry value="za"/>
498
+ </FRBRWork>
499
+ <FRBRExpression>
500
+ <FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
501
+ <FRBRuri value="/za/act/1980/01/eng@"/>
502
+ <FRBRdate date="1980-01-01" name="Generation"/>
503
+ <FRBRauthor href="#council" as="#author"/>
504
+ <FRBRlanguage language="eng"/>
505
+ </FRBRExpression>
506
+ <FRBRManifestation>
507
+ <FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
508
+ <FRBRuri value="/za/act/1980/01/eng@"/>
509
+ <FRBRdate date="#{today}" name="Generation"/>
510
+ <FRBRauthor href="#slaw" as="#author"/>
511
+ </FRBRManifestation>
512
+ </identification>
513
+ </meta>
514
+ <mainBody>
515
+ <article id="schedule-1">
516
+ <content>
517
+ <p>Other than as is set out hereinbelow, no signs other than locality bound signs, temporary signs including loose portable sign, estate agents signs, newspaper headline posters and posters (the erection of which must comply with the appropriate schedules pertinent thereto) shall be erected on Municipal owned land.</p>
518
+ <p>1. Foo</p>
519
+ <p>2. Bar</p>
520
+ </content>
521
+ </article>
522
+ </mainBody>
523
+ </doc>
524
+ </component>
525
+ </components>
526
+ EOS
527
+ end
367
528
  end
368
529
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-05 00:00:00.000000000 Z
11
+ date: 2015-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler