slaw 9.1.0 → 10.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6c60448bb3a8c2cb9bad02e434eab59805129a845b13f6f95b6f062b3a61ef15
4
- data.tar.gz: dfcb7d9a0413b8c61448af102da840e41cb3f942ec0a249c35c5ec4808c4e090
3
+ metadata.gz: 85950871314a64110b980b50860cae66fce1bc9536374fca99204506150da04b
4
+ data.tar.gz: 8dc252e644f14502f652b0daa4e18bcaadaf9d310be23b9a8f7cbffe3b85123f
5
5
  SHA512:
6
- metadata.gz: 9930cf182f41f54ab0fc377a9f3d6a48d3cd8f466124ad9d3b404bd0739036a7e9615bdc1b715a16b94458b1215ed8bf77a1ab0a4d40092044faf1ecb2559caf
7
- data.tar.gz: 8db5857b43d42ccbacb24c48b2649ae49c431830a5e22a1ceeb121a16e6b8cc10f3a5757bbd6c64393e24c0e34d59486cd11e3a227badf32bfb92ebcfb08f8ce
6
+ metadata.gz: d56cf86ff1e6502b82d6d79e76a4630e9259babcf57aaee54cc5ef2435b59a1a8ec5f6a60d58b81992cac392352f6db1acdce11aac18fc8ec461f2f8e1a92ef4
7
+ data.tar.gz: 19f716a0069a1d157a6f2407b1f2d9ef4610447b31b5c7113a4b7ec090e19b6e6981573c4f37baa08f2e60e8ad5864122827a3e0454b29df68f958a81500f8b9
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Slaw [![Build Status](https://travis-ci.org/longhotsummer/slaw.svg)](http://travis-ci.org/longhotsummer/slaw) [![Gem Version](https://badge.fury.io/rb/slaw.svg)](https://badge.fury.io/rb/slaw)
2
2
 
3
- Slaw is a lightweight library for generating Akoma Ntoso 2.0 Act XML from plain text documents.
3
+ Slaw is a lightweight library for generating Akoma Ntoso 3.0 Act XML from plain text documents.
4
4
  It is used to power [Indigo](https://github.com/laws-africa/indigo) and uses grammars developed for the legal
5
5
  tradition in South Africa, although others traditions are supported.
6
6
 
@@ -86,6 +86,32 @@ You can create your own grammar by creating a gem that provides these files and
86
86
 
87
87
  ## Changelog
88
88
 
89
+ ### 10.3.1 (11 January 2021)
90
+
91
+ * Strip ascii, unicode general and unicode supplemental punctuation from num elements when building eIds
92
+
93
+ ### 10.2.0 (4 September 2020)
94
+
95
+ * support inline superscript `^^text^^`
96
+ * support inline subscript `_^text^_`
97
+
98
+ ### 10.1.0 (18 June 2020)
99
+
100
+ * hcontainer elements have name attributes, to be compliant with AKN 3.0
101
+
102
+ ### 10.0.0 (12 June 2020)
103
+
104
+ * BREAKING: Create XML with AKN 3 namespace (http://docs.oasis-open.org/legaldocml/ns/akn/3.0), AKN2 is no longer supported
105
+ * BREAKING: replace id attributes with eId attributes
106
+ * BREAKING: serialize schedules as attachments to act, not as components as peers of the act
107
+ * BREAKING: anonymous blocks are serialized as hcontainers, not paragraphs
108
+ * BREAKING: crossheading hcontainer IDs correctly use hcontainer
109
+ * Remove unnecessary schemaLocation header in root element
110
+
111
+ ### 9.2.0 (10 June 2020)
112
+
113
+ * Subpart numbers are optional
114
+
89
115
  ### 9.1.0 (15 April 2020)
90
116
 
91
117
  * Subsections can have numbers such as 1.1A and 1.1bis
data/bin/slaw CHANGED
@@ -19,6 +19,7 @@ class SlawCLI < Thor
19
19
  option :section_number_position, enum: ['before-title', 'after-title', 'guess'], desc: "Where do section titles come in relation to the section number? Default: before-title"
20
20
  option :grammar, type: :string, desc: "Grammar name (usually a two-letter country code). Default is za."
21
21
  option :ascii, type: :boolean, default: false, desc: "Process text as ASCII using %-encoding. This can provide significant speed improvements if the grammar uses only ASCII literals. See https://github.com/cjheath/treetop/issues/31."
22
+ option :namespace, enum: ['akn3'], default: 'akn3', desc: 'AKN XML namespace to use.'
22
23
  def parse(name)
23
24
  logging
24
25
 
@@ -33,6 +34,11 @@ class SlawCLI < Thor
33
34
  text = extractor.extract_from_file(name)
34
35
  end
35
36
 
37
+ case options[:namespace]
38
+ when 'akn3'
39
+ Slaw.akn_namespace = Slaw::AKN3_NS
40
+ end
41
+
36
42
  generator = Slaw::ActGenerator.new(options[:grammar] || 'za')
37
43
 
38
44
  if options[:fragment]
@@ -49,7 +55,7 @@ class SlawCLI < Thor
49
55
 
50
56
  if options[:id_prefix]
51
57
  prefix = options[:id_prefix]
52
- prefix += "." unless prefix.end_with?('.')
58
+ prefix += "__" unless prefix.end_with?('__')
53
59
  generator.builder.fragment_id_prefix = prefix
54
60
  end
55
61
  end
@@ -4,16 +4,14 @@ module Slaw
4
4
  # Counters for generating element IDs. This is a hash from the element ID
5
5
  # prefix, to another hash that maps the element type name to a count.
6
6
  #
7
- # For backwards compatibility, counters always start at -1, and must be
8
- # incremented before being used. This ensures that element ids start at 0.
9
- # This is NOT compatible with AKN 3.0 which requires that element numbers
10
- # start at 1.
7
+ # Counters always start at 0, and must be incremented before being used.
8
+ # This ensures that element ids start at 1, as per AKN 3.0 spec.
11
9
  #
12
10
  # eg.
13
11
  #
14
12
  # section-1 => paragraph => 2
15
13
  #
16
- @@counters = Hash.new{ |h, k| h[k] = Hash.new(-1) }
14
+ @@counters = Hash.new{ |h, k| h[k] = Hash.new(0) }
17
15
 
18
16
  def self.counters
19
17
  @@counters
@@ -22,6 +20,37 @@ module Slaw
22
20
  def self.reset!
23
21
  @@counters.clear
24
22
  end
23
+
24
+ # Clean a <num> value for use in an eId
25
+ # See https://docs.oasis-open.org/legaldocml/akn-nc/v1.0/os/akn-nc-v1.0-os.html#_Toc531692306
26
+ #
27
+ # "The number part of the identifiers of such elements corresponds to the
28
+ # stripping of all final punctuation, meaningless separations as well as
29
+ # redundant characters in the content of the <num> element. The
30
+ # representation is case-sensitive."
31
+ #
32
+ # Our algorithm is:
33
+ # 1. strip all leading and trailing whitespace and punctuation (using the unicode punctuation blocks)
34
+ # 2. strip all whitespace
35
+ # 3. replace all remaining punctuation with a hyphen.
36
+ #
37
+ # The General Punctuation block is \u2000-\u206F, and the Supplemental Punctuation block is \u2E00-\u2E7F.
38
+ #
39
+ # (i) -> i
40
+ # 1.2. -> 1-2
41
+ # “2.3“ -> 2-3
42
+ # 3a bis -> 3abis
43
+ def self.clean(num)
44
+ # leading whitespace and punctuation
45
+ num = num.gsub(/^[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '')
46
+ # trailing whitespace and punctuation
47
+ num.gsub!(/[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+$/, '')
48
+ # whitespace
49
+ num.gsub!(/\s/, '')
50
+ # remaining punctuation to a hyphen
51
+ num.gsub!(/[\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '-')
52
+ num
53
+ end
25
54
  end
26
55
  end
27
56
  end
@@ -20,7 +20,7 @@ module Slaw
20
20
  end
21
21
 
22
22
  rule inline_item
23
- remark / image / ref / bold / italics / [^\n]
23
+ remark / image / ref / bold / italics / superscript / subscript / [^\n]
24
24
  <InlineItem>
25
25
  end
26
26
 
@@ -57,6 +57,18 @@ module Slaw
57
57
  <Ref>
58
58
  end
59
59
 
60
+ rule superscript
61
+ # ^^foo^^
62
+ '^^' content:(!'^^' inline_item)+ '^^'
63
+ <Superscript>
64
+ end
65
+
66
+ rule subscript
67
+ # _^foo^_
68
+ '_^' content:(!'^_' inline_item)+ '^_'
69
+ <Subscript>
70
+ end
71
+
60
72
  end
61
73
  end
62
74
  end
@@ -71,6 +71,26 @@ module Slaw
71
71
  end
72
72
  end
73
73
 
74
+ class Superscript < Treetop::Runtime::SyntaxNode
75
+ def to_xml(b, idprefix)
76
+ b.sup { |b|
77
+ for e in content.elements
78
+ e.inline_item.to_xml(b, idprefix)
79
+ end
80
+ }
81
+ end
82
+ end
83
+
84
+ class Subscript < Treetop::Runtime::SyntaxNode
85
+ def to_xml(b, idprefix)
86
+ b.sub { |b|
87
+ for e in content.elements
88
+ e.inline_item.to_xml(b, idprefix)
89
+ end
90
+ }
91
+ end
92
+ end
93
+
74
94
  end
75
95
  end
76
96
  end
@@ -10,7 +10,7 @@ module Slaw
10
10
 
11
11
  class ScheduleContainer < Treetop::Runtime::SyntaxNode
12
12
  def to_xml(b, idprefix="")
13
- b.components { |b|
13
+ b.attachments { |b|
14
14
  schedules.children.elements.each_with_index { |e, i|
15
15
  e.to_xml(b, idprefix, i+1)
16
16
  }
@@ -86,6 +86,9 @@ module Slaw
86
86
  end
87
87
 
88
88
  def to_xml(b, idprefix=nil, i=1)
89
+ # reset counters for this new schedule document
90
+ Slaw::Grammars::Counters.reset!
91
+
89
92
  heading_text = self.schedule_title.heading_text
90
93
  if not heading_text
91
94
  heading_text = "Schedule"
@@ -95,12 +98,13 @@ module Slaw
95
98
  # the schedule id is derived from the heading
96
99
  schedule_id = self.schedule_id(heading_text, i)
97
100
 
98
- b.component(id: "component-#{schedule_id}") { |b|
99
- b.doc_(name: schedule_id) { |b|
101
+ b.attachment(eId: "att_#{i}") { |b|
102
+ schedule_title.to_xml(b, '', heading_text)
103
+ b.doc_(name: "schedule") { |b|
100
104
  b.meta { |b|
101
105
  b.identification(source: "#slaw") { |b|
102
106
  b.FRBRWork { |b|
103
- b.FRBRthis(value: "#{WORK_URI}/#{schedule_id}")
107
+ b.FRBRthis(value: "#{WORK_URI}/!#{schedule_id}")
104
108
  b.FRBRuri(value: WORK_URI)
105
109
  b.FRBRalias(value: heading_text)
106
110
  b.FRBRdate(date: '1980-01-01', name: 'Generation')
@@ -108,14 +112,14 @@ module Slaw
108
112
  b.FRBRcountry(value: 'za')
109
113
  }
110
114
  b.FRBRExpression { |b|
111
- b.FRBRthis(value: "#{EXPRESSION_URI}/#{schedule_id}")
115
+ b.FRBRthis(value: "#{EXPRESSION_URI}/!#{schedule_id}")
112
116
  b.FRBRuri(value: EXPRESSION_URI)
113
117
  b.FRBRdate(date: '1980-01-01', name: 'Generation')
114
118
  b.FRBRauthor(href: '#council')
115
119
  b.FRBRlanguage(language: 'eng')
116
120
  }
117
121
  b.FRBRManifestation { |b|
118
- b.FRBRthis(value: "#{MANIFESTATION_URI}/#{schedule_id}")
122
+ b.FRBRthis(value: "#{MANIFESTATION_URI}/!#{schedule_id}")
119
123
  b.FRBRuri(value: MANIFESTATION_URI)
120
124
  b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
121
125
  b.FRBRauthor(href: '#slaw')
@@ -124,14 +128,7 @@ module Slaw
124
128
  }
125
129
 
126
130
  b.mainBody { |b|
127
- idprefix = "#{schedule_id}."
128
-
129
- # there is no good AKN hierarchy container for schedules, so we
130
- # use hcontainer instead
131
- b.hcontainer(id: schedule_id, name: "schedule") { |b|
132
- schedule_title.to_xml(b, idprefix, heading_text)
133
- body.children.elements.each_with_index { |e| e.to_xml(b, idprefix, i) } if body.is_a? Body
134
- }
131
+ body.children.elements.each_with_index { |e| e.to_xml(b, '', i) } if body.is_a? Body
135
132
  }
136
133
  }
137
134
  }
@@ -3,7 +3,9 @@ module Slaw
3
3
  module Tables
4
4
  class Table < Treetop::Runtime::SyntaxNode
5
5
  def to_xml(b, idprefix, i=0)
6
- b.table(id: "#{idprefix}table#{i}") { |b|
6
+ cnt = Slaw::Grammars::Counters.counters[idprefix]['table'] += 1
7
+
8
+ b.table(eId: "#{idprefix}table_#{cnt}") { |b|
7
9
  # we'll gather cells into this row list
8
10
  rows = []
9
11
  cells = []
@@ -257,7 +257,7 @@ module Slaw
257
257
  end
258
258
 
259
259
  rule subpart_heading_prefix
260
- 'subpart'i space alphanums [ :-]*
260
+ 'subpart'i num:(space alphanums)? [ :-]*
261
261
  end
262
262
 
263
263
  rule chapter_heading_prefix
@@ -12,13 +12,13 @@ module Slaw
12
12
  MANIFESTATION_URI = EXPRESSION_URI
13
13
 
14
14
  def to_xml(b, idprefix=nil, i=0)
15
- b.act(contains: "originalVersion") { |b|
15
+ b.act(contains: 'originalVersion', name: 'act') { |b|
16
16
  write_meta(b)
17
17
  write_preface(b)
18
18
  write_preamble(b)
19
19
  write_body(b)
20
+ write_schedules(b)
20
21
  }
21
- write_schedules(b)
22
22
  end
23
23
 
24
24
  def write_meta(b)
@@ -26,8 +26,8 @@ module Slaw
26
26
  write_identification(b)
27
27
 
28
28
  b.references(source: "#this") {
29
- b.TLCOrganization(id: 'slaw', href: 'https://github.com/longhotsummer/slaw', showAs: "Slaw")
30
- b.TLCOrganization(id: 'council', href: '/ontology/organization/za/council', showAs: "Council")
29
+ b.TLCOrganization(eId: 'slaw', href: 'https://github.com/longhotsummer/slaw', showAs: "Slaw")
30
+ b.TLCOrganization(eId: 'council', href: '/ontology/organization/za/council', showAs: "Council")
31
31
  }
32
32
  }
33
33
  end
@@ -38,7 +38,7 @@ module Slaw
38
38
  b.FRBRWork { |b|
39
39
  b.FRBRthis(value: "#{WORK_URI}/main")
40
40
  b.FRBRuri(value: WORK_URI)
41
- b.FRBRalias(value: 'Short Title')
41
+ b.FRBRalias(value: 'Short Title', name: 'title')
42
42
  b.FRBRdate(date: '1980-01-01', name: 'Generation')
43
43
  b.FRBRauthor(href: '#council')
44
44
  b.FRBRcountry(value: 'za')
@@ -125,7 +125,7 @@ module Slaw
125
125
  if !stmts.empty?
126
126
  b.preamble { |b|
127
127
  stmts.each { |e|
128
- e.preamble_statement.to_xml(b, "")
128
+ e.preamble_statement.to_xml(b, "preamble__")
129
129
  }
130
130
  }
131
131
  end
@@ -138,11 +138,11 @@ module Slaw
138
138
  end
139
139
 
140
140
  def to_xml(b, id_prefix='', *args)
141
- id = id_prefix + "part-#{num}"
141
+ id = id_prefix + "part_#{Slaw::Grammars::Counters.clean(num)}"
142
142
 
143
- b.part(id: id) { |b|
143
+ b.part(eId: id) { |b|
144
144
  heading.to_xml(b)
145
- children.elements.each_with_index { |e, i| e.to_xml(b, id + '.', i) }
145
+ children.elements.each_with_index { |e, i| e.to_xml(b, id + '__', i) }
146
146
  }
147
147
  end
148
148
  end
@@ -168,22 +168,29 @@ module Slaw
168
168
  end
169
169
 
170
170
  def to_xml(b, id_prefix='', *args)
171
- id = id_prefix + "subpart-#{num}"
171
+ num = self.num
172
+ if num.empty?
173
+ num = Slaw::Grammars::Counters.counters[id_prefix]['subpart'] += 1
174
+ else
175
+ num = Slaw::Grammars::Counters.clean(num)
176
+ end
172
177
 
173
- b.subpart(id: id) { |b|
178
+ id = id_prefix + "subpart_#{num}"
179
+
180
+ b.subpart(eId: id) { |b|
174
181
  heading.to_xml(b)
175
- children.elements.each_with_index { |e, i| e.to_xml(b, id + '.', i) }
182
+ children.elements.each_with_index { |e, i| e.to_xml(b, id + '__', i) }
176
183
  }
177
184
  end
178
185
  end
179
186
 
180
187
  class SubpartHeading < Treetop::Runtime::SyntaxNode
181
188
  def num
182
- subpart_heading_prefix.alphanums.text_value
189
+ subpart_heading_prefix.num.text_value.strip()
183
190
  end
184
191
 
185
192
  def to_xml(b)
186
- b.num(num)
193
+ b.num(num) unless self.num.empty?
187
194
  if heading.respond_to? :inline_items
188
195
  b.heading { |b|
189
196
  heading.inline_items.to_xml(b)
@@ -198,11 +205,11 @@ module Slaw
198
205
  end
199
206
 
200
207
  def to_xml(b, id_prefix='', *args)
201
- id = id_prefix + "chapter-#{num}"
208
+ id = id_prefix + "chp_#{Slaw::Grammars::Counters.clean(num)}"
202
209
 
203
- b.chapter(id: id) { |b|
210
+ b.chapter(eId: id) { |b|
204
211
  heading.to_xml(b)
205
- children.elements.each_with_index { |e, i| e.to_xml(b, id + '.', i) }
212
+ children.elements.each_with_index { |e, i| e.to_xml(b, id + '__', i) }
206
213
  }
207
214
  end
208
215
  end
@@ -228,11 +235,11 @@ module Slaw
228
235
  end
229
236
 
230
237
  def to_xml(b, *args)
231
- id = "section-#{num}"
232
- b.section(id: id) { |b|
238
+ id = "sec_#{Slaw::Grammars::Counters.clean(num)}"
239
+ b.section(eId: id) { |b|
233
240
  section_title.to_xml(b)
234
241
 
235
- idprefix = "#{id}."
242
+ idprefix = "#{id}__"
236
243
  children.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
237
244
  }
238
245
  end
@@ -303,11 +310,11 @@ module Slaw
303
310
 
304
311
  class BlockElements < Treetop::Runtime::SyntaxNode
305
312
  def to_xml(b, idprefix='', i=0)
306
- cnt = Slaw::Grammars::Counters.counters[idprefix]['paragraph'] += 1
307
- id = "#{idprefix}paragraph#{cnt}"
308
- idprefix = "#{id}."
313
+ cnt = Slaw::Grammars::Counters.counters[idprefix]['hcontainer'] += 1
314
+ id = "#{idprefix}hcontainer_#{cnt}"
315
+ idprefix = "#{id}__"
309
316
 
310
- b.paragraph(id: id) { |b|
317
+ b.hcontainer(eId: id, name: 'hcontainer') { |b|
311
318
  b.content { |b|
312
319
  elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
313
320
  }
@@ -321,10 +328,10 @@ module Slaw
321
328
  end
322
329
 
323
330
  def to_xml(b, idprefix, i)
324
- id = idprefix + num.gsub(/[()]/, '')
325
- idprefix = id + "."
331
+ id = idprefix + "subsec_" + Slaw::Grammars::Counters.clean(num)
332
+ idprefix = id + "__"
326
333
 
327
- b.subsection(id: id) { |b|
334
+ b.subsection(eId: id) { |b|
328
335
  b.num(num)
329
336
  block_elements_with_inline.to_xml(b, idprefix)
330
337
  }
@@ -336,10 +343,10 @@ module Slaw
336
343
  # yield to it a builder to insert a listIntroduction node
337
344
  def to_xml(b, idprefix, i=0, &block)
338
345
  cnt = Slaw::Grammars::Counters.counters[idprefix]['list'] += 1
339
- id = idprefix + "list#{cnt}"
340
- idprefix = id + '.'
346
+ id = idprefix + "list_#{cnt}"
347
+ idprefix = id + '__'
341
348
 
342
- b.blockList(id: id, renest: true) { |b|
349
+ b.blockList(eId: id, renest: true) { |b|
343
350
  b.listIntroduction { |b| yield b } if block_given?
344
351
 
345
352
  elements.each { |e| e.to_xml(b, idprefix) }
@@ -353,7 +360,7 @@ module Slaw
353
360
  end
354
361
 
355
362
  def to_xml(b, idprefix)
356
- b.item(id: idprefix + num.gsub(/[()]/, '')) { |b|
363
+ b.item(eId: idprefix + "item_" + Slaw::Grammars::Counters.clean(num)) { |b|
357
364
  b.num(num)
358
365
  b.p { |b|
359
366
  item_content.inline_items.to_xml(b, idprefix) if respond_to? :item_content and item_content.respond_to? :inline_items
@@ -364,10 +371,10 @@ module Slaw
364
371
 
365
372
  class Crossheading < Treetop::Runtime::SyntaxNode
366
373
  def to_xml(b, idprefix, i=0)
367
- cnt = Slaw::Grammars::Counters.counters[idprefix]['crossheading'] += 1
368
- id = "#{idprefix}crossheading-#{cnt}"
374
+ cnt = Slaw::Grammars::Counters.counters[idprefix]['hcontainer'] += 1
375
+ id = "#{idprefix}hcontainer_#{cnt}"
369
376
 
370
- b.hcontainer(id: id, name: 'crossheading') { |b|
377
+ b.hcontainer(eId: id, name: 'crossheading') { |b|
371
378
  b.heading { |b|
372
379
  inline_items.to_xml(b, idprefix)
373
380
  }