slaw 8.0.0 → 10.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0c018a71314291d2304ba458300e360da8555bfe6df1d4348fcba8bb4501700
4
- data.tar.gz: 5c68a881a0d52f54b28998fe195d877a068361c3750a254e8f88e3479c24f056
3
+ metadata.gz: ada9248a6af947faeae4212b3d1cffe7b912d90712baf8c7273cc0fbb107c926
4
+ data.tar.gz: 01ea2d767da2b46bdbaea17566998c46b09e293e334a454e5022d63104981021
5
5
  SHA512:
6
- metadata.gz: 5538b79f68d5db2289d713a0db9760147bb3baf4a986d63d0a9097fc86af95df6e6b3c8637eeaccd25834b4c79b8cadedf50c107c60833c8fb90382d7559f43f
7
- data.tar.gz: d9bdcf5780887407f8ab397e765b6df83de5d54e1898a71792f0560a74044d1216e41b30e6166cc86974719fc4007513db82340c927117dec6087b974b176f1b
6
+ metadata.gz: 04a40af1f73282bdf912213451ca7e37e7538a77373c42ae294979bf91bf240b0a22e4b09e3816f303324f3e6e463cf2d16f81b240fd94d692e13c9fe0dc1d87
7
+ data.tar.gz: 49cb6f91a31977c3bc6808baee29da7e014a4743d677234b9da83d1ec9cfbdd539daee2b03d6b9878382d7b4bd0b63fe6227f8f083151d412bced1dbc40ef6a1
@@ -1,7 +1,7 @@
1
1
  language: ruby
2
2
  rvm:
3
+ - 2.7.0
3
4
  - 2.6.2
4
5
  - 2.5.4
5
- - 2.4.5
6
6
  before_install:
7
7
  - gem update bundler
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Slaw [![Build Status](https://travis-ci.org/longhotsummer/slaw.svg)](http://travis-ci.org/longhotsummer/slaw) [![Gem Version](https://badge.fury.io/rb/slaw.svg)](https://badge.fury.io/rb/slaw)
2
2
 
3
- Slaw is a lightweight library for generating Akoma Ntoso 2.0 Act XML from plain text documents.
3
+ Slaw is a lightweight library for generating Akoma Ntoso 3.0 Act XML from plain text documents.
4
4
  It is used to power [Indigo](https://github.com/laws-africa/indigo) and uses grammars developed for the legal
5
5
  tradition in South Africa, although others traditions are supported.
6
6
 
@@ -79,8 +79,38 @@ You can create your own grammar by creating a gem that provides these files and
79
79
  7. Push to the branch: `git push origin my-new-feature`
80
80
  8. Create a new Pull Request
81
81
 
82
+ ## Releasing
83
+
84
+ 1. Update `lib/slaw/version.rb`
85
+ 2. Run `rake release`
86
+
82
87
  ## Changelog
83
88
 
89
+ ### 10.0.0 (12 June 2020)
90
+
91
+ * BREAKING: Create XML with AKN 3 namespace (http://docs.oasis-open.org/legaldocml/ns/akn/3.0), AKN2 is no longer supported
92
+ * BREAKING: replace id attributes with eId attributes
93
+ * BREAKING: serialize schedules as attachments to act, not as components as peers of the act
94
+ * BREAKING: anonymous blocks are serialized as hcontainers, not paragraphs
95
+ * BREAKING: crossheading hcontainer IDs correctly use hcontainer
96
+ * Remove unnecessary schemaLocation header in root element
97
+
98
+ ### 9.2.0 (10 June 2020)
99
+
100
+ * Subpart numbers are optional
101
+
102
+ ### 9.1.0 (15 April 2020)
103
+
104
+ * Subsections can have numbers such as 1.1A and 1.1bis
105
+
106
+ ### 9.0.0 (17 Mar 2020)
107
+
108
+ * Support SUBPART
109
+
110
+ ### 8.0.1 (26 Feb 2020)
111
+
112
+ * Fix bug with id prefix on schedules container
113
+
84
114
  ### 8.0.0 (19 Feb 2020)
85
115
 
86
116
  * Obey --id-prefix for group nodes
data/bin/slaw CHANGED
@@ -19,6 +19,7 @@ class SlawCLI < Thor
19
19
  option :section_number_position, enum: ['before-title', 'after-title', 'guess'], desc: "Where do section titles come in relation to the section number? Default: before-title"
20
20
  option :grammar, type: :string, desc: "Grammar name (usually a two-letter country code). Default is za."
21
21
  option :ascii, type: :boolean, default: false, desc: "Process text as ASCII using %-encoding. This can provide significant speed improvements if the grammar uses only ASCII literals. See https://github.com/cjheath/treetop/issues/31."
22
+ option :namespace, enum: ['akn3'], default: 'akn3', desc: 'AKN XML namespace to use.'
22
23
  def parse(name)
23
24
  logging
24
25
 
@@ -33,6 +34,11 @@ class SlawCLI < Thor
33
34
  text = extractor.extract_from_file(name)
34
35
  end
35
36
 
37
+ case options[:namespace]
38
+ when 'akn3'
39
+ Slaw.akn_namespace = Slaw::AKN3_NS
40
+ end
41
+
36
42
  generator = Slaw::ActGenerator.new(options[:grammar] || 'za')
37
43
 
38
44
  if options[:fragment]
@@ -49,7 +55,7 @@ class SlawCLI < Thor
49
55
 
50
56
  if options[:id_prefix]
51
57
  prefix = options[:id_prefix]
52
- prefix += "." unless prefix.end_with?('.')
58
+ prefix += "__" unless prefix.end_with?('__')
53
59
  generator.builder.fragment_id_prefix = prefix
54
60
  end
55
61
  end
@@ -4,16 +4,14 @@ module Slaw
4
4
  # Counters for generating element IDs. This is a hash from the element ID
5
5
  # prefix, to another hash that maps the element type name to a count.
6
6
  #
7
- # For backwards compatibility, counters always start at -1, and must be
8
- # incremented before being used. This ensures that element ids start at 0.
9
- # This is NOT compatible with AKN 3.0 which requires that element numbers
10
- # start at 1.
7
+ # Counters always start at 0, and must be incremented before being used.
8
+ # This ensures that element ids start at 1, as per AKN 3.0 spec.
11
9
  #
12
10
  # eg.
13
11
  #
14
12
  # section-1 => paragraph => 2
15
13
  #
16
- @@counters = Hash.new{ |h, k| h[k] = Hash.new(-1) }
14
+ @@counters = Hash.new{ |h, k| h[k] = Hash.new(0) }
17
15
 
18
16
  def self.counters
19
17
  @@counters
@@ -22,6 +20,25 @@ module Slaw
22
20
  def self.reset!
23
21
  @@counters.clear
24
22
  end
23
+
24
+ # Clean a <num> value for use in an eId
25
+ # See https://docs.oasis-open.org/legaldocml/akn-nc/v1.0/os/akn-nc-v1.0-os.html#_Toc531692306
26
+ #
27
+ # The number part of the identifiers of such elements corresponds to the
28
+ # stripping of all final punctuation, meaningless separations as well as
29
+ # redundant characters in the content of the <num> element. The
30
+ # representation is case-sensitive
31
+ #
32
+ # (i) -> i
33
+ # 1.2. -> 1-2
34
+ # 3a bis -> 3abis
35
+ def self.clean(num)
36
+ num
37
+ .gsub(/[ ()\[\]]/, '')
38
+ .gsub(/\.+$/, '')
39
+ .gsub(/^\.+/, '')
40
+ .gsub(/\.+/, '-')
41
+ end
25
42
  end
26
43
  end
27
44
  end
@@ -9,10 +9,10 @@ module Slaw
9
9
  MANIFESTATION_URI = EXPRESSION_URI
10
10
 
11
11
  class ScheduleContainer < Treetop::Runtime::SyntaxNode
12
- def to_xml(b)
13
- b.components { |b|
12
+ def to_xml(b, idprefix="")
13
+ b.attachments { |b|
14
14
  schedules.children.elements.each_with_index { |e, i|
15
- e.to_xml(b, "", i+1)
15
+ e.to_xml(b, idprefix, i+1)
16
16
  }
17
17
  }
18
18
  end
@@ -86,6 +86,9 @@ module Slaw
86
86
  end
87
87
 
88
88
  def to_xml(b, idprefix=nil, i=1)
89
+ # reset counters for this new schedule document
90
+ Slaw::Grammars::Counters.reset!
91
+
89
92
  heading_text = self.schedule_title.heading_text
90
93
  if not heading_text
91
94
  heading_text = "Schedule"
@@ -95,12 +98,13 @@ module Slaw
95
98
  # the schedule id is derived from the heading
96
99
  schedule_id = self.schedule_id(heading_text, i)
97
100
 
98
- b.component(id: "component-#{schedule_id}") { |b|
99
- b.doc_(name: schedule_id) { |b|
101
+ b.attachment(eId: "att_#{i}") { |b|
102
+ schedule_title.to_xml(b, '', heading_text)
103
+ b.doc_(name: "schedule") { |b|
100
104
  b.meta { |b|
101
105
  b.identification(source: "#slaw") { |b|
102
106
  b.FRBRWork { |b|
103
- b.FRBRthis(value: "#{WORK_URI}/#{schedule_id}")
107
+ b.FRBRthis(value: "#{WORK_URI}/!#{schedule_id}")
104
108
  b.FRBRuri(value: WORK_URI)
105
109
  b.FRBRalias(value: heading_text)
106
110
  b.FRBRdate(date: '1980-01-01', name: 'Generation')
@@ -108,14 +112,14 @@ module Slaw
108
112
  b.FRBRcountry(value: 'za')
109
113
  }
110
114
  b.FRBRExpression { |b|
111
- b.FRBRthis(value: "#{EXPRESSION_URI}/#{schedule_id}")
115
+ b.FRBRthis(value: "#{EXPRESSION_URI}/!#{schedule_id}")
112
116
  b.FRBRuri(value: EXPRESSION_URI)
113
117
  b.FRBRdate(date: '1980-01-01', name: 'Generation')
114
118
  b.FRBRauthor(href: '#council')
115
119
  b.FRBRlanguage(language: 'eng')
116
120
  }
117
121
  b.FRBRManifestation { |b|
118
- b.FRBRthis(value: "#{MANIFESTATION_URI}/#{schedule_id}")
122
+ b.FRBRthis(value: "#{MANIFESTATION_URI}/!#{schedule_id}")
119
123
  b.FRBRuri(value: MANIFESTATION_URI)
120
124
  b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
121
125
  b.FRBRauthor(href: '#slaw')
@@ -124,14 +128,7 @@ module Slaw
124
128
  }
125
129
 
126
130
  b.mainBody { |b|
127
- idprefix = "#{schedule_id}."
128
-
129
- # there is no good AKN hierarchy container for schedules, so we
130
- # use hcontainer instead
131
- b.hcontainer(id: schedule_id, name: "schedule") { |b|
132
- schedule_title.to_xml(b, idprefix, heading_text)
133
- body.children.elements.each_with_index { |e| e.to_xml(b, idprefix, i) } if body.is_a? Body
134
- }
131
+ body.children.elements.each_with_index { |e| e.to_xml(b, '', i) } if body.is_a? Body
135
132
  }
136
133
  }
137
134
  }
@@ -3,7 +3,9 @@ module Slaw
3
3
  module Tables
4
4
  class Table < Treetop::Runtime::SyntaxNode
5
5
  def to_xml(b, idprefix, i=0)
6
- b.table(id: "#{idprefix}table#{i}") { |b|
6
+ cnt = Slaw::Grammars::Counters.counters[idprefix]['table'] += 1
7
+
8
+ b.table(eId: "#{idprefix}table_#{cnt}") { |b|
7
9
  # we'll gather cells into this row list
8
10
  rows = []
9
11
  cells = []
@@ -20,7 +20,11 @@ module Slaw
20
20
  end
21
21
 
22
22
  rule dotted_number_2
23
- number '.' number
23
+ # 9.1
24
+ # 9.1A
25
+ # 9.1A1
26
+ # NOT: 9.A
27
+ number '.' number alphanums?
24
28
  end
25
29
 
26
30
  rule number
@@ -44,37 +44,43 @@ module Slaw
44
44
 
45
45
  rule body
46
46
  ('BODY' space? eol)?
47
- children:(chapter / part / section / subsection / generic_container)* <Body>
47
+ children:(chapter / part / subpart / section / subsection / generic_container)* <Body>
48
48
  end
49
49
 
50
50
  # chapter (parts allowed)
51
51
  rule chapter
52
52
  heading:chapter_heading
53
- children:(part_no_chapter / section / subsection / generic_container)*
53
+ children:(part_no_chapter / subpart / section / subsection / generic_container)*
54
54
  <Chapter>
55
55
  end
56
56
 
57
57
  # part (chapters allowed)
58
58
  rule part
59
59
  heading:part_heading
60
- children:(chapter_no_part / section / subsection / generic_container)*
60
+ children:(chapter_no_part / subpart / section / subsection / generic_container)*
61
61
  <Part>
62
62
  end
63
63
 
64
64
  # part (no chapters)
65
65
  rule part_no_chapter
66
66
  heading:part_heading
67
- children:(section / subsection / generic_container)*
67
+ children:(subpart / section / subsection / generic_container)*
68
68
  <Part>
69
69
  end
70
70
 
71
71
  # chapter (no parts)
72
72
  rule chapter_no_part
73
73
  heading:chapter_heading
74
- children:(section / subsection / generic_container)*
74
+ children:(subpart / section / subsection / generic_container)*
75
75
  <Chapter>
76
76
  end
77
77
 
78
+ rule subpart
79
+ heading:subpart_heading
80
+ children:(section / subsection / generic_container)*
81
+ <Subpart>
82
+ end
83
+
78
84
  rule section
79
85
  section_title
80
86
  children:(subsection / generic_container)* <Section>
@@ -134,6 +140,10 @@ module Slaw
134
140
  children:part_no_chapter+ <GroupNode>
135
141
  end
136
142
 
143
+ rule subparts
144
+ children:subpart+ <GroupNode>
145
+ end
146
+
137
147
  rule sections
138
148
  children:section+ <GroupNode>
139
149
  end
@@ -151,6 +161,11 @@ module Slaw
151
161
  <PartHeading>
152
162
  end
153
163
 
164
+ rule subpart_heading
165
+ space? subpart_heading_prefix heading:(newline? space? inline_items)? eol
166
+ <SubpartHeading>
167
+ end
168
+
154
169
  rule section_title
155
170
  section_title_1 / section_1_title
156
171
  end
@@ -241,6 +256,10 @@ module Slaw
241
256
  'part'i space alphanums [ :-]*
242
257
  end
243
258
 
259
+ rule subpart_heading_prefix
260
+ 'subpart'i num:(space alphanums)? [ :-]*
261
+ end
262
+
244
263
  rule chapter_heading_prefix
245
264
  'chapter'i space alphanums [ :-]*
246
265
  end
@@ -275,12 +294,12 @@ module Slaw
275
294
 
276
295
  rule body_hierarchy_prefix
277
296
  # Text that indicates the start of a hierarchy element, in the body
278
- chapter_heading / part_heading / section_title / schedule_title / subsection_prefix / crossheading
297
+ chapter_heading / part_heading / subpart_heading / section_title / schedule_title / subsection_prefix / crossheading
279
298
  end
280
299
 
281
300
  rule non_body_hierarchy_prefix
282
301
  # Text that indicates the start of a hierarchy element, in the preamble or preface
283
- chapter_heading / part_heading / section_title / schedule_title / crossheading
302
+ chapter_heading / part_heading / subpart_heading / section_title / schedule_title / crossheading
284
303
  end
285
304
 
286
305
  include Slaw::Grammars::Inlines
@@ -12,13 +12,13 @@ module Slaw
12
12
  MANIFESTATION_URI = EXPRESSION_URI
13
13
 
14
14
  def to_xml(b, idprefix=nil, i=0)
15
- b.act(contains: "originalVersion") { |b|
15
+ b.act(contains: 'originalVersion', name: 'act') { |b|
16
16
  write_meta(b)
17
17
  write_preface(b)
18
18
  write_preamble(b)
19
19
  write_body(b)
20
+ write_schedules(b)
20
21
  }
21
- write_schedules(b)
22
22
  end
23
23
 
24
24
  def write_meta(b)
@@ -26,8 +26,8 @@ module Slaw
26
26
  write_identification(b)
27
27
 
28
28
  b.references(source: "#this") {
29
- b.TLCOrganization(id: 'slaw', href: 'https://github.com/longhotsummer/slaw', showAs: "Slaw")
30
- b.TLCOrganization(id: 'council', href: '/ontology/organization/za/council', showAs: "Council")
29
+ b.TLCOrganization(eId: 'slaw', href: 'https://github.com/longhotsummer/slaw', showAs: "Slaw")
30
+ b.TLCOrganization(eId: 'council', href: '/ontology/organization/za/council', showAs: "Council")
31
31
  }
32
32
  }
33
33
  end
@@ -38,7 +38,7 @@ module Slaw
38
38
  b.FRBRWork { |b|
39
39
  b.FRBRthis(value: "#{WORK_URI}/main")
40
40
  b.FRBRuri(value: WORK_URI)
41
- b.FRBRalias(value: 'Short Title')
41
+ b.FRBRalias(value: 'Short Title', name: 'title')
42
42
  b.FRBRdate(date: '1980-01-01', name: 'Generation')
43
43
  b.FRBRauthor(href: '#council')
44
44
  b.FRBRcountry(value: 'za')
@@ -125,7 +125,7 @@ module Slaw
125
125
  if !stmts.empty?
126
126
  b.preamble { |b|
127
127
  stmts.each { |e|
128
- e.preamble_statement.to_xml(b, "")
128
+ e.preamble_statement.to_xml(b, "preamble__")
129
129
  }
130
130
  }
131
131
  end
@@ -138,11 +138,11 @@ module Slaw
138
138
  end
139
139
 
140
140
  def to_xml(b, id_prefix='', *args)
141
- id = id_prefix + "part-#{num}"
141
+ id = id_prefix + "part_#{Slaw::Grammars::Counters.clean(num)}"
142
142
 
143
- b.part(id: id) { |b|
143
+ b.part(eId: id) { |b|
144
144
  heading.to_xml(b)
145
- children.elements.each_with_index { |e, i| e.to_xml(b, id + '.', i) }
145
+ children.elements.each_with_index { |e, i| e.to_xml(b, id + '__', i) }
146
146
  }
147
147
  end
148
148
  end
@@ -162,17 +162,54 @@ module Slaw
162
162
  end
163
163
  end
164
164
 
165
+ class Subpart < Treetop::Runtime::SyntaxNode
166
+ def num
167
+ heading.num
168
+ end
169
+
170
+ def to_xml(b, id_prefix='', *args)
171
+ num = self.num
172
+ if num.empty?
173
+ num = Slaw::Grammars::Counters.counters[id_prefix]['subpart'] += 1
174
+ else
175
+ num = Slaw::Grammars::Counters.clean(num)
176
+ end
177
+
178
+ id = id_prefix + "subpart_#{num}"
179
+
180
+ b.subpart(eId: id) { |b|
181
+ heading.to_xml(b)
182
+ children.elements.each_with_index { |e, i| e.to_xml(b, id + '__', i) }
183
+ }
184
+ end
185
+ end
186
+
187
+ class SubpartHeading < Treetop::Runtime::SyntaxNode
188
+ def num
189
+ subpart_heading_prefix.num.text_value.strip()
190
+ end
191
+
192
+ def to_xml(b)
193
+ b.num(num) unless self.num.empty?
194
+ if heading.respond_to? :inline_items
195
+ b.heading { |b|
196
+ heading.inline_items.to_xml(b)
197
+ }
198
+ end
199
+ end
200
+ end
201
+
165
202
  class Chapter < Treetop::Runtime::SyntaxNode
166
203
  def num
167
204
  heading.num
168
205
  end
169
206
 
170
207
  def to_xml(b, id_prefix='', *args)
171
- id = id_prefix + "chapter-#{num}"
208
+ id = id_prefix + "chp_#{Slaw::Grammars::Counters.clean(num)}"
172
209
 
173
- b.chapter(id: id) { |b|
210
+ b.chapter(eId: id) { |b|
174
211
  heading.to_xml(b)
175
- children.elements.each_with_index { |e, i| e.to_xml(b, id + '.', i) }
212
+ children.elements.each_with_index { |e, i| e.to_xml(b, id + '__', i) }
176
213
  }
177
214
  end
178
215
  end
@@ -198,11 +235,11 @@ module Slaw
198
235
  end
199
236
 
200
237
  def to_xml(b, *args)
201
- id = "section-#{num}"
202
- b.section(id: id) { |b|
238
+ id = "sec_#{Slaw::Grammars::Counters.clean(num)}"
239
+ b.section(eId: id) { |b|
203
240
  section_title.to_xml(b)
204
241
 
205
- idprefix = "#{id}."
242
+ idprefix = "#{id}__"
206
243
  children.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
207
244
  }
208
245
  end
@@ -273,11 +310,11 @@ module Slaw
273
310
 
274
311
  class BlockElements < Treetop::Runtime::SyntaxNode
275
312
  def to_xml(b, idprefix='', i=0)
276
- cnt = Slaw::Grammars::Counters.counters[idprefix]['paragraph'] += 1
277
- id = "#{idprefix}paragraph#{cnt}"
278
- idprefix = "#{id}."
313
+ cnt = Slaw::Grammars::Counters.counters[idprefix]['hcontainer'] += 1
314
+ id = "#{idprefix}hcontainer_#{cnt}"
315
+ idprefix = "#{id}__"
279
316
 
280
- b.paragraph(id: id) { |b|
317
+ b.hcontainer(eId: id) { |b|
281
318
  b.content { |b|
282
319
  elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
283
320
  }
@@ -291,10 +328,10 @@ module Slaw
291
328
  end
292
329
 
293
330
  def to_xml(b, idprefix, i)
294
- id = idprefix + num.gsub(/[()]/, '')
295
- idprefix = id + "."
331
+ id = idprefix + "subsec_" + Slaw::Grammars::Counters.clean(num)
332
+ idprefix = id + "__"
296
333
 
297
- b.subsection(id: id) { |b|
334
+ b.subsection(eId: id) { |b|
298
335
  b.num(num)
299
336
  block_elements_with_inline.to_xml(b, idprefix)
300
337
  }
@@ -306,10 +343,10 @@ module Slaw
306
343
  # yield to it a builder to insert a listIntroduction node
307
344
  def to_xml(b, idprefix, i=0, &block)
308
345
  cnt = Slaw::Grammars::Counters.counters[idprefix]['list'] += 1
309
- id = idprefix + "list#{cnt}"
310
- idprefix = id + '.'
346
+ id = idprefix + "list_#{cnt}"
347
+ idprefix = id + '__'
311
348
 
312
- b.blockList(id: id, renest: true) { |b|
349
+ b.blockList(eId: id, renest: true) { |b|
313
350
  b.listIntroduction { |b| yield b } if block_given?
314
351
 
315
352
  elements.each { |e| e.to_xml(b, idprefix) }
@@ -323,7 +360,7 @@ module Slaw
323
360
  end
324
361
 
325
362
  def to_xml(b, idprefix)
326
- b.item(id: idprefix + num.gsub(/[()]/, '')) { |b|
363
+ b.item(eId: idprefix + "item_" + Slaw::Grammars::Counters.clean(num)) { |b|
327
364
  b.num(num)
328
365
  b.p { |b|
329
366
  item_content.inline_items.to_xml(b, idprefix) if respond_to? :item_content and item_content.respond_to? :inline_items
@@ -334,10 +371,10 @@ module Slaw
334
371
 
335
372
  class Crossheading < Treetop::Runtime::SyntaxNode
336
373
  def to_xml(b, idprefix, i=0)
337
- cnt = Slaw::Grammars::Counters.counters[idprefix]['crossheading'] += 1
338
- id = "#{idprefix}crossheading-#{cnt}"
374
+ cnt = Slaw::Grammars::Counters.counters[idprefix]['hcontainer'] += 1
375
+ id = "#{idprefix}hcontainer_#{cnt}"
339
376
 
340
- b.hcontainer(id: id, name: 'crossheading') { |b|
377
+ b.hcontainer(eId: id, name: 'crossheading') { |b|
341
378
  b.heading { |b|
342
379
  inline_items.to_xml(b, idprefix)
343
380
  }