slaw 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/bin/slaw +13 -13
- data/lib/slaw.rb +1 -1
- data/lib/slaw/extract/extractor.rb +16 -4
- data/lib/slaw/generator.rb +36 -0
- data/lib/slaw/parse/builder.rb +0 -1
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/{bylaw.treetop → act.treetop} +4 -3
- data/lib/slaw/za/{bylaw_nodes.rb → act_nodes.rb} +72 -53
- data/slaw.gemspec +1 -0
- data/spec/act_spec.rb +56 -0
- data/spec/bylaw_spec.rb +10 -29
- data/spec/za/{bylaw_spec.rb → act_spec.rb} +11 -12
- metadata +23 -7
- data/lib/slaw/za/bylaw_generator.rb +0 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d293949a3ac2383cd254f0efd2268ef24286f1d
|
4
|
+
data.tar.gz: 2439db865559b2f9b494b7755004d3f7a7d9b4ba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a7af42dc109723ea0908e8bb3ba9cbf4269f923dbd8a68e6cabd87f53e98dec59884cb7d4daf0df8de98b3d209302a9ba214e45d04fadffd372fabf91c9db26
|
7
|
+
data.tar.gz: 41bac579c874b9ef29aaf5a87e243fd9ba268efe46c60e94cdc728e0e4d0f01b37b60b40fb0fa80a82889a43ac44174c6d8385386fb72a98c37dd0c5868c3b97
|
data/README.md
CHANGED
@@ -40,7 +40,7 @@ installed by default on most systems (including Mac). On Ubuntu you can use:
|
|
40
40
|
|
41
41
|
The simplest way to use Slaw is via the commandline:
|
42
42
|
|
43
|
-
$ slaw
|
43
|
+
$ slaw parse myfile.pdf
|
44
44
|
|
45
45
|
## Overview
|
46
46
|
|
@@ -79,7 +79,7 @@ extractor = Slaw::Extract::Extractor.new
|
|
79
79
|
text = extractor.extract_from_pdf('/path/to/file.pdf')
|
80
80
|
|
81
81
|
# parse the text into a XML and
|
82
|
-
generator = Slaw::
|
82
|
+
generator = Slaw::ActGenerator.new
|
83
83
|
bylaw = generator.generate_from_text(text)
|
84
84
|
puts bylaw.to_xml(indent: 2)
|
85
85
|
|
data/bin/slaw
CHANGED
@@ -4,13 +4,14 @@ require 'thor'
|
|
4
4
|
require 'slaw'
|
5
5
|
|
6
6
|
class SlawCLI < Thor
|
7
|
-
desc "convert FILE", "convert FILE into Akoma Ntoso XML"
|
8
|
-
option :input, enum: ['text', 'pdf'], desc: "Type of input, determined by file extension by default."
|
9
|
-
option :output, enum: ['text', 'xml'], default: 'xml', desc: "Type of output required."
|
10
|
-
|
11
7
|
# TODO: support different grammars and locales
|
12
8
|
|
13
|
-
|
9
|
+
desc "parse FILE", "parse FILE into Akoma Ntoso XML"
|
10
|
+
option :input, enum: ['text', 'pdf'], desc: "Type of input if it can't be determined automatically"
|
11
|
+
option :pdftotext, desc: "Location of the pdftotext binary if not in PATH"
|
12
|
+
option :definitions, type: :boolean, desc: "Find and link definitions (this can be slow). Default: false"
|
13
|
+
def parse(name)
|
14
|
+
Slaw::Extract::Extractor.pdftotext_path = options[:pdftotext] if options[:pdftotext]
|
14
15
|
extractor = Slaw::Extract::Extractor.new
|
15
16
|
|
16
17
|
case options[:input]
|
@@ -22,14 +23,13 @@ class SlawCLI < Thor
|
|
22
23
|
text = extractor.extract_from_file(name)
|
23
24
|
end
|
24
25
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end
|
26
|
+
generator = Slaw::ActGenerator.new
|
27
|
+
act = generator.generate_from_text(text)
|
28
|
+
|
29
|
+
# definitions?
|
30
|
+
generator.builder.link_definitions(act.doc) if options[:definitions]
|
31
|
+
|
32
|
+
puts act.to_xml(indent: 2)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
data/lib/slaw.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'open3'
|
2
2
|
require 'tempfile'
|
3
|
+
require 'mimemagic'
|
3
4
|
|
4
5
|
module Slaw
|
5
6
|
module Extract
|
@@ -31,13 +32,19 @@ module Slaw
|
|
31
32
|
def extract_from_file(filename)
|
32
33
|
ext = filename[-4..-1].downcase
|
33
34
|
|
34
|
-
|
35
|
-
|
35
|
+
mimetype = get_mimetype(filename)
|
36
|
+
|
37
|
+
case mimetype && mimetype.type
|
38
|
+
when 'application/pdf'
|
36
39
|
extract_from_pdf(filename)
|
37
|
-
when '
|
40
|
+
when 'text/plain', nil
|
38
41
|
extract_from_text(filename)
|
39
42
|
else
|
40
|
-
|
43
|
+
if mimetype.text?
|
44
|
+
extract_from_text(filename)
|
45
|
+
else
|
46
|
+
raise ArgumentError.new("Unsupported file type #{ext} (#{mimetype || unknown})")
|
47
|
+
end
|
41
48
|
end
|
42
49
|
end
|
43
50
|
|
@@ -103,6 +110,11 @@ module Slaw
|
|
103
110
|
end
|
104
111
|
end
|
105
112
|
|
113
|
+
def get_mimetype(filename)
|
114
|
+
File.open(filename) { |f| MimeMagic.by_magic(f) } \
|
115
|
+
|| MimeMagic.by_path(filename)
|
116
|
+
end
|
117
|
+
|
106
118
|
# Get location of the pdftotext executable for all instances.
|
107
119
|
def self.pdftotext_path
|
108
120
|
@@pdftotext_path
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Slaw
|
2
|
+
# Base class for generating Act documents
|
3
|
+
class ActGenerator
|
4
|
+
Treetop.load(File.dirname(__FILE__) + "/za/act.treetop")
|
5
|
+
|
6
|
+
# [Treetop::Runtime::CompiledParser] compiled parser
|
7
|
+
attr_accessor :parser
|
8
|
+
|
9
|
+
# [Slaw::Parse::Builder] builder used by the generator
|
10
|
+
attr_accessor :builder
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@parser = Slaw::ZA::ActParser.new
|
14
|
+
@builder = Slaw::Parse::Builder.new(parser: @parser)
|
15
|
+
@cleanser = Slaw::Parse::Cleanser.new
|
16
|
+
@document_class = Slaw::Act
|
17
|
+
end
|
18
|
+
|
19
|
+
# Generate a Slaw::Act instance from plain text.
|
20
|
+
#
|
21
|
+
# @param text [String] plain text
|
22
|
+
#
|
23
|
+
# @return [Slaw::Act] the resulting act
|
24
|
+
def generate_from_text(text)
|
25
|
+
act = @document_class.new
|
26
|
+
act.doc = @builder.parse_and_process_text(cleanup(text))
|
27
|
+
act
|
28
|
+
end
|
29
|
+
|
30
|
+
def cleanup(text)
|
31
|
+
text = @cleanser.cleanup(text)
|
32
|
+
text = @cleanser.reformat(text)
|
33
|
+
text
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/slaw/parse/builder.rb
CHANGED
data/lib/slaw/version.rb
CHANGED
@@ -1,19 +1,20 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
3
|
require 'slaw/parse/grammar_helpers'
|
4
|
+
require 'slaw/za/act_nodes'
|
4
5
|
|
5
6
|
module Slaw
|
6
7
|
module ZA
|
7
|
-
grammar
|
8
|
+
grammar Act
|
8
9
|
include Slaw::Parse::GrammarHelpers
|
9
10
|
|
10
11
|
########
|
11
12
|
# major containers
|
12
13
|
|
13
|
-
rule
|
14
|
+
rule act
|
14
15
|
preamble
|
15
16
|
chapters:chapter*
|
16
|
-
schedules:schedules <
|
17
|
+
schedules:schedules <Act>
|
17
18
|
end
|
18
19
|
|
19
20
|
rule preamble
|
@@ -1,58 +1,77 @@
|
|
1
1
|
module Slaw
|
2
2
|
module ZA
|
3
|
-
module
|
4
|
-
class
|
3
|
+
module Act
|
4
|
+
class Act < Treetop::Runtime::SyntaxNode
|
5
|
+
FRBR_URI = '/za/act/1980/01'
|
6
|
+
|
5
7
|
def to_xml(b)
|
6
8
|
b.act(contains: "originalVersion") { |b|
|
7
|
-
b
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
b.FRBRalias(value: 'By-Law Short Title')
|
14
|
-
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
15
|
-
b.FRBRauthor(href: '#council', as: '#author')
|
16
|
-
b.FRBRcountry(value: 'za')
|
17
|
-
}
|
18
|
-
b.FRBRExpression { |b|
|
19
|
-
b.FRBRthis(value: '/za/by-law/locale/1980/name/main/eng@')
|
20
|
-
b.FRBRuri(value: '/za/by-law/locale/1980/name/eng@')
|
21
|
-
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
22
|
-
b.FRBRauthor(href: '#council', as: '#author')
|
23
|
-
b.FRBRlanguage(language: 'eng')
|
24
|
-
}
|
25
|
-
b.FRBRManifestation { |b|
|
26
|
-
b.FRBRthis(value: '/za/by-law/locale/1980/name/main/eng@')
|
27
|
-
b.FRBRuri(value: '/za/by-law/locale/1980/name/eng@')
|
28
|
-
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
|
29
|
-
b.FRBRauthor(href: '#openbylaws', as: '#author')
|
30
|
-
}
|
31
|
-
}
|
9
|
+
write_meta(b)
|
10
|
+
write_preamble(b)
|
11
|
+
write_body(b)
|
12
|
+
write_schedules(b)
|
13
|
+
}
|
14
|
+
end
|
32
15
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
showAs: 'Province of Western Cape: Provincial Gazette')
|
16
|
+
def write_meta(b)
|
17
|
+
b.meta { |b|
|
18
|
+
write_identification(b)
|
37
19
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
20
|
+
b.publication(date: '1980-01-01',
|
21
|
+
name: 'Publication Name',
|
22
|
+
number: 'XXXX',
|
23
|
+
showAs: 'Publication Name')
|
24
|
+
|
25
|
+
b.references(source: "#this") {
|
26
|
+
b.TLCOrganization(id: 'slaw', href: 'https://github.com/longhotsummer/slaw', showAs: "Slaw")
|
27
|
+
b.TLCOrganization(id: 'council', href: '/ontology/organization/za/council', showAs: "Council")
|
28
|
+
b.TLCRole(id: 'author', href: '/ontology/role/author', showAs: 'Author')
|
43
29
|
}
|
30
|
+
}
|
31
|
+
end
|
44
32
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
33
|
+
def write_identification(b)
|
34
|
+
b.identification(source: "#slaw") { |b|
|
35
|
+
# use stub values so that we can generate a validating document
|
36
|
+
b.FRBRWork { |b|
|
37
|
+
b.FRBRthis(value: "#{FRBR_URI}/main")
|
38
|
+
b.FRBRuri(value: '/za/act/locale/1980/name')
|
39
|
+
b.FRBRalias(value: 'Short Title')
|
40
|
+
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
41
|
+
b.FRBRauthor(href: '#council', as: '#author')
|
42
|
+
b.FRBRcountry(value: 'za')
|
43
|
+
}
|
44
|
+
b.FRBRExpression { |b|
|
45
|
+
b.FRBRthis(value: '/za/act/locale/1980/name/main/eng@')
|
46
|
+
b.FRBRuri(value: '/za/act/locale/1980/name/eng@')
|
47
|
+
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
48
|
+
b.FRBRauthor(href: '#council', as: '#author')
|
49
|
+
b.FRBRlanguage(language: 'eng')
|
50
|
+
}
|
51
|
+
b.FRBRManifestation { |b|
|
52
|
+
b.FRBRthis(value: '/za/act/locale/1980/name/main/eng@')
|
53
|
+
b.FRBRuri(value: '/za/act/locale/1980/name/eng@')
|
54
|
+
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
|
55
|
+
b.FRBRauthor(href: '#slaw', as: '#author')
|
56
|
+
}
|
57
|
+
}
|
58
|
+
end
|
50
59
|
|
51
|
-
|
52
|
-
|
60
|
+
def write_preamble(b)
|
61
|
+
if preamble.text_value != ""
|
62
|
+
b.preamble { |b|
|
63
|
+
preamble.to_xml(b)
|
53
64
|
}
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def write_body(b)
|
69
|
+
b.body { |b|
|
70
|
+
chapters.elements.each { |e| e.to_xml(b) }
|
54
71
|
}
|
72
|
+
end
|
55
73
|
|
74
|
+
def write_schedules(b)
|
56
75
|
schedules.to_xml(b)
|
57
76
|
end
|
58
77
|
end
|
@@ -179,7 +198,7 @@ module Slaw
|
|
179
198
|
# a section title of the form:
|
180
199
|
#
|
181
200
|
# Definitions
|
182
|
-
# 1. In this
|
201
|
+
# 1. In this act...
|
183
202
|
|
184
203
|
def num
|
185
204
|
section_title_prefix.number_letter.text_value
|
@@ -194,7 +213,7 @@ module Slaw
|
|
194
213
|
# a section title of the form:
|
195
214
|
#
|
196
215
|
# 1. Definitions
|
197
|
-
# In this
|
216
|
+
# In this act...
|
198
217
|
#
|
199
218
|
# In this format, the title is optional and the section content may
|
200
219
|
# start where we think the title is.
|
@@ -299,26 +318,26 @@ module Slaw
|
|
299
318
|
b.component(id: 'component-0') { |b|
|
300
319
|
b.doc(name: 'schedules') { |b|
|
301
320
|
b.meta { |b|
|
302
|
-
b.identification(source: "#
|
321
|
+
b.identification(source: "#slaw") { |b|
|
303
322
|
b.FRBRWork { |b|
|
304
|
-
b.FRBRthis(value: '/za/
|
305
|
-
b.FRBRuri(value: '/za/
|
323
|
+
b.FRBRthis(value: '/za/act/locale/1980/name/main/schedules')
|
324
|
+
b.FRBRuri(value: '/za/act/locale/1980/name/schedules')
|
306
325
|
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
307
326
|
b.FRBRauthor(href: '#council', as: '#author')
|
308
327
|
b.FRBRcountry(value: 'za')
|
309
328
|
}
|
310
329
|
b.FRBRExpression { |b|
|
311
|
-
b.FRBRthis(value: '/za/
|
312
|
-
b.FRBRuri(value: '/za/
|
330
|
+
b.FRBRthis(value: '/za/act/locale/1980/name/main//schedules/eng@')
|
331
|
+
b.FRBRuri(value: '/za/act/locale/1980/name/schedules/eng@')
|
313
332
|
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
314
333
|
b.FRBRauthor(href: '#council', as: '#author')
|
315
334
|
b.FRBRlanguage(language: 'eng')
|
316
335
|
}
|
317
336
|
b.FRBRManifestation { |b|
|
318
|
-
b.FRBRthis(value: '/za/
|
319
|
-
b.FRBRuri(value: '/za/
|
337
|
+
b.FRBRthis(value: '/za/act/locale/1980/name/main/schedules/eng@')
|
338
|
+
b.FRBRuri(value: '/za/act/locale/1980/name/schedules/eng@')
|
320
339
|
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
|
321
|
-
b.FRBRauthor(href: '#
|
340
|
+
b.FRBRauthor(href: '#slaw', as: '#author')
|
322
341
|
}
|
323
342
|
}
|
324
343
|
}
|
data/slaw.gemspec
CHANGED
data/spec/act_spec.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'slaw'
|
5
|
+
|
6
|
+
describe Slaw::Act do
|
7
|
+
let(:filename) { File.dirname(__FILE__) + "/fixtures/community-fire-safety.xml" }
|
8
|
+
subject { Slaw::Act.new(filename) }
|
9
|
+
|
10
|
+
it 'should have correct basic properties' do
|
11
|
+
subject.title.should == 'Community Fire Safety By-law'
|
12
|
+
subject.amended?.should be_true
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should set the title correctly' do
|
16
|
+
subject.title = 'foo'
|
17
|
+
subject.meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRalias', a: Slaw::NS)['value'].should == 'foo'
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should set the title if it doesnt exist' do
|
21
|
+
subject.meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRalias', a: Slaw::NS).remove
|
22
|
+
subject.title = 'bar'
|
23
|
+
subject.title.should == 'bar'
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should set the publication details' do
|
27
|
+
subject.meta.at_xpath('./a:publication', a: Slaw::NS).remove
|
28
|
+
|
29
|
+
subject.published!(name: 'foo', number: '1234', date: '2014-01-01')
|
30
|
+
subject.publication['name'].should == 'foo'
|
31
|
+
subject.publication['showAs'].should == 'foo'
|
32
|
+
subject.publication['number'].should == '1234'
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should get/set the work date' do
|
36
|
+
subject.date.should == '2002-02-28'
|
37
|
+
|
38
|
+
subject.date = '2014-01-01'
|
39
|
+
subject.date.should == '2014-01-01'
|
40
|
+
subject.meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRdate[@name="Generation"]', a: Slaw::NS)['date'].should == '2014-01-01'
|
41
|
+
subject.meta.at_xpath('./a:identification/a:FRBRExpression/a:FRBRdate[@name="Generation"]', a: Slaw::NS)['date'].should == '2014-01-01'
|
42
|
+
|
43
|
+
subject.id_uri.should == '/za/by-law/2014/2002'
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should update the uri when the year changes' do
|
47
|
+
subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
|
48
|
+
subject.year = '1980'
|
49
|
+
subject.id_uri.should == '/za/by-law/1980/2002'
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'should validate' do
|
53
|
+
subject.validate.should == []
|
54
|
+
subject.validates?.should be_true
|
55
|
+
end
|
56
|
+
end
|
data/spec/bylaw_spec.rb
CHANGED
@@ -12,9 +12,16 @@ describe Slaw::ByLaw do
|
|
12
12
|
subject.amended?.should be_true
|
13
13
|
end
|
14
14
|
|
15
|
-
it 'should
|
16
|
-
subject.
|
17
|
-
subject.
|
15
|
+
it 'should update the uri when the region changes' do
|
16
|
+
subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
|
17
|
+
subject.region = 'foo-bar'
|
18
|
+
subject.id_uri.should == '/za/by-law/foo-bar/2002/community-fire-safety'
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should update the uri when the name changes' do
|
22
|
+
subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
|
23
|
+
subject.name = 'foo-bar'
|
24
|
+
subject.id_uri.should == '/za/by-law/cape-town/2002/foo-bar'
|
18
25
|
end
|
19
26
|
|
20
27
|
it 'should set the title if it doesnt exist' do
|
@@ -23,15 +30,6 @@ describe Slaw::ByLaw do
|
|
23
30
|
subject.title.should == 'bar as amended'
|
24
31
|
end
|
25
32
|
|
26
|
-
it 'should set the publication details' do
|
27
|
-
subject.meta.at_xpath('./a:publication', a: Slaw::NS).remove
|
28
|
-
|
29
|
-
subject.published!(name: 'foo', number: '1234', date: '2014-01-01')
|
30
|
-
subject.publication['name'].should == 'foo'
|
31
|
-
subject.publication['showAs'].should == 'foo'
|
32
|
-
subject.publication['number'].should == '1234'
|
33
|
-
end
|
34
|
-
|
35
33
|
it 'should get/set the work date' do
|
36
34
|
subject.date.should == '2002-02-28'
|
37
35
|
|
@@ -48,21 +46,4 @@ describe Slaw::ByLaw do
|
|
48
46
|
subject.year = '1980'
|
49
47
|
subject.id_uri.should == '/za/by-law/cape-town/1980/community-fire-safety'
|
50
48
|
end
|
51
|
-
|
52
|
-
it 'should update the uri when the region changes' do
|
53
|
-
subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
|
54
|
-
subject.region = 'foo-bar'
|
55
|
-
subject.id_uri.should == '/za/by-law/foo-bar/2002/community-fire-safety'
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'should update the uri when the name changes' do
|
59
|
-
subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
|
60
|
-
subject.name = 'foo-bar'
|
61
|
-
subject.id_uri.should == '/za/by-law/cape-town/2002/foo-bar'
|
62
|
-
end
|
63
|
-
|
64
|
-
it 'should validate' do
|
65
|
-
subject.validate.should == []
|
66
|
-
subject.validates?.should be_true
|
67
|
-
end
|
68
49
|
end
|
@@ -3,9 +3,8 @@
|
|
3
3
|
require 'builder'
|
4
4
|
|
5
5
|
require 'slaw'
|
6
|
-
require 'slaw/za/bylaw_generator'
|
7
6
|
|
8
|
-
describe Slaw::
|
7
|
+
describe Slaw::ActGenerator do
|
9
8
|
def parse(rule, s)
|
10
9
|
subject.builder.text_to_syntax_tree(s, {root: rule})
|
11
10
|
end
|
@@ -64,7 +63,7 @@ EOS
|
|
64
63
|
|
65
64
|
it 'should handle parts and odd section numbers' do
|
66
65
|
subject.parser.options = {section_number_after_title: false}
|
67
|
-
node = parse :
|
66
|
+
node = parse :act, <<EOS
|
68
67
|
PART 1
|
69
68
|
PREVENTION AND SUPPRESSION OF HEALTH NUISANCES
|
70
69
|
1.
|
@@ -189,7 +188,7 @@ EOS
|
|
189
188
|
|
190
189
|
context 'preamble' do
|
191
190
|
it 'should consider any text at the start to be preamble' do
|
192
|
-
node = parse :
|
191
|
+
node = parse :act, <<EOS
|
193
192
|
foo
|
194
193
|
bar
|
195
194
|
(1) stuff
|
@@ -208,7 +207,7 @@ baz
|
|
208
207
|
end
|
209
208
|
|
210
209
|
it 'should support an optional preamble' do
|
211
|
-
node = parse :
|
210
|
+
node = parse :act, <<EOS
|
212
211
|
PREAMBLE
|
213
212
|
foo
|
214
213
|
1. Section
|
@@ -219,7 +218,7 @@ EOS
|
|
219
218
|
end
|
220
219
|
|
221
220
|
it 'should support no preamble' do
|
222
|
-
node = parse :
|
221
|
+
node = parse :act, <<EOS
|
223
222
|
1. Section
|
224
223
|
bar
|
225
224
|
EOS
|
@@ -235,7 +234,7 @@ EOS
|
|
235
234
|
context 'sections' do
|
236
235
|
it 'should handle section numbers after title' do
|
237
236
|
subject.parser.options = {section_number_after_title: true}
|
238
|
-
node = parse :
|
237
|
+
node = parse :act, <<EOS
|
239
238
|
Section
|
240
239
|
1. (1) hello
|
241
240
|
EOS
|
@@ -247,7 +246,7 @@ EOS
|
|
247
246
|
|
248
247
|
it 'should handle section numbers before title' do
|
249
248
|
subject.parser.options = {section_number_after_title: false}
|
250
|
-
node = parse :
|
249
|
+
node = parse :act, <<EOS
|
251
250
|
1. Section
|
252
251
|
(1) hello
|
253
252
|
EOS
|
@@ -259,7 +258,7 @@ EOS
|
|
259
258
|
|
260
259
|
it 'should handle section numbers without a dot' do
|
261
260
|
subject.parser.options = {section_number_after_title: false}
|
262
|
-
node = parse :
|
261
|
+
node = parse :act, <<EOS
|
263
262
|
1 A section
|
264
263
|
(1) hello
|
265
264
|
2 Another section
|
@@ -277,7 +276,7 @@ EOS
|
|
277
276
|
|
278
277
|
it 'should handle sections without titles' do
|
279
278
|
subject.parser.options = {section_number_after_title: false}
|
280
|
-
node = parse :
|
279
|
+
node = parse :act, <<EOS
|
281
280
|
1. No owner or occupier of any shop or business premises or vacant land, blah blah
|
282
281
|
2. Notwithstanding the provision of any other By-law or legislation no person shall—
|
283
282
|
EOS
|
@@ -294,7 +293,7 @@ EOS
|
|
294
293
|
|
295
294
|
it 'should handle sections without titles and with subsections' do
|
296
295
|
subject.parser.options = {section_number_after_title: false}
|
297
|
-
node = parse :
|
296
|
+
node = parse :act, <<EOS
|
298
297
|
10. (1) Transporters must remove medical waste.
|
299
298
|
(2) Without limiting generality, stuff.
|
300
299
|
EOS
|
@@ -308,7 +307,7 @@ EOS
|
|
308
307
|
|
309
308
|
it 'should realise complex section titles are actually section content' do
|
310
309
|
subject.parser.options = {section_number_after_title: false}
|
311
|
-
node = parse :
|
310
|
+
node = parse :act, <<EOS
|
312
311
|
10. The owner of any premises which is let or sublet to more than one tenant, shall maintain at all times in a clean and sanitary condition every part of such premises as may be used in common by more than one tenant.
|
313
312
|
11. No person shall keep, cause or suffer to be kept any factory or trade premises so as to cause or give rise to smells or effluvia that constitute a health nuisance.
|
314
313
|
EOS
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: 0.19.1
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: mimemagic
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 0.2.1
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 0.2.1
|
125
139
|
description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
|
126
140
|
acts from plain text and PDF documents.
|
127
141
|
email:
|
@@ -143,6 +157,7 @@ files:
|
|
143
157
|
- lib/slaw/bylaw.rb
|
144
158
|
- lib/slaw/collection.rb
|
145
159
|
- lib/slaw/extract/extractor.rb
|
160
|
+
- lib/slaw/generator.rb
|
146
161
|
- lib/slaw/lifecycle_event.rb
|
147
162
|
- lib/slaw/logging.rb
|
148
163
|
- lib/slaw/namespace.rb
|
@@ -159,10 +174,10 @@ files:
|
|
159
174
|
- lib/slaw/schemas/xml.xsd
|
160
175
|
- lib/slaw/version.rb
|
161
176
|
- lib/slaw/xml_support.rb
|
162
|
-
- lib/slaw/za/
|
163
|
-
- lib/slaw/za/
|
164
|
-
- lib/slaw/za/bylaw_nodes.rb
|
177
|
+
- lib/slaw/za/act.treetop
|
178
|
+
- lib/slaw/za/act_nodes.rb
|
165
179
|
- slaw.gemspec
|
180
|
+
- spec/act_spec.rb
|
166
181
|
- spec/bylaw_spec.rb
|
167
182
|
- spec/extract/extractor_spec.rb
|
168
183
|
- spec/fixtures/community-fire-safety.xml
|
@@ -170,7 +185,7 @@ files:
|
|
170
185
|
- spec/parse/cleanser_spec.rb
|
171
186
|
- spec/spec_helper.rb
|
172
187
|
- spec/xml_helpers.rb
|
173
|
-
- spec/za/
|
188
|
+
- spec/za/act_spec.rb
|
174
189
|
homepage: ''
|
175
190
|
licenses:
|
176
191
|
- MIT
|
@@ -196,6 +211,7 @@ signing_key:
|
|
196
211
|
specification_version: 4
|
197
212
|
summary: A lightweight library for using Akoma Ntoso acts in Ruby.
|
198
213
|
test_files:
|
214
|
+
- spec/act_spec.rb
|
199
215
|
- spec/bylaw_spec.rb
|
200
216
|
- spec/extract/extractor_spec.rb
|
201
217
|
- spec/fixtures/community-fire-safety.xml
|
@@ -203,4 +219,4 @@ test_files:
|
|
203
219
|
- spec/parse/cleanser_spec.rb
|
204
220
|
- spec/spec_helper.rb
|
205
221
|
- spec/xml_helpers.rb
|
206
|
-
- spec/za/
|
222
|
+
- spec/za/act_spec.rb
|
@@ -1,41 +0,0 @@
|
|
1
|
-
require 'slaw/za/bylaw_nodes'
|
2
|
-
|
3
|
-
module Slaw
|
4
|
-
# Support specifically for South Africa
|
5
|
-
module ZA
|
6
|
-
|
7
|
-
# Support class for generating South African bylaws
|
8
|
-
class BylawGenerator
|
9
|
-
Treetop.load(File.dirname(__FILE__) + "/bylaw.treetop")
|
10
|
-
|
11
|
-
# [Treetop::Runtime::CompiledParser] compiled bylaw parser
|
12
|
-
attr_accessor :parser
|
13
|
-
|
14
|
-
# [Slaw::Parse::Builder] builder used by the generator
|
15
|
-
attr_accessor :builder
|
16
|
-
|
17
|
-
def initialize
|
18
|
-
@parser = Slaw::ZA::BylawParser.new
|
19
|
-
@builder = Slaw::Parse::Builder.new(parser: @parser)
|
20
|
-
@cleanser = Slaw::Parse::Cleanser.new
|
21
|
-
end
|
22
|
-
|
23
|
-
# Generate a Slaw::Bylaw instance from plain text.
|
24
|
-
#
|
25
|
-
# @param text [String] plain text
|
26
|
-
#
|
27
|
-
# @return [Slaw::ByLaw] the resulting bylaw
|
28
|
-
def generate_from_text(text)
|
29
|
-
bylaw = Slaw::ByLaw.new
|
30
|
-
bylaw.doc = @builder.parse_and_process_text(cleanup(text))
|
31
|
-
bylaw
|
32
|
-
end
|
33
|
-
|
34
|
-
def cleanup(text)
|
35
|
-
text = @cleanser.cleanup(text)
|
36
|
-
text = @cleanser.reformat(text)
|
37
|
-
text
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|