slaw 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/bin/slaw +13 -13
- data/lib/slaw.rb +1 -1
- data/lib/slaw/extract/extractor.rb +16 -4
- data/lib/slaw/generator.rb +36 -0
- data/lib/slaw/parse/builder.rb +0 -1
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/{bylaw.treetop → act.treetop} +4 -3
- data/lib/slaw/za/{bylaw_nodes.rb → act_nodes.rb} +72 -53
- data/slaw.gemspec +1 -0
- data/spec/act_spec.rb +56 -0
- data/spec/bylaw_spec.rb +10 -29
- data/spec/za/{bylaw_spec.rb → act_spec.rb} +11 -12
- metadata +23 -7
- data/lib/slaw/za/bylaw_generator.rb +0 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d293949a3ac2383cd254f0efd2268ef24286f1d
|
4
|
+
data.tar.gz: 2439db865559b2f9b494b7755004d3f7a7d9b4ba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a7af42dc109723ea0908e8bb3ba9cbf4269f923dbd8a68e6cabd87f53e98dec59884cb7d4daf0df8de98b3d209302a9ba214e45d04fadffd372fabf91c9db26
|
7
|
+
data.tar.gz: 41bac579c874b9ef29aaf5a87e243fd9ba268efe46c60e94cdc728e0e4d0f01b37b60b40fb0fa80a82889a43ac44174c6d8385386fb72a98c37dd0c5868c3b97
|
data/README.md
CHANGED
@@ -40,7 +40,7 @@ installed by default on most systems (including Mac). On Ubuntu you can use:
|
|
40
40
|
|
41
41
|
The simplest way to use Slaw is via the commandline:
|
42
42
|
|
43
|
-
$ slaw
|
43
|
+
$ slaw parse myfile.pdf
|
44
44
|
|
45
45
|
## Overview
|
46
46
|
|
@@ -79,7 +79,7 @@ extractor = Slaw::Extract::Extractor.new
|
|
79
79
|
text = extractor.extract_from_pdf('/path/to/file.pdf')
|
80
80
|
|
81
81
|
# parse the text into a XML and
|
82
|
-
generator = Slaw::
|
82
|
+
generator = Slaw::ActGenerator.new
|
83
83
|
bylaw = generator.generate_from_text(text)
|
84
84
|
puts bylaw.to_xml(indent: 2)
|
85
85
|
|
data/bin/slaw
CHANGED
@@ -4,13 +4,14 @@ require 'thor'
|
|
4
4
|
require 'slaw'
|
5
5
|
|
6
6
|
class SlawCLI < Thor
|
7
|
-
desc "convert FILE", "convert FILE into Akoma Ntoso XML"
|
8
|
-
option :input, enum: ['text', 'pdf'], desc: "Type of input, determined by file extension by default."
|
9
|
-
option :output, enum: ['text', 'xml'], default: 'xml', desc: "Type of output required."
|
10
|
-
|
11
7
|
# TODO: support different grammars and locales
|
12
8
|
|
13
|
-
|
9
|
+
desc "parse FILE", "parse FILE into Akoma Ntoso XML"
|
10
|
+
option :input, enum: ['text', 'pdf'], desc: "Type of input if it can't be determined automatically"
|
11
|
+
option :pdftotext, desc: "Location of the pdftotext binary if not in PATH"
|
12
|
+
option :definitions, type: :boolean, desc: "Find and link definitions (this can be slow). Default: false"
|
13
|
+
def parse(name)
|
14
|
+
Slaw::Extract::Extractor.pdftotext_path = options[:pdftotext] if options[:pdftotext]
|
14
15
|
extractor = Slaw::Extract::Extractor.new
|
15
16
|
|
16
17
|
case options[:input]
|
@@ -22,14 +23,13 @@ class SlawCLI < Thor
|
|
22
23
|
text = extractor.extract_from_file(name)
|
23
24
|
end
|
24
25
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end
|
26
|
+
generator = Slaw::ActGenerator.new
|
27
|
+
act = generator.generate_from_text(text)
|
28
|
+
|
29
|
+
# definitions?
|
30
|
+
generator.builder.link_definitions(act.doc) if options[:definitions]
|
31
|
+
|
32
|
+
puts act.to_xml(indent: 2)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
data/lib/slaw.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'open3'
|
2
2
|
require 'tempfile'
|
3
|
+
require 'mimemagic'
|
3
4
|
|
4
5
|
module Slaw
|
5
6
|
module Extract
|
@@ -31,13 +32,19 @@ module Slaw
|
|
31
32
|
def extract_from_file(filename)
|
32
33
|
ext = filename[-4..-1].downcase
|
33
34
|
|
34
|
-
|
35
|
-
|
35
|
+
mimetype = get_mimetype(filename)
|
36
|
+
|
37
|
+
case mimetype && mimetype.type
|
38
|
+
when 'application/pdf'
|
36
39
|
extract_from_pdf(filename)
|
37
|
-
when '
|
40
|
+
when 'text/plain', nil
|
38
41
|
extract_from_text(filename)
|
39
42
|
else
|
40
|
-
|
43
|
+
if mimetype.text?
|
44
|
+
extract_from_text(filename)
|
45
|
+
else
|
46
|
+
raise ArgumentError.new("Unsupported file type #{ext} (#{mimetype || unknown})")
|
47
|
+
end
|
41
48
|
end
|
42
49
|
end
|
43
50
|
|
@@ -103,6 +110,11 @@ module Slaw
|
|
103
110
|
end
|
104
111
|
end
|
105
112
|
|
113
|
+
def get_mimetype(filename)
|
114
|
+
File.open(filename) { |f| MimeMagic.by_magic(f) } \
|
115
|
+
|| MimeMagic.by_path(filename)
|
116
|
+
end
|
117
|
+
|
106
118
|
# Get location of the pdftotext executable for all instances.
|
107
119
|
def self.pdftotext_path
|
108
120
|
@@pdftotext_path
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Slaw
|
2
|
+
# Base class for generating Act documents
|
3
|
+
class ActGenerator
|
4
|
+
Treetop.load(File.dirname(__FILE__) + "/za/act.treetop")
|
5
|
+
|
6
|
+
# [Treetop::Runtime::CompiledParser] compiled parser
|
7
|
+
attr_accessor :parser
|
8
|
+
|
9
|
+
# [Slaw::Parse::Builder] builder used by the generator
|
10
|
+
attr_accessor :builder
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@parser = Slaw::ZA::ActParser.new
|
14
|
+
@builder = Slaw::Parse::Builder.new(parser: @parser)
|
15
|
+
@cleanser = Slaw::Parse::Cleanser.new
|
16
|
+
@document_class = Slaw::Act
|
17
|
+
end
|
18
|
+
|
19
|
+
# Generate a Slaw::Act instance from plain text.
|
20
|
+
#
|
21
|
+
# @param text [String] plain text
|
22
|
+
#
|
23
|
+
# @return [Slaw::Act] the resulting act
|
24
|
+
def generate_from_text(text)
|
25
|
+
act = @document_class.new
|
26
|
+
act.doc = @builder.parse_and_process_text(cleanup(text))
|
27
|
+
act
|
28
|
+
end
|
29
|
+
|
30
|
+
def cleanup(text)
|
31
|
+
text = @cleanser.cleanup(text)
|
32
|
+
text = @cleanser.reformat(text)
|
33
|
+
text
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/slaw/parse/builder.rb
CHANGED
data/lib/slaw/version.rb
CHANGED
@@ -1,19 +1,20 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
3
|
require 'slaw/parse/grammar_helpers'
|
4
|
+
require 'slaw/za/act_nodes'
|
4
5
|
|
5
6
|
module Slaw
|
6
7
|
module ZA
|
7
|
-
grammar
|
8
|
+
grammar Act
|
8
9
|
include Slaw::Parse::GrammarHelpers
|
9
10
|
|
10
11
|
########
|
11
12
|
# major containers
|
12
13
|
|
13
|
-
rule
|
14
|
+
rule act
|
14
15
|
preamble
|
15
16
|
chapters:chapter*
|
16
|
-
schedules:schedules <
|
17
|
+
schedules:schedules <Act>
|
17
18
|
end
|
18
19
|
|
19
20
|
rule preamble
|
@@ -1,58 +1,77 @@
|
|
1
1
|
module Slaw
|
2
2
|
module ZA
|
3
|
-
module
|
4
|
-
class
|
3
|
+
module Act
|
4
|
+
class Act < Treetop::Runtime::SyntaxNode
|
5
|
+
FRBR_URI = '/za/act/1980/01'
|
6
|
+
|
5
7
|
def to_xml(b)
|
6
8
|
b.act(contains: "originalVersion") { |b|
|
7
|
-
b
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
b.FRBRalias(value: 'By-Law Short Title')
|
14
|
-
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
15
|
-
b.FRBRauthor(href: '#council', as: '#author')
|
16
|
-
b.FRBRcountry(value: 'za')
|
17
|
-
}
|
18
|
-
b.FRBRExpression { |b|
|
19
|
-
b.FRBRthis(value: '/za/by-law/locale/1980/name/main/eng@')
|
20
|
-
b.FRBRuri(value: '/za/by-law/locale/1980/name/eng@')
|
21
|
-
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
22
|
-
b.FRBRauthor(href: '#council', as: '#author')
|
23
|
-
b.FRBRlanguage(language: 'eng')
|
24
|
-
}
|
25
|
-
b.FRBRManifestation { |b|
|
26
|
-
b.FRBRthis(value: '/za/by-law/locale/1980/name/main/eng@')
|
27
|
-
b.FRBRuri(value: '/za/by-law/locale/1980/name/eng@')
|
28
|
-
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
|
29
|
-
b.FRBRauthor(href: '#openbylaws', as: '#author')
|
30
|
-
}
|
31
|
-
}
|
9
|
+
write_meta(b)
|
10
|
+
write_preamble(b)
|
11
|
+
write_body(b)
|
12
|
+
write_schedules(b)
|
13
|
+
}
|
14
|
+
end
|
32
15
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
showAs: 'Province of Western Cape: Provincial Gazette')
|
16
|
+
def write_meta(b)
|
17
|
+
b.meta { |b|
|
18
|
+
write_identification(b)
|
37
19
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
20
|
+
b.publication(date: '1980-01-01',
|
21
|
+
name: 'Publication Name',
|
22
|
+
number: 'XXXX',
|
23
|
+
showAs: 'Publication Name')
|
24
|
+
|
25
|
+
b.references(source: "#this") {
|
26
|
+
b.TLCOrganization(id: 'slaw', href: 'https://github.com/longhotsummer/slaw', showAs: "Slaw")
|
27
|
+
b.TLCOrganization(id: 'council', href: '/ontology/organization/za/council', showAs: "Council")
|
28
|
+
b.TLCRole(id: 'author', href: '/ontology/role/author', showAs: 'Author')
|
43
29
|
}
|
30
|
+
}
|
31
|
+
end
|
44
32
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
33
|
+
def write_identification(b)
|
34
|
+
b.identification(source: "#slaw") { |b|
|
35
|
+
# use stub values so that we can generate a validating document
|
36
|
+
b.FRBRWork { |b|
|
37
|
+
b.FRBRthis(value: "#{FRBR_URI}/main")
|
38
|
+
b.FRBRuri(value: '/za/act/locale/1980/name')
|
39
|
+
b.FRBRalias(value: 'Short Title')
|
40
|
+
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
41
|
+
b.FRBRauthor(href: '#council', as: '#author')
|
42
|
+
b.FRBRcountry(value: 'za')
|
43
|
+
}
|
44
|
+
b.FRBRExpression { |b|
|
45
|
+
b.FRBRthis(value: '/za/act/locale/1980/name/main/eng@')
|
46
|
+
b.FRBRuri(value: '/za/act/locale/1980/name/eng@')
|
47
|
+
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
48
|
+
b.FRBRauthor(href: '#council', as: '#author')
|
49
|
+
b.FRBRlanguage(language: 'eng')
|
50
|
+
}
|
51
|
+
b.FRBRManifestation { |b|
|
52
|
+
b.FRBRthis(value: '/za/act/locale/1980/name/main/eng@')
|
53
|
+
b.FRBRuri(value: '/za/act/locale/1980/name/eng@')
|
54
|
+
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
|
55
|
+
b.FRBRauthor(href: '#slaw', as: '#author')
|
56
|
+
}
|
57
|
+
}
|
58
|
+
end
|
50
59
|
|
51
|
-
|
52
|
-
|
60
|
+
def write_preamble(b)
|
61
|
+
if preamble.text_value != ""
|
62
|
+
b.preamble { |b|
|
63
|
+
preamble.to_xml(b)
|
53
64
|
}
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def write_body(b)
|
69
|
+
b.body { |b|
|
70
|
+
chapters.elements.each { |e| e.to_xml(b) }
|
54
71
|
}
|
72
|
+
end
|
55
73
|
|
74
|
+
def write_schedules(b)
|
56
75
|
schedules.to_xml(b)
|
57
76
|
end
|
58
77
|
end
|
@@ -179,7 +198,7 @@ module Slaw
|
|
179
198
|
# a section title of the form:
|
180
199
|
#
|
181
200
|
# Definitions
|
182
|
-
# 1. In this
|
201
|
+
# 1. In this act...
|
183
202
|
|
184
203
|
def num
|
185
204
|
section_title_prefix.number_letter.text_value
|
@@ -194,7 +213,7 @@ module Slaw
|
|
194
213
|
# a section title of the form:
|
195
214
|
#
|
196
215
|
# 1. Definitions
|
197
|
-
# In this
|
216
|
+
# In this act...
|
198
217
|
#
|
199
218
|
# In this format, the title is optional and the section content may
|
200
219
|
# start where we think the title is.
|
@@ -299,26 +318,26 @@ module Slaw
|
|
299
318
|
b.component(id: 'component-0') { |b|
|
300
319
|
b.doc(name: 'schedules') { |b|
|
301
320
|
b.meta { |b|
|
302
|
-
b.identification(source: "#
|
321
|
+
b.identification(source: "#slaw") { |b|
|
303
322
|
b.FRBRWork { |b|
|
304
|
-
b.FRBRthis(value: '/za/
|
305
|
-
b.FRBRuri(value: '/za/
|
323
|
+
b.FRBRthis(value: '/za/act/locale/1980/name/main/schedules')
|
324
|
+
b.FRBRuri(value: '/za/act/locale/1980/name/schedules')
|
306
325
|
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
307
326
|
b.FRBRauthor(href: '#council', as: '#author')
|
308
327
|
b.FRBRcountry(value: 'za')
|
309
328
|
}
|
310
329
|
b.FRBRExpression { |b|
|
311
|
-
b.FRBRthis(value: '/za/
|
312
|
-
b.FRBRuri(value: '/za/
|
330
|
+
b.FRBRthis(value: '/za/act/locale/1980/name/main//schedules/eng@')
|
331
|
+
b.FRBRuri(value: '/za/act/locale/1980/name/schedules/eng@')
|
313
332
|
b.FRBRdate(date: '1980-01-01', name: 'Generation')
|
314
333
|
b.FRBRauthor(href: '#council', as: '#author')
|
315
334
|
b.FRBRlanguage(language: 'eng')
|
316
335
|
}
|
317
336
|
b.FRBRManifestation { |b|
|
318
|
-
b.FRBRthis(value: '/za/
|
319
|
-
b.FRBRuri(value: '/za/
|
337
|
+
b.FRBRthis(value: '/za/act/locale/1980/name/main/schedules/eng@')
|
338
|
+
b.FRBRuri(value: '/za/act/locale/1980/name/schedules/eng@')
|
320
339
|
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
|
321
|
-
b.FRBRauthor(href: '#
|
340
|
+
b.FRBRauthor(href: '#slaw', as: '#author')
|
322
341
|
}
|
323
342
|
}
|
324
343
|
}
|
data/slaw.gemspec
CHANGED
data/spec/act_spec.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'slaw'
|
5
|
+
|
6
|
+
describe Slaw::Act do
|
7
|
+
let(:filename) { File.dirname(__FILE__) + "/fixtures/community-fire-safety.xml" }
|
8
|
+
subject { Slaw::Act.new(filename) }
|
9
|
+
|
10
|
+
it 'should have correct basic properties' do
|
11
|
+
subject.title.should == 'Community Fire Safety By-law'
|
12
|
+
subject.amended?.should be_true
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should set the title correctly' do
|
16
|
+
subject.title = 'foo'
|
17
|
+
subject.meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRalias', a: Slaw::NS)['value'].should == 'foo'
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should set the title if it doesnt exist' do
|
21
|
+
subject.meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRalias', a: Slaw::NS).remove
|
22
|
+
subject.title = 'bar'
|
23
|
+
subject.title.should == 'bar'
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should set the publication details' do
|
27
|
+
subject.meta.at_xpath('./a:publication', a: Slaw::NS).remove
|
28
|
+
|
29
|
+
subject.published!(name: 'foo', number: '1234', date: '2014-01-01')
|
30
|
+
subject.publication['name'].should == 'foo'
|
31
|
+
subject.publication['showAs'].should == 'foo'
|
32
|
+
subject.publication['number'].should == '1234'
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should get/set the work date' do
|
36
|
+
subject.date.should == '2002-02-28'
|
37
|
+
|
38
|
+
subject.date = '2014-01-01'
|
39
|
+
subject.date.should == '2014-01-01'
|
40
|
+
subject.meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRdate[@name="Generation"]', a: Slaw::NS)['date'].should == '2014-01-01'
|
41
|
+
subject.meta.at_xpath('./a:identification/a:FRBRExpression/a:FRBRdate[@name="Generation"]', a: Slaw::NS)['date'].should == '2014-01-01'
|
42
|
+
|
43
|
+
subject.id_uri.should == '/za/by-law/2014/2002'
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should update the uri when the year changes' do
|
47
|
+
subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
|
48
|
+
subject.year = '1980'
|
49
|
+
subject.id_uri.should == '/za/by-law/1980/2002'
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'should validate' do
|
53
|
+
subject.validate.should == []
|
54
|
+
subject.validates?.should be_true
|
55
|
+
end
|
56
|
+
end
|
data/spec/bylaw_spec.rb
CHANGED
@@ -12,9 +12,16 @@ describe Slaw::ByLaw do
|
|
12
12
|
subject.amended?.should be_true
|
13
13
|
end
|
14
14
|
|
15
|
-
it 'should
|
16
|
-
subject.
|
17
|
-
subject.
|
15
|
+
it 'should update the uri when the region changes' do
|
16
|
+
subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
|
17
|
+
subject.region = 'foo-bar'
|
18
|
+
subject.id_uri.should == '/za/by-law/foo-bar/2002/community-fire-safety'
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should update the uri when the name changes' do
|
22
|
+
subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
|
23
|
+
subject.name = 'foo-bar'
|
24
|
+
subject.id_uri.should == '/za/by-law/cape-town/2002/foo-bar'
|
18
25
|
end
|
19
26
|
|
20
27
|
it 'should set the title if it doesnt exist' do
|
@@ -23,15 +30,6 @@ describe Slaw::ByLaw do
|
|
23
30
|
subject.title.should == 'bar as amended'
|
24
31
|
end
|
25
32
|
|
26
|
-
it 'should set the publication details' do
|
27
|
-
subject.meta.at_xpath('./a:publication', a: Slaw::NS).remove
|
28
|
-
|
29
|
-
subject.published!(name: 'foo', number: '1234', date: '2014-01-01')
|
30
|
-
subject.publication['name'].should == 'foo'
|
31
|
-
subject.publication['showAs'].should == 'foo'
|
32
|
-
subject.publication['number'].should == '1234'
|
33
|
-
end
|
34
|
-
|
35
33
|
it 'should get/set the work date' do
|
36
34
|
subject.date.should == '2002-02-28'
|
37
35
|
|
@@ -48,21 +46,4 @@ describe Slaw::ByLaw do
|
|
48
46
|
subject.year = '1980'
|
49
47
|
subject.id_uri.should == '/za/by-law/cape-town/1980/community-fire-safety'
|
50
48
|
end
|
51
|
-
|
52
|
-
it 'should update the uri when the region changes' do
|
53
|
-
subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
|
54
|
-
subject.region = 'foo-bar'
|
55
|
-
subject.id_uri.should == '/za/by-law/foo-bar/2002/community-fire-safety'
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'should update the uri when the name changes' do
|
59
|
-
subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
|
60
|
-
subject.name = 'foo-bar'
|
61
|
-
subject.id_uri.should == '/za/by-law/cape-town/2002/foo-bar'
|
62
|
-
end
|
63
|
-
|
64
|
-
it 'should validate' do
|
65
|
-
subject.validate.should == []
|
66
|
-
subject.validates?.should be_true
|
67
|
-
end
|
68
49
|
end
|
@@ -3,9 +3,8 @@
|
|
3
3
|
require 'builder'
|
4
4
|
|
5
5
|
require 'slaw'
|
6
|
-
require 'slaw/za/bylaw_generator'
|
7
6
|
|
8
|
-
describe Slaw::
|
7
|
+
describe Slaw::ActGenerator do
|
9
8
|
def parse(rule, s)
|
10
9
|
subject.builder.text_to_syntax_tree(s, {root: rule})
|
11
10
|
end
|
@@ -64,7 +63,7 @@ EOS
|
|
64
63
|
|
65
64
|
it 'should handle parts and odd section numbers' do
|
66
65
|
subject.parser.options = {section_number_after_title: false}
|
67
|
-
node = parse :
|
66
|
+
node = parse :act, <<EOS
|
68
67
|
PART 1
|
69
68
|
PREVENTION AND SUPPRESSION OF HEALTH NUISANCES
|
70
69
|
1.
|
@@ -189,7 +188,7 @@ EOS
|
|
189
188
|
|
190
189
|
context 'preamble' do
|
191
190
|
it 'should consider any text at the start to be preamble' do
|
192
|
-
node = parse :
|
191
|
+
node = parse :act, <<EOS
|
193
192
|
foo
|
194
193
|
bar
|
195
194
|
(1) stuff
|
@@ -208,7 +207,7 @@ baz
|
|
208
207
|
end
|
209
208
|
|
210
209
|
it 'should support an optional preamble' do
|
211
|
-
node = parse :
|
210
|
+
node = parse :act, <<EOS
|
212
211
|
PREAMBLE
|
213
212
|
foo
|
214
213
|
1. Section
|
@@ -219,7 +218,7 @@ EOS
|
|
219
218
|
end
|
220
219
|
|
221
220
|
it 'should support no preamble' do
|
222
|
-
node = parse :
|
221
|
+
node = parse :act, <<EOS
|
223
222
|
1. Section
|
224
223
|
bar
|
225
224
|
EOS
|
@@ -235,7 +234,7 @@ EOS
|
|
235
234
|
context 'sections' do
|
236
235
|
it 'should handle section numbers after title' do
|
237
236
|
subject.parser.options = {section_number_after_title: true}
|
238
|
-
node = parse :
|
237
|
+
node = parse :act, <<EOS
|
239
238
|
Section
|
240
239
|
1. (1) hello
|
241
240
|
EOS
|
@@ -247,7 +246,7 @@ EOS
|
|
247
246
|
|
248
247
|
it 'should handle section numbers before title' do
|
249
248
|
subject.parser.options = {section_number_after_title: false}
|
250
|
-
node = parse :
|
249
|
+
node = parse :act, <<EOS
|
251
250
|
1. Section
|
252
251
|
(1) hello
|
253
252
|
EOS
|
@@ -259,7 +258,7 @@ EOS
|
|
259
258
|
|
260
259
|
it 'should handle section numbers without a dot' do
|
261
260
|
subject.parser.options = {section_number_after_title: false}
|
262
|
-
node = parse :
|
261
|
+
node = parse :act, <<EOS
|
263
262
|
1 A section
|
264
263
|
(1) hello
|
265
264
|
2 Another section
|
@@ -277,7 +276,7 @@ EOS
|
|
277
276
|
|
278
277
|
it 'should handle sections without titles' do
|
279
278
|
subject.parser.options = {section_number_after_title: false}
|
280
|
-
node = parse :
|
279
|
+
node = parse :act, <<EOS
|
281
280
|
1. No owner or occupier of any shop or business premises or vacant land, blah blah
|
282
281
|
2. Notwithstanding the provision of any other By-law or legislation no person shall—
|
283
282
|
EOS
|
@@ -294,7 +293,7 @@ EOS
|
|
294
293
|
|
295
294
|
it 'should handle sections without titles and with subsections' do
|
296
295
|
subject.parser.options = {section_number_after_title: false}
|
297
|
-
node = parse :
|
296
|
+
node = parse :act, <<EOS
|
298
297
|
10. (1) Transporters must remove medical waste.
|
299
298
|
(2) Without limiting generality, stuff.
|
300
299
|
EOS
|
@@ -308,7 +307,7 @@ EOS
|
|
308
307
|
|
309
308
|
it 'should realise complex section titles are actually section content' do
|
310
309
|
subject.parser.options = {section_number_after_title: false}
|
311
|
-
node = parse :
|
310
|
+
node = parse :act, <<EOS
|
312
311
|
10. The owner of any premises which is let or sublet to more than one tenant, shall maintain at all times in a clean and sanitary condition every part of such premises as may be used in common by more than one tenant.
|
313
312
|
11. No person shall keep, cause or suffer to be kept any factory or trade premises so as to cause or give rise to smells or effluvia that constitute a health nuisance.
|
314
313
|
EOS
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: 0.19.1
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: mimemagic
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 0.2.1
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 0.2.1
|
125
139
|
description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
|
126
140
|
acts from plain text and PDF documents.
|
127
141
|
email:
|
@@ -143,6 +157,7 @@ files:
|
|
143
157
|
- lib/slaw/bylaw.rb
|
144
158
|
- lib/slaw/collection.rb
|
145
159
|
- lib/slaw/extract/extractor.rb
|
160
|
+
- lib/slaw/generator.rb
|
146
161
|
- lib/slaw/lifecycle_event.rb
|
147
162
|
- lib/slaw/logging.rb
|
148
163
|
- lib/slaw/namespace.rb
|
@@ -159,10 +174,10 @@ files:
|
|
159
174
|
- lib/slaw/schemas/xml.xsd
|
160
175
|
- lib/slaw/version.rb
|
161
176
|
- lib/slaw/xml_support.rb
|
162
|
-
- lib/slaw/za/
|
163
|
-
- lib/slaw/za/
|
164
|
-
- lib/slaw/za/bylaw_nodes.rb
|
177
|
+
- lib/slaw/za/act.treetop
|
178
|
+
- lib/slaw/za/act_nodes.rb
|
165
179
|
- slaw.gemspec
|
180
|
+
- spec/act_spec.rb
|
166
181
|
- spec/bylaw_spec.rb
|
167
182
|
- spec/extract/extractor_spec.rb
|
168
183
|
- spec/fixtures/community-fire-safety.xml
|
@@ -170,7 +185,7 @@ files:
|
|
170
185
|
- spec/parse/cleanser_spec.rb
|
171
186
|
- spec/spec_helper.rb
|
172
187
|
- spec/xml_helpers.rb
|
173
|
-
- spec/za/
|
188
|
+
- spec/za/act_spec.rb
|
174
189
|
homepage: ''
|
175
190
|
licenses:
|
176
191
|
- MIT
|
@@ -196,6 +211,7 @@ signing_key:
|
|
196
211
|
specification_version: 4
|
197
212
|
summary: A lightweight library for using Akoma Ntoso acts in Ruby.
|
198
213
|
test_files:
|
214
|
+
- spec/act_spec.rb
|
199
215
|
- spec/bylaw_spec.rb
|
200
216
|
- spec/extract/extractor_spec.rb
|
201
217
|
- spec/fixtures/community-fire-safety.xml
|
@@ -203,4 +219,4 @@ test_files:
|
|
203
219
|
- spec/parse/cleanser_spec.rb
|
204
220
|
- spec/spec_helper.rb
|
205
221
|
- spec/xml_helpers.rb
|
206
|
-
- spec/za/
|
222
|
+
- spec/za/act_spec.rb
|
@@ -1,41 +0,0 @@
|
|
1
|
-
require 'slaw/za/bylaw_nodes'
|
2
|
-
|
3
|
-
module Slaw
|
4
|
-
# Support specifically for South Africa
|
5
|
-
module ZA
|
6
|
-
|
7
|
-
# Support class for generating South African bylaws
|
8
|
-
class BylawGenerator
|
9
|
-
Treetop.load(File.dirname(__FILE__) + "/bylaw.treetop")
|
10
|
-
|
11
|
-
# [Treetop::Runtime::CompiledParser] compiled bylaw parser
|
12
|
-
attr_accessor :parser
|
13
|
-
|
14
|
-
# [Slaw::Parse::Builder] builder used by the generator
|
15
|
-
attr_accessor :builder
|
16
|
-
|
17
|
-
def initialize
|
18
|
-
@parser = Slaw::ZA::BylawParser.new
|
19
|
-
@builder = Slaw::Parse::Builder.new(parser: @parser)
|
20
|
-
@cleanser = Slaw::Parse::Cleanser.new
|
21
|
-
end
|
22
|
-
|
23
|
-
# Generate a Slaw::Bylaw instance from plain text.
|
24
|
-
#
|
25
|
-
# @param text [String] plain text
|
26
|
-
#
|
27
|
-
# @return [Slaw::ByLaw] the resulting bylaw
|
28
|
-
def generate_from_text(text)
|
29
|
-
bylaw = Slaw::ByLaw.new
|
30
|
-
bylaw.doc = @builder.parse_and_process_text(cleanup(text))
|
31
|
-
bylaw
|
32
|
-
end
|
33
|
-
|
34
|
-
def cleanup(text)
|
35
|
-
text = @cleanser.cleanup(text)
|
36
|
-
text = @cleanser.reformat(text)
|
37
|
-
text
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|