slaw 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 017be61dfca724d0875193ce8043557300bb1a45
4
- data.tar.gz: b42b32a4e9eb80761b856e343d0c2c278c934adb
3
+ metadata.gz: 5d293949a3ac2383cd254f0efd2268ef24286f1d
4
+ data.tar.gz: 2439db865559b2f9b494b7755004d3f7a7d9b4ba
5
5
  SHA512:
6
- metadata.gz: b2654b2c2242caff29794f601bfcccd8b6898ed1879e0ccf35bbdebfc3ed511bf68cbf2d5ad63eb4c6b3c898cbe9674686c1e4233198aa4e8ae85192ac6c63d1
7
- data.tar.gz: ffd5e6fa31da9d4fb9c3766fcd850994418a108fb364eac9eab873b394ea0c9ba50c039deeffd5abd94eeeee7c5068c4399faf8503006d4795aa3a5990e33528
6
+ metadata.gz: 2a7af42dc109723ea0908e8bb3ba9cbf4269f923dbd8a68e6cabd87f53e98dec59884cb7d4daf0df8de98b3d209302a9ba214e45d04fadffd372fabf91c9db26
7
+ data.tar.gz: 41bac579c874b9ef29aaf5a87e243fd9ba268efe46c60e94cdc728e0e4d0f01b37b60b40fb0fa80a82889a43ac44174c6d8385386fb72a98c37dd0c5868c3b97
data/README.md CHANGED
@@ -40,7 +40,7 @@ installed by default on most systems (including Mac). On Ubuntu you can use:
40
40
 
41
41
  The simplest way to use Slaw is via the commandline:
42
42
 
43
- $ slaw convert myfile.pdf
43
+ $ slaw parse myfile.pdf
44
44
 
45
45
  ## Overview
46
46
 
@@ -79,7 +79,7 @@ extractor = Slaw::Extract::Extractor.new
79
79
  text = extractor.extract_from_pdf('/path/to/file.pdf')
80
80
 
81
81
  # parse the text into a XML and
82
- generator = Slaw::ZA::BylawGenerator.new
82
+ generator = Slaw::ActGenerator.new
83
83
  bylaw = generator.generate_from_text(text)
84
84
  puts bylaw.to_xml(indent: 2)
85
85
 
data/bin/slaw CHANGED
@@ -4,13 +4,14 @@ require 'thor'
4
4
  require 'slaw'
5
5
 
6
6
  class SlawCLI < Thor
7
- desc "convert FILE", "convert FILE into Akoma Ntoso XML"
8
- option :input, enum: ['text', 'pdf'], desc: "Type of input, determined by file extension by default."
9
- option :output, enum: ['text', 'xml'], default: 'xml', desc: "Type of output required."
10
-
11
7
  # TODO: support different grammars and locales
12
8
 
13
- def convert(name)
9
+ desc "parse FILE", "parse FILE into Akoma Ntoso XML"
10
+ option :input, enum: ['text', 'pdf'], desc: "Type of input if it can't be determined automatically"
11
+ option :pdftotext, desc: "Location of the pdftotext binary if not in PATH"
12
+ option :definitions, type: :boolean, desc: "Find and link definitions (this can be slow). Default: false"
13
+ def parse(name)
14
+ Slaw::Extract::Extractor.pdftotext_path = options[:pdftotext] if options[:pdftotext]
14
15
  extractor = Slaw::Extract::Extractor.new
15
16
 
16
17
  case options[:input]
@@ -22,14 +23,13 @@ class SlawCLI < Thor
22
23
  text = extractor.extract_from_file(name)
23
24
  end
24
25
 
25
- case options[:output]
26
- when 'text'
27
- puts text
28
- when 'xml'
29
- generator = Slaw::ZA::BylawGenerator.new
30
- bylaw = generator.generate_from_text(text)
31
- puts bylaw.to_xml(indent: 2)
32
- end
26
+ generator = Slaw::ActGenerator.new
27
+ act = generator.generate_from_text(text)
28
+
29
+ # definitions?
30
+ generator.builder.link_definitions(act.doc) if options[:definitions]
31
+
32
+ puts act.to_xml(indent: 2)
33
33
  end
34
34
  end
35
35
 
data/lib/slaw.rb CHANGED
@@ -17,7 +17,7 @@ require 'slaw/parse/builder'
17
17
  require 'slaw/parse/cleanser'
18
18
  require 'slaw/parse/error'
19
19
 
20
- require 'slaw/za/bylaw_generator'
20
+ require 'slaw/generator'
21
21
  require 'slaw/extract/extractor'
22
22
 
23
23
  module Slaw
@@ -1,5 +1,6 @@
1
1
  require 'open3'
2
2
  require 'tempfile'
3
+ require 'mimemagic'
3
4
 
4
5
  module Slaw
5
6
  module Extract
@@ -31,13 +32,19 @@ module Slaw
31
32
  def extract_from_file(filename)
32
33
  ext = filename[-4..-1].downcase
33
34
 
34
- case ext
35
- when '.pdf'
35
+ mimetype = get_mimetype(filename)
36
+
37
+ case mimetype && mimetype.type
38
+ when 'application/pdf'
36
39
  extract_from_pdf(filename)
37
- when '.txt'
40
+ when 'text/plain', nil
38
41
  extract_from_text(filename)
39
42
  else
40
- raise ArgumentError.new("Unsupported file type #{ext}")
43
+ if mimetype.text?
44
+ extract_from_text(filename)
45
+ else
46
+ raise ArgumentError.new("Unsupported file type #{ext} (#{mimetype || unknown})")
47
+ end
41
48
  end
42
49
  end
43
50
 
@@ -103,6 +110,11 @@ module Slaw
103
110
  end
104
111
  end
105
112
 
113
+ def get_mimetype(filename)
114
+ File.open(filename) { |f| MimeMagic.by_magic(f) } \
115
+ || MimeMagic.by_path(filename)
116
+ end
117
+
106
118
  # Get location of the pdftotext executable for all instances.
107
119
  def self.pdftotext_path
108
120
  @@pdftotext_path
@@ -0,0 +1,36 @@
1
+ module Slaw
2
+ # Base class for generating Act documents
3
+ class ActGenerator
4
+ Treetop.load(File.dirname(__FILE__) + "/za/act.treetop")
5
+
6
+ # [Treetop::Runtime::CompiledParser] compiled parser
7
+ attr_accessor :parser
8
+
9
+ # [Slaw::Parse::Builder] builder used by the generator
10
+ attr_accessor :builder
11
+
12
+ def initialize
13
+ @parser = Slaw::ZA::ActParser.new
14
+ @builder = Slaw::Parse::Builder.new(parser: @parser)
15
+ @cleanser = Slaw::Parse::Cleanser.new
16
+ @document_class = Slaw::Act
17
+ end
18
+
19
+ # Generate a Slaw::Act instance from plain text.
20
+ #
21
+ # @param text [String] plain text
22
+ #
23
+ # @return [Slaw::Act] the resulting act
24
+ def generate_from_text(text)
25
+ act = @document_class.new
26
+ act.doc = @builder.parse_and_process_text(cleanup(text))
27
+ act
28
+ end
29
+
30
+ def cleanup(text)
31
+ text = @cleanser.cleanup(text)
32
+ text = @cleanser.reformat(text)
33
+ text
34
+ end
35
+ end
36
+ end
@@ -139,7 +139,6 @@ module Slaw
139
139
  def postprocess(doc)
140
140
  normalise_headings(doc)
141
141
  find_short_title(doc)
142
- link_definitions(doc)
143
142
  nest_blocklists(doc)
144
143
 
145
144
  doc
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "0.4.1"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -1,19 +1,20 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  require 'slaw/parse/grammar_helpers'
4
+ require 'slaw/za/act_nodes'
4
5
 
5
6
  module Slaw
6
7
  module ZA
7
- grammar Bylaw
8
+ grammar Act
8
9
  include Slaw::Parse::GrammarHelpers
9
10
 
10
11
  ########
11
12
  # major containers
12
13
 
13
- rule bylaw
14
+ rule act
14
15
  preamble
15
16
  chapters:chapter*
16
- schedules:schedules <Bylaw>
17
+ schedules:schedules <Act>
17
18
  end
18
19
 
19
20
  rule preamble
@@ -1,58 +1,77 @@
1
1
  module Slaw
2
2
  module ZA
3
- module Bylaw
4
- class Bylaw < Treetop::Runtime::SyntaxNode
3
+ module Act
4
+ class Act < Treetop::Runtime::SyntaxNode
5
+ FRBR_URI = '/za/act/1980/01'
6
+
5
7
  def to_xml(b)
6
8
  b.act(contains: "originalVersion") { |b|
7
- b.meta { |b|
8
- b.identification(source: "#openbylaws") { |b|
9
- # TODO: correct values
10
- b.FRBRWork { |b|
11
- b.FRBRthis(value: '/za/by-law/locale/1980/name/main')
12
- b.FRBRuri(value: '/za/by-law/locale/1980/name')
13
- b.FRBRalias(value: 'By-Law Short Title')
14
- b.FRBRdate(date: '1980-01-01', name: 'Generation')
15
- b.FRBRauthor(href: '#council', as: '#author')
16
- b.FRBRcountry(value: 'za')
17
- }
18
- b.FRBRExpression { |b|
19
- b.FRBRthis(value: '/za/by-law/locale/1980/name/main/eng@')
20
- b.FRBRuri(value: '/za/by-law/locale/1980/name/eng@')
21
- b.FRBRdate(date: '1980-01-01', name: 'Generation')
22
- b.FRBRauthor(href: '#council', as: '#author')
23
- b.FRBRlanguage(language: 'eng')
24
- }
25
- b.FRBRManifestation { |b|
26
- b.FRBRthis(value: '/za/by-law/locale/1980/name/main/eng@')
27
- b.FRBRuri(value: '/za/by-law/locale/1980/name/eng@')
28
- b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
29
- b.FRBRauthor(href: '#openbylaws', as: '#author')
30
- }
31
- }
9
+ write_meta(b)
10
+ write_preamble(b)
11
+ write_body(b)
12
+ write_schedules(b)
13
+ }
14
+ end
32
15
 
33
- b.publication(date: '1980-01-01',
34
- name: 'Province of Western Cape: Provincial Gazette',
35
- number: 'XXXX',
36
- showAs: 'Province of Western Cape: Provincial Gazette')
16
+ def write_meta(b)
17
+ b.meta { |b|
18
+ write_identification(b)
37
19
 
38
- b.references(source: "#this") {
39
- b.TLCOrganization(id: 'openbylaws', href: 'http://openbylaws.org.za', showAs: "openbylaws.org.za")
40
- b.TLCOrganization(id: 'council', href: '/ontology/organization/za/council.cape-town', showAs: "Cape Town City Council")
41
- b.TLCRole(id: 'author', href: '/ontology/role/author', showAs: 'Author')
42
- }
20
+ b.publication(date: '1980-01-01',
21
+ name: 'Publication Name',
22
+ number: 'XXXX',
23
+ showAs: 'Publication Name')
24
+
25
+ b.references(source: "#this") {
26
+ b.TLCOrganization(id: 'slaw', href: 'https://github.com/longhotsummer/slaw', showAs: "Slaw")
27
+ b.TLCOrganization(id: 'council', href: '/ontology/organization/za/council', showAs: "Council")
28
+ b.TLCRole(id: 'author', href: '/ontology/role/author', showAs: 'Author')
43
29
  }
30
+ }
31
+ end
44
32
 
45
- if preamble.text_value != ""
46
- b.preamble { |b|
47
- preamble.to_xml(b)
48
- }
49
- end
33
+ def write_identification(b)
34
+ b.identification(source: "#slaw") { |b|
35
+ # use stub values so that we can generate a validating document
36
+ b.FRBRWork { |b|
37
+ b.FRBRthis(value: "#{FRBR_URI}/main")
38
+ b.FRBRuri(value: '/za/act/locale/1980/name')
39
+ b.FRBRalias(value: 'Short Title')
40
+ b.FRBRdate(date: '1980-01-01', name: 'Generation')
41
+ b.FRBRauthor(href: '#council', as: '#author')
42
+ b.FRBRcountry(value: 'za')
43
+ }
44
+ b.FRBRExpression { |b|
45
+ b.FRBRthis(value: '/za/act/locale/1980/name/main/eng@')
46
+ b.FRBRuri(value: '/za/act/locale/1980/name/eng@')
47
+ b.FRBRdate(date: '1980-01-01', name: 'Generation')
48
+ b.FRBRauthor(href: '#council', as: '#author')
49
+ b.FRBRlanguage(language: 'eng')
50
+ }
51
+ b.FRBRManifestation { |b|
52
+ b.FRBRthis(value: '/za/act/locale/1980/name/main/eng@')
53
+ b.FRBRuri(value: '/za/act/locale/1980/name/eng@')
54
+ b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
55
+ b.FRBRauthor(href: '#slaw', as: '#author')
56
+ }
57
+ }
58
+ end
50
59
 
51
- b.body { |b|
52
- chapters.elements.each { |e| e.to_xml(b) }
60
+ def write_preamble(b)
61
+ if preamble.text_value != ""
62
+ b.preamble { |b|
63
+ preamble.to_xml(b)
53
64
  }
65
+ end
66
+ end
67
+
68
+ def write_body(b)
69
+ b.body { |b|
70
+ chapters.elements.each { |e| e.to_xml(b) }
54
71
  }
72
+ end
55
73
 
74
+ def write_schedules(b)
56
75
  schedules.to_xml(b)
57
76
  end
58
77
  end
@@ -179,7 +198,7 @@ module Slaw
179
198
  # a section title of the form:
180
199
  #
181
200
  # Definitions
182
- # 1. In this by-law...
201
+ # 1. In this act...
183
202
 
184
203
  def num
185
204
  section_title_prefix.number_letter.text_value
@@ -194,7 +213,7 @@ module Slaw
194
213
  # a section title of the form:
195
214
  #
196
215
  # 1. Definitions
197
- # In this by-law...
216
+ # In this act...
198
217
  #
199
218
  # In this format, the title is optional and the section content may
200
219
  # start where we think the title is.
@@ -299,26 +318,26 @@ module Slaw
299
318
  b.component(id: 'component-0') { |b|
300
319
  b.doc(name: 'schedules') { |b|
301
320
  b.meta { |b|
302
- b.identification(source: "#openbylaws") { |b|
321
+ b.identification(source: "#slaw") { |b|
303
322
  b.FRBRWork { |b|
304
- b.FRBRthis(value: '/za/by-law/locale/1980/name/main/schedules')
305
- b.FRBRuri(value: '/za/by-law/locale/1980/name/schedules')
323
+ b.FRBRthis(value: '/za/act/locale/1980/name/main/schedules')
324
+ b.FRBRuri(value: '/za/act/locale/1980/name/schedules')
306
325
  b.FRBRdate(date: '1980-01-01', name: 'Generation')
307
326
  b.FRBRauthor(href: '#council', as: '#author')
308
327
  b.FRBRcountry(value: 'za')
309
328
  }
310
329
  b.FRBRExpression { |b|
311
- b.FRBRthis(value: '/za/by-law/locale/1980/name/main//schedules/eng@')
312
- b.FRBRuri(value: '/za/by-law/locale/1980/name/schedules/eng@')
330
+ b.FRBRthis(value: '/za/act/locale/1980/name/main//schedules/eng@')
331
+ b.FRBRuri(value: '/za/act/locale/1980/name/schedules/eng@')
313
332
  b.FRBRdate(date: '1980-01-01', name: 'Generation')
314
333
  b.FRBRauthor(href: '#council', as: '#author')
315
334
  b.FRBRlanguage(language: 'eng')
316
335
  }
317
336
  b.FRBRManifestation { |b|
318
- b.FRBRthis(value: '/za/by-law/locale/1980/name/main/schedules/eng@')
319
- b.FRBRuri(value: '/za/by-law/locale/1980/name/schedules/eng@')
337
+ b.FRBRthis(value: '/za/act/locale/1980/name/main/schedules/eng@')
338
+ b.FRBRuri(value: '/za/act/locale/1980/name/schedules/eng@')
320
339
  b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
321
- b.FRBRauthor(href: '#openbylaws', as: '#author')
340
+ b.FRBRauthor(href: '#slaw', as: '#author')
322
341
  }
323
342
  }
324
343
  }
data/slaw.gemspec CHANGED
@@ -27,4 +27,5 @@ Gem::Specification.new do |spec|
27
27
  spec.add_runtime_dependency "builder", "~> 3.2.2"
28
28
  spec.add_runtime_dependency "log4r", "~> 1.1.10"
29
29
  spec.add_runtime_dependency "thor", "~> 0.19.1"
30
+ spec.add_runtime_dependency "mimemagic", "~> 0.2.1"
30
31
  end
data/spec/act_spec.rb ADDED
@@ -0,0 +1,56 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+ require 'slaw'
5
+
6
+ describe Slaw::Act do
7
+ let(:filename) { File.dirname(__FILE__) + "/fixtures/community-fire-safety.xml" }
8
+ subject { Slaw::Act.new(filename) }
9
+
10
+ it 'should have correct basic properties' do
11
+ subject.title.should == 'Community Fire Safety By-law'
12
+ subject.amended?.should be_true
13
+ end
14
+
15
+ it 'should set the title correctly' do
16
+ subject.title = 'foo'
17
+ subject.meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRalias', a: Slaw::NS)['value'].should == 'foo'
18
+ end
19
+
20
+ it 'should set the title if it doesnt exist' do
21
+ subject.meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRalias', a: Slaw::NS).remove
22
+ subject.title = 'bar'
23
+ subject.title.should == 'bar'
24
+ end
25
+
26
+ it 'should set the publication details' do
27
+ subject.meta.at_xpath('./a:publication', a: Slaw::NS).remove
28
+
29
+ subject.published!(name: 'foo', number: '1234', date: '2014-01-01')
30
+ subject.publication['name'].should == 'foo'
31
+ subject.publication['showAs'].should == 'foo'
32
+ subject.publication['number'].should == '1234'
33
+ end
34
+
35
+ it 'should get/set the work date' do
36
+ subject.date.should == '2002-02-28'
37
+
38
+ subject.date = '2014-01-01'
39
+ subject.date.should == '2014-01-01'
40
+ subject.meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRdate[@name="Generation"]', a: Slaw::NS)['date'].should == '2014-01-01'
41
+ subject.meta.at_xpath('./a:identification/a:FRBRExpression/a:FRBRdate[@name="Generation"]', a: Slaw::NS)['date'].should == '2014-01-01'
42
+
43
+ subject.id_uri.should == '/za/by-law/2014/2002'
44
+ end
45
+
46
+ it 'should update the uri when the year changes' do
47
+ subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
48
+ subject.year = '1980'
49
+ subject.id_uri.should == '/za/by-law/1980/2002'
50
+ end
51
+
52
+ it 'should validate' do
53
+ subject.validate.should == []
54
+ subject.validates?.should be_true
55
+ end
56
+ end
data/spec/bylaw_spec.rb CHANGED
@@ -12,9 +12,16 @@ describe Slaw::ByLaw do
12
12
  subject.amended?.should be_true
13
13
  end
14
14
 
15
- it 'should set the title correctly' do
16
- subject.title = 'foo'
17
- subject.meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRalias', a: Slaw::NS)['value'].should == 'foo'
15
+ it 'should update the uri when the region changes' do
16
+ subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
17
+ subject.region = 'foo-bar'
18
+ subject.id_uri.should == '/za/by-law/foo-bar/2002/community-fire-safety'
19
+ end
20
+
21
+ it 'should update the uri when the name changes' do
22
+ subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
23
+ subject.name = 'foo-bar'
24
+ subject.id_uri.should == '/za/by-law/cape-town/2002/foo-bar'
18
25
  end
19
26
 
20
27
  it 'should set the title if it doesnt exist' do
@@ -23,15 +30,6 @@ describe Slaw::ByLaw do
23
30
  subject.title.should == 'bar as amended'
24
31
  end
25
32
 
26
- it 'should set the publication details' do
27
- subject.meta.at_xpath('./a:publication', a: Slaw::NS).remove
28
-
29
- subject.published!(name: 'foo', number: '1234', date: '2014-01-01')
30
- subject.publication['name'].should == 'foo'
31
- subject.publication['showAs'].should == 'foo'
32
- subject.publication['number'].should == '1234'
33
- end
34
-
35
33
  it 'should get/set the work date' do
36
34
  subject.date.should == '2002-02-28'
37
35
 
@@ -48,21 +46,4 @@ describe Slaw::ByLaw do
48
46
  subject.year = '1980'
49
47
  subject.id_uri.should == '/za/by-law/cape-town/1980/community-fire-safety'
50
48
  end
51
-
52
- it 'should update the uri when the region changes' do
53
- subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
54
- subject.region = 'foo-bar'
55
- subject.id_uri.should == '/za/by-law/foo-bar/2002/community-fire-safety'
56
- end
57
-
58
- it 'should update the uri when the name changes' do
59
- subject.id_uri.should == '/za/by-law/cape-town/2002/community-fire-safety'
60
- subject.name = 'foo-bar'
61
- subject.id_uri.should == '/za/by-law/cape-town/2002/foo-bar'
62
- end
63
-
64
- it 'should validate' do
65
- subject.validate.should == []
66
- subject.validates?.should be_true
67
- end
68
49
  end
@@ -3,9 +3,8 @@
3
3
  require 'builder'
4
4
 
5
5
  require 'slaw'
6
- require 'slaw/za/bylaw_generator'
7
6
 
8
- describe Slaw::ZA::BylawGenerator do
7
+ describe Slaw::ActGenerator do
9
8
  def parse(rule, s)
10
9
  subject.builder.text_to_syntax_tree(s, {root: rule})
11
10
  end
@@ -64,7 +63,7 @@ EOS
64
63
 
65
64
  it 'should handle parts and odd section numbers' do
66
65
  subject.parser.options = {section_number_after_title: false}
67
- node = parse :bylaw, <<EOS
66
+ node = parse :act, <<EOS
68
67
  PART 1
69
68
  PREVENTION AND SUPPRESSION OF HEALTH NUISANCES
70
69
  1.
@@ -189,7 +188,7 @@ EOS
189
188
 
190
189
  context 'preamble' do
191
190
  it 'should consider any text at the start to be preamble' do
192
- node = parse :bylaw, <<EOS
191
+ node = parse :act, <<EOS
193
192
  foo
194
193
  bar
195
194
  (1) stuff
@@ -208,7 +207,7 @@ baz
208
207
  end
209
208
 
210
209
  it 'should support an optional preamble' do
211
- node = parse :bylaw, <<EOS
210
+ node = parse :act, <<EOS
212
211
  PREAMBLE
213
212
  foo
214
213
  1. Section
@@ -219,7 +218,7 @@ EOS
219
218
  end
220
219
 
221
220
  it 'should support no preamble' do
222
- node = parse :bylaw, <<EOS
221
+ node = parse :act, <<EOS
223
222
  1. Section
224
223
  bar
225
224
  EOS
@@ -235,7 +234,7 @@ EOS
235
234
  context 'sections' do
236
235
  it 'should handle section numbers after title' do
237
236
  subject.parser.options = {section_number_after_title: true}
238
- node = parse :bylaw, <<EOS
237
+ node = parse :act, <<EOS
239
238
  Section
240
239
  1. (1) hello
241
240
  EOS
@@ -247,7 +246,7 @@ EOS
247
246
 
248
247
  it 'should handle section numbers before title' do
249
248
  subject.parser.options = {section_number_after_title: false}
250
- node = parse :bylaw, <<EOS
249
+ node = parse :act, <<EOS
251
250
  1. Section
252
251
  (1) hello
253
252
  EOS
@@ -259,7 +258,7 @@ EOS
259
258
 
260
259
  it 'should handle section numbers without a dot' do
261
260
  subject.parser.options = {section_number_after_title: false}
262
- node = parse :bylaw, <<EOS
261
+ node = parse :act, <<EOS
263
262
  1 A section
264
263
  (1) hello
265
264
  2 Another section
@@ -277,7 +276,7 @@ EOS
277
276
 
278
277
  it 'should handle sections without titles' do
279
278
  subject.parser.options = {section_number_after_title: false}
280
- node = parse :bylaw, <<EOS
279
+ node = parse :act, <<EOS
281
280
  1. No owner or occupier of any shop or business premises or vacant land, blah blah
282
281
  2. Notwithstanding the provision of any other By-law or legislation no person shall—
283
282
  EOS
@@ -294,7 +293,7 @@ EOS
294
293
 
295
294
  it 'should handle sections without titles and with subsections' do
296
295
  subject.parser.options = {section_number_after_title: false}
297
- node = parse :bylaw, <<EOS
296
+ node = parse :act, <<EOS
298
297
  10. (1) Transporters must remove medical waste.
299
298
  (2) Without limiting generality, stuff.
300
299
  EOS
@@ -308,7 +307,7 @@ EOS
308
307
 
309
308
  it 'should realise complex section titles are actually section content' do
310
309
  subject.parser.options = {section_number_after_title: false}
311
- node = parse :bylaw, <<EOS
310
+ node = parse :act, <<EOS
312
311
  10. The owner of any premises which is let or sublet to more than one tenant, shall maintain at all times in a clean and sanitary condition every part of such premises as may be used in common by more than one tenant.
313
312
  11. No person shall keep, cause or suffer to be kept any factory or trade premises so as to cause or give rise to smells or effluvia that constitute a health nuisance.
314
313
  EOS
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-04 00:00:00.000000000 Z
11
+ date: 2015-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: 0.19.1
125
+ - !ruby/object:Gem::Dependency
126
+ name: mimemagic
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 0.2.1
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 0.2.1
125
139
  description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
126
140
  acts from plain text and PDF documents.
127
141
  email:
@@ -143,6 +157,7 @@ files:
143
157
  - lib/slaw/bylaw.rb
144
158
  - lib/slaw/collection.rb
145
159
  - lib/slaw/extract/extractor.rb
160
+ - lib/slaw/generator.rb
146
161
  - lib/slaw/lifecycle_event.rb
147
162
  - lib/slaw/logging.rb
148
163
  - lib/slaw/namespace.rb
@@ -159,10 +174,10 @@ files:
159
174
  - lib/slaw/schemas/xml.xsd
160
175
  - lib/slaw/version.rb
161
176
  - lib/slaw/xml_support.rb
162
- - lib/slaw/za/bylaw.treetop
163
- - lib/slaw/za/bylaw_generator.rb
164
- - lib/slaw/za/bylaw_nodes.rb
177
+ - lib/slaw/za/act.treetop
178
+ - lib/slaw/za/act_nodes.rb
165
179
  - slaw.gemspec
180
+ - spec/act_spec.rb
166
181
  - spec/bylaw_spec.rb
167
182
  - spec/extract/extractor_spec.rb
168
183
  - spec/fixtures/community-fire-safety.xml
@@ -170,7 +185,7 @@ files:
170
185
  - spec/parse/cleanser_spec.rb
171
186
  - spec/spec_helper.rb
172
187
  - spec/xml_helpers.rb
173
- - spec/za/bylaw_spec.rb
188
+ - spec/za/act_spec.rb
174
189
  homepage: ''
175
190
  licenses:
176
191
  - MIT
@@ -196,6 +211,7 @@ signing_key:
196
211
  specification_version: 4
197
212
  summary: A lightweight library for using Akoma Ntoso acts in Ruby.
198
213
  test_files:
214
+ - spec/act_spec.rb
199
215
  - spec/bylaw_spec.rb
200
216
  - spec/extract/extractor_spec.rb
201
217
  - spec/fixtures/community-fire-safety.xml
@@ -203,4 +219,4 @@ test_files:
203
219
  - spec/parse/cleanser_spec.rb
204
220
  - spec/spec_helper.rb
205
221
  - spec/xml_helpers.rb
206
- - spec/za/bylaw_spec.rb
222
+ - spec/za/act_spec.rb
@@ -1,41 +0,0 @@
1
- require 'slaw/za/bylaw_nodes'
2
-
3
- module Slaw
4
- # Support specifically for South Africa
5
- module ZA
6
-
7
- # Support class for generating South African bylaws
8
- class BylawGenerator
9
- Treetop.load(File.dirname(__FILE__) + "/bylaw.treetop")
10
-
11
- # [Treetop::Runtime::CompiledParser] compiled bylaw parser
12
- attr_accessor :parser
13
-
14
- # [Slaw::Parse::Builder] builder used by the generator
15
- attr_accessor :builder
16
-
17
- def initialize
18
- @parser = Slaw::ZA::BylawParser.new
19
- @builder = Slaw::Parse::Builder.new(parser: @parser)
20
- @cleanser = Slaw::Parse::Cleanser.new
21
- end
22
-
23
- # Generate a Slaw::Bylaw instance from plain text.
24
- #
25
- # @param text [String] plain text
26
- #
27
- # @return [Slaw::ByLaw] the resulting bylaw
28
- def generate_from_text(text)
29
- bylaw = Slaw::ByLaw.new
30
- bylaw.doc = @builder.parse_and_process_text(cleanup(text))
31
- bylaw
32
- end
33
-
34
- def cleanup(text)
35
- text = @cleanser.cleanup(text)
36
- text = @cleanser.reformat(text)
37
- text
38
- end
39
- end
40
- end
41
- end