slaw 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 680301c5ade280eb7da5ea92c509f491631824f2
4
- data.tar.gz: f2ddd5a99631121bf3693da5f229e38a6f590142
3
+ metadata.gz: 405a0b941536c74c13588e1bfb4350c566337626
4
+ data.tar.gz: 809e1fd9fd4ada655d3531b4eb31702398490a42
5
5
  SHA512:
6
- metadata.gz: 844130f24fa5e4e7e2acd8bacc9381bbd043591676a4fd22e9f1deec87e99b813f3062e4c4ec7286aca4ec0fe2a17161c39d85f5a07c8819192c82cd6203e474
7
- data.tar.gz: de11ab3cb747c7341209e79f131506f6e2fc44065a73d95bb936c2b36b348646644024b0b657252106cd4c6d9f1b792ca4f7884e8f45fff4bda453da0a736cb7
6
+ metadata.gz: a636be697e3589db697232bc01876a864ee8c02eb4548232b9db8addc2c3d9fb0a5004ffeb94f12494e88889b2954c3edd61100a865ce9329b5eddad7381fbe8
7
+ data.tar.gz: 57e78d5489aa950436b2e7dc3ebe7d19a639b86c3f4fa3b95add5edfa9adbb0185b6f191a012f74ba4fc35100432096ac1ea09b928327afa214aaedd1a2c070c
data/lib/slaw/act.rb CHANGED
@@ -63,7 +63,14 @@ module Slaw
63
63
  # Parse the XML contained in the file-like object `io`
64
64
  # @param io [file-like] io object with XML
65
65
  def parse(io)
66
- @doc = Nokogiri::XML(io)
66
+ self.doc = Nokogiri::XML(io)
67
+ end
68
+
69
+ # Set the XML document backing this bylaw.
70
+ #
71
+ # @param doc [Nokogiri::XML::Document] document
72
+ def doc=(doc)
73
+ @doc = doc
67
74
  @meta = @doc.at_xpath('/a:akomaNtoso/a:act/a:meta', a: NS)
68
75
  @body = @doc.at_xpath('/a:akomaNtoso/a:act/a:body', a: NS)
69
76
 
@@ -12,61 +12,77 @@ module Slaw
12
12
  # XML document.
13
13
  #
14
14
  # @example Parse some text into a well-formed document
15
- # builder = Slaw::Builder.new
15
+ # builder = Slaw::Builder.new(parser: parser)
16
16
  # xml = builder.parse_text(text)
17
17
  # doc = builder.parse_xml(xml)
18
18
  # builder.postprocess(doc)
19
19
  #
20
20
  # @example A quicker way to build a well-formed document
21
- # builder = Slaw::Builder.new
22
21
  # doc = builder.parse_and_process_text(text)
23
22
  #
24
23
  class Builder
25
24
  include Slaw::Namespace
26
25
  include Slaw::Logging
27
26
 
28
- Treetop.load(File.dirname(__FILE__) + "/bylaw.treetop")
27
+ @@parsers = {}
29
28
 
30
- # [Hash] A Hash of options that are made available to the parser when parsing.
31
- attr_accessor :parse_options
32
-
33
- def initialize(parse_options={})
34
- @parse_options = parse_options
29
+ # Create a new builder.
30
+ #
31
+ # Specify either `:parser` or `:grammar_file` and `:grammar_class`.
32
+ #
33
+ # @option opts [Treetop::Runtime::CompiledParser] :parser parser to use
34
+ # @option opts [String] :grammar_file grammar filename to load a parser from
35
+ # @option opts [String] :grammar_class name of the class that the grammar will generate
36
+ def initialize(opts={})
37
+ if opts[:parser]
38
+ @parser = opts[:parser]
39
+ elsif opts[:grammar_file] and opts[:grammar_class]
40
+ if @@parsers[opts[:grammar_class]]
41
+ # already compiled the grammar, just use it
42
+ @parser = @@parsers[opts[:grammar_class]]
43
+ else
44
+ # load the grammar
45
+ Treetop.load(opts[:grammar_file])
46
+ cls = eval(opts[:grammar_class])
47
+ @parser = cls.new
48
+ end
49
+ else
50
+ raise ArgumentError.new("Specify either :parser or :grammar_file and :grammar_class")
51
+ end
35
52
  end
36
53
 
37
54
  # Do all the work necessary to parse text into a well-formed XML document.
38
55
  #
39
56
  # @param text [String] the text to parse
40
- # @param root [Symbol] the root element of the grammar
57
+ # @param parse_options [Hash] options to parse to the parser
41
58
  #
42
59
  # @return [Nokogiri::XML::Document] a well formed document
43
- def parse_and_process_text(text, root=:bylaw)
44
- postprocess(parse_xml(parse_text(text, root)))
60
+ def parse_and_process_text(text, parse_options={})
61
+ postprocess(parse_xml(parse_text(text, parse_options)))
45
62
  end
46
63
 
47
64
  # Parse text into XML. You should still run {#postprocess} on the
48
65
  # resulting XML to normalise it.
49
66
  #
50
67
  # @param text [String] the text to parse
51
- # @param root [Symbol] the root element of the grammar
68
+ # @param parse_options [Hash] options to parse to the parser
52
69
  #
53
70
  # @return [String] an XML string
54
- def parse_text(text, root=:bylaw)
55
- tree = text_to_syntax_tree(text, root)
71
+ def parse_text(text, parse_options={})
72
+ tree = text_to_syntax_tree(text, parse_options)
56
73
  xml_from_syntax_tree(tree)
57
74
  end
58
75
 
59
76
  # Parse plain text into a syntax tree.
60
77
  #
61
78
  # @param text [String] the text to parse
62
- # @param root [Symbol] the root element of the grammar
79
+ # @param parse_options [Hash] options to parse to the parser
63
80
  #
64
- # @return [Object] the root of the resulting parse tree, usually a Treetop::Node object
65
- def text_to_syntax_tree(text, root=:bylaw)
66
- parser = Slaw::Parse::BylawParser.new
67
- parser.options = @parse_options
68
-
69
- tree = parser.parse(text, {root: root})
81
+ # @return [Object] the root of the resulting parse tree, usually a Treetop::Runtime::SyntaxNode object
82
+ def text_to_syntax_tree(text, parse_options={})
83
+ logger.info("Parsing...")
84
+ tree = @parser.parse(text, parse_options)
85
+ logger.info("Parsed!")
70
86
 
71
87
  if tree.nil?
72
88
  raise Slaw::Parse::ParseError.new(parser.failure_reason || "Couldn't match to grammar",
@@ -80,7 +96,7 @@ module Slaw
80
96
  # Generate an XML document from the given syntax tree. You should still
81
97
  # run {#postprocess} on the resulting XML to normalise it.
82
98
  #
83
- # @param tree [Object] a Treetop::Node object
99
+ # @param tree [Object] a Treetop::Runtime::SyntaxNode object
84
100
  #
85
101
  # @return [String] an XML string
86
102
  def xml_from_syntax_tree(tree)
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "0.1.3"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -3,7 +3,7 @@
3
3
  require 'slaw/parse/grammar_helpers'
4
4
 
5
5
  module Slaw
6
- module Parse
6
+ module ZA
7
7
  grammar Bylaw
8
8
  include Slaw::Parse::GrammarHelpers
9
9
 
@@ -0,0 +1,41 @@
1
+ require 'slaw/za/bylaw_nodes'
2
+
3
+ module Slaw
4
+ # Support specifically for South Africa
5
+ module ZA
6
+
7
+ # Support class for generating South African bylaws
8
+ class BylawGenerator
9
+ Treetop.load(File.dirname(__FILE__) + "/bylaw.treetop")
10
+
11
+ # [Treetop::Runtime::CompiledParser] compiled bylaw parser
12
+ attr_accessor :parser
13
+
14
+ # [Slaw::Parse::Builder] builder used by the generator
15
+ attr_accessor :builder
16
+
17
+ def initialize
18
+ @parser = Slaw::ZA::BylawParser.new
19
+ @builder = Slaw::Parse::Builder.new(parser: @parser)
20
+ @cleanser = Slaw::Parse::Cleanser.new
21
+ end
22
+
23
+ # Generate a Slaw::Bylaw instance from plain text.
24
+ #
25
+ # @param text [String] plain text
26
+ #
27
+ # @return [Slaw::ByLaw] the resulting bylaw
28
+ def generate_from_text(text)
29
+ bylaw = Slaw::ByLaw.new
30
+ bylaw.doc = @builder.parse_and_process_text(cleanup(text))
31
+ bylaw
32
+ end
33
+
34
+ def cleanup(text)
35
+ text = @cleanser.cleanup(text)
36
+ text = @cleanser.reformat(text)
37
+ text
38
+ end
39
+ end
40
+ end
41
+ end
@@ -1,5 +1,5 @@
1
1
  module Slaw
2
- module Parse
2
+ module ZA
3
3
  module Bylaw
4
4
  class Bylaw < Treetop::Runtime::SyntaxNode
5
5
  def to_xml(b)
data/lib/slaw.rb CHANGED
@@ -16,8 +16,6 @@ require 'slaw/parse/blocklists'
16
16
  require 'slaw/parse/builder'
17
17
  require 'slaw/parse/cleanser'
18
18
  require 'slaw/parse/error'
19
- require 'slaw/parse/grammar_helpers'
20
- require 'slaw/parse/nodes'
21
19
 
22
20
  require 'slaw/extract/extractor'
23
21
 
@@ -4,6 +4,9 @@ require 'spec_helper'
4
4
  require 'slaw'
5
5
 
6
6
  describe Slaw::Parse::Builder do
7
+ let(:parser) { double("parser") }
8
+ subject { Slaw::Parse::Builder.new(parser: parser) }
9
+
7
10
  describe '#nest_blocklists' do
8
11
  it 'should nest simple blocks' do
9
12
  doc = xml2doc(subsection(<<XML
@@ -1,16 +1,18 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'slaw'
4
3
  require 'builder'
5
4
 
6
- describe Slaw::Parse::Builder do
5
+ require 'slaw'
6
+ require 'slaw/za/bylaw_generator'
7
+
8
+ describe Slaw::ZA::BylawGenerator do
7
9
  def parse(rule, s)
8
- subject.text_to_syntax_tree(s, rule)
10
+ subject.builder.text_to_syntax_tree(s, {root: rule})
9
11
  end
10
12
 
11
13
  def should_parse(rule, s)
12
14
  s << "\n" unless s.end_with?("\n")
13
- tree = subject.text_to_syntax_tree(s, rule)
15
+ tree = subject.builder.text_to_syntax_tree(s, {root: rule})
14
16
 
15
17
  if not tree
16
18
  raise Exception.new(subject.failure_reason || "Couldn't match to grammar") if tree.nil?
@@ -61,7 +63,7 @@ EOS
61
63
  end
62
64
 
63
65
  it 'should handle parts and odd section numbers' do
64
- subject.parse_options = {section_number_after_title: false}
66
+ subject.parser.options = {section_number_after_title: false}
65
67
  node = parse :bylaw, <<EOS
66
68
  PART 1
67
69
  PREVENTION AND SUPPRESSION OF HEALTH NUISANCES
@@ -232,7 +234,7 @@ EOS
232
234
 
233
235
  context 'sections' do
234
236
  it 'should handle section numbers after title' do
235
- subject.parse_options = {section_number_after_title: true}
237
+ subject.parser.options = {section_number_after_title: true}
236
238
  node = parse :bylaw, <<EOS
237
239
  Section
238
240
  1. (1) hello
@@ -244,7 +246,7 @@ EOS
244
246
  end
245
247
 
246
248
  it 'should handle section numbers before title' do
247
- subject.parse_options = {section_number_after_title: false}
249
+ subject.parser.options = {section_number_after_title: false}
248
250
  node = parse :bylaw, <<EOS
249
251
  1. Section
250
252
  (1) hello
@@ -256,7 +258,7 @@ EOS
256
258
  end
257
259
 
258
260
  it 'should handle section numbers without a dot' do
259
- subject.parse_options = {section_number_after_title: false}
261
+ subject.parser.options = {section_number_after_title: false}
260
262
  node = parse :bylaw, <<EOS
261
263
  1 A section
262
264
  (1) hello
@@ -274,7 +276,7 @@ EOS
274
276
  end
275
277
 
276
278
  it 'should handle sections without titles' do
277
- subject.parse_options = {section_number_after_title: false}
279
+ subject.parser.options = {section_number_after_title: false}
278
280
  node = parse :bylaw, <<EOS
279
281
  1. No owner or occupier of any shop or business premises or vacant land, blah blah
280
282
  2. Notwithstanding the provision of any other By-law or legislation no person shall—
@@ -291,7 +293,7 @@ EOS
291
293
  end
292
294
 
293
295
  it 'should handle sections without titles and with subsections' do
294
- subject.parse_options = {section_number_after_title: false}
296
+ subject.parser.options = {section_number_after_title: false}
295
297
  node = parse :bylaw, <<EOS
296
298
  10. (1) Transporters must remove medical waste.
297
299
  (2) Without limiting generality, stuff.
@@ -305,7 +307,7 @@ EOS
305
307
  end
306
308
 
307
309
  it 'should realise complex section titles are actually section content' do
308
- subject.parse_options = {section_number_after_title: false}
310
+ subject.parser.options = {section_number_after_title: false}
309
311
  node = parse :bylaw, <<EOS
310
312
  10. The owner of any premises which is let or sublet to more than one tenant, shall maintain at all times in a clean and sanitary condition every part of such premises as may be used in common by more than one tenant.
311
313
  11. No person shall keep, cause or suffer to be kept any factory or trade premises so as to cause or give rise to smells or effluvia that constitute a health nuisance.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-18 00:00:00.000000000 Z
11
+ date: 2014-09-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -132,24 +132,25 @@ files:
132
132
  - lib/slaw/namespace.rb
133
133
  - lib/slaw/parse/blocklists.rb
134
134
  - lib/slaw/parse/builder.rb
135
- - lib/slaw/parse/bylaw.treetop
136
135
  - lib/slaw/parse/cleanser.rb
137
136
  - lib/slaw/parse/error.rb
138
137
  - lib/slaw/parse/grammar_helpers.rb
139
- - lib/slaw/parse/nodes.rb
140
138
  - lib/slaw/render/html.rb
141
139
  - lib/slaw/render/xsl/act.xsl
142
140
  - lib/slaw/render/xsl/elements.xsl
143
141
  - lib/slaw/render/xsl/fragment.xsl
144
142
  - lib/slaw/version.rb
145
143
  - lib/slaw/xml_support.rb
144
+ - lib/slaw/za/bylaw.treetop
145
+ - lib/slaw/za/bylaw_generator.rb
146
+ - lib/slaw/za/bylaw_nodes.rb
146
147
  - slaw.gemspec
147
148
  - spec/extract/extractor_spec.rb
148
149
  - spec/parse/builder_spec.rb
149
- - spec/parse/bylaw_spec.rb
150
150
  - spec/parse/cleanser_spec.rb
151
151
  - spec/spec_helper.rb
152
152
  - spec/xml_helpers.rb
153
+ - spec/za/bylaw_spec.rb
153
154
  homepage: ''
154
155
  licenses:
155
156
  - MIT
@@ -177,7 +178,7 @@ summary: A lightweight library for using Akoma Ntoso acts in Ruby.
177
178
  test_files:
178
179
  - spec/extract/extractor_spec.rb
179
180
  - spec/parse/builder_spec.rb
180
- - spec/parse/bylaw_spec.rb
181
181
  - spec/parse/cleanser_spec.rb
182
182
  - spec/spec_helper.rb
183
183
  - spec/xml_helpers.rb
184
+ - spec/za/bylaw_spec.rb