slaw 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e5ab33b04df46f9d2d087f7a22ce9c0e7d12278e
4
- data.tar.gz: 276c7b704a30c435d0dfbce2e74c34cd2e902b51
3
+ metadata.gz: 75915642fffe0d65b3d4386745c12cd019e3d715
4
+ data.tar.gz: fb183f90a301b479fd4d422d7da479f8796e808d
5
5
  SHA512:
6
- metadata.gz: 7632de6a68c70b2ce44a4854ec4bcb35867f269447b8c339286056301ef57d73548edabddd5a0ea963d4bc19c66b37d571beb1f8f635d7dc533ca32e1f57bd35
7
- data.tar.gz: 6e4a1d0778b6616f6956fba51a1b604514a036f188d89232984d7f57dd5f75f3bad2b031507d9d2eeeb1f470d9b85fef063d055719e97853df7d8665f4715a1a
6
+ metadata.gz: acf9d6a07f1a927461394721228a8ebd6548fe79c5604e59f36463bdb4bd0a25df1bafa8aa391a937ed45756f1a70f85e9c48469d752c92817c04f392f62c169
7
+ data.tar.gz: 4ada4ce9e47e9d71bdcf55f1fc8c94c7977e03711b46d0ec86ceae17f8df5a2efcc7a23aa171adf967a0c24a3fd889d80f2aefdee3ebd6c6fb0f0893f14598d6
data/bin/slaw CHANGED
@@ -12,9 +12,15 @@ class SlawCLI < Thor
12
12
  option :input, enum: ['text', 'pdf'], desc: "Type of input if it can't be determined automatically"
13
13
  option :pdftotext, desc: "Location of the pdftotext binary if not in PATH"
14
14
  option :definitions, type: :boolean, desc: "Find and link definitions (this can be slow). Default: false"
15
+ option :fragment, type: :string, desc: "Akoma Ntoso element name that the imported text represents. Support depends on the grammar."
16
+ option :id_prefix, type: :string, desc: "Prefix to be used when generating ID elements when parsing a fragment."
15
17
  def parse(name)
16
18
  logging
17
19
 
20
+ if options[:fragment] and options[:definitions]
21
+ raise Thor::Error.new("--definitions can't be used together with --fragment")
22
+ end
23
+
18
24
  Slaw::Extract::Extractor.pdftotext_path = options[:pdftotext] if options[:pdftotext]
19
25
  extractor = Slaw::Extract::Extractor.new
20
26
 
@@ -28,6 +34,20 @@ class SlawCLI < Thor
28
34
  end
29
35
 
30
36
  generator = Slaw::ActGenerator.new
37
+
38
+ if options[:fragment]
39
+ generator.document_class = Slaw::Fragment
40
+ fragment = options[:fragment]
41
+ fragment = 'act' if fragment.downcase == 'akomantoso'
42
+ generator.builder.parse_options[:root] = fragment
43
+
44
+ if options[:id_prefix]
45
+ prefix = options[:id_prefix]
46
+ prefix += "." unless prefix.end_with?('.')
47
+ generator.builder.fragment_id_prefix = prefix
48
+ end
49
+ end
50
+
31
51
  act = generator.generate_from_text(text)
32
52
 
33
53
  # definitions?
data/lib/slaw/act.rb CHANGED
@@ -1,4 +1,31 @@
1
1
  module Slaw
2
+ class AknBase
3
+ include Slaw::Namespace
4
+
5
+ attr_accessor :doc
6
+
7
+ # Serialise the XML for this act, passing `args` to the Nokogiri serialiser.
8
+ # The most useful argument is usually `indent: 2` if you like your XML perdy.
9
+ #
10
+ # @return [String] serialized XML
11
+ def to_xml(*args)
12
+ @doc.to_xml(*args)
13
+ end
14
+
15
+ # Parse the XML contained in the file-like or String object `io`
16
+ #
17
+ # @param io [String, file-like] io object or String with XML
18
+ def parse(io)
19
+ self.doc = Nokogiri::XML(io)
20
+ end
21
+ end
22
+
23
+ # A fragment is a part of a larger document and doesn't have the context associated
24
+ # with the document.
25
+ class Fragment < AknBase
26
+ alias_method :fragment, :doc
27
+ end
28
+
2
29
  # An Act wraps a single {http://www.akomantoso.org/ AkomaNtoso 2.0 XML} act document in the form of a
3
30
  # Nokogiri::XML::Document object.
4
31
  #
@@ -7,8 +34,7 @@ module Slaw
7
34
  # identifying whether it has been amended ({#amended?}), repealed
8
35
  # ({#repealed?}) or what chapters ({#chapters}), parts ({#parts}) and
9
36
  # sections ({#sections}) it contains.
10
- class Act
11
- include Slaw::Namespace
37
+ class Act < AknBase
12
38
 
13
39
  # Allow us to jump from the XML document for an act to the
14
40
  # Act instance itself
@@ -66,13 +92,6 @@ module Slaw
66
92
 
67
93
  File.open(filename) { |f| parse(f) }
68
94
  end
69
-
70
- # Parse the XML contained in the file-like or String object `io`
71
- #
72
- # @param io [String, file-like] io object or String with XML
73
- def parse(io)
74
- self.doc = Nokogiri::XML(io)
75
- end
76
95
 
77
96
  # Set the XML document backing this bylaw.
78
97
  #
@@ -403,14 +422,6 @@ module Slaw
403
422
  validate.empty?
404
423
  end
405
424
 
406
- # Serialise the XML for this act, passing `args` to the Nokogiri serialiser.
407
- # The most useful argument is usually `indent: 2` if you like your XML perdy.
408
- #
409
- # @return [String] serialized XML
410
- def to_xml(*args)
411
- @doc.to_xml(*args)
412
- end
413
-
414
425
  def inspect
415
426
  "<#{self.class.name} @id_uri=\"#{@id_uri}\">"
416
427
  end
@@ -9,6 +9,9 @@ module Slaw
9
9
  # [Slaw::Parse::Builder] builder used by the generator
10
10
  attr_accessor :builder
11
11
 
12
+ # The type that will hold the generated document
13
+ attr_accessor :document_class
14
+
12
15
  def initialize
13
16
  @parser = Slaw::ZA::ActParser.new
14
17
  @builder = Slaw::Parse::Builder.new(parser: @parser)
@@ -26,6 +26,12 @@ module Slaw
26
26
 
27
27
  @@parsers = {}
28
28
 
29
+ # Additional hash of options to be provided to the parser when parsing.
30
+ attr_accessor :parse_options
31
+
32
+ # Prefix to use when generating IDs for fragments
33
+ attr_accessor :fragment_id_prefix
34
+
29
35
  # Create a new builder.
30
36
  #
31
37
  # Specify either `:parser` or `:grammar_file` and `:grammar_class`.
@@ -49,6 +55,8 @@ module Slaw
49
55
  else
50
56
  raise ArgumentError.new("Specify either :parser or :grammar_file and :grammar_class")
51
57
  end
58
+
59
+ @parse_options = {}
52
60
  end
53
61
 
54
62
  # Do all the work necessary to parse text into a well-formed XML document.
@@ -65,7 +73,7 @@ module Slaw
65
73
  # resulting XML to normalise it.
66
74
  #
67
75
  # @param text [String] the text to parse
68
- # @param parse_options [Hash] options to parse to the parser
76
+ # @param parse_options [Hash] options to pass to the parser
69
77
  #
70
78
  # @return [String] an XML string
71
79
  def parse_text(text, parse_options={})
@@ -76,11 +84,12 @@ module Slaw
76
84
  # Parse plain text into a syntax tree.
77
85
  #
78
86
  # @param text [String] the text to parse
79
- # @param parse_options [Hash] options to parse to the parser
87
+ # @param parse_options [Hash] options to pass to the parser
80
88
  #
81
89
  # @return [Object] the root of the resulting parse tree, usually a Treetop::Runtime::SyntaxNode object
82
90
  def text_to_syntax_tree(text, parse_options={})
83
91
  logger.info("Parsing...")
92
+ parse_options = @parse_options.dup.update(parse_options)
84
93
  tree = @parser.parse(text, parse_options)
85
94
  logger.info("Parsed!")
86
95
 
@@ -107,7 +116,14 @@ module Slaw
107
116
  builder.akomaNtoso("xmlns:xsi"=> "http://www.w3.org/2001/XMLSchema-instance",
108
117
  "xsi:schemaLocation" => "http://www.akomantoso.org/2.0 akomantoso20.xsd",
109
118
  "xmlns" => NS) { |b|
110
- tree.to_xml(b)
119
+ args = [b]
120
+
121
+ # should we provide an id prefix?
122
+ arity = tree.method('to_xml').arity
123
+ arity = arity.abs-1 if arity < 0
124
+ args << (fragment_id_prefix || "") if arity > 1
125
+
126
+ tree.to_xml(*args)
111
127
  }
112
128
 
113
129
  s
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "0.6.0"
2
+ VERSION = "0.6.2"
3
3
  end
@@ -82,7 +82,7 @@ module Slaw
82
82
  #
83
83
  # Additionally, the section title is optional.
84
84
  !{ |s| options[:section_number_after_title] }
85
- section_title_prefix section_title:section_title_content? eol?
85
+ space? section_title_prefix section_title:section_title_content? eol?
86
86
  <SectionTitleType2>
87
87
  end
88
88
 
@@ -7,7 +7,7 @@ module Slaw
7
7
  EXPRESSION_URI = "#{FRBR_URI}/eng@"
8
8
  MANIFESTATION_URI = EXPRESSION_URI
9
9
 
10
- def to_xml(b)
10
+ def to_xml(b, idprefix)
11
11
  b.act(contains: "originalVersion") { |b|
12
12
  write_meta(b)
13
13
  write_preamble(b)
@@ -192,7 +192,7 @@ module Slaw
192
192
 
193
193
  idprefix = "#{id}."
194
194
 
195
- subsections.elements.each_with_index { |e, i| e.to_xml(b, i, idprefix) }
195
+ subsections.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
196
196
  }
197
197
  end
198
198
  end
@@ -231,7 +231,7 @@ module Slaw
231
231
  end
232
232
 
233
233
  class Subsection < Treetop::Runtime::SyntaxNode
234
- def to_xml(b, i, idprefix)
234
+ def to_xml(b, idprefix, i=0)
235
235
  if statement.is_a?(NumberedStatement)
236
236
  attribs = {id: idprefix + statement.num.gsub(/[()]/, '')}
237
237
  else
@@ -246,9 +246,9 @@ module Slaw
246
246
  b.content { |b|
247
247
  if blocklist and blocklist.is_a?(Blocklist)
248
248
  if statement.content
249
- blocklist.to_xml(b, i, idprefix) { |b| b << statement.content.text_value }
249
+ blocklist.to_xml(b, idprefix, i) { |b| b << statement.content.text_value }
250
250
  else
251
- blocklist.to_xml(b, i, idprefix)
251
+ blocklist.to_xml(b, idprefix, i)
252
252
  end
253
253
  else
254
254
  # raw content
@@ -283,7 +283,7 @@ module Slaw
283
283
  class Blocklist < Treetop::Runtime::SyntaxNode
284
284
  # Render a block list to xml. If a block is given,
285
285
  # yield to it a builder to insert a listIntroduction node
286
- def to_xml(b, i, idprefix, &block)
286
+ def to_xml(b, idprefix, i=0, &block)
287
287
  id = idprefix + "list#{i}"
288
288
  idprefix = id + '.'
289
289
 
@@ -382,7 +382,7 @@ module Slaw
382
382
  end
383
383
  end
384
384
 
385
- def to_xml(b, i)
385
+ def to_xml(b, idprefix, i=0)
386
386
  n = num.nil? ? i : num
387
387
  id = "schedule-#{n}"
388
388
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-28 00:00:00.000000000 Z
11
+ date: 2015-04-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler