slaw 0.6.0 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e5ab33b04df46f9d2d087f7a22ce9c0e7d12278e
4
- data.tar.gz: 276c7b704a30c435d0dfbce2e74c34cd2e902b51
3
+ metadata.gz: 75915642fffe0d65b3d4386745c12cd019e3d715
4
+ data.tar.gz: fb183f90a301b479fd4d422d7da479f8796e808d
5
5
  SHA512:
6
- metadata.gz: 7632de6a68c70b2ce44a4854ec4bcb35867f269447b8c339286056301ef57d73548edabddd5a0ea963d4bc19c66b37d571beb1f8f635d7dc533ca32e1f57bd35
7
- data.tar.gz: 6e4a1d0778b6616f6956fba51a1b604514a036f188d89232984d7f57dd5f75f3bad2b031507d9d2eeeb1f470d9b85fef063d055719e97853df7d8665f4715a1a
6
+ metadata.gz: acf9d6a07f1a927461394721228a8ebd6548fe79c5604e59f36463bdb4bd0a25df1bafa8aa391a937ed45756f1a70f85e9c48469d752c92817c04f392f62c169
7
+ data.tar.gz: 4ada4ce9e47e9d71bdcf55f1fc8c94c7977e03711b46d0ec86ceae17f8df5a2efcc7a23aa171adf967a0c24a3fd889d80f2aefdee3ebd6c6fb0f0893f14598d6
data/bin/slaw CHANGED
@@ -12,9 +12,15 @@ class SlawCLI < Thor
12
12
  option :input, enum: ['text', 'pdf'], desc: "Type of input if it can't be determined automatically"
13
13
  option :pdftotext, desc: "Location of the pdftotext binary if not in PATH"
14
14
  option :definitions, type: :boolean, desc: "Find and link definitions (this can be slow). Default: false"
15
+ option :fragment, type: :string, desc: "Akoma Ntoso element name that the imported text represents. Support depends on the grammar."
16
+ option :id_prefix, type: :string, desc: "Prefix to be used when generating ID elements when parsing a fragment."
15
17
  def parse(name)
16
18
  logging
17
19
 
20
+ if options[:fragment] and options[:definitions]
21
+ raise Thor::Error.new("--definitions can't be used together with --fragment")
22
+ end
23
+
18
24
  Slaw::Extract::Extractor.pdftotext_path = options[:pdftotext] if options[:pdftotext]
19
25
  extractor = Slaw::Extract::Extractor.new
20
26
 
@@ -28,6 +34,20 @@ class SlawCLI < Thor
28
34
  end
29
35
 
30
36
  generator = Slaw::ActGenerator.new
37
+
38
+ if options[:fragment]
39
+ generator.document_class = Slaw::Fragment
40
+ fragment = options[:fragment]
41
+ fragment = 'act' if fragment.downcase == 'akomantoso'
42
+ generator.builder.parse_options[:root] = fragment
43
+
44
+ if options[:id_prefix]
45
+ prefix = options[:id_prefix]
46
+ prefix += "." unless prefix.end_with?('.')
47
+ generator.builder.fragment_id_prefix = prefix
48
+ end
49
+ end
50
+
31
51
  act = generator.generate_from_text(text)
32
52
 
33
53
  # definitions?
data/lib/slaw/act.rb CHANGED
@@ -1,4 +1,31 @@
1
1
  module Slaw
2
+ class AknBase
3
+ include Slaw::Namespace
4
+
5
+ attr_accessor :doc
6
+
7
+ # Serialise the XML for this act, passing `args` to the Nokogiri serialiser.
8
+ # The most useful argument is usually `indent: 2` if you like your XML perdy.
9
+ #
10
+ # @return [String] serialized XML
11
+ def to_xml(*args)
12
+ @doc.to_xml(*args)
13
+ end
14
+
15
+ # Parse the XML contained in the file-like or String object `io`
16
+ #
17
+ # @param io [String, file-like] io object or String with XML
18
+ def parse(io)
19
+ self.doc = Nokogiri::XML(io)
20
+ end
21
+ end
22
+
23
+ # A fragment is a part of a larger document and doesn't have the context associated
24
+ # with the document.
25
+ class Fragment < AknBase
26
+ alias_method :fragment, :doc
27
+ end
28
+
2
29
  # An Act wraps a single {http://www.akomantoso.org/ AkomaNtoso 2.0 XML} act document in the form of a
3
30
  # Nokogiri::XML::Document object.
4
31
  #
@@ -7,8 +34,7 @@ module Slaw
7
34
  # identifying whether it has been amended ({#amended?}), repealed
8
35
  # ({#repealed?}) or what chapters ({#chapters}), parts ({#parts}) and
9
36
  # sections ({#sections}) it contains.
10
- class Act
11
- include Slaw::Namespace
37
+ class Act < AknBase
12
38
 
13
39
  # Allow us to jump from the XML document for an act to the
14
40
  # Act instance itself
@@ -66,13 +92,6 @@ module Slaw
66
92
 
67
93
  File.open(filename) { |f| parse(f) }
68
94
  end
69
-
70
- # Parse the XML contained in the file-like or String object `io`
71
- #
72
- # @param io [String, file-like] io object or String with XML
73
- def parse(io)
74
- self.doc = Nokogiri::XML(io)
75
- end
76
95
 
77
96
  # Set the XML document backing this bylaw.
78
97
  #
@@ -403,14 +422,6 @@ module Slaw
403
422
  validate.empty?
404
423
  end
405
424
 
406
- # Serialise the XML for this act, passing `args` to the Nokogiri serialiser.
407
- # The most useful argument is usually `indent: 2` if you like your XML perdy.
408
- #
409
- # @return [String] serialized XML
410
- def to_xml(*args)
411
- @doc.to_xml(*args)
412
- end
413
-
414
425
  def inspect
415
426
  "<#{self.class.name} @id_uri=\"#{@id_uri}\">"
416
427
  end
@@ -9,6 +9,9 @@ module Slaw
9
9
  # [Slaw::Parse::Builder] builder used by the generator
10
10
  attr_accessor :builder
11
11
 
12
+ # The type that will hold the generated document
13
+ attr_accessor :document_class
14
+
12
15
  def initialize
13
16
  @parser = Slaw::ZA::ActParser.new
14
17
  @builder = Slaw::Parse::Builder.new(parser: @parser)
@@ -26,6 +26,12 @@ module Slaw
26
26
 
27
27
  @@parsers = {}
28
28
 
29
+ # Additional hash of options to be provided to the parser when parsing.
30
+ attr_accessor :parse_options
31
+
32
+ # Prefix to use when generating IDs for fragments
33
+ attr_accessor :fragment_id_prefix
34
+
29
35
  # Create a new builder.
30
36
  #
31
37
  # Specify either `:parser` or `:grammar_file` and `:grammar_class`.
@@ -49,6 +55,8 @@ module Slaw
49
55
  else
50
56
  raise ArgumentError.new("Specify either :parser or :grammar_file and :grammar_class")
51
57
  end
58
+
59
+ @parse_options = {}
52
60
  end
53
61
 
54
62
  # Do all the work necessary to parse text into a well-formed XML document.
@@ -65,7 +73,7 @@ module Slaw
65
73
  # resulting XML to normalise it.
66
74
  #
67
75
  # @param text [String] the text to parse
68
- # @param parse_options [Hash] options to parse to the parser
76
+ # @param parse_options [Hash] options to pass to the parser
69
77
  #
70
78
  # @return [String] an XML string
71
79
  def parse_text(text, parse_options={})
@@ -76,11 +84,12 @@ module Slaw
76
84
  # Parse plain text into a syntax tree.
77
85
  #
78
86
  # @param text [String] the text to parse
79
- # @param parse_options [Hash] options to parse to the parser
87
+ # @param parse_options [Hash] options to pass to the parser
80
88
  #
81
89
  # @return [Object] the root of the resulting parse tree, usually a Treetop::Runtime::SyntaxNode object
82
90
  def text_to_syntax_tree(text, parse_options={})
83
91
  logger.info("Parsing...")
92
+ parse_options = @parse_options.dup.update(parse_options)
84
93
  tree = @parser.parse(text, parse_options)
85
94
  logger.info("Parsed!")
86
95
 
@@ -107,7 +116,14 @@ module Slaw
107
116
  builder.akomaNtoso("xmlns:xsi"=> "http://www.w3.org/2001/XMLSchema-instance",
108
117
  "xsi:schemaLocation" => "http://www.akomantoso.org/2.0 akomantoso20.xsd",
109
118
  "xmlns" => NS) { |b|
110
- tree.to_xml(b)
119
+ args = [b]
120
+
121
+ # should we provide an id prefix?
122
+ arity = tree.method('to_xml').arity
123
+ arity = arity.abs-1 if arity < 0
124
+ args << (fragment_id_prefix || "") if arity > 1
125
+
126
+ tree.to_xml(*args)
111
127
  }
112
128
 
113
129
  s
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "0.6.0"
2
+ VERSION = "0.6.2"
3
3
  end
@@ -82,7 +82,7 @@ module Slaw
82
82
  #
83
83
  # Additionally, the section title is optional.
84
84
  !{ |s| options[:section_number_after_title] }
85
- section_title_prefix section_title:section_title_content? eol?
85
+ space? section_title_prefix section_title:section_title_content? eol?
86
86
  <SectionTitleType2>
87
87
  end
88
88
 
@@ -7,7 +7,7 @@ module Slaw
7
7
  EXPRESSION_URI = "#{FRBR_URI}/eng@"
8
8
  MANIFESTATION_URI = EXPRESSION_URI
9
9
 
10
- def to_xml(b)
10
+ def to_xml(b, idprefix)
11
11
  b.act(contains: "originalVersion") { |b|
12
12
  write_meta(b)
13
13
  write_preamble(b)
@@ -192,7 +192,7 @@ module Slaw
192
192
 
193
193
  idprefix = "#{id}."
194
194
 
195
- subsections.elements.each_with_index { |e, i| e.to_xml(b, i, idprefix) }
195
+ subsections.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
196
196
  }
197
197
  end
198
198
  end
@@ -231,7 +231,7 @@ module Slaw
231
231
  end
232
232
 
233
233
  class Subsection < Treetop::Runtime::SyntaxNode
234
- def to_xml(b, i, idprefix)
234
+ def to_xml(b, idprefix, i=0)
235
235
  if statement.is_a?(NumberedStatement)
236
236
  attribs = {id: idprefix + statement.num.gsub(/[()]/, '')}
237
237
  else
@@ -246,9 +246,9 @@ module Slaw
246
246
  b.content { |b|
247
247
  if blocklist and blocklist.is_a?(Blocklist)
248
248
  if statement.content
249
- blocklist.to_xml(b, i, idprefix) { |b| b << statement.content.text_value }
249
+ blocklist.to_xml(b, idprefix, i) { |b| b << statement.content.text_value }
250
250
  else
251
- blocklist.to_xml(b, i, idprefix)
251
+ blocklist.to_xml(b, idprefix, i)
252
252
  end
253
253
  else
254
254
  # raw content
@@ -283,7 +283,7 @@ module Slaw
283
283
  class Blocklist < Treetop::Runtime::SyntaxNode
284
284
  # Render a block list to xml. If a block is given,
285
285
  # yield to it a builder to insert a listIntroduction node
286
- def to_xml(b, i, idprefix, &block)
286
+ def to_xml(b, idprefix, i=0, &block)
287
287
  id = idprefix + "list#{i}"
288
288
  idprefix = id + '.'
289
289
 
@@ -382,7 +382,7 @@ module Slaw
382
382
  end
383
383
  end
384
384
 
385
- def to_xml(b, i)
385
+ def to_xml(b, idprefix, i=0)
386
386
  n = num.nil? ? i : num
387
387
  id = "schedule-#{n}"
388
388
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-28 00:00:00.000000000 Z
11
+ date: 2015-04-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler