slaw 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/slaw +20 -0
- data/lib/slaw/act.rb +28 -17
- data/lib/slaw/generator.rb +3 -0
- data/lib/slaw/parse/builder.rb +19 -3
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/act.treetop +1 -1
- data/lib/slaw/za/act_nodes.rb +7 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75915642fffe0d65b3d4386745c12cd019e3d715
|
4
|
+
data.tar.gz: fb183f90a301b479fd4d422d7da479f8796e808d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acf9d6a07f1a927461394721228a8ebd6548fe79c5604e59f36463bdb4bd0a25df1bafa8aa391a937ed45756f1a70f85e9c48469d752c92817c04f392f62c169
|
7
|
+
data.tar.gz: 4ada4ce9e47e9d71bdcf55f1fc8c94c7977e03711b46d0ec86ceae17f8df5a2efcc7a23aa171adf967a0c24a3fd889d80f2aefdee3ebd6c6fb0f0893f14598d6
|
data/bin/slaw
CHANGED
@@ -12,9 +12,15 @@ class SlawCLI < Thor
|
|
12
12
|
option :input, enum: ['text', 'pdf'], desc: "Type of input if it can't be determined automatically"
|
13
13
|
option :pdftotext, desc: "Location of the pdftotext binary if not in PATH"
|
14
14
|
option :definitions, type: :boolean, desc: "Find and link definitions (this can be slow). Default: false"
|
15
|
+
option :fragment, type: :string, desc: "Akoma Ntoso element name that the imported text represents. Support depends on the grammar."
|
16
|
+
option :id_prefix, type: :string, desc: "Prefix to be used when generating ID elements when parsing a fragment."
|
15
17
|
def parse(name)
|
16
18
|
logging
|
17
19
|
|
20
|
+
if options[:fragment] and options[:definitions]
|
21
|
+
raise Thor::Error.new("--definitions can't be used together with --fragment")
|
22
|
+
end
|
23
|
+
|
18
24
|
Slaw::Extract::Extractor.pdftotext_path = options[:pdftotext] if options[:pdftotext]
|
19
25
|
extractor = Slaw::Extract::Extractor.new
|
20
26
|
|
@@ -28,6 +34,20 @@ class SlawCLI < Thor
|
|
28
34
|
end
|
29
35
|
|
30
36
|
generator = Slaw::ActGenerator.new
|
37
|
+
|
38
|
+
if options[:fragment]
|
39
|
+
generator.document_class = Slaw::Fragment
|
40
|
+
fragment = options[:fragment]
|
41
|
+
fragment = 'act' if fragment.downcase == 'akomantoso'
|
42
|
+
generator.builder.parse_options[:root] = fragment
|
43
|
+
|
44
|
+
if options[:id_prefix]
|
45
|
+
prefix = options[:id_prefix]
|
46
|
+
prefix += "." unless prefix.end_with?('.')
|
47
|
+
generator.builder.fragment_id_prefix = prefix
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
31
51
|
act = generator.generate_from_text(text)
|
32
52
|
|
33
53
|
# definitions?
|
data/lib/slaw/act.rb
CHANGED
@@ -1,4 +1,31 @@
|
|
1
1
|
module Slaw
|
2
|
+
class AknBase
|
3
|
+
include Slaw::Namespace
|
4
|
+
|
5
|
+
attr_accessor :doc
|
6
|
+
|
7
|
+
# Serialise the XML for this act, passing `args` to the Nokogiri serialiser.
|
8
|
+
# The most useful argument is usually `indent: 2` if you like your XML perdy.
|
9
|
+
#
|
10
|
+
# @return [String] serialized XML
|
11
|
+
def to_xml(*args)
|
12
|
+
@doc.to_xml(*args)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Parse the XML contained in the file-like or String object `io`
|
16
|
+
#
|
17
|
+
# @param io [String, file-like] io object or String with XML
|
18
|
+
def parse(io)
|
19
|
+
self.doc = Nokogiri::XML(io)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# A fragment is a part of a larger document and doesn't have the context associated
|
24
|
+
# with the document.
|
25
|
+
class Fragment < AknBase
|
26
|
+
alias_method :fragment, :doc
|
27
|
+
end
|
28
|
+
|
2
29
|
# An Act wraps a single {http://www.akomantoso.org/ AkomaNtoso 2.0 XML} act document in the form of a
|
3
30
|
# Nokogiri::XML::Document object.
|
4
31
|
#
|
@@ -7,8 +34,7 @@ module Slaw
|
|
7
34
|
# identifying whether it has been amended ({#amended?}), repealed
|
8
35
|
# ({#repealed?}) or what chapters ({#chapters}), parts ({#parts}) and
|
9
36
|
# sections ({#sections}) it contains.
|
10
|
-
class Act
|
11
|
-
include Slaw::Namespace
|
37
|
+
class Act < AknBase
|
12
38
|
|
13
39
|
# Allow us to jump from the XML document for an act to the
|
14
40
|
# Act instance itself
|
@@ -66,13 +92,6 @@ module Slaw
|
|
66
92
|
|
67
93
|
File.open(filename) { |f| parse(f) }
|
68
94
|
end
|
69
|
-
|
70
|
-
# Parse the XML contained in the file-like or String object `io`
|
71
|
-
#
|
72
|
-
# @param io [String, file-like] io object or String with XML
|
73
|
-
def parse(io)
|
74
|
-
self.doc = Nokogiri::XML(io)
|
75
|
-
end
|
76
95
|
|
77
96
|
# Set the XML document backing this bylaw.
|
78
97
|
#
|
@@ -403,14 +422,6 @@ module Slaw
|
|
403
422
|
validate.empty?
|
404
423
|
end
|
405
424
|
|
406
|
-
# Serialise the XML for this act, passing `args` to the Nokogiri serialiser.
|
407
|
-
# The most useful argument is usually `indent: 2` if you like your XML perdy.
|
408
|
-
#
|
409
|
-
# @return [String] serialized XML
|
410
|
-
def to_xml(*args)
|
411
|
-
@doc.to_xml(*args)
|
412
|
-
end
|
413
|
-
|
414
425
|
def inspect
|
415
426
|
"<#{self.class.name} @id_uri=\"#{@id_uri}\">"
|
416
427
|
end
|
data/lib/slaw/generator.rb
CHANGED
@@ -9,6 +9,9 @@ module Slaw
|
|
9
9
|
# [Slaw::Parse::Builder] builder used by the generator
|
10
10
|
attr_accessor :builder
|
11
11
|
|
12
|
+
# The type that will hold the generated document
|
13
|
+
attr_accessor :document_class
|
14
|
+
|
12
15
|
def initialize
|
13
16
|
@parser = Slaw::ZA::ActParser.new
|
14
17
|
@builder = Slaw::Parse::Builder.new(parser: @parser)
|
data/lib/slaw/parse/builder.rb
CHANGED
@@ -26,6 +26,12 @@ module Slaw
|
|
26
26
|
|
27
27
|
@@parsers = {}
|
28
28
|
|
29
|
+
# Additional hash of options to be provided to the parser when parsing.
|
30
|
+
attr_accessor :parse_options
|
31
|
+
|
32
|
+
# Prefix to use when generating IDs for fragments
|
33
|
+
attr_accessor :fragment_id_prefix
|
34
|
+
|
29
35
|
# Create a new builder.
|
30
36
|
#
|
31
37
|
# Specify either `:parser` or `:grammar_file` and `:grammar_class`.
|
@@ -49,6 +55,8 @@ module Slaw
|
|
49
55
|
else
|
50
56
|
raise ArgumentError.new("Specify either :parser or :grammar_file and :grammar_class")
|
51
57
|
end
|
58
|
+
|
59
|
+
@parse_options = {}
|
52
60
|
end
|
53
61
|
|
54
62
|
# Do all the work necessary to parse text into a well-formed XML document.
|
@@ -65,7 +73,7 @@ module Slaw
|
|
65
73
|
# resulting XML to normalise it.
|
66
74
|
#
|
67
75
|
# @param text [String] the text to parse
|
68
|
-
# @param parse_options [Hash] options to
|
76
|
+
# @param parse_options [Hash] options to pass to the parser
|
69
77
|
#
|
70
78
|
# @return [String] an XML string
|
71
79
|
def parse_text(text, parse_options={})
|
@@ -76,11 +84,12 @@ module Slaw
|
|
76
84
|
# Parse plain text into a syntax tree.
|
77
85
|
#
|
78
86
|
# @param text [String] the text to parse
|
79
|
-
# @param parse_options [Hash] options to
|
87
|
+
# @param parse_options [Hash] options to pass to the parser
|
80
88
|
#
|
81
89
|
# @return [Object] the root of the resulting parse tree, usually a Treetop::Runtime::SyntaxNode object
|
82
90
|
def text_to_syntax_tree(text, parse_options={})
|
83
91
|
logger.info("Parsing...")
|
92
|
+
parse_options = @parse_options.dup.update(parse_options)
|
84
93
|
tree = @parser.parse(text, parse_options)
|
85
94
|
logger.info("Parsed!")
|
86
95
|
|
@@ -107,7 +116,14 @@ module Slaw
|
|
107
116
|
builder.akomaNtoso("xmlns:xsi"=> "http://www.w3.org/2001/XMLSchema-instance",
|
108
117
|
"xsi:schemaLocation" => "http://www.akomantoso.org/2.0 akomantoso20.xsd",
|
109
118
|
"xmlns" => NS) { |b|
|
110
|
-
|
119
|
+
args = [b]
|
120
|
+
|
121
|
+
# should we provide an id prefix?
|
122
|
+
arity = tree.method('to_xml').arity
|
123
|
+
arity = arity.abs-1 if arity < 0
|
124
|
+
args << (fragment_id_prefix || "") if arity > 1
|
125
|
+
|
126
|
+
tree.to_xml(*args)
|
111
127
|
}
|
112
128
|
|
113
129
|
s
|
data/lib/slaw/version.rb
CHANGED
data/lib/slaw/za/act.treetop
CHANGED
@@ -82,7 +82,7 @@ module Slaw
|
|
82
82
|
#
|
83
83
|
# Additionally, the section title is optional.
|
84
84
|
!{ |s| options[:section_number_after_title] }
|
85
|
-
section_title_prefix section_title:section_title_content? eol?
|
85
|
+
space? section_title_prefix section_title:section_title_content? eol?
|
86
86
|
<SectionTitleType2>
|
87
87
|
end
|
88
88
|
|
data/lib/slaw/za/act_nodes.rb
CHANGED
@@ -7,7 +7,7 @@ module Slaw
|
|
7
7
|
EXPRESSION_URI = "#{FRBR_URI}/eng@"
|
8
8
|
MANIFESTATION_URI = EXPRESSION_URI
|
9
9
|
|
10
|
-
def to_xml(b)
|
10
|
+
def to_xml(b, idprefix)
|
11
11
|
b.act(contains: "originalVersion") { |b|
|
12
12
|
write_meta(b)
|
13
13
|
write_preamble(b)
|
@@ -192,7 +192,7 @@ module Slaw
|
|
192
192
|
|
193
193
|
idprefix = "#{id}."
|
194
194
|
|
195
|
-
subsections.elements.each_with_index { |e, i| e.to_xml(b,
|
195
|
+
subsections.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
|
196
196
|
}
|
197
197
|
end
|
198
198
|
end
|
@@ -231,7 +231,7 @@ module Slaw
|
|
231
231
|
end
|
232
232
|
|
233
233
|
class Subsection < Treetop::Runtime::SyntaxNode
|
234
|
-
def to_xml(b,
|
234
|
+
def to_xml(b, idprefix, i=0)
|
235
235
|
if statement.is_a?(NumberedStatement)
|
236
236
|
attribs = {id: idprefix + statement.num.gsub(/[()]/, '')}
|
237
237
|
else
|
@@ -246,9 +246,9 @@ module Slaw
|
|
246
246
|
b.content { |b|
|
247
247
|
if blocklist and blocklist.is_a?(Blocklist)
|
248
248
|
if statement.content
|
249
|
-
blocklist.to_xml(b,
|
249
|
+
blocklist.to_xml(b, idprefix, i) { |b| b << statement.content.text_value }
|
250
250
|
else
|
251
|
-
blocklist.to_xml(b,
|
251
|
+
blocklist.to_xml(b, idprefix, i)
|
252
252
|
end
|
253
253
|
else
|
254
254
|
# raw content
|
@@ -283,7 +283,7 @@ module Slaw
|
|
283
283
|
class Blocklist < Treetop::Runtime::SyntaxNode
|
284
284
|
# Render a block list to xml. If a block is given,
|
285
285
|
# yield to it a builder to insert a listIntroduction node
|
286
|
-
def to_xml(b,
|
286
|
+
def to_xml(b, idprefix, i=0, &block)
|
287
287
|
id = idprefix + "list#{i}"
|
288
288
|
idprefix = id + '.'
|
289
289
|
|
@@ -382,7 +382,7 @@ module Slaw
|
|
382
382
|
end
|
383
383
|
end
|
384
384
|
|
385
|
-
def to_xml(b, i)
|
385
|
+
def to_xml(b, idprefix, i=0)
|
386
386
|
n = num.nil? ? i : num
|
387
387
|
id = "schedule-#{n}"
|
388
388
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|