slaw 0.6.0 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/slaw +20 -0
- data/lib/slaw/act.rb +28 -17
- data/lib/slaw/generator.rb +3 -0
- data/lib/slaw/parse/builder.rb +19 -3
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/act.treetop +1 -1
- data/lib/slaw/za/act_nodes.rb +7 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75915642fffe0d65b3d4386745c12cd019e3d715
|
4
|
+
data.tar.gz: fb183f90a301b479fd4d422d7da479f8796e808d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acf9d6a07f1a927461394721228a8ebd6548fe79c5604e59f36463bdb4bd0a25df1bafa8aa391a937ed45756f1a70f85e9c48469d752c92817c04f392f62c169
|
7
|
+
data.tar.gz: 4ada4ce9e47e9d71bdcf55f1fc8c94c7977e03711b46d0ec86ceae17f8df5a2efcc7a23aa171adf967a0c24a3fd889d80f2aefdee3ebd6c6fb0f0893f14598d6
|
data/bin/slaw
CHANGED
@@ -12,9 +12,15 @@ class SlawCLI < Thor
|
|
12
12
|
option :input, enum: ['text', 'pdf'], desc: "Type of input if it can't be determined automatically"
|
13
13
|
option :pdftotext, desc: "Location of the pdftotext binary if not in PATH"
|
14
14
|
option :definitions, type: :boolean, desc: "Find and link definitions (this can be slow). Default: false"
|
15
|
+
option :fragment, type: :string, desc: "Akoma Ntoso element name that the imported text represents. Support depends on the grammar."
|
16
|
+
option :id_prefix, type: :string, desc: "Prefix to be used when generating ID elements when parsing a fragment."
|
15
17
|
def parse(name)
|
16
18
|
logging
|
17
19
|
|
20
|
+
if options[:fragment] and options[:definitions]
|
21
|
+
raise Thor::Error.new("--definitions can't be used together with --fragment")
|
22
|
+
end
|
23
|
+
|
18
24
|
Slaw::Extract::Extractor.pdftotext_path = options[:pdftotext] if options[:pdftotext]
|
19
25
|
extractor = Slaw::Extract::Extractor.new
|
20
26
|
|
@@ -28,6 +34,20 @@ class SlawCLI < Thor
|
|
28
34
|
end
|
29
35
|
|
30
36
|
generator = Slaw::ActGenerator.new
|
37
|
+
|
38
|
+
if options[:fragment]
|
39
|
+
generator.document_class = Slaw::Fragment
|
40
|
+
fragment = options[:fragment]
|
41
|
+
fragment = 'act' if fragment.downcase == 'akomantoso'
|
42
|
+
generator.builder.parse_options[:root] = fragment
|
43
|
+
|
44
|
+
if options[:id_prefix]
|
45
|
+
prefix = options[:id_prefix]
|
46
|
+
prefix += "." unless prefix.end_with?('.')
|
47
|
+
generator.builder.fragment_id_prefix = prefix
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
31
51
|
act = generator.generate_from_text(text)
|
32
52
|
|
33
53
|
# definitions?
|
data/lib/slaw/act.rb
CHANGED
@@ -1,4 +1,31 @@
|
|
1
1
|
module Slaw
|
2
|
+
class AknBase
|
3
|
+
include Slaw::Namespace
|
4
|
+
|
5
|
+
attr_accessor :doc
|
6
|
+
|
7
|
+
# Serialise the XML for this act, passing `args` to the Nokogiri serialiser.
|
8
|
+
# The most useful argument is usually `indent: 2` if you like your XML perdy.
|
9
|
+
#
|
10
|
+
# @return [String] serialized XML
|
11
|
+
def to_xml(*args)
|
12
|
+
@doc.to_xml(*args)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Parse the XML contained in the file-like or String object `io`
|
16
|
+
#
|
17
|
+
# @param io [String, file-like] io object or String with XML
|
18
|
+
def parse(io)
|
19
|
+
self.doc = Nokogiri::XML(io)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# A fragment is a part of a larger document and doesn't have the context associated
|
24
|
+
# with the document.
|
25
|
+
class Fragment < AknBase
|
26
|
+
alias_method :fragment, :doc
|
27
|
+
end
|
28
|
+
|
2
29
|
# An Act wraps a single {http://www.akomantoso.org/ AkomaNtoso 2.0 XML} act document in the form of a
|
3
30
|
# Nokogiri::XML::Document object.
|
4
31
|
#
|
@@ -7,8 +34,7 @@ module Slaw
|
|
7
34
|
# identifying whether it has been amended ({#amended?}), repealed
|
8
35
|
# ({#repealed?}) or what chapters ({#chapters}), parts ({#parts}) and
|
9
36
|
# sections ({#sections}) it contains.
|
10
|
-
class Act
|
11
|
-
include Slaw::Namespace
|
37
|
+
class Act < AknBase
|
12
38
|
|
13
39
|
# Allow us to jump from the XML document for an act to the
|
14
40
|
# Act instance itself
|
@@ -66,13 +92,6 @@ module Slaw
|
|
66
92
|
|
67
93
|
File.open(filename) { |f| parse(f) }
|
68
94
|
end
|
69
|
-
|
70
|
-
# Parse the XML contained in the file-like or String object `io`
|
71
|
-
#
|
72
|
-
# @param io [String, file-like] io object or String with XML
|
73
|
-
def parse(io)
|
74
|
-
self.doc = Nokogiri::XML(io)
|
75
|
-
end
|
76
95
|
|
77
96
|
# Set the XML document backing this bylaw.
|
78
97
|
#
|
@@ -403,14 +422,6 @@ module Slaw
|
|
403
422
|
validate.empty?
|
404
423
|
end
|
405
424
|
|
406
|
-
# Serialise the XML for this act, passing `args` to the Nokogiri serialiser.
|
407
|
-
# The most useful argument is usually `indent: 2` if you like your XML perdy.
|
408
|
-
#
|
409
|
-
# @return [String] serialized XML
|
410
|
-
def to_xml(*args)
|
411
|
-
@doc.to_xml(*args)
|
412
|
-
end
|
413
|
-
|
414
425
|
def inspect
|
415
426
|
"<#{self.class.name} @id_uri=\"#{@id_uri}\">"
|
416
427
|
end
|
data/lib/slaw/generator.rb
CHANGED
@@ -9,6 +9,9 @@ module Slaw
|
|
9
9
|
# [Slaw::Parse::Builder] builder used by the generator
|
10
10
|
attr_accessor :builder
|
11
11
|
|
12
|
+
# The type that will hold the generated document
|
13
|
+
attr_accessor :document_class
|
14
|
+
|
12
15
|
def initialize
|
13
16
|
@parser = Slaw::ZA::ActParser.new
|
14
17
|
@builder = Slaw::Parse::Builder.new(parser: @parser)
|
data/lib/slaw/parse/builder.rb
CHANGED
@@ -26,6 +26,12 @@ module Slaw
|
|
26
26
|
|
27
27
|
@@parsers = {}
|
28
28
|
|
29
|
+
# Additional hash of options to be provided to the parser when parsing.
|
30
|
+
attr_accessor :parse_options
|
31
|
+
|
32
|
+
# Prefix to use when generating IDs for fragments
|
33
|
+
attr_accessor :fragment_id_prefix
|
34
|
+
|
29
35
|
# Create a new builder.
|
30
36
|
#
|
31
37
|
# Specify either `:parser` or `:grammar_file` and `:grammar_class`.
|
@@ -49,6 +55,8 @@ module Slaw
|
|
49
55
|
else
|
50
56
|
raise ArgumentError.new("Specify either :parser or :grammar_file and :grammar_class")
|
51
57
|
end
|
58
|
+
|
59
|
+
@parse_options = {}
|
52
60
|
end
|
53
61
|
|
54
62
|
# Do all the work necessary to parse text into a well-formed XML document.
|
@@ -65,7 +73,7 @@ module Slaw
|
|
65
73
|
# resulting XML to normalise it.
|
66
74
|
#
|
67
75
|
# @param text [String] the text to parse
|
68
|
-
# @param parse_options [Hash] options to
|
76
|
+
# @param parse_options [Hash] options to pass to the parser
|
69
77
|
#
|
70
78
|
# @return [String] an XML string
|
71
79
|
def parse_text(text, parse_options={})
|
@@ -76,11 +84,12 @@ module Slaw
|
|
76
84
|
# Parse plain text into a syntax tree.
|
77
85
|
#
|
78
86
|
# @param text [String] the text to parse
|
79
|
-
# @param parse_options [Hash] options to
|
87
|
+
# @param parse_options [Hash] options to pass to the parser
|
80
88
|
#
|
81
89
|
# @return [Object] the root of the resulting parse tree, usually a Treetop::Runtime::SyntaxNode object
|
82
90
|
def text_to_syntax_tree(text, parse_options={})
|
83
91
|
logger.info("Parsing...")
|
92
|
+
parse_options = @parse_options.dup.update(parse_options)
|
84
93
|
tree = @parser.parse(text, parse_options)
|
85
94
|
logger.info("Parsed!")
|
86
95
|
|
@@ -107,7 +116,14 @@ module Slaw
|
|
107
116
|
builder.akomaNtoso("xmlns:xsi"=> "http://www.w3.org/2001/XMLSchema-instance",
|
108
117
|
"xsi:schemaLocation" => "http://www.akomantoso.org/2.0 akomantoso20.xsd",
|
109
118
|
"xmlns" => NS) { |b|
|
110
|
-
|
119
|
+
args = [b]
|
120
|
+
|
121
|
+
# should we provide an id prefix?
|
122
|
+
arity = tree.method('to_xml').arity
|
123
|
+
arity = arity.abs-1 if arity < 0
|
124
|
+
args << (fragment_id_prefix || "") if arity > 1
|
125
|
+
|
126
|
+
tree.to_xml(*args)
|
111
127
|
}
|
112
128
|
|
113
129
|
s
|
data/lib/slaw/version.rb
CHANGED
data/lib/slaw/za/act.treetop
CHANGED
@@ -82,7 +82,7 @@ module Slaw
|
|
82
82
|
#
|
83
83
|
# Additionally, the section title is optional.
|
84
84
|
!{ |s| options[:section_number_after_title] }
|
85
|
-
section_title_prefix section_title:section_title_content? eol?
|
85
|
+
space? section_title_prefix section_title:section_title_content? eol?
|
86
86
|
<SectionTitleType2>
|
87
87
|
end
|
88
88
|
|
data/lib/slaw/za/act_nodes.rb
CHANGED
@@ -7,7 +7,7 @@ module Slaw
|
|
7
7
|
EXPRESSION_URI = "#{FRBR_URI}/eng@"
|
8
8
|
MANIFESTATION_URI = EXPRESSION_URI
|
9
9
|
|
10
|
-
def to_xml(b)
|
10
|
+
def to_xml(b, idprefix)
|
11
11
|
b.act(contains: "originalVersion") { |b|
|
12
12
|
write_meta(b)
|
13
13
|
write_preamble(b)
|
@@ -192,7 +192,7 @@ module Slaw
|
|
192
192
|
|
193
193
|
idprefix = "#{id}."
|
194
194
|
|
195
|
-
subsections.elements.each_with_index { |e, i| e.to_xml(b,
|
195
|
+
subsections.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
|
196
196
|
}
|
197
197
|
end
|
198
198
|
end
|
@@ -231,7 +231,7 @@ module Slaw
|
|
231
231
|
end
|
232
232
|
|
233
233
|
class Subsection < Treetop::Runtime::SyntaxNode
|
234
|
-
def to_xml(b,
|
234
|
+
def to_xml(b, idprefix, i=0)
|
235
235
|
if statement.is_a?(NumberedStatement)
|
236
236
|
attribs = {id: idprefix + statement.num.gsub(/[()]/, '')}
|
237
237
|
else
|
@@ -246,9 +246,9 @@ module Slaw
|
|
246
246
|
b.content { |b|
|
247
247
|
if blocklist and blocklist.is_a?(Blocklist)
|
248
248
|
if statement.content
|
249
|
-
blocklist.to_xml(b,
|
249
|
+
blocklist.to_xml(b, idprefix, i) { |b| b << statement.content.text_value }
|
250
250
|
else
|
251
|
-
blocklist.to_xml(b,
|
251
|
+
blocklist.to_xml(b, idprefix, i)
|
252
252
|
end
|
253
253
|
else
|
254
254
|
# raw content
|
@@ -283,7 +283,7 @@ module Slaw
|
|
283
283
|
class Blocklist < Treetop::Runtime::SyntaxNode
|
284
284
|
# Render a block list to xml. If a block is given,
|
285
285
|
# yield to it a builder to insert a listIntroduction node
|
286
|
-
def to_xml(b,
|
286
|
+
def to_xml(b, idprefix, i=0, &block)
|
287
287
|
id = idprefix + "list#{i}"
|
288
288
|
idprefix = id + '.'
|
289
289
|
|
@@ -382,7 +382,7 @@ module Slaw
|
|
382
382
|
end
|
383
383
|
end
|
384
384
|
|
385
|
-
def to_xml(b, i)
|
385
|
+
def to_xml(b, idprefix, i=0)
|
386
386
|
n = num.nil? ? i : num
|
387
387
|
id = "schedule-#{n}"
|
388
388
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|