slaw 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +5 -1
- data/bin/slaw +3 -0
- data/lib/slaw/grammars/terminals.treetop +0 -8
- data/lib/slaw/parse/builder.rb +35 -1
- data/lib/slaw/version.rb +1 -1
- data/slaw.gemspec +6 -6
- data/spec/generator_spec.rb +3 -3
- data/spec/za/act_block_spec.rb +3 -3
- metadata +15 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9caa9135d576212d4a7d6b2300bfe65fad7ef36cf938243f59ea002b25a41273
|
4
|
+
data.tar.gz: faf61f38ee828b2d0f3024ca42ee7fe6ad560acfde021f1cdcbd1cfacdc73fd4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e69b0a96eb59ae723d8f3c217d7e0dfe13a236908f894061072a680d33e0e318496ec99f267672fcf49fb347cde79bc21c8bd9a2d449f315e6548b2f43cde84
|
7
|
+
data.tar.gz: 380eb2d660d556715ce1339d841c44efe07c5c88271ae10bc4d9e44d648c420788c60137b917daf6f1768d745b273ad8fa3b7b6559ccec873f201b305c129e37
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Slaw [](http://travis-ci.org/longhotsummer/slaw)
|
1
|
+
# Slaw [](http://travis-ci.org/longhotsummer/slaw) [](https://badge.fury.io/rb/slaw)
|
2
2
|
|
3
3
|
Slaw is a lightweight library for generating Akoma Ntoso 2.0 Act XML from plain text documents.
|
4
4
|
It is used to power [Indigo](https://github.com/OpenUpSA/indigo) and uses grammars developed for the legal
|
@@ -84,6 +84,10 @@ You can create your own grammar by creating a gem that provides these files and
|
|
84
84
|
|
85
85
|
## Changelog
|
86
86
|
|
87
|
+
### 3.1.0 (29 March 2019)
|
88
|
+
|
89
|
+
* Add --ascii flag to %-encode utf-8 strings into US-ASCII for speed. See https://github.com/cjheath/treetop/issues/31
|
90
|
+
|
87
91
|
### 3.0.0 (28 March 2019)
|
88
92
|
|
89
93
|
* Inline bold and italics
|
data/bin/slaw
CHANGED
@@ -18,6 +18,7 @@ class SlawCLI < Thor
|
|
18
18
|
option :id_prefix, type: :string, desc: "Prefix to be used when generating ID elements when parsing a fragment."
|
19
19
|
option :section_number_position, enum: ['before-title', 'after-title', 'guess'], desc: "Where do section titles come in relation to the section number? Default: before-title"
|
20
20
|
option :grammar, type: :string, desc: "Grammar name (usually a two-letter country code). Default is za."
|
21
|
+
option :ascii, type: :boolean, default: false, desc: "Process text as ASCII using %-encoding. This can provide significant speed improvements if the grammar uses only ASCII literals. See https://github.com/cjheath/treetop/issues/31."
|
21
22
|
def parse(name)
|
22
23
|
logging
|
23
24
|
|
@@ -65,6 +66,8 @@ class SlawCLI < Thor
|
|
65
66
|
generator.parser.options[:section_number_after_title] = after
|
66
67
|
end
|
67
68
|
|
69
|
+
generator.builder.force_ascii = options[:ascii]
|
70
|
+
|
68
71
|
begin
|
69
72
|
act = generator.generate_from_text(text)
|
70
73
|
rescue Slaw::Parse::ParseError => e
|
data/lib/slaw/parse/builder.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
+
require 'uri'
|
3
4
|
require 'treetop'
|
4
5
|
|
5
6
|
module Slaw
|
@@ -32,6 +33,9 @@ module Slaw
|
|
32
33
|
# Prefix to use when generating IDs for fragments
|
33
34
|
attr_accessor :fragment_id_prefix
|
34
35
|
|
36
|
+
# Should the parsing re-encoding the string as ASCII?
|
37
|
+
attr_accessor :force_ascii
|
38
|
+
|
35
39
|
# Create a new builder.
|
36
40
|
#
|
37
41
|
# Specify either `:parser` or `:grammar_file` and `:grammar_class`.
|
@@ -41,6 +45,7 @@ module Slaw
|
|
41
45
|
def initialize(opts={})
|
42
46
|
@parser = opts[:parser]
|
43
47
|
@parse_options = opts[:parse_optiosn] || {}
|
48
|
+
@force_ascii = false
|
44
49
|
end
|
45
50
|
|
46
51
|
# Do all the work necessary to parse text into a well-formed XML document.
|
@@ -77,8 +82,37 @@ module Slaw
|
|
77
82
|
# @return [String] an XML string
|
78
83
|
def parse_text(text, parse_options={})
|
79
84
|
text = preprocess(text)
|
85
|
+
|
86
|
+
text = escape_utf8(text) if @force_ascii
|
87
|
+
|
80
88
|
tree = text_to_syntax_tree(text, parse_options)
|
81
|
-
xml_from_syntax_tree(tree)
|
89
|
+
xml = xml_from_syntax_tree(tree)
|
90
|
+
|
91
|
+
xml = unescape_utf8(xml) if @force_ascii
|
92
|
+
|
93
|
+
xml
|
94
|
+
end
|
95
|
+
|
96
|
+
# Use %-encoding to escape everything outside of the US_ASCII range,
|
97
|
+
# including encoding % itself.
|
98
|
+
#
|
99
|
+
# This can have a huge performance benefit. String lookups on utf-8 strings
|
100
|
+
# are linear in Ruby, while string lookups on US_ASCII encoded strings
|
101
|
+
# are constant time.
|
102
|
+
#
|
103
|
+
# This option can only be used if the grammar doesn't include non-ascii literals.
|
104
|
+
#
|
105
|
+
# See https://github.com/cjheath/treetop/issues/31
|
106
|
+
def escape_utf8(text)
|
107
|
+
unsafe = (0..126).to_a - ['%'.ord]
|
108
|
+
unsafe = unsafe.map { |i| '\u%04x' % i }
|
109
|
+
unsafe = Regexp.new('[^' + unsafe.join('') + ']')
|
110
|
+
|
111
|
+
URI::DEFAULT_PARSER.escape(text, unsafe)
|
112
|
+
end
|
113
|
+
|
114
|
+
def unescape_utf8(xml)
|
115
|
+
URI.unescape(xml)
|
82
116
|
end
|
83
117
|
|
84
118
|
# Parse plain text into a syntax tree.
|
data/lib/slaw/version.rb
CHANGED
data/slaw.gemspec
CHANGED
@@ -18,12 +18,12 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_development_dependency "rake", "~>
|
22
|
-
spec.add_development_dependency "rspec", "~>
|
21
|
+
spec.add_development_dependency "rake", "~> 12.3"
|
22
|
+
spec.add_development_dependency "rspec", "~> 3.8"
|
23
23
|
|
24
|
-
spec.add_runtime_dependency "nokogiri", "~> 1.8
|
24
|
+
spec.add_runtime_dependency "nokogiri", "~> 1.8"
|
25
25
|
spec.add_runtime_dependency "treetop", "~> 1.5"
|
26
|
-
spec.add_runtime_dependency "log4r", "~> 1.1
|
27
|
-
spec.add_runtime_dependency "thor", "~> 0.
|
28
|
-
spec.add_runtime_dependency "mimemagic", "~> 0.2
|
26
|
+
spec.add_runtime_dependency "log4r", "~> 1.1"
|
27
|
+
spec.add_runtime_dependency "thor", "~> 0.20"
|
28
|
+
spec.add_runtime_dependency "mimemagic", "~> 0.2"
|
29
29
|
end
|
data/spec/generator_spec.rb
CHANGED
@@ -16,13 +16,13 @@ Section title
|
|
16
16
|
Another section title
|
17
17
|
2. Section content that is long.
|
18
18
|
"
|
19
|
-
subject.guess_section_number_after_title(text).should
|
19
|
+
subject.guess_section_number_after_title(text).should be true
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
23
|
context 'section number before title' do
|
24
24
|
it 'should default to false' do
|
25
|
-
subject.guess_section_number_after_title("").should
|
25
|
+
subject.guess_section_number_after_title("").should be false
|
26
26
|
end
|
27
27
|
|
28
28
|
it 'should be false' do
|
@@ -36,7 +36,7 @@ Some content.
|
|
36
36
|
|
37
37
|
Some content.
|
38
38
|
"
|
39
|
-
subject.guess_section_number_after_title(text).should
|
39
|
+
subject.guess_section_number_after_title(text).should be false
|
40
40
|
end
|
41
41
|
end
|
42
42
|
end
|
data/spec/za/act_block_spec.rb
CHANGED
@@ -1973,12 +1973,12 @@ EOS
|
|
1973
1973
|
it 'should handle a clause with a remark' do
|
1974
1974
|
node = parse :inline_items, "simple [[remark]]. text"
|
1975
1975
|
node.text_value.should == "simple [[remark]]. text"
|
1976
|
-
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should
|
1976
|
+
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
|
1977
1977
|
|
1978
1978
|
node = parse :inline_items, "simple [[remark]][[another]] text"
|
1979
1979
|
node.text_value.should == "simple [[remark]][[another]] text"
|
1980
|
-
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should
|
1981
|
-
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should
|
1980
|
+
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
|
1981
|
+
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
|
1982
1982
|
end
|
1983
1983
|
end
|
1984
1984
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-03-
|
11
|
+
date: 2019-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -16,42 +16,42 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: '12.3'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: '12.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: '3.8'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '3.8'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: nokogiri
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.8
|
47
|
+
version: '1.8'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 1.8
|
54
|
+
version: '1.8'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: treetop
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,42 +72,42 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 1.1
|
75
|
+
version: '1.1'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 1.1
|
82
|
+
version: '1.1'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: thor
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
89
|
+
version: '0.20'
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.
|
96
|
+
version: '0.20'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: mimemagic
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.2
|
103
|
+
version: '0.2'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0.2
|
110
|
+
version: '0.2'
|
111
111
|
description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
|
112
112
|
acts from plain text and PDF documents.
|
113
113
|
email:
|
@@ -181,8 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
181
181
|
- !ruby/object:Gem::Version
|
182
182
|
version: '0'
|
183
183
|
requirements: []
|
184
|
-
|
185
|
-
rubygems_version: 2.6.12
|
184
|
+
rubygems_version: 3.0.3
|
186
185
|
signing_key:
|
187
186
|
specification_version: 4
|
188
187
|
summary: A lightweight library for using Akoma Ntoso acts in Ruby.
|