slaw 3.0.0 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +5 -1
- data/bin/slaw +3 -0
- data/lib/slaw/grammars/terminals.treetop +0 -8
- data/lib/slaw/parse/builder.rb +35 -1
- data/lib/slaw/version.rb +1 -1
- data/slaw.gemspec +6 -6
- data/spec/generator_spec.rb +3 -3
- data/spec/za/act_block_spec.rb +3 -3
- metadata +15 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9caa9135d576212d4a7d6b2300bfe65fad7ef36cf938243f59ea002b25a41273
|
4
|
+
data.tar.gz: faf61f38ee828b2d0f3024ca42ee7fe6ad560acfde021f1cdcbd1cfacdc73fd4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e69b0a96eb59ae723d8f3c217d7e0dfe13a236908f894061072a680d33e0e318496ec99f267672fcf49fb347cde79bc21c8bd9a2d449f315e6548b2f43cde84
|
7
|
+
data.tar.gz: 380eb2d660d556715ce1339d841c44efe07c5c88271ae10bc4d9e44d648c420788c60137b917daf6f1768d745b273ad8fa3b7b6559ccec873f201b305c129e37
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Slaw [![Build Status](https://travis-ci.org/longhotsummer/slaw.svg)](http://travis-ci.org/longhotsummer/slaw)
|
1
|
+
# Slaw [![Build Status](https://travis-ci.org/longhotsummer/slaw.svg)](http://travis-ci.org/longhotsummer/slaw) [![Gem Version](https://badge.fury.io/rb/slaw.svg)](https://badge.fury.io/rb/slaw)
|
2
2
|
|
3
3
|
Slaw is a lightweight library for generating Akoma Ntoso 2.0 Act XML from plain text documents.
|
4
4
|
It is used to power [Indigo](https://github.com/OpenUpSA/indigo) and uses grammars developed for the legal
|
@@ -84,6 +84,10 @@ You can create your own grammar by creating a gem that provides these files and
|
|
84
84
|
|
85
85
|
## Changelog
|
86
86
|
|
87
|
+
### 3.1.0 (29 March 2019)
|
88
|
+
|
89
|
+
* Add --ascii flag to %-encode utf-8 strings into US-ASCII for speed. See https://github.com/cjheath/treetop/issues/31
|
90
|
+
|
87
91
|
### 3.0.0 (28 March 2019)
|
88
92
|
|
89
93
|
* Inline bold and italics
|
data/bin/slaw
CHANGED
@@ -18,6 +18,7 @@ class SlawCLI < Thor
|
|
18
18
|
option :id_prefix, type: :string, desc: "Prefix to be used when generating ID elements when parsing a fragment."
|
19
19
|
option :section_number_position, enum: ['before-title', 'after-title', 'guess'], desc: "Where do section titles come in relation to the section number? Default: before-title"
|
20
20
|
option :grammar, type: :string, desc: "Grammar name (usually a two-letter country code). Default is za."
|
21
|
+
option :ascii, type: :boolean, default: false, desc: "Process text as ASCII using %-encoding. This can provide significant speed improvements if the grammar uses only ASCII literals. See https://github.com/cjheath/treetop/issues/31."
|
21
22
|
def parse(name)
|
22
23
|
logging
|
23
24
|
|
@@ -65,6 +66,8 @@ class SlawCLI < Thor
|
|
65
66
|
generator.parser.options[:section_number_after_title] = after
|
66
67
|
end
|
67
68
|
|
69
|
+
generator.builder.force_ascii = options[:ascii]
|
70
|
+
|
68
71
|
begin
|
69
72
|
act = generator.generate_from_text(text)
|
70
73
|
rescue Slaw::Parse::ParseError => e
|
data/lib/slaw/parse/builder.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
+
require 'uri'
|
3
4
|
require 'treetop'
|
4
5
|
|
5
6
|
module Slaw
|
@@ -32,6 +33,9 @@ module Slaw
|
|
32
33
|
# Prefix to use when generating IDs for fragments
|
33
34
|
attr_accessor :fragment_id_prefix
|
34
35
|
|
36
|
+
# Should the parsing re-encoding the string as ASCII?
|
37
|
+
attr_accessor :force_ascii
|
38
|
+
|
35
39
|
# Create a new builder.
|
36
40
|
#
|
37
41
|
# Specify either `:parser` or `:grammar_file` and `:grammar_class`.
|
@@ -41,6 +45,7 @@ module Slaw
|
|
41
45
|
def initialize(opts={})
|
42
46
|
@parser = opts[:parser]
|
43
47
|
@parse_options = opts[:parse_optiosn] || {}
|
48
|
+
@force_ascii = false
|
44
49
|
end
|
45
50
|
|
46
51
|
# Do all the work necessary to parse text into a well-formed XML document.
|
@@ -77,8 +82,37 @@ module Slaw
|
|
77
82
|
# @return [String] an XML string
|
78
83
|
def parse_text(text, parse_options={})
|
79
84
|
text = preprocess(text)
|
85
|
+
|
86
|
+
text = escape_utf8(text) if @force_ascii
|
87
|
+
|
80
88
|
tree = text_to_syntax_tree(text, parse_options)
|
81
|
-
xml_from_syntax_tree(tree)
|
89
|
+
xml = xml_from_syntax_tree(tree)
|
90
|
+
|
91
|
+
xml = unescape_utf8(xml) if @force_ascii
|
92
|
+
|
93
|
+
xml
|
94
|
+
end
|
95
|
+
|
96
|
+
# Use %-encoding to escape everything outside of the US_ASCII range,
|
97
|
+
# including encoding % itself.
|
98
|
+
#
|
99
|
+
# This can have a huge performance benefit. String lookups on utf-8 strings
|
100
|
+
# are linear in Ruby, while string lookups on US_ASCII encoded strings
|
101
|
+
# are constant time.
|
102
|
+
#
|
103
|
+
# This option can only be used if the grammar doesn't include non-ascii literals.
|
104
|
+
#
|
105
|
+
# See https://github.com/cjheath/treetop/issues/31
|
106
|
+
def escape_utf8(text)
|
107
|
+
unsafe = (0..126).to_a - ['%'.ord]
|
108
|
+
unsafe = unsafe.map { |i| '\u%04x' % i }
|
109
|
+
unsafe = Regexp.new('[^' + unsafe.join('') + ']')
|
110
|
+
|
111
|
+
URI::DEFAULT_PARSER.escape(text, unsafe)
|
112
|
+
end
|
113
|
+
|
114
|
+
def unescape_utf8(xml)
|
115
|
+
URI.unescape(xml)
|
82
116
|
end
|
83
117
|
|
84
118
|
# Parse plain text into a syntax tree.
|
data/lib/slaw/version.rb
CHANGED
data/slaw.gemspec
CHANGED
@@ -18,12 +18,12 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_development_dependency "rake", "~>
|
22
|
-
spec.add_development_dependency "rspec", "~>
|
21
|
+
spec.add_development_dependency "rake", "~> 12.3"
|
22
|
+
spec.add_development_dependency "rspec", "~> 3.8"
|
23
23
|
|
24
|
-
spec.add_runtime_dependency "nokogiri", "~> 1.8
|
24
|
+
spec.add_runtime_dependency "nokogiri", "~> 1.8"
|
25
25
|
spec.add_runtime_dependency "treetop", "~> 1.5"
|
26
|
-
spec.add_runtime_dependency "log4r", "~> 1.1
|
27
|
-
spec.add_runtime_dependency "thor", "~> 0.
|
28
|
-
spec.add_runtime_dependency "mimemagic", "~> 0.2
|
26
|
+
spec.add_runtime_dependency "log4r", "~> 1.1"
|
27
|
+
spec.add_runtime_dependency "thor", "~> 0.20"
|
28
|
+
spec.add_runtime_dependency "mimemagic", "~> 0.2"
|
29
29
|
end
|
data/spec/generator_spec.rb
CHANGED
@@ -16,13 +16,13 @@ Section title
|
|
16
16
|
Another section title
|
17
17
|
2. Section content that is long.
|
18
18
|
"
|
19
|
-
subject.guess_section_number_after_title(text).should
|
19
|
+
subject.guess_section_number_after_title(text).should be true
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
23
|
context 'section number before title' do
|
24
24
|
it 'should default to false' do
|
25
|
-
subject.guess_section_number_after_title("").should
|
25
|
+
subject.guess_section_number_after_title("").should be false
|
26
26
|
end
|
27
27
|
|
28
28
|
it 'should be false' do
|
@@ -36,7 +36,7 @@ Some content.
|
|
36
36
|
|
37
37
|
Some content.
|
38
38
|
"
|
39
|
-
subject.guess_section_number_after_title(text).should
|
39
|
+
subject.guess_section_number_after_title(text).should be false
|
40
40
|
end
|
41
41
|
end
|
42
42
|
end
|
data/spec/za/act_block_spec.rb
CHANGED
@@ -1973,12 +1973,12 @@ EOS
|
|
1973
1973
|
it 'should handle a clause with a remark' do
|
1974
1974
|
node = parse :inline_items, "simple [[remark]]. text"
|
1975
1975
|
node.text_value.should == "simple [[remark]]. text"
|
1976
|
-
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should
|
1976
|
+
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
|
1977
1977
|
|
1978
1978
|
node = parse :inline_items, "simple [[remark]][[another]] text"
|
1979
1979
|
node.text_value.should == "simple [[remark]][[another]] text"
|
1980
|
-
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should
|
1981
|
-
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should
|
1980
|
+
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
|
1981
|
+
node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
|
1982
1982
|
end
|
1983
1983
|
end
|
1984
1984
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-03-
|
11
|
+
date: 2019-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -16,42 +16,42 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: '12.3'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: '12.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: '3.8'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '3.8'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: nokogiri
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.8
|
47
|
+
version: '1.8'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 1.8
|
54
|
+
version: '1.8'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: treetop
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,42 +72,42 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 1.1
|
75
|
+
version: '1.1'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 1.1
|
82
|
+
version: '1.1'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: thor
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
89
|
+
version: '0.20'
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.
|
96
|
+
version: '0.20'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: mimemagic
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.2
|
103
|
+
version: '0.2'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0.2
|
110
|
+
version: '0.2'
|
111
111
|
description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
|
112
112
|
acts from plain text and PDF documents.
|
113
113
|
email:
|
@@ -181,8 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
181
181
|
- !ruby/object:Gem::Version
|
182
182
|
version: '0'
|
183
183
|
requirements: []
|
184
|
-
|
185
|
-
rubygems_version: 2.6.12
|
184
|
+
rubygems_version: 3.0.3
|
186
185
|
signing_key:
|
187
186
|
specification_version: 4
|
188
187
|
summary: A lightweight library for using Akoma Ntoso acts in Ruby.
|