slaw 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 655dd4179f692952514dc6341d332b92bb5ed09f
4
- data.tar.gz: f94a6f5d09da791132c901343cdc7c0f228822ec
2
+ SHA256:
3
+ metadata.gz: 9caa9135d576212d4a7d6b2300bfe65fad7ef36cf938243f59ea002b25a41273
4
+ data.tar.gz: faf61f38ee828b2d0f3024ca42ee7fe6ad560acfde021f1cdcbd1cfacdc73fd4
5
5
  SHA512:
6
- metadata.gz: dae4cc911be85f416e81252489f7914d8da142aa0eacf6e92a15c4f314f7569394eb718cad620ccabe098c352c87f02c4a5ae0bdb8cc7430523a4db1f65add54
7
- data.tar.gz: ea1bc757d9a29b0f998366f36bd5c5baec6b2b0196705369a06a9ded88aafb1740ac27e364a9ccd1b5320a14a00cb57179911294cd858fd94f5106a2ff5f2d18
6
+ metadata.gz: 7e69b0a96eb59ae723d8f3c217d7e0dfe13a236908f894061072a680d33e0e318496ec99f267672fcf49fb347cde79bc21c8bd9a2d449f315e6548b2f43cde84
7
+ data.tar.gz: 380eb2d660d556715ce1339d841c44efe07c5c88271ae10bc4d9e44d648c420788c60137b917daf6f1768d745b273ad8fa3b7b6559ccec873f201b305c129e37
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Slaw [![Build Status](https://travis-ci.org/longhotsummer/slaw.svg)](http://travis-ci.org/longhotsummer/slaw)
1
+ # Slaw [![Build Status](https://travis-ci.org/longhotsummer/slaw.svg)](http://travis-ci.org/longhotsummer/slaw) [![Gem Version](https://badge.fury.io/rb/slaw.svg)](https://badge.fury.io/rb/slaw)
2
2
 
3
3
  Slaw is a lightweight library for generating Akoma Ntoso 2.0 Act XML from plain text documents.
4
4
  It is used to power [Indigo](https://github.com/OpenUpSA/indigo) and uses grammars developed for the legal
@@ -84,6 +84,10 @@ You can create your own grammar by creating a gem that provides these files and
84
84
 
85
85
  ## Changelog
86
86
 
87
+ ### 3.1.0 (29 March 2019)
88
+
89
+ * Add --ascii flag to %-encode utf-8 strings into US-ASCII for speed. See https://github.com/cjheath/treetop/issues/31
90
+
87
91
  ### 3.0.0 (28 March 2019)
88
92
 
89
93
  * Inline bold and italics
data/bin/slaw CHANGED
@@ -18,6 +18,7 @@ class SlawCLI < Thor
18
18
  option :id_prefix, type: :string, desc: "Prefix to be used when generating ID elements when parsing a fragment."
19
19
  option :section_number_position, enum: ['before-title', 'after-title', 'guess'], desc: "Where do section titles come in relation to the section number? Default: before-title"
20
20
  option :grammar, type: :string, desc: "Grammar name (usually a two-letter country code). Default is za."
21
+ option :ascii, type: :boolean, default: false, desc: "Process text as ASCII using %-encoding. This can provide significant speed improvements if the grammar uses only ASCII literals. See https://github.com/cjheath/treetop/issues/31."
21
22
  def parse(name)
22
23
  logging
23
24
 
@@ -65,6 +66,8 @@ class SlawCLI < Thor
65
66
  generator.parser.options[:section_number_after_title] = after
66
67
  end
67
68
 
69
+ generator.builder.force_ascii = options[:ascii]
70
+
68
71
  begin
69
72
  act = generator.generate_from_text(text)
70
73
  rescue Slaw::Parse::ParseError => e
@@ -48,14 +48,6 @@ module Slaw
48
48
  [a-zA-Z0-9]+
49
49
  end
50
50
 
51
- rule quotes
52
- ["“”]
53
- end
54
-
55
- rule non_quotes
56
- [^"“”]
57
- end
58
-
59
51
  ##########
60
52
  # whitespace
61
53
 
@@ -1,5 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ require 'uri'
3
4
  require 'treetop'
4
5
 
5
6
  module Slaw
@@ -32,6 +33,9 @@ module Slaw
32
33
  # Prefix to use when generating IDs for fragments
33
34
  attr_accessor :fragment_id_prefix
34
35
 
36
+ # Should the parsing re-encoding the string as ASCII?
37
+ attr_accessor :force_ascii
38
+
35
39
  # Create a new builder.
36
40
  #
37
41
  # Specify either `:parser` or `:grammar_file` and `:grammar_class`.
@@ -41,6 +45,7 @@ module Slaw
41
45
  def initialize(opts={})
42
46
  @parser = opts[:parser]
43
47
  @parse_options = opts[:parse_optiosn] || {}
48
+ @force_ascii = false
44
49
  end
45
50
 
46
51
  # Do all the work necessary to parse text into a well-formed XML document.
@@ -77,8 +82,37 @@ module Slaw
77
82
  # @return [String] an XML string
78
83
  def parse_text(text, parse_options={})
79
84
  text = preprocess(text)
85
+
86
+ text = escape_utf8(text) if @force_ascii
87
+
80
88
  tree = text_to_syntax_tree(text, parse_options)
81
- xml_from_syntax_tree(tree)
89
+ xml = xml_from_syntax_tree(tree)
90
+
91
+ xml = unescape_utf8(xml) if @force_ascii
92
+
93
+ xml
94
+ end
95
+
96
+ # Use %-encoding to escape everything outside of the US_ASCII range,
97
+ # including encoding % itself.
98
+ #
99
+ # This can have a huge performance benefit. String lookups on utf-8 strings
100
+ # are linear in Ruby, while string lookups on US_ASCII encoded strings
101
+ # are constant time.
102
+ #
103
+ # This option can only be used if the grammar doesn't include non-ascii literals.
104
+ #
105
+ # See https://github.com/cjheath/treetop/issues/31
106
+ def escape_utf8(text)
107
+ unsafe = (0..126).to_a - ['%'.ord]
108
+ unsafe = unsafe.map { |i| '\u%04x' % i }
109
+ unsafe = Regexp.new('[^' + unsafe.join('') + ']')
110
+
111
+ URI::DEFAULT_PARSER.escape(text, unsafe)
112
+ end
113
+
114
+ def unescape_utf8(xml)
115
+ URI.unescape(xml)
82
116
  end
83
117
 
84
118
  # Parse plain text into a syntax tree.
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "3.0.0"
2
+ VERSION = "3.1.0"
3
3
  end
data/slaw.gemspec CHANGED
@@ -18,12 +18,12 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_development_dependency "rake", "~> 10.3.1"
22
- spec.add_development_dependency "rspec", "~> 2.14.1"
21
+ spec.add_development_dependency "rake", "~> 12.3"
22
+ spec.add_development_dependency "rspec", "~> 3.8"
23
23
 
24
- spec.add_runtime_dependency "nokogiri", "~> 1.8.5"
24
+ spec.add_runtime_dependency "nokogiri", "~> 1.8"
25
25
  spec.add_runtime_dependency "treetop", "~> 1.5"
26
- spec.add_runtime_dependency "log4r", "~> 1.1.10"
27
- spec.add_runtime_dependency "thor", "~> 0.19.1"
28
- spec.add_runtime_dependency "mimemagic", "~> 0.2.1"
26
+ spec.add_runtime_dependency "log4r", "~> 1.1"
27
+ spec.add_runtime_dependency "thor", "~> 0.20"
28
+ spec.add_runtime_dependency "mimemagic", "~> 0.2"
29
29
  end
@@ -16,13 +16,13 @@ Section title
16
16
  Another section title
17
17
  2. Section content that is long.
18
18
  "
19
- subject.guess_section_number_after_title(text).should be_true
19
+ subject.guess_section_number_after_title(text).should be true
20
20
  end
21
21
  end
22
22
 
23
23
  context 'section number before title' do
24
24
  it 'should default to false' do
25
- subject.guess_section_number_after_title("").should be_false
25
+ subject.guess_section_number_after_title("").should be false
26
26
  end
27
27
 
28
28
  it 'should be false' do
@@ -36,7 +36,7 @@ Some content.
36
36
 
37
37
  Some content.
38
38
  "
39
- subject.guess_section_number_after_title(text).should be_false
39
+ subject.guess_section_number_after_title(text).should be false
40
40
  end
41
41
  end
42
42
  end
@@ -1973,12 +1973,12 @@ EOS
1973
1973
  it 'should handle a clause with a remark' do
1974
1974
  node = parse :inline_items, "simple [[remark]]. text"
1975
1975
  node.text_value.should == "simple [[remark]]. text"
1976
- node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be_true
1976
+ node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
1977
1977
 
1978
1978
  node = parse :inline_items, "simple [[remark]][[another]] text"
1979
1979
  node.text_value.should == "simple [[remark]][[another]] text"
1980
- node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be_true
1981
- node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be_true
1980
+ node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
1981
+ node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
1982
1982
  end
1983
1983
  end
1984
1984
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-28 00:00:00.000000000 Z
11
+ date: 2019-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -16,42 +16,42 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 10.3.1
19
+ version: '12.3'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 10.3.1
26
+ version: '12.3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 2.14.1
33
+ version: '3.8'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 2.14.1
40
+ version: '3.8'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: nokogiri
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 1.8.5
47
+ version: '1.8'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 1.8.5
54
+ version: '1.8'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: treetop
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -72,42 +72,42 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 1.1.10
75
+ version: '1.1'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 1.1.10
82
+ version: '1.1'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: thor
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: 0.19.1
89
+ version: '0.20'
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: 0.19.1
96
+ version: '0.20'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: mimemagic
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 0.2.1
103
+ version: '0.2'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 0.2.1
110
+ version: '0.2'
111
111
  description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
112
112
  acts from plain text and PDF documents.
113
113
  email:
@@ -181,8 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
181
181
  - !ruby/object:Gem::Version
182
182
  version: '0'
183
183
  requirements: []
184
- rubyforge_project:
185
- rubygems_version: 2.6.12
184
+ rubygems_version: 3.0.3
186
185
  signing_key:
187
186
  specification_version: 4
188
187
  summary: A lightweight library for using Akoma Ntoso acts in Ruby.