slaw 3.0.0 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 655dd4179f692952514dc6341d332b92bb5ed09f
4
- data.tar.gz: f94a6f5d09da791132c901343cdc7c0f228822ec
2
+ SHA256:
3
+ metadata.gz: 9caa9135d576212d4a7d6b2300bfe65fad7ef36cf938243f59ea002b25a41273
4
+ data.tar.gz: faf61f38ee828b2d0f3024ca42ee7fe6ad560acfde021f1cdcbd1cfacdc73fd4
5
5
  SHA512:
6
- metadata.gz: dae4cc911be85f416e81252489f7914d8da142aa0eacf6e92a15c4f314f7569394eb718cad620ccabe098c352c87f02c4a5ae0bdb8cc7430523a4db1f65add54
7
- data.tar.gz: ea1bc757d9a29b0f998366f36bd5c5baec6b2b0196705369a06a9ded88aafb1740ac27e364a9ccd1b5320a14a00cb57179911294cd858fd94f5106a2ff5f2d18
6
+ metadata.gz: 7e69b0a96eb59ae723d8f3c217d7e0dfe13a236908f894061072a680d33e0e318496ec99f267672fcf49fb347cde79bc21c8bd9a2d449f315e6548b2f43cde84
7
+ data.tar.gz: 380eb2d660d556715ce1339d841c44efe07c5c88271ae10bc4d9e44d648c420788c60137b917daf6f1768d745b273ad8fa3b7b6559ccec873f201b305c129e37
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Slaw [![Build Status](https://travis-ci.org/longhotsummer/slaw.svg)](http://travis-ci.org/longhotsummer/slaw)
1
+ # Slaw [![Build Status](https://travis-ci.org/longhotsummer/slaw.svg)](http://travis-ci.org/longhotsummer/slaw) [![Gem Version](https://badge.fury.io/rb/slaw.svg)](https://badge.fury.io/rb/slaw)
2
2
 
3
3
  Slaw is a lightweight library for generating Akoma Ntoso 2.0 Act XML from plain text documents.
4
4
  It is used to power [Indigo](https://github.com/OpenUpSA/indigo) and uses grammars developed for the legal
@@ -84,6 +84,10 @@ You can create your own grammar by creating a gem that provides these files and
84
84
 
85
85
  ## Changelog
86
86
 
87
+ ### 3.1.0 (29 March 2019)
88
+
89
+ * Add --ascii flag to %-encode utf-8 strings into US-ASCII for speed. See https://github.com/cjheath/treetop/issues/31
90
+
87
91
  ### 3.0.0 (28 March 2019)
88
92
 
89
93
  * Inline bold and italics
data/bin/slaw CHANGED
@@ -18,6 +18,7 @@ class SlawCLI < Thor
18
18
  option :id_prefix, type: :string, desc: "Prefix to be used when generating ID elements when parsing a fragment."
19
19
  option :section_number_position, enum: ['before-title', 'after-title', 'guess'], desc: "Where do section titles come in relation to the section number? Default: before-title"
20
20
  option :grammar, type: :string, desc: "Grammar name (usually a two-letter country code). Default is za."
21
+ option :ascii, type: :boolean, default: false, desc: "Process text as ASCII using %-encoding. This can provide significant speed improvements if the grammar uses only ASCII literals. See https://github.com/cjheath/treetop/issues/31."
21
22
  def parse(name)
22
23
  logging
23
24
 
@@ -65,6 +66,8 @@ class SlawCLI < Thor
65
66
  generator.parser.options[:section_number_after_title] = after
66
67
  end
67
68
 
69
+ generator.builder.force_ascii = options[:ascii]
70
+
68
71
  begin
69
72
  act = generator.generate_from_text(text)
70
73
  rescue Slaw::Parse::ParseError => e
@@ -48,14 +48,6 @@ module Slaw
48
48
  [a-zA-Z0-9]+
49
49
  end
50
50
 
51
- rule quotes
52
- ["“”]
53
- end
54
-
55
- rule non_quotes
56
- [^"“”]
57
- end
58
-
59
51
  ##########
60
52
  # whitespace
61
53
 
@@ -1,5 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ require 'uri'
3
4
  require 'treetop'
4
5
 
5
6
  module Slaw
@@ -32,6 +33,9 @@ module Slaw
32
33
  # Prefix to use when generating IDs for fragments
33
34
  attr_accessor :fragment_id_prefix
34
35
 
36
+ # Should the parsing re-encoding the string as ASCII?
37
+ attr_accessor :force_ascii
38
+
35
39
  # Create a new builder.
36
40
  #
37
41
  # Specify either `:parser` or `:grammar_file` and `:grammar_class`.
@@ -41,6 +45,7 @@ module Slaw
41
45
  def initialize(opts={})
42
46
  @parser = opts[:parser]
43
47
  @parse_options = opts[:parse_optiosn] || {}
48
+ @force_ascii = false
44
49
  end
45
50
 
46
51
  # Do all the work necessary to parse text into a well-formed XML document.
@@ -77,8 +82,37 @@ module Slaw
77
82
  # @return [String] an XML string
78
83
  def parse_text(text, parse_options={})
79
84
  text = preprocess(text)
85
+
86
+ text = escape_utf8(text) if @force_ascii
87
+
80
88
  tree = text_to_syntax_tree(text, parse_options)
81
- xml_from_syntax_tree(tree)
89
+ xml = xml_from_syntax_tree(tree)
90
+
91
+ xml = unescape_utf8(xml) if @force_ascii
92
+
93
+ xml
94
+ end
95
+
96
+ # Use %-encoding to escape everything outside of the US_ASCII range,
97
+ # including encoding % itself.
98
+ #
99
+ # This can have a huge performance benefit. String lookups on utf-8 strings
100
+ # are linear in Ruby, while string lookups on US_ASCII encoded strings
101
+ # are constant time.
102
+ #
103
+ # This option can only be used if the grammar doesn't include non-ascii literals.
104
+ #
105
+ # See https://github.com/cjheath/treetop/issues/31
106
+ def escape_utf8(text)
107
+ unsafe = (0..126).to_a - ['%'.ord]
108
+ unsafe = unsafe.map { |i| '\u%04x' % i }
109
+ unsafe = Regexp.new('[^' + unsafe.join('') + ']')
110
+
111
+ URI::DEFAULT_PARSER.escape(text, unsafe)
112
+ end
113
+
114
+ def unescape_utf8(xml)
115
+ URI.unescape(xml)
82
116
  end
83
117
 
84
118
  # Parse plain text into a syntax tree.
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "3.0.0"
2
+ VERSION = "3.1.0"
3
3
  end
data/slaw.gemspec CHANGED
@@ -18,12 +18,12 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_development_dependency "rake", "~> 10.3.1"
22
- spec.add_development_dependency "rspec", "~> 2.14.1"
21
+ spec.add_development_dependency "rake", "~> 12.3"
22
+ spec.add_development_dependency "rspec", "~> 3.8"
23
23
 
24
- spec.add_runtime_dependency "nokogiri", "~> 1.8.5"
24
+ spec.add_runtime_dependency "nokogiri", "~> 1.8"
25
25
  spec.add_runtime_dependency "treetop", "~> 1.5"
26
- spec.add_runtime_dependency "log4r", "~> 1.1.10"
27
- spec.add_runtime_dependency "thor", "~> 0.19.1"
28
- spec.add_runtime_dependency "mimemagic", "~> 0.2.1"
26
+ spec.add_runtime_dependency "log4r", "~> 1.1"
27
+ spec.add_runtime_dependency "thor", "~> 0.20"
28
+ spec.add_runtime_dependency "mimemagic", "~> 0.2"
29
29
  end
@@ -16,13 +16,13 @@ Section title
16
16
  Another section title
17
17
  2. Section content that is long.
18
18
  "
19
- subject.guess_section_number_after_title(text).should be_true
19
+ subject.guess_section_number_after_title(text).should be true
20
20
  end
21
21
  end
22
22
 
23
23
  context 'section number before title' do
24
24
  it 'should default to false' do
25
- subject.guess_section_number_after_title("").should be_false
25
+ subject.guess_section_number_after_title("").should be false
26
26
  end
27
27
 
28
28
  it 'should be false' do
@@ -36,7 +36,7 @@ Some content.
36
36
 
37
37
  Some content.
38
38
  "
39
- subject.guess_section_number_after_title(text).should be_false
39
+ subject.guess_section_number_after_title(text).should be false
40
40
  end
41
41
  end
42
42
  end
@@ -1973,12 +1973,12 @@ EOS
1973
1973
  it 'should handle a clause with a remark' do
1974
1974
  node = parse :inline_items, "simple [[remark]]. text"
1975
1975
  node.text_value.should == "simple [[remark]]. text"
1976
- node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be_true
1976
+ node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
1977
1977
 
1978
1978
  node = parse :inline_items, "simple [[remark]][[another]] text"
1979
1979
  node.text_value.should == "simple [[remark]][[another]] text"
1980
- node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be_true
1981
- node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be_true
1980
+ node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
1981
+ node.elements[7].is_a?(Slaw::Grammars::ZA::Act::Remark).should be true
1982
1982
  end
1983
1983
  end
1984
1984
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-28 00:00:00.000000000 Z
11
+ date: 2019-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -16,42 +16,42 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 10.3.1
19
+ version: '12.3'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 10.3.1
26
+ version: '12.3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 2.14.1
33
+ version: '3.8'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 2.14.1
40
+ version: '3.8'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: nokogiri
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 1.8.5
47
+ version: '1.8'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 1.8.5
54
+ version: '1.8'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: treetop
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -72,42 +72,42 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 1.1.10
75
+ version: '1.1'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 1.1.10
82
+ version: '1.1'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: thor
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: 0.19.1
89
+ version: '0.20'
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: 0.19.1
96
+ version: '0.20'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: mimemagic
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 0.2.1
103
+ version: '0.2'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 0.2.1
110
+ version: '0.2'
111
111
  description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
112
112
  acts from plain text and PDF documents.
113
113
  email:
@@ -181,8 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
181
181
  - !ruby/object:Gem::Version
182
182
  version: '0'
183
183
  requirements: []
184
- rubyforge_project:
185
- rubygems_version: 2.6.12
184
+ rubygems_version: 3.0.3
186
185
  signing_key:
187
186
  specification_version: 4
188
187
  summary: A lightweight library for using Akoma Ntoso acts in Ruby.