slaw 10.2.0 → 10.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/slaw/grammars/counters.rb +18 -6
- data/lib/slaw/version.rb +1 -1
- data/spec/counters_spec.rb +38 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 85950871314a64110b980b50860cae66fce1bc9536374fca99204506150da04b
|
4
|
+
data.tar.gz: 8dc252e644f14502f652b0daa4e18bcaadaf9d310be23b9a8f7cbffe3b85123f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d56cf86ff1e6502b82d6d79e76a4630e9259babcf57aaee54cc5ef2435b59a1a8ec5f6a60d58b81992cac392352f6db1acdce11aac18fc8ec461f2f8e1a92ef4
|
7
|
+
data.tar.gz: 19f716a0069a1d157a6f2407b1f2d9ef4610447b31b5c7113a4b7ec090e19b6e6981573c4f37baa08f2e60e8ad5864122827a3e0454b29df68f958a81500f8b9
|
data/README.md
CHANGED
@@ -86,6 +86,10 @@ You can create your own grammar by creating a gem that provides these files and
|
|
86
86
|
|
87
87
|
## Changelog
|
88
88
|
|
89
|
+
### 10.3.1 (11 January 2021)
|
90
|
+
|
91
|
+
* Strip ascii, unicode general and unicode supplemental punctuation from num elements when building eIds
|
92
|
+
|
89
93
|
### 10.2.0 (4 September 2020)
|
90
94
|
|
91
95
|
* support inline superscript `^^text^^`
|
@@ -24,20 +24,32 @@ module Slaw
|
|
24
24
|
# Clean a <num> value for use in an eId
|
25
25
|
# See https://docs.oasis-open.org/legaldocml/akn-nc/v1.0/os/akn-nc-v1.0-os.html#_Toc531692306
|
26
26
|
#
|
27
|
-
# The number part of the identifiers of such elements corresponds to the
|
27
|
+
# "The number part of the identifiers of such elements corresponds to the
|
28
28
|
# stripping of all final punctuation, meaningless separations as well as
|
29
29
|
# redundant characters in the content of the <num> element. The
|
30
|
-
# representation is case-sensitive
|
30
|
+
# representation is case-sensitive."
|
31
|
+
#
|
32
|
+
# Our algorithm is:
|
33
|
+
# 1. strip all leading and trailing whitespace and punctuation (using the unicode punctuation blocks)
|
34
|
+
# 2. strip all whitespace
|
35
|
+
# 3. replace all remaining punctuation with a hyphen.
|
36
|
+
#
|
37
|
+
# The General Punctuation block is \u2000-\u206F, and the Supplemental Punctuation block is \u2E00-\u2E7F.
|
31
38
|
#
|
32
39
|
# (i) -> i
|
33
40
|
# 1.2. -> 1-2
|
41
|
+
# “2.3“ -> 2-3
|
34
42
|
# 3a bis -> 3abis
|
35
43
|
def self.clean(num)
|
44
|
+
# leading whitespace and punctuation
|
45
|
+
num = num.gsub(/^[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '')
|
46
|
+
# trailing whitespace and punctuation
|
47
|
+
num.gsub!(/[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+$/, '')
|
48
|
+
# whitespace
|
49
|
+
num.gsub!(/\s/, '')
|
50
|
+
# remaining punctuation to a hyphen
|
51
|
+
num.gsub!(/[\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '-')
|
36
52
|
num
|
37
|
-
.gsub(/[ ()\[\]]/, '')
|
38
|
-
.gsub(/\.+$/, '')
|
39
|
-
.gsub(/^\.+/, '')
|
40
|
-
.gsub(/\.+/, '-')
|
41
53
|
end
|
42
54
|
end
|
43
55
|
end
|
data/lib/slaw/version.rb
CHANGED
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
require 'slaw'
|
6
|
+
|
7
|
+
describe Slaw::Grammars::Counters do
|
8
|
+
describe '#clean' do
|
9
|
+
it 'should remove leading and trailing punctuation' do
|
10
|
+
described_class.clean("").should == ""
|
11
|
+
described_class.clean(" ").should == ""
|
12
|
+
described_class.clean("( )").should == ""
|
13
|
+
described_class.clean("(123.4-5)").should == "123-4-5"
|
14
|
+
described_class.clean("(312.32.7)").should == "312-32-7"
|
15
|
+
described_class.clean("(312_32_7)").should == "312-32-7"
|
16
|
+
described_class.clean("(6)").should == "6"
|
17
|
+
described_class.clean("[16]").should == "16"
|
18
|
+
described_class.clean("(i)").should == "i"
|
19
|
+
described_class.clean("[i]").should == "i"
|
20
|
+
described_class.clean("(2bis)").should == "2bis"
|
21
|
+
described_class.clean('"1.2.').should == "1-2"
|
22
|
+
described_class.clean("1.2.").should == "1-2"
|
23
|
+
described_class.clean("“2.3").should == "2-3"
|
24
|
+
described_class.clean("2,3").should == "2-3"
|
25
|
+
described_class.clean("2,3, 4,").should == "2-3-4"
|
26
|
+
described_class.clean("3a bis").should == "3abis"
|
27
|
+
described_class.clean("3é").should == "3é"
|
28
|
+
described_class.clean(" -3a--4,9").should == "3a-4-9"
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should handle non-arabic numerals' do
|
32
|
+
# hebrew aleph
|
33
|
+
described_class.clean("(א)").should == "א"
|
34
|
+
# chinese 3
|
35
|
+
described_class.clean("(三)").should == "三"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 10.
|
4
|
+
version: 10.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -155,6 +155,7 @@ files:
|
|
155
155
|
- lib/slaw/version.rb
|
156
156
|
- lib/slaw/xml_support.rb
|
157
157
|
- slaw.gemspec
|
158
|
+
- spec/counters_spec.rb
|
158
159
|
- spec/extract/extractor_spec.rb
|
159
160
|
- spec/fixtures/community-fire-safety.xml
|
160
161
|
- spec/generator_spec.rb
|
@@ -192,6 +193,7 @@ signing_key:
|
|
192
193
|
specification_version: 4
|
193
194
|
summary: A lightweight library for using Akoma Ntoso acts in Ruby.
|
194
195
|
test_files:
|
196
|
+
- spec/counters_spec.rb
|
195
197
|
- spec/extract/extractor_spec.rb
|
196
198
|
- spec/fixtures/community-fire-safety.xml
|
197
199
|
- spec/generator_spec.rb
|