slaw 10.2.0 → 10.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/slaw/grammars/counters.rb +18 -6
- data/lib/slaw/version.rb +1 -1
- data/spec/counters_spec.rb +38 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 85950871314a64110b980b50860cae66fce1bc9536374fca99204506150da04b
|
4
|
+
data.tar.gz: 8dc252e644f14502f652b0daa4e18bcaadaf9d310be23b9a8f7cbffe3b85123f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d56cf86ff1e6502b82d6d79e76a4630e9259babcf57aaee54cc5ef2435b59a1a8ec5f6a60d58b81992cac392352f6db1acdce11aac18fc8ec461f2f8e1a92ef4
|
7
|
+
data.tar.gz: 19f716a0069a1d157a6f2407b1f2d9ef4610447b31b5c7113a4b7ec090e19b6e6981573c4f37baa08f2e60e8ad5864122827a3e0454b29df68f958a81500f8b9
|
data/README.md
CHANGED
@@ -86,6 +86,10 @@ You can create your own grammar by creating a gem that provides these files and
|
|
86
86
|
|
87
87
|
## Changelog
|
88
88
|
|
89
|
+
### 10.3.1 (11 January 2021)
|
90
|
+
|
91
|
+
* Strip ascii, unicode general and unicode supplemental punctuation from num elements when building eIds
|
92
|
+
|
89
93
|
### 10.2.0 (4 September 2020)
|
90
94
|
|
91
95
|
* support inline superscript `^^text^^`
|
@@ -24,20 +24,32 @@ module Slaw
|
|
24
24
|
# Clean a <num> value for use in an eId
|
25
25
|
# See https://docs.oasis-open.org/legaldocml/akn-nc/v1.0/os/akn-nc-v1.0-os.html#_Toc531692306
|
26
26
|
#
|
27
|
-
# The number part of the identifiers of such elements corresponds to the
|
27
|
+
# "The number part of the identifiers of such elements corresponds to the
|
28
28
|
# stripping of all final punctuation, meaningless separations as well as
|
29
29
|
# redundant characters in the content of the <num> element. The
|
30
|
-
# representation is case-sensitive
|
30
|
+
# representation is case-sensitive."
|
31
|
+
#
|
32
|
+
# Our algorithm is:
|
33
|
+
# 1. strip all leading and trailing whitespace and punctuation (using the unicode punctuation blocks)
|
34
|
+
# 2. strip all whitespace
|
35
|
+
# 3. replace all remaining punctuation with a hyphen.
|
36
|
+
#
|
37
|
+
# The General Punctuation block is \u2000-\u206F, and the Supplemental Punctuation block is \u2E00-\u2E7F.
|
31
38
|
#
|
32
39
|
# (i) -> i
|
33
40
|
# 1.2. -> 1-2
|
41
|
+
# “2.3“ -> 2-3
|
34
42
|
# 3a bis -> 3abis
|
35
43
|
def self.clean(num)
|
44
|
+
# leading whitespace and punctuation
|
45
|
+
num = num.gsub(/^[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '')
|
46
|
+
# trailing whitespace and punctuation
|
47
|
+
num.gsub!(/[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+$/, '')
|
48
|
+
# whitespace
|
49
|
+
num.gsub!(/\s/, '')
|
50
|
+
# remaining punctuation to a hyphen
|
51
|
+
num.gsub!(/[\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '-')
|
36
52
|
num
|
37
|
-
.gsub(/[ ()\[\]]/, '')
|
38
|
-
.gsub(/\.+$/, '')
|
39
|
-
.gsub(/^\.+/, '')
|
40
|
-
.gsub(/\.+/, '-')
|
41
53
|
end
|
42
54
|
end
|
43
55
|
end
|
data/lib/slaw/version.rb
CHANGED
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
require 'slaw'
|
6
|
+
|
7
|
+
describe Slaw::Grammars::Counters do
|
8
|
+
describe '#clean' do
|
9
|
+
it 'should remove leading and trailing punctuation' do
|
10
|
+
described_class.clean("").should == ""
|
11
|
+
described_class.clean(" ").should == ""
|
12
|
+
described_class.clean("( )").should == ""
|
13
|
+
described_class.clean("(123.4-5)").should == "123-4-5"
|
14
|
+
described_class.clean("(312.32.7)").should == "312-32-7"
|
15
|
+
described_class.clean("(312_32_7)").should == "312-32-7"
|
16
|
+
described_class.clean("(6)").should == "6"
|
17
|
+
described_class.clean("[16]").should == "16"
|
18
|
+
described_class.clean("(i)").should == "i"
|
19
|
+
described_class.clean("[i]").should == "i"
|
20
|
+
described_class.clean("(2bis)").should == "2bis"
|
21
|
+
described_class.clean('"1.2.').should == "1-2"
|
22
|
+
described_class.clean("1.2.").should == "1-2"
|
23
|
+
described_class.clean("“2.3").should == "2-3"
|
24
|
+
described_class.clean("2,3").should == "2-3"
|
25
|
+
described_class.clean("2,3, 4,").should == "2-3-4"
|
26
|
+
described_class.clean("3a bis").should == "3abis"
|
27
|
+
described_class.clean("3é").should == "3é"
|
28
|
+
described_class.clean(" -3a--4,9").should == "3a-4-9"
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should handle non-arabic numerals' do
|
32
|
+
# hebrew aleph
|
33
|
+
described_class.clean("(א)").should == "א"
|
34
|
+
# chinese 3
|
35
|
+
described_class.clean("(三)").should == "三"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 10.
|
4
|
+
version: 10.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -155,6 +155,7 @@ files:
|
|
155
155
|
- lib/slaw/version.rb
|
156
156
|
- lib/slaw/xml_support.rb
|
157
157
|
- slaw.gemspec
|
158
|
+
- spec/counters_spec.rb
|
158
159
|
- spec/extract/extractor_spec.rb
|
159
160
|
- spec/fixtures/community-fire-safety.xml
|
160
161
|
- spec/generator_spec.rb
|
@@ -192,6 +193,7 @@ signing_key:
|
|
192
193
|
specification_version: 4
|
193
194
|
summary: A lightweight library for using Akoma Ntoso acts in Ruby.
|
194
195
|
test_files:
|
196
|
+
- spec/counters_spec.rb
|
195
197
|
- spec/extract/extractor_spec.rb
|
196
198
|
- spec/fixtures/community-fire-safety.xml
|
197
199
|
- spec/generator_spec.rb
|