slaw 10.2.0 → 10.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f76b66445a595e5130d1a3af8b153498d41007416bcd7c5ced418d4339030303
4
- data.tar.gz: cd28f4839c8ecd4b430a6111a3c5ec3df764af84674b1e6205d54ac27b3c734f
3
+ metadata.gz: 85950871314a64110b980b50860cae66fce1bc9536374fca99204506150da04b
4
+ data.tar.gz: 8dc252e644f14502f652b0daa4e18bcaadaf9d310be23b9a8f7cbffe3b85123f
5
5
  SHA512:
6
- metadata.gz: f04d34041c3dc21f552b81af8d331d4440cd7c36d94e38007d15be8b467cca15b2bedbc1b5d1fb7d652ebd2b263f695a02afd337ce20df5c0e5eca7b1ec02050
7
- data.tar.gz: dae3ef8b12f13761543a90b1711b301af9a3368cbde2387c8c00adf717c6a297ddfcfe108b9ef53073b370178574877a978fb8968d63317ed736ca1c49417232
6
+ metadata.gz: d56cf86ff1e6502b82d6d79e76a4630e9259babcf57aaee54cc5ef2435b59a1a8ec5f6a60d58b81992cac392352f6db1acdce11aac18fc8ec461f2f8e1a92ef4
7
+ data.tar.gz: 19f716a0069a1d157a6f2407b1f2d9ef4610447b31b5c7113a4b7ec090e19b6e6981573c4f37baa08f2e60e8ad5864122827a3e0454b29df68f958a81500f8b9
data/README.md CHANGED
@@ -86,6 +86,10 @@ You can create your own grammar by creating a gem that provides these files and
86
86
 
87
87
  ## Changelog
88
88
 
89
+ ### 10.3.1 (11 January 2021)
90
+
91
+ * Strip ascii, unicode general and unicode supplemental punctuation from num elements when building eIds
92
+
89
93
  ### 10.2.0 (4 September 2020)
90
94
 
91
95
  * support inline superscript `^^text^^`
@@ -24,20 +24,32 @@ module Slaw
24
24
  # Clean a <num> value for use in an eId
25
25
  # See https://docs.oasis-open.org/legaldocml/akn-nc/v1.0/os/akn-nc-v1.0-os.html#_Toc531692306
26
26
  #
27
- # The number part of the identifiers of such elements corresponds to the
27
+ # "The number part of the identifiers of such elements corresponds to the
28
28
  # stripping of all final punctuation, meaningless separations as well as
29
29
  # redundant characters in the content of the <num> element. The
30
- # representation is case-sensitive
30
+ # representation is case-sensitive."
31
+ #
32
+ # Our algorithm is:
33
+ # 1. strip all leading and trailing whitespace and punctuation (using the unicode punctuation blocks)
34
+ # 2. strip all whitespace
35
+ # 3. replace all remaining punctuation with a hyphen.
36
+ #
37
+ # The General Punctuation block is \u2000-\u206F, and the Supplemental Punctuation block is \u2E00-\u2E7F.
31
38
  #
32
39
  # (i) -> i
33
40
  # 1.2. -> 1-2
41
+ # “2.3“ -> 2-3
34
42
  # 3a bis -> 3abis
35
43
  def self.clean(num)
44
+ # leading whitespace and punctuation
45
+ num = num.gsub(/^[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '')
46
+ # trailing whitespace and punctuation
47
+ num.gsub!(/[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+$/, '')
48
+ # whitespace
49
+ num.gsub!(/\s/, '')
50
+ # remaining punctuation to a hyphen
51
+ num.gsub!(/[\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '-')
36
52
  num
37
- .gsub(/[ ()\[\]]/, '')
38
- .gsub(/\.+$/, '')
39
- .gsub(/^\.+/, '')
40
- .gsub(/\.+/, '-')
41
53
  end
42
54
  end
43
55
  end
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "10.2.0"
2
+ VERSION = "10.3.1"
3
3
  end
@@ -0,0 +1,38 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ require 'slaw'
6
+
7
+ describe Slaw::Grammars::Counters do
8
+ describe '#clean' do
9
+ it 'should remove leading and trailing punctuation' do
10
+ described_class.clean("").should == ""
11
+ described_class.clean(" ").should == ""
12
+ described_class.clean("( )").should == ""
13
+ described_class.clean("(123.4-5)").should == "123-4-5"
14
+ described_class.clean("(312.32.7)").should == "312-32-7"
15
+ described_class.clean("(312_32_7)").should == "312-32-7"
16
+ described_class.clean("(6)").should == "6"
17
+ described_class.clean("[16]").should == "16"
18
+ described_class.clean("(i)").should == "i"
19
+ described_class.clean("[i]").should == "i"
20
+ described_class.clean("(2bis)").should == "2bis"
21
+ described_class.clean('"1.2.').should == "1-2"
22
+ described_class.clean("1.2.").should == "1-2"
23
+ described_class.clean("“2.3").should == "2-3"
24
+ described_class.clean("2,3").should == "2-3"
25
+ described_class.clean("2,3, 4,").should == "2-3-4"
26
+ described_class.clean("3a bis").should == "3abis"
27
+ described_class.clean("3é").should == "3é"
28
+ described_class.clean(" -3a--4,9").should == "3a-4-9"
29
+ end
30
+
31
+ it 'should handle non-arabic numerals' do
32
+ # hebrew aleph
33
+ described_class.clean("(א)").should == "א"
34
+ # chinese 3
35
+ described_class.clean("(三)").should == "三"
36
+ end
37
+ end
38
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.2.0
4
+ version: 10.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-04 00:00:00.000000000 Z
11
+ date: 2021-01-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -155,6 +155,7 @@ files:
155
155
  - lib/slaw/version.rb
156
156
  - lib/slaw/xml_support.rb
157
157
  - slaw.gemspec
158
+ - spec/counters_spec.rb
158
159
  - spec/extract/extractor_spec.rb
159
160
  - spec/fixtures/community-fire-safety.xml
160
161
  - spec/generator_spec.rb
@@ -192,6 +193,7 @@ signing_key:
192
193
  specification_version: 4
193
194
  summary: A lightweight library for using Akoma Ntoso acts in Ruby.
194
195
  test_files:
196
+ - spec/counters_spec.rb
195
197
  - spec/extract/extractor_spec.rb
196
198
  - spec/fixtures/community-fire-safety.xml
197
199
  - spec/generator_spec.rb