slaw 10.2.0 → 10.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f76b66445a595e5130d1a3af8b153498d41007416bcd7c5ced418d4339030303
4
- data.tar.gz: cd28f4839c8ecd4b430a6111a3c5ec3df764af84674b1e6205d54ac27b3c734f
3
+ metadata.gz: 85950871314a64110b980b50860cae66fce1bc9536374fca99204506150da04b
4
+ data.tar.gz: 8dc252e644f14502f652b0daa4e18bcaadaf9d310be23b9a8f7cbffe3b85123f
5
5
  SHA512:
6
- metadata.gz: f04d34041c3dc21f552b81af8d331d4440cd7c36d94e38007d15be8b467cca15b2bedbc1b5d1fb7d652ebd2b263f695a02afd337ce20df5c0e5eca7b1ec02050
7
- data.tar.gz: dae3ef8b12f13761543a90b1711b301af9a3368cbde2387c8c00adf717c6a297ddfcfe108b9ef53073b370178574877a978fb8968d63317ed736ca1c49417232
6
+ metadata.gz: d56cf86ff1e6502b82d6d79e76a4630e9259babcf57aaee54cc5ef2435b59a1a8ec5f6a60d58b81992cac392352f6db1acdce11aac18fc8ec461f2f8e1a92ef4
7
+ data.tar.gz: 19f716a0069a1d157a6f2407b1f2d9ef4610447b31b5c7113a4b7ec090e19b6e6981573c4f37baa08f2e60e8ad5864122827a3e0454b29df68f958a81500f8b9
data/README.md CHANGED
@@ -86,6 +86,10 @@ You can create your own grammar by creating a gem that provides these files and
86
86
 
87
87
  ## Changelog
88
88
 
89
+ ### 10.3.1 (11 January 2021)
90
+
91
+ * Strip ascii, unicode general and unicode supplemental punctuation from num elements when building eIds
92
+
89
93
  ### 10.2.0 (4 September 2020)
90
94
 
91
95
  * support inline superscript `^^text^^`
@@ -24,20 +24,32 @@ module Slaw
24
24
  # Clean a <num> value for use in an eId
25
25
  # See https://docs.oasis-open.org/legaldocml/akn-nc/v1.0/os/akn-nc-v1.0-os.html#_Toc531692306
26
26
  #
27
- # The number part of the identifiers of such elements corresponds to the
27
+ # "The number part of the identifiers of such elements corresponds to the
28
28
  # stripping of all final punctuation, meaningless separations as well as
29
29
  # redundant characters in the content of the <num> element. The
30
- # representation is case-sensitive
30
+ # representation is case-sensitive."
31
+ #
32
+ # Our algorithm is:
33
+ # 1. strip all leading and trailing whitespace and punctuation (using the unicode punctuation blocks)
34
+ # 2. strip all whitespace
35
+ # 3. replace all remaining punctuation with a hyphen.
36
+ #
37
+ # The General Punctuation block is \u2000-\u206F, and the Supplemental Punctuation block is \u2E00-\u2E7F.
31
38
  #
32
39
  # (i) -> i
33
40
  # 1.2. -> 1-2
41
+ # “2.3“ -> 2-3
34
42
  # 3a bis -> 3abis
35
43
  def self.clean(num)
44
+ # leading whitespace and punctuation
45
+ num = num.gsub(/^[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '')
46
+ # trailing whitespace and punctuation
47
+ num.gsub!(/[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+$/, '')
48
+ # whitespace
49
+ num.gsub!(/\s/, '')
50
+ # remaining punctuation to a hyphen
51
+ num.gsub!(/[\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '-')
36
52
  num
37
- .gsub(/[ ()\[\]]/, '')
38
- .gsub(/\.+$/, '')
39
- .gsub(/^\.+/, '')
40
- .gsub(/\.+/, '-')
41
53
  end
42
54
  end
43
55
  end
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "10.2.0"
2
+ VERSION = "10.3.1"
3
3
  end
@@ -0,0 +1,38 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ require 'slaw'
6
+
7
+ describe Slaw::Grammars::Counters do
8
+ describe '#clean' do
9
+ it 'should remove leading and trailing punctuation' do
10
+ described_class.clean("").should == ""
11
+ described_class.clean(" ").should == ""
12
+ described_class.clean("( )").should == ""
13
+ described_class.clean("(123.4-5)").should == "123-4-5"
14
+ described_class.clean("(312.32.7)").should == "312-32-7"
15
+ described_class.clean("(312_32_7)").should == "312-32-7"
16
+ described_class.clean("(6)").should == "6"
17
+ described_class.clean("[16]").should == "16"
18
+ described_class.clean("(i)").should == "i"
19
+ described_class.clean("[i]").should == "i"
20
+ described_class.clean("(2bis)").should == "2bis"
21
+ described_class.clean('"1.2.').should == "1-2"
22
+ described_class.clean("1.2.").should == "1-2"
23
+ described_class.clean("“2.3").should == "2-3"
24
+ described_class.clean("2,3").should == "2-3"
25
+ described_class.clean("2,3, 4,").should == "2-3-4"
26
+ described_class.clean("3a bis").should == "3abis"
27
+ described_class.clean("3é").should == "3é"
28
+ described_class.clean(" -3a--4,9").should == "3a-4-9"
29
+ end
30
+
31
+ it 'should handle non-arabic numerals' do
32
+ # hebrew aleph
33
+ described_class.clean("(א)").should == "א"
34
+ # chinese 3
35
+ described_class.clean("(三)").should == "三"
36
+ end
37
+ end
38
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.2.0
4
+ version: 10.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-04 00:00:00.000000000 Z
11
+ date: 2021-01-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -155,6 +155,7 @@ files:
155
155
  - lib/slaw/version.rb
156
156
  - lib/slaw/xml_support.rb
157
157
  - slaw.gemspec
158
+ - spec/counters_spec.rb
158
159
  - spec/extract/extractor_spec.rb
159
160
  - spec/fixtures/community-fire-safety.xml
160
161
  - spec/generator_spec.rb
@@ -192,6 +193,7 @@ signing_key:
192
193
  specification_version: 4
193
194
  summary: A lightweight library for using Akoma Ntoso acts in Ruby.
194
195
  test_files:
196
+ - spec/counters_spec.rb
195
197
  - spec/extract/extractor_spec.rb
196
198
  - spec/fixtures/community-fire-safety.xml
197
199
  - spec/generator_spec.rb