pragmatic_segmenter 0.3.23 → 0.3.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2c66c757c1b4bd8d090e88d7db6c627720f58f6f26e6fab9916a20e8bc15471c
4
- data.tar.gz: da3a9088f72c90ddde6f0deda67d3e3b4ea3bed317970416deef794e0f594d89
3
+ metadata.gz: 51ae71a6650fcd15671ac767d26ebe1315a9ea655d8fbf6e29ef9e4fa668fc93
4
+ data.tar.gz: 786246dc9e80872b423013fed2d69e0cba48cc7a7d5a693a3165b4cdf61fe00d
5
5
  SHA512:
6
- metadata.gz: 503c52965b2f98eebbc24e1215204c45307958a0279d56834e0c929d18625e81ac8c5c78779efb1a5946b5fdda5d8496b54a72b009ad6b2a597a70c4ba0fff66
7
- data.tar.gz: f23773139a3a6d9f45cecaacabb363a7fb825a21eb76b40514abf4d0407191ed3b1afa887a5bc5328626abe2dbac5864895add62a1da036234036984d19a3454
6
+ metadata.gz: a830c5787a3818bc274b69aabd82bf5f837ba76c43921970c26a59f229d69bb24b698ff27389056ed6c536216edefdf4fa12338affbe883929b492065554af4c
7
+ data.tar.gz: f86cd6a66eaeb1890b5ddb2316d5ede734061b78212a490f8092bd20845cdb4dd47fac374972244785170ae266af21c566cbf59dd1a5667151ccd651269b72d8
data/NEWS CHANGED
@@ -1,4 +1,9 @@
1
- 0.3.22 (2021-05-03):
1
+ 0.3.24 (2024-08-12):
2
+
3
+ * Bug Fix: Catastrophic backtracking in regular expression for numerical references
4
+ * Improvement: Remove unicode dependency
5
+
6
+ 0.3.23 (2021-05-03):
2
7
 
3
8
  * Improvement: Refactor for Ruby 3.0 compatibility
4
9
 
data/README.md CHANGED
@@ -890,6 +890,10 @@ To test the relative performance of different segmentation tools and libraries I
890
890
  **Version 0.3.23**
891
891
  * Refactor for Ruby 3.0 compatibility
892
892
 
893
+ **Version 0.3.24**
894
+ * Fix catastrophic backtracking in regular expression for numerical references
895
+ * Remove unicode dependency
896
+
893
897
  ## Contributing
894
898
 
895
899
  If you find a text that is incorrectly segmented using this gem, please submit an issue.
@@ -47,7 +47,7 @@ module PragmaticSegmenter
47
47
  # Rubular: http://rubular.com/r/mQ8Es9bxtk
48
48
  CONTINUOUS_PUNCTUATION_REGEX = /(?<=\S)(!|\?){3,}(?=(\s|\z|$))/
49
49
 
50
- NUMBERED_REFERENCE_REGEX = /(?<=[^\d\s])(\.|∯)((\[(\d{1,3},?\s?-?\s?)*\b\d{1,3}\])+|((\d{1,3}\s?)*\d{1,3}))(\s)(?=[A-Z])/
50
+ NUMBERED_REFERENCE_REGEX = /(?<=[^\d\s])(\.|∯)((\[(\d{1,3},?\s?-?\s?)?\b\d{1,3}\])+|((\d{1,3}\s?){0,3}\d{1,3}))(\s)(?=[A-Z])/
51
51
 
52
52
  # Rubular: http://rubular.com/r/yqa4Rit8EY
53
53
  PossessiveAbbreviationRule = Rule.new(/\.(?='s\s)|\.(?='s$)|\.(?='s\z)/, '∯')
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PragmaticSegmenter
4
- VERSION = "0.3.23"
4
+ VERSION = "0.3.24"
5
5
  end
data/lib/unicode.rb ADDED
@@ -0,0 +1,5 @@
1
+ module Unicode
2
+ def self.downcase(text)
3
+ text.downcase
4
+ end
5
+ end
@@ -18,7 +18,6 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_runtime_dependency "unicode"
22
21
  spec.add_development_dependency "bundler", ">= 1.7"
23
22
  spec.add_development_dependency "rake", ">= 12.3.3"
24
23
  spec.add_development_dependency "rspec"
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.23
4
+ version: 0.3.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-02 00:00:00.000000000 Z
11
+ date: 2024-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: unicode
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: bundler
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +122,7 @@ files:
136
122
  - lib/pragmatic_segmenter/segmenter.rb
137
123
  - lib/pragmatic_segmenter/types.rb
138
124
  - lib/pragmatic_segmenter/version.rb
125
+ - lib/unicode.rb
139
126
  - pragmatic_segmenter.gemspec
140
127
  - spec/performance_spec.rb
141
128
  - spec/pragmatic_segmenter/languages/amharic_spec.rb
@@ -181,8 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
181
168
  - !ruby/object:Gem::Version
182
169
  version: '0'
183
170
  requirements: []
184
- rubyforge_project:
185
- rubygems_version: 2.7.6
171
+ rubygems_version: 3.3.26
186
172
  signing_key:
187
173
  specification_version: 4
188
174
  summary: A rule-based sentence boundary detection gem that works out-of-the-box across