pragmatic_segmenter 0.3.23 → 0.3.24

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2c66c757c1b4bd8d090e88d7db6c627720f58f6f26e6fab9916a20e8bc15471c
4
- data.tar.gz: da3a9088f72c90ddde6f0deda67d3e3b4ea3bed317970416deef794e0f594d89
3
+ metadata.gz: 51ae71a6650fcd15671ac767d26ebe1315a9ea655d8fbf6e29ef9e4fa668fc93
4
+ data.tar.gz: 786246dc9e80872b423013fed2d69e0cba48cc7a7d5a693a3165b4cdf61fe00d
5
5
  SHA512:
6
- metadata.gz: 503c52965b2f98eebbc24e1215204c45307958a0279d56834e0c929d18625e81ac8c5c78779efb1a5946b5fdda5d8496b54a72b009ad6b2a597a70c4ba0fff66
7
- data.tar.gz: f23773139a3a6d9f45cecaacabb363a7fb825a21eb76b40514abf4d0407191ed3b1afa887a5bc5328626abe2dbac5864895add62a1da036234036984d19a3454
6
+ metadata.gz: a830c5787a3818bc274b69aabd82bf5f837ba76c43921970c26a59f229d69bb24b698ff27389056ed6c536216edefdf4fa12338affbe883929b492065554af4c
7
+ data.tar.gz: f86cd6a66eaeb1890b5ddb2316d5ede734061b78212a490f8092bd20845cdb4dd47fac374972244785170ae266af21c566cbf59dd1a5667151ccd651269b72d8
data/NEWS CHANGED
@@ -1,4 +1,9 @@
1
- 0.3.22 (2021-05-03):
1
+ 0.3.24 (2024-08-12):
2
+
3
+ * Bug Fix: Catastrophic backtracking in regular expression for numerical references
4
+ * Improvement: Remove unicode dependency
5
+
6
+ 0.3.23 (2021-05-03):
2
7
 
3
8
  * Improvement: Refactor for Ruby 3.0 compatibility
4
9
 
data/README.md CHANGED
@@ -890,6 +890,10 @@ To test the relative performance of different segmentation tools and libraries I
890
890
  **Version 0.3.23**
891
891
  * Refactor for Ruby 3.0 compatibility
892
892
 
893
+ **Version 0.3.24**
894
+ * Fix catastrophic backtracking in regular expression for numerical references
895
+ * Remove unicode dependency
896
+
893
897
  ## Contributing
894
898
 
895
899
  If you find a text that is incorrectly segmented using this gem, please submit an issue.
@@ -47,7 +47,7 @@ module PragmaticSegmenter
47
47
  # Rubular: http://rubular.com/r/mQ8Es9bxtk
48
48
  CONTINUOUS_PUNCTUATION_REGEX = /(?<=\S)(!|\?){3,}(?=(\s|\z|$))/
49
49
 
50
- NUMBERED_REFERENCE_REGEX = /(?<=[^\d\s])(\.|∯)((\[(\d{1,3},?\s?-?\s?)*\b\d{1,3}\])+|((\d{1,3}\s?)*\d{1,3}))(\s)(?=[A-Z])/
50
+ NUMBERED_REFERENCE_REGEX = /(?<=[^\d\s])(\.|∯)((\[(\d{1,3},?\s?-?\s?)?\b\d{1,3}\])+|((\d{1,3}\s?){0,3}\d{1,3}))(\s)(?=[A-Z])/
51
51
 
52
52
  # Rubular: http://rubular.com/r/yqa4Rit8EY
53
53
  PossessiveAbbreviationRule = Rule.new(/\.(?='s\s)|\.(?='s$)|\.(?='s\z)/, '∯')
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PragmaticSegmenter
4
- VERSION = "0.3.23"
4
+ VERSION = "0.3.24"
5
5
  end
data/lib/unicode.rb ADDED
@@ -0,0 +1,5 @@
1
+ module Unicode
2
+ def self.downcase(text)
3
+ text.downcase
4
+ end
5
+ end
@@ -18,7 +18,6 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_runtime_dependency "unicode"
22
21
  spec.add_development_dependency "bundler", ">= 1.7"
23
22
  spec.add_development_dependency "rake", ">= 12.3.3"
24
23
  spec.add_development_dependency "rspec"
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.23
4
+ version: 0.3.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-02 00:00:00.000000000 Z
11
+ date: 2024-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: unicode
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: bundler
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +122,7 @@ files:
136
122
  - lib/pragmatic_segmenter/segmenter.rb
137
123
  - lib/pragmatic_segmenter/types.rb
138
124
  - lib/pragmatic_segmenter/version.rb
125
+ - lib/unicode.rb
139
126
  - pragmatic_segmenter.gemspec
140
127
  - spec/performance_spec.rb
141
128
  - spec/pragmatic_segmenter/languages/amharic_spec.rb
@@ -181,8 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
181
168
  - !ruby/object:Gem::Version
182
169
  version: '0'
183
170
  requirements: []
184
- rubyforge_project:
185
- rubygems_version: 2.7.6
171
+ rubygems_version: 3.3.26
186
172
  signing_key:
187
173
  specification_version: 4
188
174
  summary: A rule-based sentence boundary detection gem that works out-of-the-box across