pragmatic_segmenter 0.3.23 → 0.3.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS +6 -1
- data/README.md +4 -0
- data/lib/pragmatic_segmenter/languages/common/numbers.rb +1 -1
- data/lib/pragmatic_segmenter/version.rb +1 -1
- data/lib/unicode.rb +5 -0
- data/pragmatic_segmenter.gemspec +0 -1
- metadata +4 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 51ae71a6650fcd15671ac767d26ebe1315a9ea655d8fbf6e29ef9e4fa668fc93
|
4
|
+
data.tar.gz: 786246dc9e80872b423013fed2d69e0cba48cc7a7d5a693a3165b4cdf61fe00d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a830c5787a3818bc274b69aabd82bf5f837ba76c43921970c26a59f229d69bb24b698ff27389056ed6c536216edefdf4fa12338affbe883929b492065554af4c
|
7
|
+
data.tar.gz: f86cd6a66eaeb1890b5ddb2316d5ede734061b78212a490f8092bd20845cdb4dd47fac374972244785170ae266af21c566cbf59dd1a5667151ccd651269b72d8
|
data/NEWS
CHANGED
data/README.md
CHANGED
@@ -890,6 +890,10 @@ To test the relative performance of different segmentation tools and libraries I
|
|
890
890
|
**Version 0.3.23**
|
891
891
|
* Refactor for Ruby 3.0 compatibility
|
892
892
|
|
893
|
+
**Version 0.3.24**
|
894
|
+
* Fix catastrophic backtracking in regular expression for numerical references
|
895
|
+
* Remove unicode dependency
|
896
|
+
|
893
897
|
## Contributing
|
894
898
|
|
895
899
|
If you find a text that is incorrectly segmented using this gem, please submit an issue.
|
@@ -47,7 +47,7 @@ module PragmaticSegmenter
|
|
47
47
|
# Rubular: http://rubular.com/r/mQ8Es9bxtk
|
48
48
|
CONTINUOUS_PUNCTUATION_REGEX = /(?<=\S)(!|\?){3,}(?=(\s|\z|$))/
|
49
49
|
|
50
|
-
NUMBERED_REFERENCE_REGEX = /(?<=[^\d\s])(\.|∯)((\[(\d{1,3},?\s?-?\s?)
|
50
|
+
NUMBERED_REFERENCE_REGEX = /(?<=[^\d\s])(\.|∯)((\[(\d{1,3},?\s?-?\s?)?\b\d{1,3}\])+|((\d{1,3}\s?){0,3}\d{1,3}))(\s)(?=[A-Z])/
|
51
51
|
|
52
52
|
# Rubular: http://rubular.com/r/yqa4Rit8EY
|
53
53
|
PossessiveAbbreviationRule = Rule.new(/\.(?='s\s)|\.(?='s$)|\.(?='s\z)/, '∯')
|
data/lib/unicode.rb
ADDED
data/pragmatic_segmenter.gemspec
CHANGED
@@ -18,7 +18,6 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_runtime_dependency "unicode"
|
22
21
|
spec.add_development_dependency "bundler", ">= 1.7"
|
23
22
|
spec.add_development_dependency "rake", ">= 12.3.3"
|
24
23
|
spec.add_development_dependency "rspec"
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pragmatic_segmenter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.24
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: unicode
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: bundler
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,6 +122,7 @@ files:
|
|
136
122
|
- lib/pragmatic_segmenter/segmenter.rb
|
137
123
|
- lib/pragmatic_segmenter/types.rb
|
138
124
|
- lib/pragmatic_segmenter/version.rb
|
125
|
+
- lib/unicode.rb
|
139
126
|
- pragmatic_segmenter.gemspec
|
140
127
|
- spec/performance_spec.rb
|
141
128
|
- spec/pragmatic_segmenter/languages/amharic_spec.rb
|
@@ -181,8 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
181
168
|
- !ruby/object:Gem::Version
|
182
169
|
version: '0'
|
183
170
|
requirements: []
|
184
|
-
|
185
|
-
rubygems_version: 2.7.6
|
171
|
+
rubygems_version: 3.3.26
|
186
172
|
signing_key:
|
187
173
|
specification_version: 4
|
188
174
|
summary: A rule-based sentence boundary detection gem that works out-of-the-box across
|