pragmatic_segmenter 0.3.12 → 0.3.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4e6a6df4ee1ee432fdf8c5a480db27f05c79b169
4
- data.tar.gz: c041b6b02d37dc873e9b99df10579fd6b4cda651
3
+ metadata.gz: c313d610281828819a76463bd3b42590927307e2
4
+ data.tar.gz: 3c9f340a197450a6dffac38f4a3c0b378f3b8edf
5
5
  SHA512:
6
- metadata.gz: 1c15ba49df9d8b9c176f2e89ceba0c4d89390dee86db73d7bc1a3b076e2e9811e9308d1bfe59075faf41342699c9fd7c9bec0555d57c28cefedc1462479348bc
7
- data.tar.gz: 94ae2baac22caef699c8251405fa7b44403b16fb7365b26af257ecb163714be1815ef93ab9a47de5babf16e79e5203a6912a2a6beec34f2e4ca266a026df8ad7
6
+ metadata.gz: d8756402fcb03f456f27d8359b4e8adfd970fed42a89bdfebab9b7a75b25d83dd9b4db19b1f7982b71214c573500b519cbc1920b6a54fa2632a3521dabee9a68
7
+ data.tar.gz: 911a665d2609086e20aff601161e63ce22410e18c0d169fc9d8032cef029062aa7a3cdafa082c55db1ab78ae6f2e5f7a1f302246c716f5393a815ed9aec890dc
data/.gitignore CHANGED
@@ -12,3 +12,4 @@
12
12
  *.o
13
13
  *.a
14
14
  mkmf.log
15
+ .DS_Store
data/NEWS CHANGED
@@ -1,3 +1,7 @@
1
+ 0.3.13 (2017-01-17):
2
+
3
+ * Bug Fix: Unexpected sentence break between abbreviation and hyphen
4
+
1
5
  0.3.12 (2016-12-12):
2
6
 
3
7
  * Bug Fix: Issue with words with leading apostrophes
data/README.md CHANGED
@@ -847,6 +847,9 @@ To test the relative performance of different segmentation tools and libraries I
847
847
  **Version 0.3.12**
848
848
  * Fix issue involving words with leading apostrophes
849
849
 
850
+ **Version 0.3.13**
851
+ * Fix issue involving unexpected sentence break between abbreviation and hyphen
852
+
850
853
  ## Contributing
851
854
 
852
855
  If you find a text that is incorrectly segmented using this gem, please submit an issue.
@@ -113,7 +113,7 @@ module PragmaticSegmenter
113
113
  end
114
114
 
115
115
  def replace_period_of_abbr(txt, abbr)
116
- txt.gsub!(/(?<=\s#{abbr.strip})\.(?=((\.|\:|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))|(?<=^#{abbr.strip})\.(?=((\.|\:|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))/, '∯')
116
+ txt.gsub!(/(?<=\s#{abbr.strip})\.(?=((\.|\:|-|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))|(?<=^#{abbr.strip})\.(?=((\.|\:|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))/, '∯')
117
117
  txt.gsub!(/(?<=\s#{abbr.strip})\.(?=,)|(?<=^#{abbr.strip})\.(?=,)/, '∯')
118
118
  txt
119
119
  end
@@ -1,3 +1,3 @@
1
1
  module PragmaticSegmenter
2
- VERSION = "0.3.12"
2
+ VERSION = "0.3.13"
3
3
  end
@@ -1359,5 +1359,15 @@ RSpec.describe PragmaticSegmenter::Languages::English, "(en)" do
1359
1359
  ps = PragmaticSegmenter::Segmenter.new(text: "I wrote this in the ’nineties. It has four sentences. This is the third, isn't it? And this is the last")
1360
1360
  expect(ps.segment).to eq(["I wrote this in the ’nineties.", "It has four sentences.", "This is the third, isn't it?", "And this is the last"])
1361
1361
  end
1362
+
1363
+ it "correctly segments text #109" do
1364
+ ps = PragmaticSegmenter::Segmenter.new(text: "He has Ph.D.-level training", clean: false)
1365
+ expect(ps.segment).to eq(["He has Ph.D.-level training"])
1366
+ end
1367
+
1368
+ it "correctly segments text #110" do
1369
+ ps = PragmaticSegmenter::Segmenter.new(text: "He has Ph.D. level training", clean: false)
1370
+ expect(ps.segment).to eq(["He has Ph.D. level training"])
1371
+ end
1362
1372
  end
1363
1373
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.12
4
+ version: 0.3.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-13 00:00:00.000000000 Z
11
+ date: 2017-01-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode
@@ -178,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
178
  version: '0'
179
179
  requirements: []
180
180
  rubyforge_project:
181
- rubygems_version: 2.5.2
181
+ rubygems_version: 2.6.8
182
182
  signing_key:
183
183
  specification_version: 4
184
184
  summary: A rule-based sentence boundary detection gem that works out-of-the-box across