pragmatic_segmenter 0.3.12 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4e6a6df4ee1ee432fdf8c5a480db27f05c79b169
4
- data.tar.gz: c041b6b02d37dc873e9b99df10579fd6b4cda651
3
+ metadata.gz: c313d610281828819a76463bd3b42590927307e2
4
+ data.tar.gz: 3c9f340a197450a6dffac38f4a3c0b378f3b8edf
5
5
  SHA512:
6
- metadata.gz: 1c15ba49df9d8b9c176f2e89ceba0c4d89390dee86db73d7bc1a3b076e2e9811e9308d1bfe59075faf41342699c9fd7c9bec0555d57c28cefedc1462479348bc
7
- data.tar.gz: 94ae2baac22caef699c8251405fa7b44403b16fb7365b26af257ecb163714be1815ef93ab9a47de5babf16e79e5203a6912a2a6beec34f2e4ca266a026df8ad7
6
+ metadata.gz: d8756402fcb03f456f27d8359b4e8adfd970fed42a89bdfebab9b7a75b25d83dd9b4db19b1f7982b71214c573500b519cbc1920b6a54fa2632a3521dabee9a68
7
+ data.tar.gz: 911a665d2609086e20aff601161e63ce22410e18c0d169fc9d8032cef029062aa7a3cdafa082c55db1ab78ae6f2e5f7a1f302246c716f5393a815ed9aec890dc
data/.gitignore CHANGED
@@ -12,3 +12,4 @@
12
12
  *.o
13
13
  *.a
14
14
  mkmf.log
15
+ .DS_Store
data/NEWS CHANGED
@@ -1,3 +1,7 @@
1
+ 0.3.13 (2017-01-17):
2
+
3
+ * Bug Fix: Unexpected sentence break between abbreviation and hyphen
4
+
1
5
  0.3.12 (2016-12-12):
2
6
 
3
7
  * Bug Fix: Issue with words with leading apostrophes
data/README.md CHANGED
@@ -847,6 +847,9 @@ To test the relative performance of different segmentation tools and libraries I
847
847
  **Version 0.3.12**
848
848
  * Fix issue involving words with leading apostrophes
849
849
 
850
+ **Version 0.3.13**
851
+ * Fix issue involving unexpected sentence break between abbreviation and hyphen
852
+
850
853
  ## Contributing
851
854
 
852
855
  If you find a text that is incorrectly segmented using this gem, please submit an issue.
@@ -113,7 +113,7 @@ module PragmaticSegmenter
113
113
  end
114
114
 
115
115
  def replace_period_of_abbr(txt, abbr)
116
- txt.gsub!(/(?<=\s#{abbr.strip})\.(?=((\.|\:|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))|(?<=^#{abbr.strip})\.(?=((\.|\:|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))/, '∯')
116
+ txt.gsub!(/(?<=\s#{abbr.strip})\.(?=((\.|\:|-|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))|(?<=^#{abbr.strip})\.(?=((\.|\:|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))/, '∯')
117
117
  txt.gsub!(/(?<=\s#{abbr.strip})\.(?=,)|(?<=^#{abbr.strip})\.(?=,)/, '∯')
118
118
  txt
119
119
  end
@@ -1,3 +1,3 @@
1
1
  module PragmaticSegmenter
2
- VERSION = "0.3.12"
2
+ VERSION = "0.3.13"
3
3
  end
@@ -1359,5 +1359,15 @@ RSpec.describe PragmaticSegmenter::Languages::English, "(en)" do
1359
1359
  ps = PragmaticSegmenter::Segmenter.new(text: "I wrote this in the ’nineties. It has four sentences. This is the third, isn't it? And this is the last")
1360
1360
  expect(ps.segment).to eq(["I wrote this in the ’nineties.", "It has four sentences.", "This is the third, isn't it?", "And this is the last"])
1361
1361
  end
1362
+
1363
+ it "correctly segments text #109" do
1364
+ ps = PragmaticSegmenter::Segmenter.new(text: "He has Ph.D.-level training", clean: false)
1365
+ expect(ps.segment).to eq(["He has Ph.D.-level training"])
1366
+ end
1367
+
1368
+ it "correctly segments text #110" do
1369
+ ps = PragmaticSegmenter::Segmenter.new(text: "He has Ph.D. level training", clean: false)
1370
+ expect(ps.segment).to eq(["He has Ph.D. level training"])
1371
+ end
1362
1372
  end
1363
1373
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.12
4
+ version: 0.3.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-13 00:00:00.000000000 Z
11
+ date: 2017-01-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode
@@ -178,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
178
  version: '0'
179
179
  requirements: []
180
180
  rubyforge_project:
181
- rubygems_version: 2.5.2
181
+ rubygems_version: 2.6.8
182
182
  signing_key:
183
183
  specification_version: 4
184
184
  summary: A rule-based sentence boundary detection gem that works out-of-the-box across