RubyGems - pragmatic_segmenter - Versions diffs - 0.3.12 → 0.3.13 - Mend

pragmatic_segmenter 0.3.12 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/NEWS +4 -0
data/README.md +3 -0
data/lib/pragmatic_segmenter/abbreviation_replacer.rb +1 -1
data/lib/pragmatic_segmenter/version.rb +1 -1
data/spec/pragmatic_segmenter/languages/english_spec.rb +10 -0
metadata +3 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 4e6a6df4ee1ee432fdf8c5a480db27f05c79b169
-  data.tar.gz: c041b6b02d37dc873e9b99df10579fd6b4cda651
+  metadata.gz: c313d610281828819a76463bd3b42590927307e2
+  data.tar.gz: 3c9f340a197450a6dffac38f4a3c0b378f3b8edf
 SHA512:
-  metadata.gz: 1c15ba49df9d8b9c176f2e89ceba0c4d89390dee86db73d7bc1a3b076e2e9811e9308d1bfe59075faf41342699c9fd7c9bec0555d57c28cefedc1462479348bc
-  data.tar.gz: 94ae2baac22caef699c8251405fa7b44403b16fb7365b26af257ecb163714be1815ef93ab9a47de5babf16e79e5203a6912a2a6beec34f2e4ca266a026df8ad7
+  metadata.gz: d8756402fcb03f456f27d8359b4e8adfd970fed42a89bdfebab9b7a75b25d83dd9b4db19b1f7982b71214c573500b519cbc1920b6a54fa2632a3521dabee9a68
+  data.tar.gz: 911a665d2609086e20aff601161e63ce22410e18c0d169fc9d8032cef029062aa7a3cdafa082c55db1ab78ae6f2e5f7a1f302246c716f5393a815ed9aec890dc

data/.gitignore CHANGED Viewed

@@ -12,3 +12,4 @@
 *.o
 *.a
 mkmf.log
+.DS_Store

data/NEWS CHANGED Viewed

@@ -1,3 +1,7 @@
+0.3.13 (2017-01-17):
+* Bug Fix: Unexpected sentence break between abbreviation and hyphen
 0.3.12 (2016-12-12):
 * Bug Fix: Issue with words with leading apostrophes

data/README.md CHANGED Viewed

@@ -847,6 +847,9 @@ To test the relative performance of different segmentation tools and libraries I
 **Version 0.3.12**
 * Fix issue involving words with leading apostrophes
+**Version 0.3.13**
+* Fix issue involving unexpected sentence break between abbreviation and hyphen
 ## Contributing
 If you find a text that is incorrectly segmented using this gem, please submit an issue.

data/lib/pragmatic_segmenter/abbreviation_replacer.rb CHANGED Viewed

@@ -113,7 +113,7 @@ module PragmaticSegmenter
     end
     def replace_period_of_abbr(txt, abbr)
-      txt.gsub!(/(?<=\s#{abbr.strip})\.(?=((\.|\:|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))|(?<=^#{abbr.strip})\.(?=((\.|\:|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))/, '∯')
+      txt.gsub!(/(?<=\s#{abbr.strip})\.(?=((\.|\:|-|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))|(?<=^#{abbr.strip})\.(?=((\.|\:|\?)|(\s([a-z]|I\s|I'm|I'll|\d))))/, '∯')
       txt.gsub!(/(?<=\s#{abbr.strip})\.(?=,)|(?<=^#{abbr.strip})\.(?=,)/, '∯')
       txt
     end

data/lib/pragmatic_segmenter/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module PragmaticSegmenter
-  VERSION = "0.3.12"
+  VERSION = "0.3.13"
 end

data/spec/pragmatic_segmenter/languages/english_spec.rb CHANGED Viewed

@@ -1359,5 +1359,15 @@ RSpec.describe PragmaticSegmenter::Languages::English, "(en)" do
       ps = PragmaticSegmenter::Segmenter.new(text: "I wrote this in the ’nineties.  It has four sentences.  This is the third, isn't it?  And this is the last")
       expect(ps.segment).to eq(["I wrote this in the ’nineties.", "It has four sentences.", "This is the third, isn't it?", "And this is the last"])
     end
+    it "correctly segments text #109" do
+      ps = PragmaticSegmenter::Segmenter.new(text: "He has Ph.D.-level training", clean: false)
+      expect(ps.segment).to eq(["He has Ph.D.-level training"])
+    end
+    it "correctly segments text #110" do
+      ps = PragmaticSegmenter::Segmenter.new(text: "He has Ph.D. level training", clean: false)
+      expect(ps.segment).to eq(["He has Ph.D. level training"])
+    end
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: pragmatic_segmenter
 version: !ruby/object:Gem::Version
-  version: 0.3.12
+  version: 0.3.13
 platform: ruby
 authors:
 - Kevin S. Dias
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-12-13 00:00:00.000000000 Z
+date: 2017-01-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: unicode
@@ -178,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.5.2
+rubygems_version: 2.6.8
 signing_key:
 specification_version: 4
 summary: A rule-based sentence boundary detection gem that works out-of-the-box across