RubyGems - pragmatic_segmenter - Versions diffs - 0.1.4 → 0.1.5 - Mend

pragmatic_segmenter 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/README.md +3 -0
data/lib/pragmatic_segmenter/sentence_boundary_punctuation.rb +1 -1
data/lib/pragmatic_segmenter/version.rb +1 -1
data/spec/pragmatic_segmenter_spec.rb +5 -0
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 518ed98b0c60605da21ddd818f5cc8b5fef7fc99
-  data.tar.gz: 2531a0cb86b3790cbad5ef8a79b7abdbffe09403
+  metadata.gz: a2951fa2242e1eb7ce0898862d41fd5239d871f8
+  data.tar.gz: 01d84b2637e84598907ef08e59ec64e90855c2ec
 SHA512:
-  metadata.gz: 721b451df7243ff5fe334abc7d43d8b143ac33fa0aff33aa88d6af2a1b6dda34a92f1c1c0ee7bb8bf2cae85830f0aa4af05acdcecb16f9e0a0f58c773bc21ba4
-  data.tar.gz: 7eb98fdb49c723b37e23fe5d36fd28510b789feec731c30abb8387d24f7c9415aba1fafad304671fab3a105d5a1348cdd3348fe889d941360ad328714142818b
+  metadata.gz: 25ec02bc41c649a57b1bc993848bed546514568c74e2e0ec5b6a2242fbbb579e61e199210f8ea4349ceadee5a06ad0fa76c7c60196762903cab984bbad3c2286
+  data.tar.gz: 925a24d8c131813ddb4f80a8865697baa0efe05ef781cabfc13dfb9b1efc878c2e6e276a46a9f1301049a99b2f64a558b6a9911e0a2f5f8f4c17622df3fd0b2a

data/README.md CHANGED Viewed

@@ -760,6 +760,9 @@ To test the relative performance of different segmentation tools and libraries I
 **Version 0.1.4**
 * Fix missing abbreviations
+**Version 0.1.5**
+* Fix comma at end of quoatation bug
 ## Contributing
 If you find a text that is incorrectly segmented using this gem, please submit an issue.

data/lib/pragmatic_segmenter/sentence_boundary_punctuation.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module PragmaticSegmenter
   # This class splits text at sentence boundary punctuation marks
   class SentenceBoundaryPunctuation
-    SENTENCE_BOUNDARY_REGEX = /\u{ff08}(?:[^\u{ff09}])*\u{ff09}(?=\s?[A-Z])|\u{300c}(?:[^\u{300d}])*\u{300d}(?=\s[A-Z])|\((?:[^\)]){2,}\)(?=\s[A-Z])|'(?:[^'])*'(?=\s[A-Z])|"(?:[^"])*"(?=\s[A-Z])|“(?:[^”])*”(?=\s[A-Z])|\S.*?[。．.！!?？ȸȹ☉☈☇☄]/
+    SENTENCE_BOUNDARY_REGEX = /\u{ff08}(?:[^\u{ff09}])*\u{ff09}(?=\s?[A-Z])|\u{300c}(?:[^\u{300d}])*\u{300d}(?=\s[A-Z])|\((?:[^\)]){2,}\)(?=\s[A-Z])|'(?:[^'])*[^,]'(?=\s[A-Z])|"(?:[^"])*[^,]"(?=\s[A-Z])|“(?:[^”])*[^,]”(?=\s[A-Z])|\S.*?[。．.！!?？ȸȹ☉☈☇☄]/
     attr_reader :text
     def initialize(text:)

data/lib/pragmatic_segmenter/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module PragmaticSegmenter
-  VERSION = "0.1.4"
+  VERSION = "0.1.5"
 end

data/spec/pragmatic_segmenter_spec.rb CHANGED Viewed

@@ -928,6 +928,11 @@ RSpec.describe PragmaticSegmenter::Segmenter do
         ps = PragmaticSegmenter::Segmenter.new(text: "[15:  12:32]  [16:  firma? 13:28]")
         expect(ps.segment).to eq(["[15:  12:32]  [16:  firma? 13:28]"])
       end
+      it "correctly segments text #092" do
+        ps = PragmaticSegmenter::Segmenter.new(text: "\"It's a good thing that the water is really calm,\" I answered ironically.")
+        expect(ps.segment).to eq(["\"It's a good thing that the water is really calm,\" I answered ironically."])
+      end
     end
   end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: pragmatic_segmenter
 version: !ruby/object:Gem::Version
-  version: 0.1.4
+  version: 0.1.5
 platform: ruby
 authors:
 - Kevin S. Dias