pragmatic_segmenter 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 518ed98b0c60605da21ddd818f5cc8b5fef7fc99
4
- data.tar.gz: 2531a0cb86b3790cbad5ef8a79b7abdbffe09403
3
+ metadata.gz: a2951fa2242e1eb7ce0898862d41fd5239d871f8
4
+ data.tar.gz: 01d84b2637e84598907ef08e59ec64e90855c2ec
5
5
  SHA512:
6
- metadata.gz: 721b451df7243ff5fe334abc7d43d8b143ac33fa0aff33aa88d6af2a1b6dda34a92f1c1c0ee7bb8bf2cae85830f0aa4af05acdcecb16f9e0a0f58c773bc21ba4
7
- data.tar.gz: 7eb98fdb49c723b37e23fe5d36fd28510b789feec731c30abb8387d24f7c9415aba1fafad304671fab3a105d5a1348cdd3348fe889d941360ad328714142818b
6
+ metadata.gz: 25ec02bc41c649a57b1bc993848bed546514568c74e2e0ec5b6a2242fbbb579e61e199210f8ea4349ceadee5a06ad0fa76c7c60196762903cab984bbad3c2286
7
+ data.tar.gz: 925a24d8c131813ddb4f80a8865697baa0efe05ef781cabfc13dfb9b1efc878c2e6e276a46a9f1301049a99b2f64a558b6a9911e0a2f5f8f4c17622df3fd0b2a
data/README.md CHANGED
@@ -760,6 +760,9 @@ To test the relative performance of different segmentation tools and libraries I
760
760
  **Version 0.1.4**
761
761
  * Fix missing abbreviations
762
762
 
763
+ **Version 0.1.5**
764
+ * Fix comma at end of quoatation bug
765
+
763
766
  ## Contributing
764
767
 
765
768
  If you find a text that is incorrectly segmented using this gem, please submit an issue.
@@ -3,7 +3,7 @@
3
3
  module PragmaticSegmenter
4
4
  # This class splits text at sentence boundary punctuation marks
5
5
  class SentenceBoundaryPunctuation
6
- SENTENCE_BOUNDARY_REGEX = /\u{ff08}(?:[^\u{ff09}])*\u{ff09}(?=\s?[A-Z])|\u{300c}(?:[^\u{300d}])*\u{300d}(?=\s[A-Z])|\((?:[^\)]){2,}\)(?=\s[A-Z])|'(?:[^'])*'(?=\s[A-Z])|"(?:[^"])*"(?=\s[A-Z])|“(?:[^”])*”(?=\s[A-Z])|\S.*?[。..!!??ȸȹ☉☈☇☄]/
6
+ SENTENCE_BOUNDARY_REGEX = /\u{ff08}(?:[^\u{ff09}])*\u{ff09}(?=\s?[A-Z])|\u{300c}(?:[^\u{300d}])*\u{300d}(?=\s[A-Z])|\((?:[^\)]){2,}\)(?=\s[A-Z])|'(?:[^'])*[^,]'(?=\s[A-Z])|"(?:[^"])*[^,]"(?=\s[A-Z])|“(?:[^”])*[^,]”(?=\s[A-Z])|\S.*?[。..!!??ȸȹ☉☈☇☄]/
7
7
 
8
8
  attr_reader :text
9
9
  def initialize(text:)
@@ -1,3 +1,3 @@
1
1
  module PragmaticSegmenter
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
@@ -928,6 +928,11 @@ RSpec.describe PragmaticSegmenter::Segmenter do
928
928
  ps = PragmaticSegmenter::Segmenter.new(text: "[15: 12:32] [16: firma? 13:28]")
929
929
  expect(ps.segment).to eq(["[15: 12:32] [16: firma? 13:28]"])
930
930
  end
931
+
932
+ it "correctly segments text #092" do
933
+ ps = PragmaticSegmenter::Segmenter.new(text: "\"It's a good thing that the water is really calm,\" I answered ironically.")
934
+ expect(ps.segment).to eq(["\"It's a good thing that the water is really calm,\" I answered ironically."])
935
+ end
931
936
  end
932
937
  end
933
938
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias