pragmatic_segmenter 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 518ed98b0c60605da21ddd818f5cc8b5fef7fc99
4
- data.tar.gz: 2531a0cb86b3790cbad5ef8a79b7abdbffe09403
3
+ metadata.gz: a2951fa2242e1eb7ce0898862d41fd5239d871f8
4
+ data.tar.gz: 01d84b2637e84598907ef08e59ec64e90855c2ec
5
5
  SHA512:
6
- metadata.gz: 721b451df7243ff5fe334abc7d43d8b143ac33fa0aff33aa88d6af2a1b6dda34a92f1c1c0ee7bb8bf2cae85830f0aa4af05acdcecb16f9e0a0f58c773bc21ba4
7
- data.tar.gz: 7eb98fdb49c723b37e23fe5d36fd28510b789feec731c30abb8387d24f7c9415aba1fafad304671fab3a105d5a1348cdd3348fe889d941360ad328714142818b
6
+ metadata.gz: 25ec02bc41c649a57b1bc993848bed546514568c74e2e0ec5b6a2242fbbb579e61e199210f8ea4349ceadee5a06ad0fa76c7c60196762903cab984bbad3c2286
7
+ data.tar.gz: 925a24d8c131813ddb4f80a8865697baa0efe05ef781cabfc13dfb9b1efc878c2e6e276a46a9f1301049a99b2f64a558b6a9911e0a2f5f8f4c17622df3fd0b2a
data/README.md CHANGED
@@ -760,6 +760,9 @@ To test the relative performance of different segmentation tools and libraries I
760
760
  **Version 0.1.4**
761
761
  * Fix missing abbreviations
762
762
 
763
+ **Version 0.1.5**
764
+ * Fix comma at end of quoatation bug
765
+
763
766
  ## Contributing
764
767
 
765
768
  If you find a text that is incorrectly segmented using this gem, please submit an issue.
@@ -3,7 +3,7 @@
3
3
  module PragmaticSegmenter
4
4
  # This class splits text at sentence boundary punctuation marks
5
5
  class SentenceBoundaryPunctuation
6
- SENTENCE_BOUNDARY_REGEX = /\u{ff08}(?:[^\u{ff09}])*\u{ff09}(?=\s?[A-Z])|\u{300c}(?:[^\u{300d}])*\u{300d}(?=\s[A-Z])|\((?:[^\)]){2,}\)(?=\s[A-Z])|'(?:[^'])*'(?=\s[A-Z])|"(?:[^"])*"(?=\s[A-Z])|“(?:[^”])*”(?=\s[A-Z])|\S.*?[。..!!??ȸȹ☉☈☇☄]/
6
+ SENTENCE_BOUNDARY_REGEX = /\u{ff08}(?:[^\u{ff09}])*\u{ff09}(?=\s?[A-Z])|\u{300c}(?:[^\u{300d}])*\u{300d}(?=\s[A-Z])|\((?:[^\)]){2,}\)(?=\s[A-Z])|'(?:[^'])*[^,]'(?=\s[A-Z])|"(?:[^"])*[^,]"(?=\s[A-Z])|“(?:[^”])*[^,]”(?=\s[A-Z])|\S.*?[。..!!??ȸȹ☉☈☇☄]/
7
7
 
8
8
  attr_reader :text
9
9
  def initialize(text:)
@@ -1,3 +1,3 @@
1
1
  module PragmaticSegmenter
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
@@ -928,6 +928,11 @@ RSpec.describe PragmaticSegmenter::Segmenter do
928
928
  ps = PragmaticSegmenter::Segmenter.new(text: "[15: 12:32] [16: firma? 13:28]")
929
929
  expect(ps.segment).to eq(["[15: 12:32] [16: firma? 13:28]"])
930
930
  end
931
+
932
+ it "correctly segments text #092" do
933
+ ps = PragmaticSegmenter::Segmenter.new(text: "\"It's a good thing that the water is really calm,\" I answered ironically.")
934
+ expect(ps.segment).to eq(["\"It's a good thing that the water is really calm,\" I answered ironically."])
935
+ end
931
936
  end
932
937
  end
933
938
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias