pragmatic_segmenter 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ac7a7ecd4d02a9d2df378b3c174ba338238b26df
4
- data.tar.gz: fa33b3bd39e88c065bef560f0d22b3124ed99da8
3
+ metadata.gz: 1708727dc62734ca577e6d9b1c05af49551563a7
4
+ data.tar.gz: 1f00d14609486e72068d3a189851ef9fd9f66a31
5
5
  SHA512:
6
- metadata.gz: ff3c13ce3db23f80d2051fe95b94dc7dd057ed36818f06b6ddff18f14179be88ef0d381250d1bebba3322b5439d59658522fb56440fb082fb69d32baefb54e7b
7
- data.tar.gz: 21b0f050be65715cf1ab020d7ec31780369ef6cb64a6de0ff18176eabd94e5a38073f57ed258bb21c62c1d8eaf4978a6cb66e1e4f10c697549fb319dc762b02c
6
+ metadata.gz: 9a45479d6c2cfa930e2d6bafcaf9648208dee53f79b5d16fa020fe5427210c3b7673663707bf1feda98344fe4765796a060fac8055f2641f107d7d1ce5e51022
7
+ data.tar.gz: c92f8ee9fb924a3b8e46d6efb1fa9fa27eb72a06a66f9e9d64f84efa4458efd42a6d88d63f4c28880cd7ada18f333cd8ab927bb9a580d319d1ff1ec38e3b3664
data/README.md CHANGED
@@ -769,7 +769,10 @@ To test the relative performance of different segmentation tools and libraries I
769
769
  **Version 0.1.7**
770
770
  * Add Alice in Wonderland specs
771
771
  * Fix parenthesis between double quotations bug
772
- * Fix split after quotation ending in dash bug
772
+ * Fix split after quotation ending in dash bug
773
+
774
+ **Version 0.1.8**
775
+ * Fix bug in splitting new sentence after single quotes
773
776
 
774
777
  ## Contributing
775
778
 
@@ -49,7 +49,8 @@ module PragmaticSegmenter
49
49
  def sub_punctuation_between_single_quotes(txt)
50
50
  PragmaticSegmenter::PunctuationReplacer.new(
51
51
  matches_array: txt.scan(BETWEEN_SINGLE_QUOTES_REGEX),
52
- text: txt
52
+ text: txt,
53
+ match_type: 'single'
53
54
  ).replace
54
55
  end
55
56
 
@@ -4,11 +4,15 @@ module PragmaticSegmenter
4
4
  # This class replaces punctuation that is typically a sentence boundary
5
5
  # but in this case is not a sentence boundary.
6
6
  class PunctuationReplacer
7
+ # Rubular: http://rubular.com/r/2YFrKWQUYi
8
+ BETWEEN_SINGLE_QUOTES_REGEX = /(?<=\s)'(?:[^']|'[a-zA-Z])*'/
9
+
7
10
  include Rules
8
- attr_reader :matches_array, :text
9
- def initialize(text:, matches_array:)
11
+ attr_reader :matches_array, :text, :match_type
12
+ def initialize(text:, matches_array:, **args)
10
13
  @text = text
11
14
  @matches_array = matches_array
15
+ @match_type = args[:match_type]
12
16
  end
13
17
 
14
18
  def replace
@@ -29,7 +33,9 @@ module PragmaticSegmenter
29
33
  sub_4 = sub_characters(txt, sub_3, '!', '&ᓴ&')
30
34
  sub_5 = sub_characters(txt, sub_4, '?', '&ᓷ&')
31
35
  sub_6 = sub_characters(txt, sub_5, '?', '&ᓸ&')
32
- sub_7 = sub_characters(txt, sub_6, "'", '&⎋&')
36
+ unless match_type.eql?('single')
37
+ sub_7 = sub_characters(txt, sub_6, "'", '&⎋&')
38
+ end
33
39
  end
34
40
  txt.apply(SubEscapedRegexReservedCharacters::All)
35
41
  end
@@ -1,3 +1,3 @@
1
1
  module PragmaticSegmenter
2
- VERSION = "0.1.7"
2
+ VERSION = "0.1.8"
3
3
  end
@@ -1498,6 +1498,11 @@ RSpec.describe PragmaticSegmenter::Segmenter do
1498
1498
  ps = PragmaticSegmenter::Segmenter.new(text: "\"Dinah'll miss me very much to-night, I should think!\" (Dinah was the cat.) \"I hope they'll remember her saucer of milk at tea-time. Dinah, my dear, I wish you were down here with me!\"")
1499
1499
  expect(ps.segment).to eq(["\"Dinah'll miss me very much to-night, I should think!\"", "(Dinah was the cat.)", "\"I hope they'll remember her saucer of milk at tea-time. Dinah, my dear, I wish you were down here with me!\""])
1500
1500
  end
1501
+
1502
+ it "correctly segments text #098" do
1503
+ ps = PragmaticSegmenter::Segmenter.new(text: "Hello. 'This is a test of single quotes.' A new sentence.")
1504
+ expect(ps.segment).to eq(["Hello.", "'This is a test of single quotes.'", "A new sentence."])
1505
+ end
1501
1506
  end
1502
1507
  end
1503
1508
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias