pragmatic_segmenter 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e52d1869830dfba91d5e5a00f15d3529081691e
4
- data.tar.gz: 5754723c0ba657a31e3471d785b034e3c1814e33
3
+ metadata.gz: 9e26400933e02aca93c63db3221fb897f49cdf78
4
+ data.tar.gz: a0199ca43d0f8558ba3fe867c6e32c69787210c9
5
5
  SHA512:
6
- metadata.gz: d3a4700ba0369b60d36f633c4e042a5113197956ad02dff64243c70445bc445435af17689f456327753cead2d9d1cee00db548e2e0dad337341f110194522d2c
7
- data.tar.gz: f8a28d6582e846ad2ffff21b87f4888b922cc4224faaf85e2782ca92671506dbf55fab86c0ddf06ae6363a3851f8ec9dd44f7bc9956a86f2ed2aea4bc6d69fb5
6
+ metadata.gz: 1dca5e20b2b062070b0cd319da6549594add065b670506b7e7dd9cbfe9eb8c83985ef197bf6427ee496d700edab5a526c8165fc1a27567e553f7a5e625edb6fe
7
+ data.tar.gz: a53a4059b4ba41e7d40bf2a21ca1981ae5ed134a8017771f8ecd337bdc96762e2e78ec7c53c00c3ff2e3ac6179cf1e29f24fee703bc6b19fbbf31bc7ff572894
data/README.md CHANGED
@@ -816,6 +816,9 @@ To test the relative performance of different segmentation tools and libraries I
816
816
  **Version 0.3.2**
817
817
  * Add English abbreviations
818
818
 
819
+ **Version 0.3.3**
820
+ * Fix cleaner bug
821
+
819
822
  ## Contributing
820
823
 
821
824
  If you find a text that is incorrectly segmented using this gem, please submit an issue.
@@ -44,6 +44,10 @@ module PragmaticSegmenter
44
44
 
45
45
  private
46
46
 
47
+ def abbreviations
48
+ @language::Abbreviation::ABBREVIATIONS
49
+ end
50
+
47
51
  def check_for_no_space_in_between_sentences(txt)
48
52
  words = txt.split(' ')
49
53
  words.each do |word|
@@ -1,3 +1,3 @@
1
1
  module PragmaticSegmenter
2
- VERSION = "0.3.2"
2
+ VERSION = "0.3.3"
3
3
  end
@@ -2600,11 +2600,17 @@ RSpec.describe PragmaticSegmenter::Segmenter do
2600
2600
  end
2601
2601
 
2602
2602
  describe '#clean' do
2603
- it 'cleans the text' do
2603
+ it 'cleans the text #001' do
2604
2604
  ps = PragmaticSegmenter::Cleaner.new(text: "It was a cold \nnight in the city.", language: "en")
2605
2605
  expect(ps.clean).to eq("It was a cold night in the city.")
2606
2606
  end
2607
2607
 
2608
+ it 'cleans the text #002' do
2609
+ text = 'injections made by the Shareholder through the years. 7 (max.) 3. Specifications/4.Design and function The operating instructions are part of the product and must be kept in the immediate vicinity of the instrument and readily accessible to skilled "'
2610
+ ps = PragmaticSegmenter::Cleaner.new(text: text)
2611
+ expect(ps.clean).to eq("injections made by the Shareholder through the years. 7 (max.) 3. Specifications/4.Design and function The operating instructions are part of the product and must be kept in the immediate vicinity of the instrument and readily accessible to skilled \"")
2612
+ end
2613
+
2608
2614
  it 'does not mutate the input string (cleaner)' do
2609
2615
  text = "It was a cold \nnight in the city."
2610
2616
  PragmaticSegmenter::Cleaner.new(text: text, language: "en").clean
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias