pragmatic_segmenter 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e52d1869830dfba91d5e5a00f15d3529081691e
4
- data.tar.gz: 5754723c0ba657a31e3471d785b034e3c1814e33
3
+ metadata.gz: 9e26400933e02aca93c63db3221fb897f49cdf78
4
+ data.tar.gz: a0199ca43d0f8558ba3fe867c6e32c69787210c9
5
5
  SHA512:
6
- metadata.gz: d3a4700ba0369b60d36f633c4e042a5113197956ad02dff64243c70445bc445435af17689f456327753cead2d9d1cee00db548e2e0dad337341f110194522d2c
7
- data.tar.gz: f8a28d6582e846ad2ffff21b87f4888b922cc4224faaf85e2782ca92671506dbf55fab86c0ddf06ae6363a3851f8ec9dd44f7bc9956a86f2ed2aea4bc6d69fb5
6
+ metadata.gz: 1dca5e20b2b062070b0cd319da6549594add065b670506b7e7dd9cbfe9eb8c83985ef197bf6427ee496d700edab5a526c8165fc1a27567e553f7a5e625edb6fe
7
+ data.tar.gz: a53a4059b4ba41e7d40bf2a21ca1981ae5ed134a8017771f8ecd337bdc96762e2e78ec7c53c00c3ff2e3ac6179cf1e29f24fee703bc6b19fbbf31bc7ff572894
data/README.md CHANGED
@@ -816,6 +816,9 @@ To test the relative performance of different segmentation tools and libraries I
816
816
  **Version 0.3.2**
817
817
  * Add English abbreviations
818
818
 
819
+ **Version 0.3.3**
820
+ * Fix cleaner bug
821
+
819
822
  ## Contributing
820
823
 
821
824
  If you find a text that is incorrectly segmented using this gem, please submit an issue.
@@ -44,6 +44,10 @@ module PragmaticSegmenter
44
44
 
45
45
  private
46
46
 
47
+ def abbreviations
48
+ @language::Abbreviation::ABBREVIATIONS
49
+ end
50
+
47
51
  def check_for_no_space_in_between_sentences(txt)
48
52
  words = txt.split(' ')
49
53
  words.each do |word|
@@ -1,3 +1,3 @@
1
1
  module PragmaticSegmenter
2
- VERSION = "0.3.2"
2
+ VERSION = "0.3.3"
3
3
  end
@@ -2600,11 +2600,17 @@ RSpec.describe PragmaticSegmenter::Segmenter do
2600
2600
  end
2601
2601
 
2602
2602
  describe '#clean' do
2603
- it 'cleans the text' do
2603
+ it 'cleans the text #001' do
2604
2604
  ps = PragmaticSegmenter::Cleaner.new(text: "It was a cold \nnight in the city.", language: "en")
2605
2605
  expect(ps.clean).to eq("It was a cold night in the city.")
2606
2606
  end
2607
2607
 
2608
+ it 'cleans the text #002' do
2609
+ text = 'injections made by the Shareholder through the years. 7 (max.) 3. Specifications/4.Design and function The operating instructions are part of the product and must be kept in the immediate vicinity of the instrument and readily accessible to skilled "'
2610
+ ps = PragmaticSegmenter::Cleaner.new(text: text)
2611
+ expect(ps.clean).to eq("injections made by the Shareholder through the years. 7 (max.) 3. Specifications/4.Design and function The operating instructions are part of the product and must be kept in the immediate vicinity of the instrument and readily accessible to skilled \"")
2612
+ end
2613
+
2608
2614
  it 'does not mutate the input string (cleaner)' do
2609
2615
  text = "It was a cold \nnight in the city."
2610
2616
  PragmaticSegmenter::Cleaner.new(text: text, language: "en").clean
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias