pragmatic_segmenter 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/pragmatic_segmenter/cleaner.rb +4 -0
- data/lib/pragmatic_segmenter/version.rb +1 -1
- data/spec/pragmatic_segmenter_spec.rb +7 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e26400933e02aca93c63db3221fb897f49cdf78
|
4
|
+
data.tar.gz: a0199ca43d0f8558ba3fe867c6e32c69787210c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1dca5e20b2b062070b0cd319da6549594add065b670506b7e7dd9cbfe9eb8c83985ef197bf6427ee496d700edab5a526c8165fc1a27567e553f7a5e625edb6fe
|
7
|
+
data.tar.gz: a53a4059b4ba41e7d40bf2a21ca1981ae5ed134a8017771f8ecd337bdc96762e2e78ec7c53c00c3ff2e3ac6179cf1e29f24fee703bc6b19fbbf31bc7ff572894
|
data/README.md
CHANGED
@@ -816,6 +816,9 @@ To test the relative performance of different segmentation tools and libraries I
|
|
816
816
|
**Version 0.3.2**
|
817
817
|
* Add English abbreviations
|
818
818
|
|
819
|
+
**Version 0.3.3**
|
820
|
+
* Fix cleaner bug
|
821
|
+
|
819
822
|
## Contributing
|
820
823
|
|
821
824
|
If you find a text that is incorrectly segmented using this gem, please submit an issue.
|
@@ -2600,11 +2600,17 @@ RSpec.describe PragmaticSegmenter::Segmenter do
|
|
2600
2600
|
end
|
2601
2601
|
|
2602
2602
|
describe '#clean' do
|
2603
|
-
it 'cleans the text' do
|
2603
|
+
it 'cleans the text #001' do
|
2604
2604
|
ps = PragmaticSegmenter::Cleaner.new(text: "It was a cold \nnight in the city.", language: "en")
|
2605
2605
|
expect(ps.clean).to eq("It was a cold night in the city.")
|
2606
2606
|
end
|
2607
2607
|
|
2608
|
+
it 'cleans the text #002' do
|
2609
|
+
text = 'injections made by the Shareholder through the years. 7 (max.) 3. Specifications/4.Design and function The operating instructions are part of the product and must be kept in the immediate vicinity of the instrument and readily accessible to skilled "'
|
2610
|
+
ps = PragmaticSegmenter::Cleaner.new(text: text)
|
2611
|
+
expect(ps.clean).to eq("injections made by the Shareholder through the years. 7 (max.) 3. Specifications/4.Design and function The operating instructions are part of the product and must be kept in the immediate vicinity of the instrument and readily accessible to skilled \"")
|
2612
|
+
end
|
2613
|
+
|
2608
2614
|
it 'does not mutate the input string (cleaner)' do
|
2609
2615
|
text = "It was a cold \nnight in the city."
|
2610
2616
|
PragmaticSegmenter::Cleaner.new(text: text, language: "en").clean
|