pragmatic_segmenter 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/pragmatic_segmenter/cleaner.rb +4 -0
- data/lib/pragmatic_segmenter/version.rb +1 -1
- data/spec/pragmatic_segmenter_spec.rb +7 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e26400933e02aca93c63db3221fb897f49cdf78
|
4
|
+
data.tar.gz: a0199ca43d0f8558ba3fe867c6e32c69787210c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1dca5e20b2b062070b0cd319da6549594add065b670506b7e7dd9cbfe9eb8c83985ef197bf6427ee496d700edab5a526c8165fc1a27567e553f7a5e625edb6fe
|
7
|
+
data.tar.gz: a53a4059b4ba41e7d40bf2a21ca1981ae5ed134a8017771f8ecd337bdc96762e2e78ec7c53c00c3ff2e3ac6179cf1e29f24fee703bc6b19fbbf31bc7ff572894
|
data/README.md
CHANGED
@@ -816,6 +816,9 @@ To test the relative performance of different segmentation tools and libraries I
|
|
816
816
|
**Version 0.3.2**
|
817
817
|
* Add English abbreviations
|
818
818
|
|
819
|
+
**Version 0.3.3**
|
820
|
+
* Fix cleaner bug
|
821
|
+
|
819
822
|
## Contributing
|
820
823
|
|
821
824
|
If you find a text that is incorrectly segmented using this gem, please submit an issue.
|
@@ -2600,11 +2600,17 @@ RSpec.describe PragmaticSegmenter::Segmenter do
|
|
2600
2600
|
end
|
2601
2601
|
|
2602
2602
|
describe '#clean' do
|
2603
|
-
it 'cleans the text' do
|
2603
|
+
it 'cleans the text #001' do
|
2604
2604
|
ps = PragmaticSegmenter::Cleaner.new(text: "It was a cold \nnight in the city.", language: "en")
|
2605
2605
|
expect(ps.clean).to eq("It was a cold night in the city.")
|
2606
2606
|
end
|
2607
2607
|
|
2608
|
+
it 'cleans the text #002' do
|
2609
|
+
text = 'injections made by the Shareholder through the years. 7 (max.) 3. Specifications/4.Design and function The operating instructions are part of the product and must be kept in the immediate vicinity of the instrument and readily accessible to skilled "'
|
2610
|
+
ps = PragmaticSegmenter::Cleaner.new(text: text)
|
2611
|
+
expect(ps.clean).to eq("injections made by the Shareholder through the years. 7 (max.) 3. Specifications/4.Design and function The operating instructions are part of the product and must be kept in the immediate vicinity of the instrument and readily accessible to skilled \"")
|
2612
|
+
end
|
2613
|
+
|
2608
2614
|
it 'does not mutate the input string (cleaner)' do
|
2609
2615
|
text = "It was a cold \nnight in the city."
|
2610
2616
|
PragmaticSegmenter::Cleaner.new(text: text, language: "en").clean
|