pragmatic_segmenter 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/pragmatic_segmenter/list.rb +2 -1
- data/lib/pragmatic_segmenter/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80eb7dfc7aeed8a66ff324dde7a87ea544e55e45
|
4
|
+
data.tar.gz: c1f3daf78133d748d6cf02133937dad1e05ef0ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2c6018bb4d46ccc5ef86bbc548437861fa83cf4948abd09e45d8c4b23c021c48907feb9cd65916a9ef8635ede477f5f0b8e221cb7452b38c236f79a3d0cfa77
|
7
|
+
data.tar.gz: d63e6eb39e52306785e491f1bfd1d4196e992e513a857b8893ef9c24fcab95f515dc863789f5040609436dc116efbac608a7ac010c0015fa159535198b1554ea
|
data/README.md
CHANGED
@@ -738,6 +738,9 @@ To test the relative performance of different segmentation tools and libraries I
|
|
738
738
|
* Add passing spec for new end of sentence abbreviation (EN)
|
739
739
|
* Add roman numeral list support
|
740
740
|
|
741
|
+
**Version 0.0.8**
|
742
|
+
* Fix error in `list.rb`
|
743
|
+
|
741
744
|
## Contributing
|
742
745
|
|
743
746
|
If you find a text that is incorrectly segmented using this gem, please submit an issue.
|
@@ -161,7 +161,8 @@ module PragmaticSegmenter
|
|
161
161
|
def other_items_replacement(a, i, alphabet, list_array, txt, parens)
|
162
162
|
return if alphabet & list_array == [] ||
|
163
163
|
!alphabet.include?(list_array[i - 1]) ||
|
164
|
-
!alphabet.include?(a)
|
164
|
+
!alphabet.include?(a) ||
|
165
|
+
!alphabet.include?(list_array[i + 1])
|
165
166
|
return if alphabet.index(list_array[i + 1]) - alphabet.index(a) != 1 &&
|
166
167
|
(alphabet.index(list_array[i - 1]) - alphabet.index(a)).abs != 1
|
167
168
|
replace_correct_alphabet_list(a, txt, parens)
|