pragmatic_segmenter 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 96aa232b769e566aad1de6ace0fa6d8591e3859c
4
- data.tar.gz: 2ed0518fb7c236821b0986795e7bc1ce5e19e1a8
3
+ metadata.gz: a6efcebc92c7cc21017d9acacea4283de1003094
4
+ data.tar.gz: 0c64c57f9c5e506848295199e9c8195cce89a8d3
5
5
  SHA512:
6
- metadata.gz: fef9536b2c2dc9896aedcb363f89bc10af37c09d66c034d8814ccb784619a9ea82fa3d20e9e69316dfcef06d7013183d9dcbd564430dc7ce2303a7cfbe32ba87
7
- data.tar.gz: 813a64e15c83b9dab5a2fe29c0bebba638fba0663c3debb26442cd42f060fa87e927846309355ec15953ca0a45c90d764fae796533f7d054282bdf8b5bcb06d4
6
+ metadata.gz: edc26a49545a32ae220caac43979861ebe2f28fb892d857d5c920bb857df38a62bfe161ed268168710b155510c780edf1d625e3f61e7e7bdaeaa54c52a1f87aa
7
+ data.tar.gz: 970301bbcb8e984e1c9d3c8ed222831d35fa7a0731af9b9916ca593d746dbad9db2922d20af5690be810de1100df021ef799c93c2e03901381681fb99980ee8a
data/README.md CHANGED
@@ -747,6 +747,9 @@ To test the relative performance of different segmentation tools and libraries I
747
747
  **Version 0.1.0**
748
748
  * Add Kommanditgesellschaft Rule
749
749
 
750
+ **Version 0.1.1**
751
+ * Fix handling of German dates
752
+
750
753
  ## Contributing
751
754
 
752
755
  If you find a text that is incorrectly segmented using this gem, please submit an issue.
@@ -32,9 +32,20 @@ module PragmaticSegmenter
32
32
  # Rubular: http://rubular.com/r/ityNMwdghj
33
33
  NegativeNumberPeriodSpaceRule = Rule.new(/(?<=-[0-9]|-([1-9][0-9]))\.(?=\s)/, '∯')
34
34
 
35
+ DE_MONTHS = ['Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember']
36
+
35
37
  def replace
36
38
  super
37
39
  @text.apply(NumberPeriodSpaceRule).apply(NegativeNumberPeriodSpaceRule)
40
+ replace_period_in_deutsch_dates(@text)
41
+ end
42
+
43
+ def replace_period_in_deutsch_dates(txt)
44
+ DE_MONTHS.each do |month|
45
+ # Rubular: http://rubular.com/r/zlqgj7G5dA
46
+ txt.gsub!(/(?<=\d)\.(?=\s*#{Regexp.escape(month)})/, '∯')
47
+ end
48
+ txt
38
49
  end
39
50
  end
40
51
 
@@ -1,3 +1,3 @@
1
1
  module PragmaticSegmenter
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -1508,6 +1508,11 @@ RSpec.describe PragmaticSegmenter::Segmenter do
1508
1508
  ps = PragmaticSegmenter::Segmenter.new(text: "s. vorherige Anmerkung.", language: 'de')
1509
1509
  expect(ps.segment).to eq(["s. vorherige Anmerkung."])
1510
1510
  end
1511
+
1512
+ it 'correctly segments text #033' do
1513
+ ps = PragmaticSegmenter::Segmenter.new(text: "Mit Inkrafttreten des Mindestlohngesetzes (MiLoG) zum 01. Januar 2015 werden in Bezug auf den Einsatz von Leistungs.", language: 'de')
1514
+ expect(ps.segment).to eq(["Mit Inkrafttreten des Mindestlohngesetzes (MiLoG) zum 01. Januar 2015 werden in Bezug auf den Einsatz von Leistungs."])
1515
+ end
1511
1516
  end
1512
1517
  end
1513
1518
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias