pragmatic_segmenter 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 96aa232b769e566aad1de6ace0fa6d8591e3859c
4
- data.tar.gz: 2ed0518fb7c236821b0986795e7bc1ce5e19e1a8
3
+ metadata.gz: a6efcebc92c7cc21017d9acacea4283de1003094
4
+ data.tar.gz: 0c64c57f9c5e506848295199e9c8195cce89a8d3
5
5
  SHA512:
6
- metadata.gz: fef9536b2c2dc9896aedcb363f89bc10af37c09d66c034d8814ccb784619a9ea82fa3d20e9e69316dfcef06d7013183d9dcbd564430dc7ce2303a7cfbe32ba87
7
- data.tar.gz: 813a64e15c83b9dab5a2fe29c0bebba638fba0663c3debb26442cd42f060fa87e927846309355ec15953ca0a45c90d764fae796533f7d054282bdf8b5bcb06d4
6
+ metadata.gz: edc26a49545a32ae220caac43979861ebe2f28fb892d857d5c920bb857df38a62bfe161ed268168710b155510c780edf1d625e3f61e7e7bdaeaa54c52a1f87aa
7
+ data.tar.gz: 970301bbcb8e984e1c9d3c8ed222831d35fa7a0731af9b9916ca593d746dbad9db2922d20af5690be810de1100df021ef799c93c2e03901381681fb99980ee8a
data/README.md CHANGED
@@ -747,6 +747,9 @@ To test the relative performance of different segmentation tools and libraries I
747
747
  **Version 0.1.0**
748
748
  * Add Kommanditgesellschaft Rule
749
749
 
750
+ **Version 0.1.1**
751
+ * Fix handling of German dates
752
+
750
753
  ## Contributing
751
754
 
752
755
  If you find a text that is incorrectly segmented using this gem, please submit an issue.
@@ -32,9 +32,20 @@ module PragmaticSegmenter
32
32
  # Rubular: http://rubular.com/r/ityNMwdghj
33
33
  NegativeNumberPeriodSpaceRule = Rule.new(/(?<=-[0-9]|-([1-9][0-9]))\.(?=\s)/, '∯')
34
34
 
35
+ DE_MONTHS = ['Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember']
36
+
35
37
  def replace
36
38
  super
37
39
  @text.apply(NumberPeriodSpaceRule).apply(NegativeNumberPeriodSpaceRule)
40
+ replace_period_in_deutsch_dates(@text)
41
+ end
42
+
43
+ def replace_period_in_deutsch_dates(txt)
44
+ DE_MONTHS.each do |month|
45
+ # Rubular: http://rubular.com/r/zlqgj7G5dA
46
+ txt.gsub!(/(?<=\d)\.(?=\s*#{Regexp.escape(month)})/, '∯')
47
+ end
48
+ txt
38
49
  end
39
50
  end
40
51
 
@@ -1,3 +1,3 @@
1
1
  module PragmaticSegmenter
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -1508,6 +1508,11 @@ RSpec.describe PragmaticSegmenter::Segmenter do
1508
1508
  ps = PragmaticSegmenter::Segmenter.new(text: "s. vorherige Anmerkung.", language: 'de')
1509
1509
  expect(ps.segment).to eq(["s. vorherige Anmerkung."])
1510
1510
  end
1511
+
1512
+ it 'correctly segments text #033' do
1513
+ ps = PragmaticSegmenter::Segmenter.new(text: "Mit Inkrafttreten des Mindestlohngesetzes (MiLoG) zum 01. Januar 2015 werden in Bezug auf den Einsatz von Leistungs.", language: 'de')
1514
+ expect(ps.segment).to eq(["Mit Inkrafttreten des Mindestlohngesetzes (MiLoG) zum 01. Januar 2015 werden in Bezug auf den Einsatz von Leistungs."])
1515
+ end
1511
1516
  end
1512
1517
  end
1513
1518
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias