pragmatic_segmenter 0.0.9 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c54f1c34c1f0bd34858cbd9c3915ec47f8a4d807
4
- data.tar.gz: 19a936bfd8fb6c0046f06a20f50319fb5a2c0ed7
3
+ metadata.gz: 96aa232b769e566aad1de6ace0fa6d8591e3859c
4
+ data.tar.gz: 2ed0518fb7c236821b0986795e7bc1ce5e19e1a8
5
5
  SHA512:
6
- metadata.gz: 824392373d56549289ae89de98976d53afdf74ccde45f3b6069164d8882fbeef7ae04e2fec1261af2baf43223246b4f1f90ebab12b97fa0c22e0d81d6caa56fa
7
- data.tar.gz: 4f43e20c69be0515d81c9d98e33b7ed2aadf05d5110ddd7acae3f971745c8406c03763fdc275afa3b14de3d5aa8846a85638845d90cc4c9fd34f4cf9e120d1d3
6
+ metadata.gz: fef9536b2c2dc9896aedcb363f89bc10af37c09d66c034d8814ccb784619a9ea82fa3d20e9e69316dfcef06d7013183d9dcbd564430dc7ce2303a7cfbe32ba87
7
+ data.tar.gz: 813a64e15c83b9dab5a2fe29c0bebba638fba0663c3debb26442cd42f060fa87e927846309355ec15953ca0a45c90d764fae796533f7d054282bdf8b5bcb06d4
data/README.md CHANGED
@@ -744,6 +744,9 @@ To test the relative performance of different segmentation tools and libraries I
744
744
  **Version 0.0.9**
745
745
  * Improve handling of alphabetical and roman numeral lists
746
746
 
747
+ **Version 0.1.0**
748
+ * Add Kommanditgesellschaft Rule
749
+
747
750
  ## Contributing
748
751
 
749
752
  If you find a text that is incorrectly segmented using this gem, please submit an issue.
@@ -9,6 +9,9 @@ module PragmaticSegmenter
9
9
  # Rubular: http://rubular.com/r/yqa4Rit8EY
10
10
  PossessiveAbbreviationRule = Rule.new(/\.(?='s\s)|\.(?='s$)|\.(?='s\z)/, '∯')
11
11
 
12
+ # Rubular: http://rubular.com/r/NEv265G2X2
13
+ KommanditgesellschaftRule = Rule.new(/(?<=Co)\.(?=\sKG)/, '∯')
14
+
12
15
  # Rubular: http://rubular.com/r/xDkpFZ0EgH
13
16
  MULTI_PERIOD_ABBREVIATION_REGEX = /\b[a-z](?:\.[a-z])+[.]/i
14
17
 
@@ -37,6 +40,7 @@ module PragmaticSegmenter
37
40
 
38
41
  def replace
39
42
  @reformatted_text = text.apply(PossessiveAbbreviationRule)
43
+ @reformatted_text = text.apply(KommanditgesellschaftRule)
40
44
  @reformatted_text = PragmaticSegmenter::SingleLetterAbbreviation.new(text: @reformatted_text).replace
41
45
  @reformatted_text = search_for_abbreviations_in_string(@reformatted_text, abbreviations)
42
46
  @reformatted_text = replace_multi_period_abbreviations(@reformatted_text)
@@ -1,3 +1,3 @@
1
1
  module PragmaticSegmenter
2
- VERSION = "0.0.9"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -913,6 +913,11 @@ RSpec.describe PragmaticSegmenter::Segmenter do
913
913
  ps = PragmaticSegmenter::Segmenter.new(text: "A) Hello world. \nB) Hello world.\nC) Hello world.\nD) Hello world.\nE) Hello world.\nF) Hello world.", language: "en")
914
914
  expect(ps.segment).to eq(["A) Hello world.", "B) Hello world.", "C) Hello world.", "D) Hello world.", "E) Hello world.", "F) Hello world."])
915
915
  end
916
+
917
+ it "correctly segments text #089" do
918
+ ps = PragmaticSegmenter::Segmenter.new(text: "The GmbH & Co. KG is a limited partnership with, typically, the sole general partner being a limited liability company.")
919
+ expect(ps.segment).to eq(["The GmbH & Co. KG is a limited partnership with, typically, the sole general partner being a limited liability company."])
920
+ end
916
921
  end
917
922
  end
918
923
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias