semantic-compressor 2.1__py3-none-any.whl → 2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
  2. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
  3. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
  4. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
  5. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
  6. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
  7. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
  8. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
  9. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
  10. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
  11. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
  12. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
  13. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
  14. compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
  15. compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
  16. compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
  17. compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
  18. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
  19. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
  20. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
  21. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
  22. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
  23. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
  24. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
  25. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
  26. compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
  27. compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
  28. compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
  29. compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
  30. compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
  31. compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
  32. compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
  33. compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
  34. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
  35. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
  36. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
  37. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
  38. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
  39. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
  40. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
  41. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
  42. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
  43. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
  44. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
  45. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
  46. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
  47. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
  48. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
  49. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
  50. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
  51. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
  52. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
  53. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
  54. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
  55. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
  56. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
  57. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
  58. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
  59. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
  60. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
  61. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
  62. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
  63. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
  64. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
  65. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
  66. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
  67. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
  68. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
  69. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
  70. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
  71. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
  72. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
  73. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
  74. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
  75. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
  76. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
  77. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
  78. compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
  79. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/METADATA +1 -1
  80. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/RECORD +83 -5
  81. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/LICENSE +0 -0
  82. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/WHEEL +0 -0
  83. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,48 @@
1
+ c
2
+ depuis
3
+ la
4
+ enfin
5
+ certains
6
+ selon
7
+ cet
8
+ car
9
+ ces
10
+ il
11
+ cependant
12
+ pour
13
+ j
14
+ alors
15
+ un
16
+ certes
17
+ les
18
+ nous
19
+ dans
20
+ le
21
+ une
22
+ si
23
+ mais
24
+ en
25
+ dès
26
+ or
27
+ tout
28
+ ils
29
+ l
30
+ mr
31
+ malgré
32
+ elles
33
+
34
+ je
35
+ on
36
+ quand
37
+ pourtant
38
+ cela
39
+ a
40
+ après
41
+ puis
42
+ ce
43
+ elle
44
+ voilà
45
+ cette
46
+ comment
47
+ quant
48
+ ainsi
@@ -0,0 +1,71 @@
1
+ rfr
2
+ t
3
+ c
4
+ a.d
5
+ dk
6
+ he
7
+ mjm
8
+ inkl
9
+ bt
10
+ 69f
11
+ crz
12
+ dr
13
+ st
14
+ ib
15
+ liv
16
+ mrd
17
+ n.r
18
+ rg
19
+ v
20
+ vgl
21
+ mgr
22
+ cs
23
+ prof
24
+ j
25
+ kfr
26
+ bd
27
+ fre
28
+ gfh
29
+ fon
30
+ m
31
+ rp
32
+ nr
33
+ chr
34
+ etc
35
+ hg
36
+ sx
37
+ rz
38
+ 48f
39
+ kmu
40
+ abs
41
+ nkm
42
+ z.b
43
+ usw
44
+ f
45
+ d.h
46
+ lz
47
+ sc
48
+ usf
49
+ gir
50
+ hag
51
+ ff
52
+ mio
53
+ zr
54
+ k
55
+ h
56
+ mey
57
+ bst
58
+ ne
59
+ u.a
60
+ fem
61
+ bzw
62
+
63
+ med
64
+ u
65
+ lts
66
+ fr
67
+ s.o.s
68
+ w
69
+ lib
70
+ k.a
71
+ th
@@ -0,0 +1,28 @@
1
+ ##number## oktober
2
+ ##number## jahrhunderts
3
+ ##number## geburtstag
4
+ ##number## juni
5
+ s ##number##
6
+ ##number## september
7
+ ##number## mai
8
+ ##number## dezember
9
+ ##number## april
10
+ ##number## ahv-revision
11
+ ##number## revision
12
+ ##number## jahrhundert
13
+ ##number## landwirtschaftsbericht
14
+ ##number## altersjahr
15
+ ##number## februar
16
+ a schumpeter
17
+ ##number## freiheit
18
+ ##number## august
19
+ ##number## januar
20
+ ##number## märz
21
+ a meyers
22
+ ##number## november
23
+ ##number## bauetappe
24
+ ##number## ahv-
25
+ ##number## eu-richtlinie
26
+ ##number## juli
27
+ a meyer
28
+ ##number## säule