semantic-compressor 2.0__py3-none-any.whl → 2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
  2. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
  3. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
  4. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
  5. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
  6. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
  7. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
  8. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
  9. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
  10. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
  11. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
  12. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
  13. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
  14. compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
  15. compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
  16. compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
  17. compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
  18. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
  19. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
  20. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
  21. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
  22. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
  23. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
  24. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
  25. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
  26. compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
  27. compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
  28. compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
  29. compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
  30. compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
  31. compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
  32. compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
  33. compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
  34. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
  35. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
  36. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
  37. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
  38. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
  39. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
  40. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
  41. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
  42. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
  43. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
  44. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
  45. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
  46. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
  47. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
  48. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
  49. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
  50. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
  51. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
  52. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
  53. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
  54. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
  55. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
  56. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
  57. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
  58. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
  59. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
  60. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
  61. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
  62. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
  63. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
  64. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
  65. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
  66. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
  67. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
  68. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
  69. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
  70. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
  71. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
  72. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
  73. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
  74. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
  75. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
  76. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
  77. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
  78. compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
  79. compressor/semantic.py +1 -1
  80. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/METADATA +1 -1
  81. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/RECORD +84 -6
  82. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/LICENSE +0 -0
  83. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/WHEEL +0 -0
  84. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,107 @@
1
+ das
2
+ man
3
+ es
4
+ wir
5
+ dabei
6
+ ferner
7
+ ähnliches
8
+ während
9
+ entscheidend
10
+ ausserdem
11
+ ein
12
+ in
13
+ der
14
+ daraus
15
+ obschon
16
+ beide
17
+ hier
18
+ all
19
+ neben
20
+ solche
21
+ hingegen
22
+ selbstverständlich
23
+ daneben
24
+ hinzu
25
+ vielmehr
26
+ sie
27
+ natürlich
28
+ obwohl
29
+ nun
30
+ doch
31
+ ob
32
+ abgesehen
33
+ überdies
34
+ im
35
+ zweitens
36
+ darin
37
+ erstens
38
+ dieses
39
+ nach
40
+ wer
41
+ da
42
+ interessant
43
+ seit
44
+ zudem
45
+ darüber
46
+ umgekehrt
47
+ ähnlich
48
+ aber
49
+ was
50
+ nachdem
51
+ insbesondere
52
+ statt
53
+ angesichts
54
+ gefragt
55
+ gleiches
56
+ solange
57
+ wenn
58
+ dies
59
+ dass
60
+ wie
61
+ damit
62
+ allerdings
63
+ denn
64
+ letztere
65
+ eine
66
+ selbst
67
+ gleichzeitig
68
+ wo
69
+ weder
70
+ gerade
71
+ unter
72
+ problematischer
73
+ wieso
74
+ dennoch
75
+ bei
76
+ deshalb
77
+ davon
78
+ andernfalls
79
+ er
80
+ die
81
+ anders
82
+ auch
83
+ ebenso
84
+ so
85
+ inzwischen
86
+ sonst
87
+ immerhin
88
+ entsprechend
89
+ danach
90
+ am
91
+ trotz
92
+ trotzdem
93
+ worum
94
+ damals
95
+ dafür
96
+ schliesslich
97
+ gemäss
98
+ demgegenüber
99
+ warum
100
+ letzteres
101
+ mit
102
+ dazu
103
+ anderseits
104
+ ganz
105
+ zwar
106
+ dieser
107
+ diese
@@ -0,0 +1,100 @@
1
+ κλ
2
+ δημ
3
+ χλμ
4
+ σ.τ.ε
5
+ ό.π
6
+ δρχ
7
+ κων
8
+ χρ
9
+ π.α
10
+ ριχ
11
+ π.χρ
12
+ υγ
13
+ tel
14
+ ζ
15
+ ο.π
16
+ βασ
17
+ γλ
18
+ n.c
19
+ d.j
20
+ σωκ
21
+ π
22
+ ιω
23
+ αχ
24
+ βα
25
+ γερ
26
+ εκδ
27
+ κλπ
28
+ φ
29
+ ελ
30
+ οσ
31
+ α
32
+ σελ
33
+ ευ
34
+ ε.έ
35
+ ρ
36
+ ε.τ.α
37
+ λ
38
+ εβ
39
+ θρ
40
+ ν
41
+ βλ
42
+ ηλ
43
+ γ
44
+ αρ
45
+ π.χ
46
+ ε.μ
47
+ κ.μ
48
+ α.ε
49
+ μιχ
50
+ δισ
51
+ ολ
52
+ μ
53
+ κ.ά
54
+ κ
55
+ δηλ
56
+ ε.α.χ
57
+ πρ
58
+ αγ
59
+ μac
60
+ κ.ο.κ
61
+ λ.χ
62
+ θ
63
+ αδσ
64
+ εκατ
65
+ δρη
66
+ εμμ
67
+ δ
68
+ δεκ
69
+ σ.σ
70
+ 55ο
71
+ κκ
72
+ αδ
73
+ τ.μ
74
+ ε.ε
75
+ μ.χ
76
+ ν.μ
77
+ κτλ
78
+ δολ
79
+ κ.ά.π
80
+ αγγ
81
+ μ.κ
82
+ δ.σ
83
+ μπ
84
+ έκδ
85
+ ι
86
+ v
87
+ χαρ
88
+ γρ
89
+ μ.μ.ε
90
+ σχ
91
+ λεκ
92
+ σπ
93
+ πλι
94
+ αθ
95
+ χ
96
+ τζ
97
+ τρισ
98
+ στ
99
+ ευθ
100
+ μ.μ
@@ -0,0 +1,7 @@
1
+ β δερτιλής
2
+ ##number## φιλάρετος
3
+ ο gehry
4
+ η αβεε
5
+ ##number## βλ.
6
+ β παπανδρέου
7
+ σ μ.