semantic-compressor 2.1__py3-none-any.whl → 2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
  2. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
  3. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
  4. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
  5. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
  6. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
  7. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
  8. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
  9. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
  10. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
  11. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
  12. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
  13. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
  14. compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
  15. compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
  16. compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
  17. compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
  18. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
  19. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
  20. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
  21. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
  22. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
  23. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
  24. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
  25. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
  26. compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
  27. compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
  28. compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
  29. compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
  30. compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
  31. compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
  32. compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
  33. compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
  34. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
  35. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
  36. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
  37. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
  38. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
  39. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
  40. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
  41. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
  42. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
  43. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
  44. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
  45. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
  46. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
  47. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
  48. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
  49. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
  50. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
  51. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
  52. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
  53. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
  54. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
  55. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
  56. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
  57. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
  58. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
  59. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
  60. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
  61. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
  62. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
  63. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
  64. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
  65. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
  66. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
  67. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
  68. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
  69. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
  70. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
  71. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
  72. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
  73. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
  74. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
  75. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
  76. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
  77. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
  78. compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
  79. compressor/semantic.py +37 -3
  80. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/METADATA +1 -1
  81. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/RECORD +84 -6
  82. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/LICENSE +0 -0
  83. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/WHEEL +0 -0
  84. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,54 @@
1
+ οπως
2
+ πάντως
3
+ δεύτερον
4
+ παράλληλα
5
+ οχι
6
+ ειδικότερα
7
+ τι
8
+ επίσης
9
+ ωστόσο
10
+ ενας
11
+ ηδη
12
+ σύμφωνα
13
+ συγκεκριμένα
14
+ ηταν
15
+ εκεί
16
+ αλλωστε
17
+ πολλοί
18
+ διότι
19
+ οποιος
20
+ τρίτον
21
+ πώς
22
+ ολοι
23
+ ισως
24
+ ο
25
+ ολες
26
+ οι
27
+ γιατί
28
+ αλλοι
29
+ οσοι
30
+ αυτό
31
+ τα
32
+ ολα
33
+ ετσι
34
+ ενα
35
+ πράγματι
36
+ αλλά
37
+ επιπλέον
38
+ δεν
39
+ εχει
40
+ αντίθετα
41
+ οσον
42
+ γι
43
+ αντιθέτως
44
+ ας
45
+ η
46
+ πρόκειται
47
+ αρα
48
+ οσο
49
+ αν
50
+ μετά
51
+ εξάλλου
52
+ το
53
+ οταν
54
+ πέρα
@@ -0,0 +1,125 @@
1
+ t
2
+ b.p
3
+ cer
4
+ sik
5
+ 'ing
6
+ dr
7
+ p.m
8
+ st
9
+ t.t.c
10
+ a.r
11
+ p
12
+ ecc
13
+ t.b
14
+ tel
15
+ etc
16
+ 'on
17
+ mb
18
+ g.dol
19
+ g.d.g
20
+ sè»
21
+ m.p
22
+ b.b
23
+ vs
24
+ s.p.a
25
+ g.b
26
+ v6
27
+ ipp
28
+ s.r
29
+ r.c
30
+ moz
31
+ n.f
32
+ s.mr
33
+ c.s
34
+ g.i.p
35
+ r.i
36
+ a.g
37
+ rc
38
+ 'è»
39
+ 1-o
40
+ e.p
41
+ m.g.b
42
+ gen
43
+ i.e
44
+ s.a
45
+ vic
46
+ g.gi
47
+
48
+ m.cas
49
+ re.po
50
+ giri/min
51
+ e.i
52
+ mrs
53
+ w
54
+ n.d.r
55
+ l4ª
56
+ bad
57
+ p.l.f
58
+ dur
59
+ s.l
60
+ t.s
61
+ wwf
62
+ u.q
63
+ lod
64
+ b.col
65
+ prof
66
+ n.s
67
+ ii.dd
68
+ a.f
69
+ c.i
70
+ op
71
+ end
72
+ g
73
+ 'u.s
74
+ o.b
75
+ t.t
76
+ s.m
77
+ ing
78
+ shi
79
+ oren
80
+ m.l
81
+ f.l.l
82
+ mr
83
+ jvp
84
+ fia
85
+ pag
86
+ e.c
87
+ g.p
88
+ pp
89
+ u
90
+ p.d.v
91
+ c.cer
92
+ cod
93
+ d.p.r
94
+ e.t
95
+ e.st
96
+ h.c
97
+ z
98
+ r
99
+ c.n.r
100
+ o.r
101
+ mons
102
+ j
103
+ jr
104
+ kin
105
+ v6»
106
+ g.p.s
107
+ l.z
108
+ c.a
109
+ m.f
110
+ sig
111
+ s.r.l
112
+ riz
113
+ f
114
+ m.s
115
+ c.c
116
+ l.p
117
+ f.ama
118
+ pi
119
+ s.c
120
+ p.d.p
121
+ ta
122
+ di»
123
+ r.e.s
124
+ n.d
125
+ p2»
@@ -0,0 +1,6 @@
1
+ n ##number##
2
+ s pietro
3
+ s francisco
4
+ c wolf
5
+ s maria
6
+ a r.