semantic-compressor 2.0__py3-none-any.whl → 2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
  2. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
  3. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
  4. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
  5. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
  6. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
  7. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
  8. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
  9. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
  10. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
  11. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
  12. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
  13. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
  14. compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
  15. compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
  16. compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
  17. compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
  18. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
  19. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
  20. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
  21. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
  22. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
  23. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
  24. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
  25. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
  26. compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
  27. compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
  28. compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
  29. compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
  30. compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
  31. compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
  32. compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
  33. compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
  34. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
  35. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
  36. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
  37. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
  38. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
  39. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
  40. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
  41. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
  42. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
  43. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
  44. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
  45. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
  46. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
  47. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
  48. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
  49. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
  50. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
  51. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
  52. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
  53. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
  54. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
  55. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
  56. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
  57. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
  58. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
  59. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
  60. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
  61. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
  62. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
  63. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
  64. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
  65. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
  66. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
  67. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
  68. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
  69. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
  70. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
  71. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
  72. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
  73. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
  74. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
  75. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
  76. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
  77. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
  78. compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
  79. compressor/semantic.py +1 -1
  80. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/METADATA +1 -1
  81. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/RECORD +84 -6
  82. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/LICENSE +0 -0
  83. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/WHEEL +0 -0
  84. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,39 @@
1
+ most
2
+ he
3
+ since
4
+ so
5
+ both
6
+ these
7
+ it
8
+ nevertheless
9
+ this
10
+ indeed
11
+ however
12
+ instead
13
+ under
14
+ similarly
15
+ some
16
+ though
17
+ while
18
+ when
19
+ in
20
+ despite
21
+ although
22
+ nonetheless
23
+ thus
24
+ there
25
+ if
26
+ the
27
+ nor
28
+ separately
29
+ moreover
30
+ but
31
+ they
32
+ yet
33
+ many
34
+ according
35
+ sales
36
+ among
37
+ meanwhile
38
+ even
39
+ i
@@ -0,0 +1,48 @@
1
+ eos
2
+ c
3
+ a.d
4
+ t.a.s.s
5
+ e.t
6
+ päevapiltnikud
7
+ c.h
8
+ b.p
9
+ amm
10
+ ameerika-mees
11
+ n.-ö
12
+ cm
13
+ b
14
+ mhm
15
+ a.s
16
+ m.e
17
+ j.l
18
+ j
19
+ u.t
20
+ vm
21
+ g.u.n
22
+ hajutada
23
+ p.s
24
+ a.b
25
+ c.h.-r
26
+ i.q
27
+ gr
28
+ fido
29
+ pankurit
30
+ s.v
31
+ l.l
32
+ c.-h
33
+ m.h
34
+ h.l
35
+ m.k
36
+ j.r
37
+ t.k
38
+ k.h
39
+ 89/90
40
+ h
41
+ a
42
+ dost
43
+ v.k
44
+ e.q
45
+ t.j
46
+ m.b
47
+ d
48
+ p.k
@@ -0,0 +1,100 @@
1
+ ##number## juuni
2
+ ##number## novembril
3
+ ##number## juulilt
4
+ r järve-vomm
5
+ ##number## mida
6
+ n liidu
7
+ ##number## milliseid
8
+ ##number## oktoobri
9
+ ##number## iidol
10
+ m e
11
+ ##number## klassist
12
+ ##number## millest
13
+ ##number## august
14
+ ##number## pariis
15
+ ##number## septembrist
16
+ ##number## oktoober
17
+ ##number## märtsini
18
+ ##number## kust
19
+ k mägi
20
+ ##number## detsembrist
21
+ ##number## jaanuari
22
+ ##number## epee
23
+ ##number## nimetage
24
+ ##number## novembrini
25
+ ##number## eluaasta
26
+ s mill
27
+ ##number## helsingi
28
+ ##number## jaanuarini
29
+ ##number## aastail
30
+ ##number## augustil
31
+ ##number## millise
32
+ ##number## juulist
33
+ ##number## mai
34
+ ##number## novembri
35
+ ##number## oktoobrist
36
+ ##number## juunini
37
+ ##number## septembriks
38
+ ##number## detsembril
39
+ p s
40
+ ##number## jaanuar
41
+ ##number## aastate
42
+ ##number## milline
43
+ ##number## kelle
44
+ ##number## jaanuaril
45
+ s stadnikov
46
+ ##number## aastaks
47
+ ##number## stockholm
48
+ ##number## suurim
49
+ ##number## aasta
50
+ ##number## sajandi
51
+ ##number## millega
52
+ ##number## aastast
53
+ ##number## aastal
54
+ ##number## kumb
55
+ ##number## septembril
56
+ ##number## korruselt
57
+ ##number## septembri
58
+ ##number## veebruarini
59
+ ##number## london
60
+ ##number## aastatel
61
+ ##number## september
62
+ ##number## veebruari
63
+ ##number## oktoobrini
64
+ ##number## mail
65
+ m kassovitz
66
+ ##number## action-film
67
+ ##number## mis
68
+ k herkül
69
+ n n
70
+ ##number## detsembrini
71
+ ##number## imre
72
+ t jõgeda
73
+ ##number## casino
74
+ ##number## septembrit
75
+ ##number## augustini
76
+ ##number## juulil
77
+ ##number## november
78
+ ##number## kuupäeval
79
+ ##number## taevas
80
+ ##number## septembrini
81
+ ##number## detsember
82
+ ##number## detsembri
83
+ ##number## juunil
84
+ ##number## augustist
85
+ n jurist
86
+ ##number## missugust
87
+ ##number## aastatesse
88
+ ##number## aprillil
89
+ ##number## augusti
90
+ ##number## oktoobril
91
+ ##number## märtsil
92
+ ##number## a
93
+ ##number## the
94
+ ##number## sajandil
95
+ ##number## aastani
96
+ ##number## juuli
97
+ ##number## septembrile
98
+ ##number## millist
99
+ ##number## millised
100
+ ##number## veebruaril