semantic-compressor 2.0__py3-none-any.whl → 2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
  2. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
  3. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
  4. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
  5. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
  6. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
  7. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
  8. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
  9. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
  10. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
  11. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
  12. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
  13. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
  14. compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
  15. compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
  16. compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
  17. compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
  18. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
  19. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
  20. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
  21. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
  22. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
  23. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
  24. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
  25. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
  26. compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
  27. compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
  28. compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
  29. compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
  30. compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
  31. compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
  32. compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
  33. compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
  34. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
  35. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
  36. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
  37. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
  38. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
  39. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
  40. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
  41. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
  42. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
  43. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
  44. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
  45. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
  46. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
  47. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
  48. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
  49. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
  50. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
  51. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
  52. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
  53. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
  54. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
  55. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
  56. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
  57. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
  58. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
  59. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
  60. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
  61. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
  62. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
  63. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
  64. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
  65. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
  66. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
  67. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
  68. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
  69. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
  70. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
  71. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
  72. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
  73. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
  74. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
  75. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
  76. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
  77. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
  78. compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
  79. compressor/semantic.py +1 -1
  80. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/METADATA +1 -1
  81. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/RECORD +84 -6
  82. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/LICENSE +0 -0
  83. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/WHEEL +0 -0
  84. {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,14 @@
1
+ പല
2
+ ഇവ
3
+ http
4
+ അവ
5
+
6
+ ആൺ
7
+ പഴയ
8
+ ഏൽ
9
+
10
+ coordinates
11
+ അവർ
12
+ helier
13
+ പകൽ
14
+ ഇവർ
@@ -0,0 +1,106 @@
1
+ t
2
+ p.p
3
+ bk
4
+ cc
5
+ pga
6
+ e.e.o
7
+ o.h
8
+ dr
9
+ st
10
+ uh
11
+ kk
12
+ t.d
13
+ h.m
14
+ p
15
+ adm
16
+ nr
17
+ etc
18
+ t.h
19
+ dæhlie-triumf
20
+ ev
21
+ udv
22
+ anm
23
+ ø
24
+ osv
25
+ dm
26
+ hi
27
+ b.b
28
+ inc
29
+ r.c
30
+ d.v.s
31
+ ce
32
+ fr
33
+ chr
34
+ adm.dir
35
+ m.a
36
+ b
37
+ p.t
38
+ m.v
39
+ k
40
+ m.m
41
+ i.l
42
+ mill
43
+ h.e
44
+ d.å
45
+ bl.a
46
+ i.h.h.t
47
+ mrs
48
+ b.i.t
49
+ sam.pol
50
+ o.l
51
+ w
52
+ jfr
53
+ h.g
54
+ str
55
+ mil.org
56
+ c.j
57
+ sifre
58
+ l.t
59
+ t.v
60
+ ex
61
+ gj.v
62
+ pr
63
+ d.y
64
+ j.o
65
+ g.c
66
+ avd
67
+ o.s.v
68
+ pol
69
+ ca
70
+ f.eks
71
+ tjenesteforsømmelse
72
+ mr
73
+ d.c
74
+ sam
75
+ .e
76
+ h.h.v
77
+ f.v
78
+ fenomen
79
+ kl
80
+ hr
81
+ c.h
82
+ miljøvernavdelingen
83
+ h.c
84
+ startstreken
85
+ r
86
+ o.a
87
+ mrd
88
+ a.s
89
+ j.v
90
+ j
91
+ jr
92
+ f.w
93
+ kfr
94
+ l.h
95
+ bås
96
+ schlickernrieder
97
+ f.-eks
98
+ f
99
+ 5.n7
100
+ c.c
101
+ fung
102
+ dvs
103
+ d.e
104
+ wc
105
+ f.å
106
+ th
@@ -0,0 +1,54 @@
1
+ m skjærstad
2
+ ##number## plass
3
+ ##number## omgang
4
+ ##number## oktober
5
+ h rowe
6
+ ##number## juni
7
+ ##number## september
8
+ ##number## klasse
9
+ ##number## mai
10
+ ##number## april
11
+ ##number## nyttårsdag
12
+ d hillgaar
13
+ ##number## etasje
14
+ c sundts
15
+ o henrik
16
+ a himle
17
+ a robinson
18
+ m bjelland
19
+ ##number## d.å.
20
+ ##number## runden
21
+ h spang
22
+ ##number## etappen
23
+ ##number## mars-tog
24
+ ##number## februar
25
+ ##number## divisjonslaget
26
+ ##number## minutt
27
+ g larsen
28
+ ##number## divisjon
29
+ ##number## januar
30
+ ##number## august
31
+ m pedersen
32
+ m lillebø
33
+ ##number## november
34
+ h reimers
35
+ a hoffman
36
+ ##number## jubileumsår
37
+ ##number## desember
38
+ d hagesæther
39
+ ##number## divisjonsnivå
40
+ ##number## omgangen
41
+ n kristiansen
42
+ ##number## skaff
43
+ ##number## divisjonslag
44
+ a pettersen
45
+ ##number## divisjonskamper
46
+ ##number## plassen
47
+ ##number## juli
48
+ c sjaastad
49
+ ##number## runde
50
+ ##number## viseformann
51
+ i eliassen
52
+ ##number## mars
53
+ ##number## nurmis
54
+ i hagen