semantic-compressor 2.1__py3-none-any.whl → 2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
  2. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
  3. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
  4. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
  5. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
  6. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
  7. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
  8. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
  9. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
  10. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
  11. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
  12. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
  13. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
  14. compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
  15. compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
  16. compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
  17. compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
  18. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
  19. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
  20. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
  21. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
  22. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
  23. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
  24. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
  25. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
  26. compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
  27. compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
  28. compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
  29. compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
  30. compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
  31. compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
  32. compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
  33. compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
  34. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
  35. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
  36. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
  37. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
  38. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
  39. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
  40. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
  41. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
  42. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
  43. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
  44. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
  45. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
  46. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
  47. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
  48. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
  49. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
  50. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
  51. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
  52. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
  53. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
  54. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
  55. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
  56. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
  57. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
  58. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
  59. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
  60. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
  61. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
  62. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
  63. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
  64. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
  65. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
  66. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
  67. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
  68. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
  69. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
  70. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
  71. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
  72. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
  73. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
  74. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
  75. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
  76. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
  77. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
  78. compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
  79. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/METADATA +1 -1
  80. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/RECORD +83 -5
  81. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/LICENSE +0 -0
  82. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/WHEEL +0 -0
  83. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,73 @@
1
+ t
2
+ c
3
+ odst
4
+ cit
5
+ tzv
6
+ con
7
+ str
8
+ tov
9
+ otc
10
+ tir
11
+ dr
12
+ itn
13
+ r
14
+ org
15
+ st
16
+ br
17
+ sv
18
+ b
19
+ prof
20
+ xiv
21
+ j
22
+ instituciona
23
+ mag
24
+ št
25
+ p
26
+ op
27
+ roj
28
+ im
29
+ m
30
+ imf
31
+ sms
32
+ g
33
+ ipd
34
+ člov
35
+ reg
36
+ čl
37
+ v.p
38
+ čustven
39
+ pr
40
+ nezaposlenih
41
+ gld
42
+ tj
43
+ š
44
+ zloma
45
+ iii
46
+ rib
47
+ f
48
+ ada
49
+ xvi
50
+ inc
51
+ đ
52
+ l0
53
+ n
54
+ npr
55
+ mt
56
+ feb
57
+ us
58
+ ž
59
+ pos
60
+ t.i
61
+ ico
62
+ b.s
63
+ akti
64
+ vma
65
+ egs
66
+ č
67
+ pov
68
+ sti
69
+ co
70
+ w
71
+ d
72
+ pc9
73
+ oz
@@ -0,0 +1,74 @@
1
+ ##number## novembra
2
+ ##number## redne
3
+ ##number## septembra
4
+ a plavevska
5
+ k zalival
6
+ ##number## decembra
7
+ ##number## uro
8
+ ##number## uri
9
+ v gotovac
10
+ ##number## členom
11
+ s broveta
12
+ ##number## lanski
13
+ ##number## členu
14
+ a novak
15
+ i borštnerja
16
+ ##number## marca
17
+ ##number## junijem
18
+ ##number## iii.
19
+ ##number## obletnico
20
+ ##number## januarja
21
+ l d.
22
+ i španski
23
+ e ljubljana
24
+ ##number## sati
25
+ ##number## mura
26
+ ##number## kongresu
27
+ ##number## ure
28
+ ##number## junija
29
+ i bavčar
30
+ v britanija
31
+ ##number## feb.
32
+ ##number## številki
33
+ ##number## februarja
34
+ ##number## junij
35
+ ##number## oktobra
36
+ ##number## športno
37
+ k b.
38
+ ##number## majem
39
+ ##number## člena
40
+ i p.
41
+ z l
42
+ ##number## stoletju
43
+ ##number## kongresa
44
+ s hribar
45
+ s p.
46
+ ##number## iv
47
+ i b.
48
+ ##number## maj
49
+ ##number## armade
50
+ ##number## marec
51
+ ##number## član
52
+ ##number## odstavka
53
+ ##number## maja
54
+ ##number## julij
55
+ ##number## zkp
56
+ ##number## odstavek
57
+ ##number## točke
58
+ ##number## stoletja
59
+ a puhar
60
+ ##number## julija
61
+ v p.
62
+ ##number## pivovarna
63
+ ##number## čl.
64
+ ##number## december
65
+ ##number## stoletje
66
+ ##number## kongres
67
+ ##number## aprila
68
+ i razreda
69
+ ##number## številko
70
+ v blažič
71
+ ##number## ##number##
72
+ ##number## člen
73
+ ##number## godine
74
+ i u