semantic-compressor 2.1__py3-none-any.whl → 2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
  2. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
  3. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
  4. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
  5. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
  6. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
  7. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
  8. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
  9. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
  10. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
  11. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
  12. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
  13. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
  14. compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
  15. compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
  16. compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
  17. compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
  18. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
  19. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
  20. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
  21. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
  22. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
  23. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
  24. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
  25. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
  26. compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
  27. compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
  28. compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
  29. compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
  30. compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
  31. compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
  32. compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
  33. compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
  34. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
  35. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
  36. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
  37. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
  38. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
  39. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
  40. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
  41. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
  42. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
  43. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
  44. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
  45. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
  46. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
  47. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
  48. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
  49. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
  50. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
  51. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
  52. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
  53. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
  54. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
  55. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
  56. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
  57. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
  58. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
  59. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
  60. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
  61. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
  62. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
  63. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
  64. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
  65. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
  66. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
  67. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
  68. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
  69. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
  70. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
  71. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
  72. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
  73. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
  74. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
  75. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
  76. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
  77. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
  78. compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
  79. compressor/semantic.py +37 -3
  80. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/METADATA +1 -1
  81. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/RECORD +84 -6
  82. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/LICENSE +0 -0
  83. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/WHEEL +0 -0
  84. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,64 @@
1
+ kronik
2
+ alligevel
3
+ de
4
+ først
5
+ derfor
6
+ vi
7
+ selv
8
+ hertil
9
+ sådan
10
+ dette
11
+ sport
12
+ man
13
+ foto
14
+ begge
15
+ tag
16
+ dertil
17
+ reuter
18
+ efter
19
+ endelig
20
+ ifølge
21
+ lad
22
+ når
23
+ det
24
+ desuden
25
+ nu
26
+ reuters
27
+ årsagen
28
+ tænk
29
+ samtidig
30
+ udover
31
+ men
32
+ endvidere
33
+ rør
34
+ rb
35
+ udstillingen
36
+ faktabox
37
+ reception
38
+ blandt
39
+ hvad
40
+ skær
41
+ lilot
42
+ derudover
43
+ da
44
+ tilsæt
45
+ denne
46
+ afp
47
+ her
48
+ hvis
49
+ hæld
50
+ problemet
51
+ dermed
52
+ jeg
53
+ grafik
54
+ anmeldelse
55
+ den
56
+ ebbe
57
+ resultatet
58
+ tværtimod
59
+ hans
60
+ måske
61
+ feature
62
+ tillæg
63
+ hun
64
+ han
@@ -0,0 +1,99 @@
1
+ m.j
2
+ t
3
+ ph
4
+ j.h
5
+ p.a.m
6
+ j.m
7
+ dr
8
+ st
9
+ j.b.m
10
+ p
11
+ nr
12
+ h.s
13
+ e.d
14
+ t.e
15
+ a.v
16
+ esb
17
+ s.z
18
+ drs
19
+ b.b
20
+ m.o
21
+ inc
22
+ n
23
+ pensioenfonds
24
+ s.v.p
25
+ bod
26
+ fr
27
+ pk
28
+ r.p
29
+ c.p.j
30
+ v.l.n.r
31
+ chr
32
+ m.v.d
33
+ int
34
+ o.m
35
+ j.v.d
36
+ u.o.m
37
+ f.c
38
+ k
39
+ bijgebracht
40
+ ontwaakte
41
+ m
42
+ j.w
43
+ a.l
44
+ a.v.d
45
+ s.v
46
+ s
47
+ j.d
48
+ binnengekomen
49
+ ds
50
+ schouwburg
51
+ b.v
52
+ h
53
+ a
54
+ j.a
55
+ aanvielen
56
+ h.g
57
+ p.f
58
+ j.l
59
+ mgr
60
+ c.j
61
+ blz
62
+ l.e.h
63
+ w.k
64
+ g
65
+ m.g
66
+ r.v.d
67
+ ing
68
+ v.d
69
+ c.q
70
+ l
71
+ h.p
72
+ mr
73
+ gesch
74
+ e.l
75
+ p.j
76
+ mm
77
+ j.g
78
+ j.f
79
+ c
80
+ f.m
81
+ jl
82
+ r
83
+ o.a
84
+ a.s
85
+ ir
86
+ v
87
+ j
88
+ jr
89
+ e
90
+ m.i.v
91
+ l.a
92
+ f.v.d
93
+ aansluit
94
+ c.c
95
+ a.m
96
+ f.o.j
97
+ m.b
98
+ y
99
+ th
@@ -0,0 +1,37 @@
1
+ ##number## sotelo
2
+ ##number## clas
3
+ ##number## buckler
4
+ ##number## carrera
5
+ ##number## rmo
6
+ ##number## orioli
7
+ w baron
8
+ ##number## morales
9
+ ##number## snotselelaank
10
+ ##number## arcarons
11
+ ##number## cavandoli
12
+ ##number## pdm
13
+ ##number## helvetia
14
+ ##number## panasonic
15
+ ##number## motorola
16
+ w bruinsma
17
+ ##number## heer
18
+ ##number## lotus
19
+ ##number## banesto
20
+ ##number## magnaldi
21
+ w jense
22
+ w heuvelmans
23
+ w spatje
24
+ ##number## telekom
25
+ f kennedy
26
+ ##number## gatorade
27
+ ##number## mg-gb
28
+ ##number## once
29
+ ##number## peterhansel
30
+ ##number## ariostea
31
+ ##number## tvm
32
+ ##number## höl
33
+ ##number## castorama
34
+ ##number## tulip
35
+ b situatie
36
+ ##number## mas
37
+ ##number## lotto