kreuzberg 4.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (265) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yaml +534 -0
  5. data/Gemfile +9 -0
  6. data/Gemfile.lock +157 -0
  7. data/README.md +421 -0
  8. data/Rakefile +25 -0
  9. data/Steepfile +47 -0
  10. data/examples/async_patterns.rb +340 -0
  11. data/ext/kreuzberg_rb/extconf.rb +35 -0
  12. data/ext/kreuzberg_rb/native/Cargo.toml +36 -0
  13. data/ext/kreuzberg_rb/native/README.md +425 -0
  14. data/ext/kreuzberg_rb/native/build.rs +17 -0
  15. data/ext/kreuzberg_rb/native/include/ieeefp.h +11 -0
  16. data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +14 -0
  17. data/ext/kreuzberg_rb/native/include/strings.h +20 -0
  18. data/ext/kreuzberg_rb/native/include/unistd.h +47 -0
  19. data/ext/kreuzberg_rb/native/src/lib.rs +2939 -0
  20. data/extconf.rb +28 -0
  21. data/kreuzberg.gemspec +105 -0
  22. data/lib/kreuzberg/api_proxy.rb +142 -0
  23. data/lib/kreuzberg/cache_api.rb +45 -0
  24. data/lib/kreuzberg/cli.rb +55 -0
  25. data/lib/kreuzberg/cli_proxy.rb +127 -0
  26. data/lib/kreuzberg/config.rb +684 -0
  27. data/lib/kreuzberg/errors.rb +50 -0
  28. data/lib/kreuzberg/extraction_api.rb +84 -0
  29. data/lib/kreuzberg/mcp_proxy.rb +186 -0
  30. data/lib/kreuzberg/ocr_backend_protocol.rb +113 -0
  31. data/lib/kreuzberg/post_processor_protocol.rb +86 -0
  32. data/lib/kreuzberg/result.rb +216 -0
  33. data/lib/kreuzberg/setup_lib_path.rb +79 -0
  34. data/lib/kreuzberg/validator_protocol.rb +89 -0
  35. data/lib/kreuzberg/version.rb +5 -0
  36. data/lib/kreuzberg.rb +82 -0
  37. data/pkg/kreuzberg-4.0.0.rc1.gem +0 -0
  38. data/sig/kreuzberg/internal.rbs +184 -0
  39. data/sig/kreuzberg.rbs +468 -0
  40. data/spec/binding/cache_spec.rb +227 -0
  41. data/spec/binding/cli_proxy_spec.rb +87 -0
  42. data/spec/binding/cli_spec.rb +54 -0
  43. data/spec/binding/config_spec.rb +345 -0
  44. data/spec/binding/config_validation_spec.rb +283 -0
  45. data/spec/binding/error_handling_spec.rb +213 -0
  46. data/spec/binding/errors_spec.rb +66 -0
  47. data/spec/binding/plugins/ocr_backend_spec.rb +307 -0
  48. data/spec/binding/plugins/postprocessor_spec.rb +269 -0
  49. data/spec/binding/plugins/validator_spec.rb +274 -0
  50. data/spec/examples.txt +104 -0
  51. data/spec/fixtures/config.toml +39 -0
  52. data/spec/fixtures/config.yaml +42 -0
  53. data/spec/fixtures/invalid_config.toml +4 -0
  54. data/spec/smoke/package_spec.rb +178 -0
  55. data/spec/spec_helper.rb +42 -0
  56. data/vendor/kreuzberg/Cargo.toml +134 -0
  57. data/vendor/kreuzberg/README.md +175 -0
  58. data/vendor/kreuzberg/build.rs +460 -0
  59. data/vendor/kreuzberg/src/api/error.rs +81 -0
  60. data/vendor/kreuzberg/src/api/handlers.rs +199 -0
  61. data/vendor/kreuzberg/src/api/mod.rs +79 -0
  62. data/vendor/kreuzberg/src/api/server.rs +353 -0
  63. data/vendor/kreuzberg/src/api/types.rs +170 -0
  64. data/vendor/kreuzberg/src/bin/profile_extract.rs +455 -0
  65. data/vendor/kreuzberg/src/cache/mod.rs +1143 -0
  66. data/vendor/kreuzberg/src/chunking/mod.rs +677 -0
  67. data/vendor/kreuzberg/src/core/batch_mode.rs +35 -0
  68. data/vendor/kreuzberg/src/core/config.rs +1032 -0
  69. data/vendor/kreuzberg/src/core/extractor.rs +903 -0
  70. data/vendor/kreuzberg/src/core/io.rs +327 -0
  71. data/vendor/kreuzberg/src/core/mime.rs +615 -0
  72. data/vendor/kreuzberg/src/core/mod.rs +42 -0
  73. data/vendor/kreuzberg/src/core/pipeline.rs +906 -0
  74. data/vendor/kreuzberg/src/embeddings.rs +323 -0
  75. data/vendor/kreuzberg/src/error.rs +431 -0
  76. data/vendor/kreuzberg/src/extraction/archive.rs +954 -0
  77. data/vendor/kreuzberg/src/extraction/docx.rs +40 -0
  78. data/vendor/kreuzberg/src/extraction/email.rs +854 -0
  79. data/vendor/kreuzberg/src/extraction/excel.rs +688 -0
  80. data/vendor/kreuzberg/src/extraction/html.rs +553 -0
  81. data/vendor/kreuzberg/src/extraction/image.rs +368 -0
  82. data/vendor/kreuzberg/src/extraction/libreoffice.rs +564 -0
  83. data/vendor/kreuzberg/src/extraction/mod.rs +77 -0
  84. data/vendor/kreuzberg/src/extraction/office_metadata/app_properties.rs +398 -0
  85. data/vendor/kreuzberg/src/extraction/office_metadata/core_properties.rs +247 -0
  86. data/vendor/kreuzberg/src/extraction/office_metadata/custom_properties.rs +240 -0
  87. data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +128 -0
  88. data/vendor/kreuzberg/src/extraction/pandoc/batch.rs +275 -0
  89. data/vendor/kreuzberg/src/extraction/pandoc/mime_types.rs +178 -0
  90. data/vendor/kreuzberg/src/extraction/pandoc/mod.rs +491 -0
  91. data/vendor/kreuzberg/src/extraction/pandoc/server.rs +496 -0
  92. data/vendor/kreuzberg/src/extraction/pandoc/subprocess.rs +1188 -0
  93. data/vendor/kreuzberg/src/extraction/pandoc/version.rs +162 -0
  94. data/vendor/kreuzberg/src/extraction/pptx.rs +3000 -0
  95. data/vendor/kreuzberg/src/extraction/structured.rs +490 -0
  96. data/vendor/kreuzberg/src/extraction/table.rs +328 -0
  97. data/vendor/kreuzberg/src/extraction/text.rs +269 -0
  98. data/vendor/kreuzberg/src/extraction/xml.rs +333 -0
  99. data/vendor/kreuzberg/src/extractors/archive.rs +425 -0
  100. data/vendor/kreuzberg/src/extractors/docx.rs +479 -0
  101. data/vendor/kreuzberg/src/extractors/email.rs +129 -0
  102. data/vendor/kreuzberg/src/extractors/excel.rs +344 -0
  103. data/vendor/kreuzberg/src/extractors/html.rs +410 -0
  104. data/vendor/kreuzberg/src/extractors/image.rs +195 -0
  105. data/vendor/kreuzberg/src/extractors/mod.rs +268 -0
  106. data/vendor/kreuzberg/src/extractors/pandoc.rs +201 -0
  107. data/vendor/kreuzberg/src/extractors/pdf.rs +496 -0
  108. data/vendor/kreuzberg/src/extractors/pptx.rs +234 -0
  109. data/vendor/kreuzberg/src/extractors/structured.rs +126 -0
  110. data/vendor/kreuzberg/src/extractors/text.rs +242 -0
  111. data/vendor/kreuzberg/src/extractors/xml.rs +128 -0
  112. data/vendor/kreuzberg/src/image/dpi.rs +164 -0
  113. data/vendor/kreuzberg/src/image/mod.rs +6 -0
  114. data/vendor/kreuzberg/src/image/preprocessing.rs +417 -0
  115. data/vendor/kreuzberg/src/image/resize.rs +89 -0
  116. data/vendor/kreuzberg/src/keywords/config.rs +154 -0
  117. data/vendor/kreuzberg/src/keywords/mod.rs +237 -0
  118. data/vendor/kreuzberg/src/keywords/processor.rs +267 -0
  119. data/vendor/kreuzberg/src/keywords/rake.rs +294 -0
  120. data/vendor/kreuzberg/src/keywords/types.rs +68 -0
  121. data/vendor/kreuzberg/src/keywords/yake.rs +163 -0
  122. data/vendor/kreuzberg/src/language_detection/mod.rs +942 -0
  123. data/vendor/kreuzberg/src/lib.rs +102 -0
  124. data/vendor/kreuzberg/src/mcp/mod.rs +32 -0
  125. data/vendor/kreuzberg/src/mcp/server.rs +1966 -0
  126. data/vendor/kreuzberg/src/ocr/cache.rs +469 -0
  127. data/vendor/kreuzberg/src/ocr/error.rs +37 -0
  128. data/vendor/kreuzberg/src/ocr/hocr.rs +216 -0
  129. data/vendor/kreuzberg/src/ocr/mod.rs +58 -0
  130. data/vendor/kreuzberg/src/ocr/processor.rs +847 -0
  131. data/vendor/kreuzberg/src/ocr/table/mod.rs +4 -0
  132. data/vendor/kreuzberg/src/ocr/table/tsv_parser.rs +144 -0
  133. data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +450 -0
  134. data/vendor/kreuzberg/src/ocr/types.rs +393 -0
  135. data/vendor/kreuzberg/src/ocr/utils.rs +47 -0
  136. data/vendor/kreuzberg/src/ocr/validation.rs +206 -0
  137. data/vendor/kreuzberg/src/pdf/error.rs +122 -0
  138. data/vendor/kreuzberg/src/pdf/images.rs +139 -0
  139. data/vendor/kreuzberg/src/pdf/metadata.rs +346 -0
  140. data/vendor/kreuzberg/src/pdf/mod.rs +50 -0
  141. data/vendor/kreuzberg/src/pdf/rendering.rs +369 -0
  142. data/vendor/kreuzberg/src/pdf/table.rs +420 -0
  143. data/vendor/kreuzberg/src/pdf/text.rs +161 -0
  144. data/vendor/kreuzberg/src/plugins/extractor.rs +1010 -0
  145. data/vendor/kreuzberg/src/plugins/mod.rs +209 -0
  146. data/vendor/kreuzberg/src/plugins/ocr.rs +629 -0
  147. data/vendor/kreuzberg/src/plugins/processor.rs +641 -0
  148. data/vendor/kreuzberg/src/plugins/registry.rs +1324 -0
  149. data/vendor/kreuzberg/src/plugins/traits.rs +258 -0
  150. data/vendor/kreuzberg/src/plugins/validator.rs +955 -0
  151. data/vendor/kreuzberg/src/stopwords/mod.rs +1470 -0
  152. data/vendor/kreuzberg/src/text/mod.rs +19 -0
  153. data/vendor/kreuzberg/src/text/quality.rs +697 -0
  154. data/vendor/kreuzberg/src/text/string_utils.rs +217 -0
  155. data/vendor/kreuzberg/src/text/token_reduction/cjk_utils.rs +164 -0
  156. data/vendor/kreuzberg/src/text/token_reduction/config.rs +100 -0
  157. data/vendor/kreuzberg/src/text/token_reduction/core.rs +796 -0
  158. data/vendor/kreuzberg/src/text/token_reduction/filters.rs +902 -0
  159. data/vendor/kreuzberg/src/text/token_reduction/mod.rs +160 -0
  160. data/vendor/kreuzberg/src/text/token_reduction/semantic.rs +619 -0
  161. data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +147 -0
  162. data/vendor/kreuzberg/src/types.rs +873 -0
  163. data/vendor/kreuzberg/src/utils/mod.rs +17 -0
  164. data/vendor/kreuzberg/src/utils/quality.rs +959 -0
  165. data/vendor/kreuzberg/src/utils/string_utils.rs +381 -0
  166. data/vendor/kreuzberg/stopwords/af_stopwords.json +53 -0
  167. data/vendor/kreuzberg/stopwords/ar_stopwords.json +482 -0
  168. data/vendor/kreuzberg/stopwords/bg_stopwords.json +261 -0
  169. data/vendor/kreuzberg/stopwords/bn_stopwords.json +400 -0
  170. data/vendor/kreuzberg/stopwords/br_stopwords.json +1205 -0
  171. data/vendor/kreuzberg/stopwords/ca_stopwords.json +280 -0
  172. data/vendor/kreuzberg/stopwords/cs_stopwords.json +425 -0
  173. data/vendor/kreuzberg/stopwords/da_stopwords.json +172 -0
  174. data/vendor/kreuzberg/stopwords/de_stopwords.json +622 -0
  175. data/vendor/kreuzberg/stopwords/el_stopwords.json +849 -0
  176. data/vendor/kreuzberg/stopwords/en_stopwords.json +1300 -0
  177. data/vendor/kreuzberg/stopwords/eo_stopwords.json +175 -0
  178. data/vendor/kreuzberg/stopwords/es_stopwords.json +734 -0
  179. data/vendor/kreuzberg/stopwords/et_stopwords.json +37 -0
  180. data/vendor/kreuzberg/stopwords/eu_stopwords.json +100 -0
  181. data/vendor/kreuzberg/stopwords/fa_stopwords.json +801 -0
  182. data/vendor/kreuzberg/stopwords/fi_stopwords.json +849 -0
  183. data/vendor/kreuzberg/stopwords/fr_stopwords.json +693 -0
  184. data/vendor/kreuzberg/stopwords/ga_stopwords.json +111 -0
  185. data/vendor/kreuzberg/stopwords/gl_stopwords.json +162 -0
  186. data/vendor/kreuzberg/stopwords/gu_stopwords.json +226 -0
  187. data/vendor/kreuzberg/stopwords/ha_stopwords.json +41 -0
  188. data/vendor/kreuzberg/stopwords/he_stopwords.json +196 -0
  189. data/vendor/kreuzberg/stopwords/hi_stopwords.json +227 -0
  190. data/vendor/kreuzberg/stopwords/hr_stopwords.json +181 -0
  191. data/vendor/kreuzberg/stopwords/hu_stopwords.json +791 -0
  192. data/vendor/kreuzberg/stopwords/hy_stopwords.json +47 -0
  193. data/vendor/kreuzberg/stopwords/id_stopwords.json +760 -0
  194. data/vendor/kreuzberg/stopwords/it_stopwords.json +634 -0
  195. data/vendor/kreuzberg/stopwords/ja_stopwords.json +136 -0
  196. data/vendor/kreuzberg/stopwords/kn_stopwords.json +84 -0
  197. data/vendor/kreuzberg/stopwords/ko_stopwords.json +681 -0
  198. data/vendor/kreuzberg/stopwords/ku_stopwords.json +64 -0
  199. data/vendor/kreuzberg/stopwords/la_stopwords.json +51 -0
  200. data/vendor/kreuzberg/stopwords/lt_stopwords.json +476 -0
  201. data/vendor/kreuzberg/stopwords/lv_stopwords.json +163 -0
  202. data/vendor/kreuzberg/stopwords/ml_stopwords.json +1 -0
  203. data/vendor/kreuzberg/stopwords/mr_stopwords.json +101 -0
  204. data/vendor/kreuzberg/stopwords/ms_stopwords.json +477 -0
  205. data/vendor/kreuzberg/stopwords/ne_stopwords.json +490 -0
  206. data/vendor/kreuzberg/stopwords/nl_stopwords.json +415 -0
  207. data/vendor/kreuzberg/stopwords/no_stopwords.json +223 -0
  208. data/vendor/kreuzberg/stopwords/pl_stopwords.json +331 -0
  209. data/vendor/kreuzberg/stopwords/pt_stopwords.json +562 -0
  210. data/vendor/kreuzberg/stopwords/ro_stopwords.json +436 -0
  211. data/vendor/kreuzberg/stopwords/ru_stopwords.json +561 -0
  212. data/vendor/kreuzberg/stopwords/si_stopwords.json +193 -0
  213. data/vendor/kreuzberg/stopwords/sk_stopwords.json +420 -0
  214. data/vendor/kreuzberg/stopwords/sl_stopwords.json +448 -0
  215. data/vendor/kreuzberg/stopwords/so_stopwords.json +32 -0
  216. data/vendor/kreuzberg/stopwords/st_stopwords.json +33 -0
  217. data/vendor/kreuzberg/stopwords/sv_stopwords.json +420 -0
  218. data/vendor/kreuzberg/stopwords/sw_stopwords.json +76 -0
  219. data/vendor/kreuzberg/stopwords/ta_stopwords.json +129 -0
  220. data/vendor/kreuzberg/stopwords/te_stopwords.json +54 -0
  221. data/vendor/kreuzberg/stopwords/th_stopwords.json +118 -0
  222. data/vendor/kreuzberg/stopwords/tl_stopwords.json +149 -0
  223. data/vendor/kreuzberg/stopwords/tr_stopwords.json +506 -0
  224. data/vendor/kreuzberg/stopwords/uk_stopwords.json +75 -0
  225. data/vendor/kreuzberg/stopwords/ur_stopwords.json +519 -0
  226. data/vendor/kreuzberg/stopwords/vi_stopwords.json +647 -0
  227. data/vendor/kreuzberg/stopwords/yo_stopwords.json +62 -0
  228. data/vendor/kreuzberg/stopwords/zh_stopwords.json +796 -0
  229. data/vendor/kreuzberg/stopwords/zu_stopwords.json +31 -0
  230. data/vendor/kreuzberg/tests/api_tests.rs +966 -0
  231. data/vendor/kreuzberg/tests/archive_integration.rs +543 -0
  232. data/vendor/kreuzberg/tests/batch_orchestration.rs +542 -0
  233. data/vendor/kreuzberg/tests/batch_processing.rs +304 -0
  234. data/vendor/kreuzberg/tests/chunking_offset_demo.rs +92 -0
  235. data/vendor/kreuzberg/tests/concurrency_stress.rs +509 -0
  236. data/vendor/kreuzberg/tests/config_features.rs +580 -0
  237. data/vendor/kreuzberg/tests/config_loading_tests.rs +439 -0
  238. data/vendor/kreuzberg/tests/core_integration.rs +493 -0
  239. data/vendor/kreuzberg/tests/csv_integration.rs +424 -0
  240. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +124 -0
  241. data/vendor/kreuzberg/tests/email_integration.rs +325 -0
  242. data/vendor/kreuzberg/tests/error_handling.rs +393 -0
  243. data/vendor/kreuzberg/tests/format_integration.rs +159 -0
  244. data/vendor/kreuzberg/tests/helpers/mod.rs +142 -0
  245. data/vendor/kreuzberg/tests/image_integration.rs +253 -0
  246. data/vendor/kreuzberg/tests/keywords_integration.rs +479 -0
  247. data/vendor/kreuzberg/tests/keywords_quality.rs +509 -0
  248. data/vendor/kreuzberg/tests/mime_detection.rs +428 -0
  249. data/vendor/kreuzberg/tests/ocr_configuration.rs +510 -0
  250. data/vendor/kreuzberg/tests/ocr_errors.rs +676 -0
  251. data/vendor/kreuzberg/tests/ocr_quality.rs +627 -0
  252. data/vendor/kreuzberg/tests/ocr_stress.rs +469 -0
  253. data/vendor/kreuzberg/tests/pandoc_integration.rs +503 -0
  254. data/vendor/kreuzberg/tests/pdf_integration.rs +43 -0
  255. data/vendor/kreuzberg/tests/pipeline_integration.rs +1412 -0
  256. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +771 -0
  257. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +561 -0
  258. data/vendor/kreuzberg/tests/plugin_system.rs +921 -0
  259. data/vendor/kreuzberg/tests/plugin_validator_test.rs +783 -0
  260. data/vendor/kreuzberg/tests/registry_integration_tests.rs +607 -0
  261. data/vendor/kreuzberg/tests/security_validation.rs +404 -0
  262. data/vendor/kreuzberg/tests/stopwords_integration_test.rs +888 -0
  263. data/vendor/kreuzberg/tests/test_fastembed.rs +609 -0
  264. data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +87 -0
  265. metadata +471 -0
@@ -0,0 +1,681 @@
1
+ [
2
+ "!",
3
+ "\"",
4
+ "$",
5
+ "%",
6
+ "&",
7
+ "'",
8
+ "(",
9
+ ")",
10
+ "*",
11
+ "+",
12
+ ",",
13
+ "-",
14
+ ".",
15
+ "...",
16
+ "0",
17
+ "1",
18
+ "2",
19
+ "3",
20
+ "4",
21
+ "5",
22
+ "6",
23
+ "7",
24
+ "8",
25
+ "9",
26
+ ";",
27
+ "<",
28
+ "=",
29
+ ">",
30
+ "?",
31
+ "@",
32
+ "\\",
33
+ "^",
34
+ "_",
35
+ "`",
36
+ "|",
37
+ "~",
38
+ "·",
39
+ "—",
40
+ "——",
41
+ "‘",
42
+ "’",
43
+ "“",
44
+ "”",
45
+ "…",
46
+ "、",
47
+ "。",
48
+ "〈",
49
+ "〉",
50
+ "《",
51
+ "》",
52
+ "가",
53
+ "가까스로",
54
+ "가령",
55
+ "각",
56
+ "각각",
57
+ "각자",
58
+ "각종",
59
+ "갖고말하자면",
60
+ "같다",
61
+ "같이",
62
+ "개의치않고",
63
+ "거니와",
64
+ "거바",
65
+ "거의",
66
+ "것",
67
+ "것과 같이",
68
+ "것들",
69
+ "게다가",
70
+ "게우다",
71
+ "겨우",
72
+ "견지에서",
73
+ "결과에 이르다",
74
+ "결국",
75
+ "결론을 낼 수 있다",
76
+ "겸사겸사",
77
+ "고려하면",
78
+ "고로",
79
+ "곧",
80
+ "공동으로",
81
+ "과",
82
+ "과연",
83
+ "관계가 있다",
84
+ "관계없이",
85
+ "관련이 있다",
86
+ "관하여",
87
+ "관한",
88
+ "관해서는",
89
+ "구",
90
+ "구체적으로",
91
+ "구토하다",
92
+ "그",
93
+ "그들",
94
+ "그때",
95
+ "그래",
96
+ "그래도",
97
+ "그래서",
98
+ "그러나",
99
+ "그러니",
100
+ "그러니까",
101
+ "그러면",
102
+ "그러므로",
103
+ "그러한즉",
104
+ "그런 까닭에",
105
+ "그런데",
106
+ "그런즉",
107
+ "그럼",
108
+ "그럼에도 불구하고",
109
+ "그렇게 함으로써",
110
+ "그렇지",
111
+ "그렇지 않다면",
112
+ "그렇지 않으면",
113
+ "그렇지만",
114
+ "그렇지않으면",
115
+ "그리고",
116
+ "그리하여",
117
+ "그만이다",
118
+ "그에 따르는",
119
+ "그위에",
120
+ "그저",
121
+ "그중에서",
122
+ "그치지 않다",
123
+ "근거로",
124
+ "근거하여",
125
+ "기대여",
126
+ "기점으로",
127
+ "기준으로",
128
+ "기타",
129
+ "까닭으로",
130
+ "까악",
131
+ "까지",
132
+ "까지 미치다",
133
+ "까지도",
134
+ "꽈당",
135
+ "끙끙",
136
+ "끼익",
137
+ "나",
138
+ "나머지는",
139
+ "남들",
140
+ "남짓",
141
+ "너",
142
+ "너희",
143
+ "너희들",
144
+ "네",
145
+ "넷",
146
+ "년",
147
+ "논하지 않다",
148
+ "놀라다",
149
+ "누가 알겠는가",
150
+ "누구",
151
+ "다른",
152
+ "다른 방면으로",
153
+ "다만",
154
+ "다섯",
155
+ "다소",
156
+ "다수",
157
+ "다시 말하자면",
158
+ "다시말하면",
159
+ "다음",
160
+ "다음에",
161
+ "다음으로",
162
+ "단지",
163
+ "답다",
164
+ "당신",
165
+ "당장",
166
+ "대로 하다",
167
+ "대하면",
168
+ "대하여",
169
+ "대해 말하자면",
170
+ "대해서",
171
+ "댕그",
172
+ "더구나",
173
+ "더군다나",
174
+ "더라도",
175
+ "더불어",
176
+ "더욱더",
177
+ "더욱이는",
178
+ "도달하다",
179
+ "도착하다",
180
+ "동시에",
181
+ "동안",
182
+ "된바에야",
183
+ "된이상",
184
+ "두번째로",
185
+ "둘",
186
+ "둥둥",
187
+ "뒤따라",
188
+ "뒤이어",
189
+ "든간에",
190
+ "들",
191
+ "등",
192
+ "등등",
193
+ "딩동",
194
+ "따라",
195
+ "따라서",
196
+ "따위",
197
+ "따지지 않다",
198
+ "딱",
199
+ "때",
200
+ "때가 되어",
201
+ "때문에",
202
+ "또",
203
+ "또한",
204
+ "뚝뚝",
205
+ "라 해도",
206
+ "령",
207
+ "로",
208
+ "로 인하여",
209
+ "로부터",
210
+ "로써",
211
+ "륙",
212
+ "를",
213
+ "마음대로",
214
+ "마저",
215
+ "마저도",
216
+ "마치",
217
+ "막론하고",
218
+ "만 못하다",
219
+ "만약",
220
+ "만약에",
221
+ "만은 아니다",
222
+ "만이 아니다",
223
+ "만일",
224
+ "만큼",
225
+ "말하자면",
226
+ "말할것도 없고",
227
+ "매",
228
+ "매번",
229
+ "메쓰겁다",
230
+ "몇",
231
+ "모",
232
+ "모두",
233
+ "무렵",
234
+ "무릎쓰고",
235
+ "무슨",
236
+ "무엇",
237
+ "무엇때문에",
238
+ "물론",
239
+ "및",
240
+ "바꾸어말하면",
241
+ "바꾸어말하자면",
242
+ "바꾸어서 말하면",
243
+ "바꾸어서 한다면",
244
+ "바꿔 말하면",
245
+ "바로",
246
+ "바와같이",
247
+ "밖에 안된다",
248
+ "반대로",
249
+ "반대로 말하자면",
250
+ "반드시",
251
+ "버금",
252
+ "보는데서",
253
+ "보다더",
254
+ "보드득",
255
+ "본대로",
256
+ "봐",
257
+ "봐라",
258
+ "부류의 사람들",
259
+ "부터",
260
+ "불구하고",
261
+ "불문하고",
262
+ "붕붕",
263
+ "비걱거리다",
264
+ "비교적",
265
+ "비길수 없다",
266
+ "비로소",
267
+ "비록",
268
+ "비슷하다",
269
+ "비추어 보아",
270
+ "비하면",
271
+ "뿐만 아니라",
272
+ "뿐만아니라",
273
+ "뿐이다",
274
+ "삐걱",
275
+ "삐걱거리다",
276
+ "사",
277
+ "삼",
278
+ "상대적으로 말하자면",
279
+ "생각한대로",
280
+ "설령",
281
+ "설마",
282
+ "설사",
283
+ "셋",
284
+ "소생",
285
+ "소인",
286
+ "솨",
287
+ "쉿",
288
+ "습니까",
289
+ "습니다",
290
+ "시각",
291
+ "시간",
292
+ "시작하여",
293
+ "시초에",
294
+ "시키다",
295
+ "실로",
296
+ "심지어",
297
+ "아",
298
+ "아니",
299
+ "아니나다를가",
300
+ "아니라면",
301
+ "아니면",
302
+ "아니었다면",
303
+ "아래윗",
304
+ "아무거나",
305
+ "아무도",
306
+ "아야",
307
+ "아울러",
308
+ "아이",
309
+ "아이고",
310
+ "아이구",
311
+ "아이야",
312
+ "아이쿠",
313
+ "아하",
314
+ "아홉",
315
+ "안 그러면",
316
+ "않기 위하여",
317
+ "않기 위해서",
318
+ "알 수 있다",
319
+ "알았어",
320
+ "앗",
321
+ "앞에서",
322
+ "앞의것",
323
+ "야",
324
+ "약간",
325
+ "양자",
326
+ "어",
327
+ "어기여차",
328
+ "어느",
329
+ "어느 년도",
330
+ "어느것",
331
+ "어느곳",
332
+ "어느때",
333
+ "어느쪽",
334
+ "어느해",
335
+ "어디",
336
+ "어때",
337
+ "어떠한",
338
+ "어떤",
339
+ "어떤것",
340
+ "어떤것들",
341
+ "어떻게",
342
+ "어떻해",
343
+ "어이",
344
+ "어째서",
345
+ "어쨋든",
346
+ "어쩔수 없다",
347
+ "어찌",
348
+ "어찌됏든",
349
+ "어찌됏어",
350
+ "어찌하든지",
351
+ "어찌하여",
352
+ "언제",
353
+ "언젠가",
354
+ "얼마",
355
+ "얼마 안 되는 것",
356
+ "얼마간",
357
+ "얼마나",
358
+ "얼마든지",
359
+ "얼마만큼",
360
+ "얼마큼",
361
+ "엉엉",
362
+ "에",
363
+ "에 가서",
364
+ "에 달려 있다",
365
+ "에 대해",
366
+ "에 있다",
367
+ "에 한하다",
368
+ "에게",
369
+ "에서",
370
+ "여",
371
+ "여기",
372
+ "여덟",
373
+ "여러분",
374
+ "여보시오",
375
+ "여부",
376
+ "여섯",
377
+ "여전히",
378
+ "여차",
379
+ "연관되다",
380
+ "연이서",
381
+ "영",
382
+ "영차",
383
+ "옆사람",
384
+ "예",
385
+ "예를 들면",
386
+ "예를 들자면",
387
+ "예컨대",
388
+ "예하면",
389
+ "오",
390
+ "오로지",
391
+ "오르다",
392
+ "오자마자",
393
+ "오직",
394
+ "오호",
395
+ "오히려",
396
+ "와",
397
+ "와 같은 사람들",
398
+ "와르르",
399
+ "와아",
400
+ "왜",
401
+ "왜냐하면",
402
+ "외에도",
403
+ "요만큼",
404
+ "요만한 것",
405
+ "요만한걸",
406
+ "요컨대",
407
+ "우르르",
408
+ "우리",
409
+ "우리들",
410
+ "우선",
411
+ "우에 종합한것과같이",
412
+ "운운",
413
+ "월",
414
+ "위에서 서술한바와같이",
415
+ "위하여",
416
+ "위해서",
417
+ "윙윙",
418
+ "육",
419
+ "으로",
420
+ "으로 인하여",
421
+ "으로서",
422
+ "으로써",
423
+ "을",
424
+ "응",
425
+ "응당",
426
+ "의",
427
+ "의거하여",
428
+ "의지하여",
429
+ "의해",
430
+ "의해되다",
431
+ "의해서",
432
+ "이",
433
+ "이 되다",
434
+ "이 때문에",
435
+ "이 밖에",
436
+ "이 외에",
437
+ "이 정도의",
438
+ "이것",
439
+ "이곳",
440
+ "이때",
441
+ "이라면",
442
+ "이래",
443
+ "이러이러하다",
444
+ "이러한",
445
+ "이런",
446
+ "이럴정도로",
447
+ "이렇게 많은 것",
448
+ "이렇게되면",
449
+ "이렇게말하자면",
450
+ "이렇구나",
451
+ "이로 인하여",
452
+ "이르기까지",
453
+ "이리하여",
454
+ "이만큼",
455
+ "이번",
456
+ "이봐",
457
+ "이상",
458
+ "이어서",
459
+ "이었다",
460
+ "이와 같다",
461
+ "이와 같은",
462
+ "이와 반대로",
463
+ "이와같다면",
464
+ "이외에도",
465
+ "이용하여",
466
+ "이유만으로",
467
+ "이젠",
468
+ "이지만",
469
+ "이쪽",
470
+ "이천구",
471
+ "이천육",
472
+ "이천칠",
473
+ "이천팔",
474
+ "인 듯하다",
475
+ "인젠",
476
+ "일",
477
+ "일것이다",
478
+ "일곱",
479
+ "일단",
480
+ "일때",
481
+ "일반적으로",
482
+ "일지라도",
483
+ "임에 틀림없다",
484
+ "입각하여",
485
+ "입장에서",
486
+ "잇따라",
487
+ "있다",
488
+ "자",
489
+ "자기",
490
+ "자기집",
491
+ "자마자",
492
+ "자신",
493
+ "잠깐",
494
+ "잠시",
495
+ "저",
496
+ "저것",
497
+ "저것만큼",
498
+ "저기",
499
+ "저쪽",
500
+ "저희",
501
+ "전부",
502
+ "전자",
503
+ "전후",
504
+ "점에서 보아",
505
+ "정도에 이르다",
506
+ "제",
507
+ "제각기",
508
+ "제외하고",
509
+ "조금",
510
+ "조차",
511
+ "조차도",
512
+ "졸졸",
513
+ "좀",
514
+ "좋아",
515
+ "좍좍",
516
+ "주룩주룩",
517
+ "주저하지 않고",
518
+ "줄은 몰랏다",
519
+ "줄은모른다",
520
+ "중에서",
521
+ "중의하나",
522
+ "즈음하여",
523
+ "즉",
524
+ "즉시",
525
+ "지든지",
526
+ "지만",
527
+ "지말고",
528
+ "진짜로",
529
+ "쪽으로",
530
+ "차라리",
531
+ "참",
532
+ "참나",
533
+ "첫번째로",
534
+ "쳇",
535
+ "총적으로",
536
+ "총적으로 말하면",
537
+ "총적으로 보면",
538
+ "칠",
539
+ "콸콸",
540
+ "쾅쾅",
541
+ "쿵",
542
+ "타다",
543
+ "타인",
544
+ "탕탕",
545
+ "토하다",
546
+ "통하여",
547
+ "툭",
548
+ "퉤",
549
+ "틈타",
550
+ "팍",
551
+ "팔",
552
+ "퍽",
553
+ "펄렁",
554
+ "하",
555
+ "하게될것이다",
556
+ "하게하다",
557
+ "하겠는가",
558
+ "하고 있다",
559
+ "하고있었다",
560
+ "하곤하였다",
561
+ "하구나",
562
+ "하기 때문에",
563
+ "하기 위하여",
564
+ "하기는한데",
565
+ "하기만 하면",
566
+ "하기보다는",
567
+ "하기에",
568
+ "하나",
569
+ "하느니",
570
+ "하는 김에",
571
+ "하는 편이 낫다",
572
+ "하는것도",
573
+ "하는것만 못하다",
574
+ "하는것이 낫다",
575
+ "하는바",
576
+ "하더라도",
577
+ "하도다",
578
+ "하도록시키다",
579
+ "하도록하다",
580
+ "하든지",
581
+ "하려고하다",
582
+ "하마터면",
583
+ "하면 할수록",
584
+ "하면된다",
585
+ "하면서",
586
+ "하물며",
587
+ "하여금",
588
+ "하여야",
589
+ "하자마자",
590
+ "하지 않는다면",
591
+ "하지 않도록",
592
+ "하지마",
593
+ "하지마라",
594
+ "하지만",
595
+ "하하",
596
+ "한 까닭에",
597
+ "한 이유는",
598
+ "한 후",
599
+ "한다면",
600
+ "한다면 몰라도",
601
+ "한데",
602
+ "한마디",
603
+ "한적이있다",
604
+ "한켠으로는",
605
+ "한항목",
606
+ "할 따름이다",
607
+ "할 생각이다",
608
+ "할 줄 안다",
609
+ "할 지경이다",
610
+ "할 힘이 있다",
611
+ "할때",
612
+ "할만하다",
613
+ "할망정",
614
+ "할뿐",
615
+ "할수있다",
616
+ "할수있어",
617
+ "할줄알다",
618
+ "할지라도",
619
+ "할지언정",
620
+ "함께",
621
+ "해도된다",
622
+ "해도좋다",
623
+ "해봐요",
624
+ "해서는 안된다",
625
+ "해야한다",
626
+ "해요",
627
+ "했어요",
628
+ "향하다",
629
+ "향하여",
630
+ "향해서",
631
+ "허",
632
+ "허걱",
633
+ "허허",
634
+ "헉",
635
+ "헉헉",
636
+ "헐떡헐떡",
637
+ "형식으로 쓰여",
638
+ "혹시",
639
+ "혹은",
640
+ "혼자",
641
+ "훨씬",
642
+ "휘익",
643
+ "휴",
644
+ "흐흐",
645
+ "흥",
646
+ "힘입어",
647
+ "︿",
648
+ "!",
649
+ "#",
650
+ "$",
651
+ "%",
652
+ "&",
653
+ "(",
654
+ ")",
655
+ "*",
656
+ "+",
657
+ ",",
658
+ "0",
659
+ "1",
660
+ "2",
661
+ "3",
662
+ "4",
663
+ "5",
664
+ "6",
665
+ "7",
666
+ "8",
667
+ "9",
668
+ ":",
669
+ ";",
670
+ "<",
671
+ ">",
672
+ "?",
673
+ "@",
674
+ "[",
675
+ "]",
676
+ "{",
677
+ "|",
678
+ "}",
679
+ "~",
680
+ "¥"
681
+ ]