kreuzberg 4.0.0.pre.rc.8 → 4.0.0.pre.rc.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +14 -14
  3. data/.rspec +3 -3
  4. data/.rubocop.yaml +1 -1
  5. data/.rubocop.yml +538 -538
  6. data/Gemfile +8 -8
  7. data/Gemfile.lock +4 -104
  8. data/README.md +454 -432
  9. data/Rakefile +25 -25
  10. data/Steepfile +47 -47
  11. data/examples/async_patterns.rb +341 -341
  12. data/ext/kreuzberg_rb/extconf.rb +45 -45
  13. data/ext/kreuzberg_rb/native/.cargo/config.toml +2 -2
  14. data/ext/kreuzberg_rb/native/Cargo.lock +6941 -6721
  15. data/ext/kreuzberg_rb/native/Cargo.toml +54 -54
  16. data/ext/kreuzberg_rb/native/README.md +425 -425
  17. data/ext/kreuzberg_rb/native/build.rs +15 -15
  18. data/ext/kreuzberg_rb/native/include/ieeefp.h +11 -11
  19. data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +14 -14
  20. data/ext/kreuzberg_rb/native/include/strings.h +20 -20
  21. data/ext/kreuzberg_rb/native/include/unistd.h +47 -47
  22. data/ext/kreuzberg_rb/native/src/lib.rs +3158 -3135
  23. data/extconf.rb +28 -28
  24. data/kreuzberg.gemspec +214 -182
  25. data/lib/kreuzberg/api_proxy.rb +142 -142
  26. data/lib/kreuzberg/cache_api.rb +81 -46
  27. data/lib/kreuzberg/cli.rb +55 -55
  28. data/lib/kreuzberg/cli_proxy.rb +127 -127
  29. data/lib/kreuzberg/config.rb +724 -724
  30. data/lib/kreuzberg/error_context.rb +80 -32
  31. data/lib/kreuzberg/errors.rb +118 -118
  32. data/lib/kreuzberg/extraction_api.rb +340 -85
  33. data/lib/kreuzberg/mcp_proxy.rb +186 -186
  34. data/lib/kreuzberg/ocr_backend_protocol.rb +113 -113
  35. data/lib/kreuzberg/post_processor_protocol.rb +86 -86
  36. data/lib/kreuzberg/result.rb +279 -279
  37. data/lib/kreuzberg/setup_lib_path.rb +80 -80
  38. data/lib/kreuzberg/validator_protocol.rb +89 -89
  39. data/lib/kreuzberg/version.rb +5 -5
  40. data/lib/kreuzberg.rb +109 -103
  41. data/lib/pdfium.dll +0 -0
  42. data/sig/kreuzberg/internal.rbs +184 -184
  43. data/sig/kreuzberg.rbs +546 -537
  44. data/spec/binding/cache_spec.rb +227 -227
  45. data/spec/binding/cli_proxy_spec.rb +85 -85
  46. data/spec/binding/cli_spec.rb +55 -55
  47. data/spec/binding/config_spec.rb +345 -345
  48. data/spec/binding/config_validation_spec.rb +283 -283
  49. data/spec/binding/error_handling_spec.rb +213 -213
  50. data/spec/binding/errors_spec.rb +66 -66
  51. data/spec/binding/plugins/ocr_backend_spec.rb +307 -307
  52. data/spec/binding/plugins/postprocessor_spec.rb +269 -269
  53. data/spec/binding/plugins/validator_spec.rb +274 -274
  54. data/spec/fixtures/config.toml +39 -39
  55. data/spec/fixtures/config.yaml +41 -41
  56. data/spec/fixtures/invalid_config.toml +4 -4
  57. data/spec/smoke/package_spec.rb +178 -178
  58. data/spec/spec_helper.rb +42 -42
  59. data/vendor/Cargo.toml +45 -0
  60. data/vendor/kreuzberg/Cargo.toml +61 -38
  61. data/vendor/kreuzberg/README.md +230 -221
  62. data/vendor/kreuzberg/benches/otel_overhead.rs +48 -48
  63. data/vendor/kreuzberg/build.rs +843 -891
  64. data/vendor/kreuzberg/src/api/error.rs +81 -81
  65. data/vendor/kreuzberg/src/api/handlers.rs +199 -199
  66. data/vendor/kreuzberg/src/api/mod.rs +79 -79
  67. data/vendor/kreuzberg/src/api/server.rs +353 -353
  68. data/vendor/kreuzberg/src/api/types.rs +170 -170
  69. data/vendor/kreuzberg/src/cache/mod.rs +1167 -1167
  70. data/vendor/kreuzberg/src/chunking/mod.rs +1877 -1877
  71. data/vendor/kreuzberg/src/chunking/processor.rs +220 -220
  72. data/vendor/kreuzberg/src/core/batch_mode.rs +95 -95
  73. data/vendor/kreuzberg/src/core/config.rs +1080 -1080
  74. data/vendor/kreuzberg/src/core/extractor.rs +1156 -1156
  75. data/vendor/kreuzberg/src/core/io.rs +329 -329
  76. data/vendor/kreuzberg/src/core/mime.rs +605 -605
  77. data/vendor/kreuzberg/src/core/mod.rs +47 -47
  78. data/vendor/kreuzberg/src/core/pipeline.rs +1184 -1171
  79. data/vendor/kreuzberg/src/embeddings.rs +500 -432
  80. data/vendor/kreuzberg/src/error.rs +431 -431
  81. data/vendor/kreuzberg/src/extraction/archive.rs +954 -954
  82. data/vendor/kreuzberg/src/extraction/docx.rs +398 -398
  83. data/vendor/kreuzberg/src/extraction/email.rs +854 -854
  84. data/vendor/kreuzberg/src/extraction/excel.rs +688 -688
  85. data/vendor/kreuzberg/src/extraction/html.rs +601 -569
  86. data/vendor/kreuzberg/src/extraction/image.rs +491 -491
  87. data/vendor/kreuzberg/src/extraction/libreoffice.rs +574 -562
  88. data/vendor/kreuzberg/src/extraction/markdown.rs +213 -213
  89. data/vendor/kreuzberg/src/extraction/mod.rs +81 -81
  90. data/vendor/kreuzberg/src/extraction/office_metadata/app_properties.rs +398 -398
  91. data/vendor/kreuzberg/src/extraction/office_metadata/core_properties.rs +247 -247
  92. data/vendor/kreuzberg/src/extraction/office_metadata/custom_properties.rs +240 -240
  93. data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +130 -130
  94. data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +284 -284
  95. data/vendor/kreuzberg/src/extraction/pptx.rs +3100 -3100
  96. data/vendor/kreuzberg/src/extraction/structured.rs +490 -490
  97. data/vendor/kreuzberg/src/extraction/table.rs +328 -328
  98. data/vendor/kreuzberg/src/extraction/text.rs +269 -269
  99. data/vendor/kreuzberg/src/extraction/xml.rs +333 -333
  100. data/vendor/kreuzberg/src/extractors/archive.rs +447 -447
  101. data/vendor/kreuzberg/src/extractors/bibtex.rs +470 -470
  102. data/vendor/kreuzberg/src/extractors/docbook.rs +504 -504
  103. data/vendor/kreuzberg/src/extractors/docx.rs +400 -400
  104. data/vendor/kreuzberg/src/extractors/email.rs +157 -157
  105. data/vendor/kreuzberg/src/extractors/epub.rs +708 -708
  106. data/vendor/kreuzberg/src/extractors/excel.rs +345 -345
  107. data/vendor/kreuzberg/src/extractors/fictionbook.rs +492 -492
  108. data/vendor/kreuzberg/src/extractors/html.rs +407 -407
  109. data/vendor/kreuzberg/src/extractors/image.rs +219 -219
  110. data/vendor/kreuzberg/src/extractors/jats.rs +1054 -1054
  111. data/vendor/kreuzberg/src/extractors/jupyter.rs +368 -368
  112. data/vendor/kreuzberg/src/extractors/latex.rs +653 -653
  113. data/vendor/kreuzberg/src/extractors/markdown.rs +701 -701
  114. data/vendor/kreuzberg/src/extractors/mod.rs +429 -429
  115. data/vendor/kreuzberg/src/extractors/odt.rs +628 -628
  116. data/vendor/kreuzberg/src/extractors/opml.rs +635 -635
  117. data/vendor/kreuzberg/src/extractors/orgmode.rs +529 -529
  118. data/vendor/kreuzberg/src/extractors/pdf.rs +749 -673
  119. data/vendor/kreuzberg/src/extractors/pptx.rs +267 -267
  120. data/vendor/kreuzberg/src/extractors/rst.rs +577 -577
  121. data/vendor/kreuzberg/src/extractors/rtf.rs +809 -809
  122. data/vendor/kreuzberg/src/extractors/security.rs +484 -484
  123. data/vendor/kreuzberg/src/extractors/security_tests.rs +367 -367
  124. data/vendor/kreuzberg/src/extractors/structured.rs +142 -142
  125. data/vendor/kreuzberg/src/extractors/text.rs +265 -265
  126. data/vendor/kreuzberg/src/extractors/typst.rs +651 -651
  127. data/vendor/kreuzberg/src/extractors/xml.rs +147 -147
  128. data/vendor/kreuzberg/src/image/dpi.rs +164 -164
  129. data/vendor/kreuzberg/src/image/mod.rs +6 -6
  130. data/vendor/kreuzberg/src/image/preprocessing.rs +417 -417
  131. data/vendor/kreuzberg/src/image/resize.rs +89 -89
  132. data/vendor/kreuzberg/src/keywords/config.rs +154 -154
  133. data/vendor/kreuzberg/src/keywords/mod.rs +237 -237
  134. data/vendor/kreuzberg/src/keywords/processor.rs +275 -275
  135. data/vendor/kreuzberg/src/keywords/rake.rs +293 -293
  136. data/vendor/kreuzberg/src/keywords/types.rs +68 -68
  137. data/vendor/kreuzberg/src/keywords/yake.rs +163 -163
  138. data/vendor/kreuzberg/src/language_detection/mod.rs +985 -985
  139. data/vendor/kreuzberg/src/language_detection/processor.rs +219 -219
  140. data/vendor/kreuzberg/src/lib.rs +113 -113
  141. data/vendor/kreuzberg/src/mcp/mod.rs +35 -35
  142. data/vendor/kreuzberg/src/mcp/server.rs +2076 -2076
  143. data/vendor/kreuzberg/src/ocr/cache.rs +469 -469
  144. data/vendor/kreuzberg/src/ocr/error.rs +37 -37
  145. data/vendor/kreuzberg/src/ocr/hocr.rs +216 -216
  146. data/vendor/kreuzberg/src/ocr/mod.rs +58 -58
  147. data/vendor/kreuzberg/src/ocr/processor.rs +863 -863
  148. data/vendor/kreuzberg/src/ocr/table/mod.rs +4 -4
  149. data/vendor/kreuzberg/src/ocr/table/tsv_parser.rs +144 -144
  150. data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +452 -452
  151. data/vendor/kreuzberg/src/ocr/types.rs +393 -393
  152. data/vendor/kreuzberg/src/ocr/utils.rs +47 -47
  153. data/vendor/kreuzberg/src/ocr/validation.rs +206 -206
  154. data/vendor/kreuzberg/src/panic_context.rs +154 -154
  155. data/vendor/kreuzberg/src/pdf/bindings.rs +44 -0
  156. data/vendor/kreuzberg/src/pdf/bundled.rs +346 -328
  157. data/vendor/kreuzberg/src/pdf/error.rs +130 -130
  158. data/vendor/kreuzberg/src/pdf/images.rs +139 -139
  159. data/vendor/kreuzberg/src/pdf/metadata.rs +489 -489
  160. data/vendor/kreuzberg/src/pdf/mod.rs +68 -66
  161. data/vendor/kreuzberg/src/pdf/rendering.rs +368 -368
  162. data/vendor/kreuzberg/src/pdf/table.rs +420 -417
  163. data/vendor/kreuzberg/src/pdf/text.rs +240 -240
  164. data/vendor/kreuzberg/src/plugins/extractor.rs +1044 -1044
  165. data/vendor/kreuzberg/src/plugins/mod.rs +212 -212
  166. data/vendor/kreuzberg/src/plugins/ocr.rs +639 -639
  167. data/vendor/kreuzberg/src/plugins/processor.rs +650 -650
  168. data/vendor/kreuzberg/src/plugins/registry.rs +1339 -1339
  169. data/vendor/kreuzberg/src/plugins/traits.rs +258 -258
  170. data/vendor/kreuzberg/src/plugins/validator.rs +967 -967
  171. data/vendor/kreuzberg/src/stopwords/mod.rs +1470 -1470
  172. data/vendor/kreuzberg/src/text/mod.rs +25 -25
  173. data/vendor/kreuzberg/src/text/quality.rs +697 -697
  174. data/vendor/kreuzberg/src/text/quality_processor.rs +219 -219
  175. data/vendor/kreuzberg/src/text/string_utils.rs +217 -217
  176. data/vendor/kreuzberg/src/text/token_reduction/cjk_utils.rs +164 -164
  177. data/vendor/kreuzberg/src/text/token_reduction/config.rs +100 -100
  178. data/vendor/kreuzberg/src/text/token_reduction/core.rs +796 -796
  179. data/vendor/kreuzberg/src/text/token_reduction/filters.rs +902 -902
  180. data/vendor/kreuzberg/src/text/token_reduction/mod.rs +160 -160
  181. data/vendor/kreuzberg/src/text/token_reduction/semantic.rs +619 -619
  182. data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +147 -147
  183. data/vendor/kreuzberg/src/types.rs +1055 -1055
  184. data/vendor/kreuzberg/src/utils/mod.rs +17 -17
  185. data/vendor/kreuzberg/src/utils/quality.rs +959 -959
  186. data/vendor/kreuzberg/src/utils/string_utils.rs +381 -381
  187. data/vendor/kreuzberg/stopwords/af_stopwords.json +53 -53
  188. data/vendor/kreuzberg/stopwords/ar_stopwords.json +482 -482
  189. data/vendor/kreuzberg/stopwords/bg_stopwords.json +261 -261
  190. data/vendor/kreuzberg/stopwords/bn_stopwords.json +400 -400
  191. data/vendor/kreuzberg/stopwords/br_stopwords.json +1205 -1205
  192. data/vendor/kreuzberg/stopwords/ca_stopwords.json +280 -280
  193. data/vendor/kreuzberg/stopwords/cs_stopwords.json +425 -425
  194. data/vendor/kreuzberg/stopwords/da_stopwords.json +172 -172
  195. data/vendor/kreuzberg/stopwords/de_stopwords.json +622 -622
  196. data/vendor/kreuzberg/stopwords/el_stopwords.json +849 -849
  197. data/vendor/kreuzberg/stopwords/en_stopwords.json +1300 -1300
  198. data/vendor/kreuzberg/stopwords/eo_stopwords.json +175 -175
  199. data/vendor/kreuzberg/stopwords/es_stopwords.json +734 -734
  200. data/vendor/kreuzberg/stopwords/et_stopwords.json +37 -37
  201. data/vendor/kreuzberg/stopwords/eu_stopwords.json +100 -100
  202. data/vendor/kreuzberg/stopwords/fa_stopwords.json +801 -801
  203. data/vendor/kreuzberg/stopwords/fi_stopwords.json +849 -849
  204. data/vendor/kreuzberg/stopwords/fr_stopwords.json +693 -693
  205. data/vendor/kreuzberg/stopwords/ga_stopwords.json +111 -111
  206. data/vendor/kreuzberg/stopwords/gl_stopwords.json +162 -162
  207. data/vendor/kreuzberg/stopwords/gu_stopwords.json +226 -226
  208. data/vendor/kreuzberg/stopwords/ha_stopwords.json +41 -41
  209. data/vendor/kreuzberg/stopwords/he_stopwords.json +196 -196
  210. data/vendor/kreuzberg/stopwords/hi_stopwords.json +227 -227
  211. data/vendor/kreuzberg/stopwords/hr_stopwords.json +181 -181
  212. data/vendor/kreuzberg/stopwords/hu_stopwords.json +791 -791
  213. data/vendor/kreuzberg/stopwords/hy_stopwords.json +47 -47
  214. data/vendor/kreuzberg/stopwords/id_stopwords.json +760 -760
  215. data/vendor/kreuzberg/stopwords/it_stopwords.json +634 -634
  216. data/vendor/kreuzberg/stopwords/ja_stopwords.json +136 -136
  217. data/vendor/kreuzberg/stopwords/kn_stopwords.json +84 -84
  218. data/vendor/kreuzberg/stopwords/ko_stopwords.json +681 -681
  219. data/vendor/kreuzberg/stopwords/ku_stopwords.json +64 -64
  220. data/vendor/kreuzberg/stopwords/la_stopwords.json +51 -51
  221. data/vendor/kreuzberg/stopwords/lt_stopwords.json +476 -476
  222. data/vendor/kreuzberg/stopwords/lv_stopwords.json +163 -163
  223. data/vendor/kreuzberg/stopwords/ml_stopwords.json +1 -1
  224. data/vendor/kreuzberg/stopwords/mr_stopwords.json +101 -101
  225. data/vendor/kreuzberg/stopwords/ms_stopwords.json +477 -477
  226. data/vendor/kreuzberg/stopwords/ne_stopwords.json +490 -490
  227. data/vendor/kreuzberg/stopwords/nl_stopwords.json +415 -415
  228. data/vendor/kreuzberg/stopwords/no_stopwords.json +223 -223
  229. data/vendor/kreuzberg/stopwords/pl_stopwords.json +331 -331
  230. data/vendor/kreuzberg/stopwords/pt_stopwords.json +562 -562
  231. data/vendor/kreuzberg/stopwords/ro_stopwords.json +436 -436
  232. data/vendor/kreuzberg/stopwords/ru_stopwords.json +561 -561
  233. data/vendor/kreuzberg/stopwords/si_stopwords.json +193 -193
  234. data/vendor/kreuzberg/stopwords/sk_stopwords.json +420 -420
  235. data/vendor/kreuzberg/stopwords/sl_stopwords.json +448 -448
  236. data/vendor/kreuzberg/stopwords/so_stopwords.json +32 -32
  237. data/vendor/kreuzberg/stopwords/st_stopwords.json +33 -33
  238. data/vendor/kreuzberg/stopwords/sv_stopwords.json +420 -420
  239. data/vendor/kreuzberg/stopwords/sw_stopwords.json +76 -76
  240. data/vendor/kreuzberg/stopwords/ta_stopwords.json +129 -129
  241. data/vendor/kreuzberg/stopwords/te_stopwords.json +54 -54
  242. data/vendor/kreuzberg/stopwords/th_stopwords.json +118 -118
  243. data/vendor/kreuzberg/stopwords/tl_stopwords.json +149 -149
  244. data/vendor/kreuzberg/stopwords/tr_stopwords.json +506 -506
  245. data/vendor/kreuzberg/stopwords/uk_stopwords.json +75 -75
  246. data/vendor/kreuzberg/stopwords/ur_stopwords.json +519 -519
  247. data/vendor/kreuzberg/stopwords/vi_stopwords.json +647 -647
  248. data/vendor/kreuzberg/stopwords/yo_stopwords.json +62 -62
  249. data/vendor/kreuzberg/stopwords/zh_stopwords.json +796 -796
  250. data/vendor/kreuzberg/stopwords/zu_stopwords.json +31 -31
  251. data/vendor/kreuzberg/tests/api_extract_multipart.rs +52 -52
  252. data/vendor/kreuzberg/tests/api_tests.rs +966 -966
  253. data/vendor/kreuzberg/tests/archive_integration.rs +545 -545
  254. data/vendor/kreuzberg/tests/batch_orchestration.rs +556 -556
  255. data/vendor/kreuzberg/tests/batch_processing.rs +318 -318
  256. data/vendor/kreuzberg/tests/bibtex_parity_test.rs +421 -421
  257. data/vendor/kreuzberg/tests/concurrency_stress.rs +533 -533
  258. data/vendor/kreuzberg/tests/config_features.rs +612 -612
  259. data/vendor/kreuzberg/tests/config_loading_tests.rs +416 -416
  260. data/vendor/kreuzberg/tests/core_integration.rs +510 -510
  261. data/vendor/kreuzberg/tests/csv_integration.rs +414 -414
  262. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +500 -500
  263. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +122 -122
  264. data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +370 -370
  265. data/vendor/kreuzberg/tests/email_integration.rs +327 -327
  266. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +275 -275
  267. data/vendor/kreuzberg/tests/error_handling.rs +402 -402
  268. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +228 -228
  269. data/vendor/kreuzberg/tests/format_integration.rs +164 -161
  270. data/vendor/kreuzberg/tests/helpers/mod.rs +142 -142
  271. data/vendor/kreuzberg/tests/html_table_test.rs +551 -551
  272. data/vendor/kreuzberg/tests/image_integration.rs +255 -255
  273. data/vendor/kreuzberg/tests/instrumentation_test.rs +139 -139
  274. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +639 -639
  275. data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +704 -704
  276. data/vendor/kreuzberg/tests/keywords_integration.rs +479 -479
  277. data/vendor/kreuzberg/tests/keywords_quality.rs +509 -509
  278. data/vendor/kreuzberg/tests/latex_extractor_tests.rs +496 -496
  279. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +490 -490
  280. data/vendor/kreuzberg/tests/mime_detection.rs +429 -429
  281. data/vendor/kreuzberg/tests/ocr_configuration.rs +514 -514
  282. data/vendor/kreuzberg/tests/ocr_errors.rs +698 -698
  283. data/vendor/kreuzberg/tests/ocr_quality.rs +629 -629
  284. data/vendor/kreuzberg/tests/ocr_stress.rs +469 -469
  285. data/vendor/kreuzberg/tests/odt_extractor_tests.rs +674 -674
  286. data/vendor/kreuzberg/tests/opml_extractor_tests.rs +616 -616
  287. data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +822 -822
  288. data/vendor/kreuzberg/tests/pdf_integration.rs +45 -45
  289. data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -374
  290. data/vendor/kreuzberg/tests/pipeline_integration.rs +1436 -1436
  291. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +776 -776
  292. data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +560 -560
  293. data/vendor/kreuzberg/tests/plugin_system.rs +927 -927
  294. data/vendor/kreuzberg/tests/plugin_validator_test.rs +783 -783
  295. data/vendor/kreuzberg/tests/registry_integration_tests.rs +587 -587
  296. data/vendor/kreuzberg/tests/rst_extractor_tests.rs +694 -694
  297. data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +775 -775
  298. data/vendor/kreuzberg/tests/security_validation.rs +416 -416
  299. data/vendor/kreuzberg/tests/stopwords_integration_test.rs +888 -888
  300. data/vendor/kreuzberg/tests/test_fastembed.rs +631 -631
  301. data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1260 -1260
  302. data/vendor/kreuzberg/tests/typst_extractor_tests.rs +648 -648
  303. data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +87 -87
  304. data/vendor/kreuzberg-ffi/Cargo.toml +63 -0
  305. data/vendor/kreuzberg-ffi/README.md +851 -0
  306. data/vendor/kreuzberg-ffi/build.rs +176 -0
  307. data/vendor/kreuzberg-ffi/cbindgen.toml +27 -0
  308. data/vendor/kreuzberg-ffi/kreuzberg-ffi-install.pc +12 -0
  309. data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +12 -0
  310. data/vendor/kreuzberg-ffi/kreuzberg.h +1087 -0
  311. data/vendor/kreuzberg-ffi/src/lib.rs +3616 -0
  312. data/vendor/kreuzberg-ffi/src/panic_shield.rs +247 -0
  313. data/vendor/kreuzberg-ffi/tests.disabled/README.md +48 -0
  314. data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +299 -0
  315. data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +346 -0
  316. data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +232 -0
  317. data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +470 -0
  318. data/vendor/kreuzberg-tesseract/.commitlintrc.json +13 -0
  319. data/vendor/kreuzberg-tesseract/.crate-ignore +2 -0
  320. data/vendor/kreuzberg-tesseract/Cargo.lock +2933 -0
  321. data/vendor/kreuzberg-tesseract/Cargo.toml +48 -0
  322. data/vendor/kreuzberg-tesseract/LICENSE +22 -0
  323. data/vendor/kreuzberg-tesseract/README.md +399 -0
  324. data/vendor/kreuzberg-tesseract/build.rs +1354 -0
  325. data/vendor/kreuzberg-tesseract/patches/README.md +71 -0
  326. data/vendor/kreuzberg-tesseract/patches/tesseract.diff +199 -0
  327. data/vendor/kreuzberg-tesseract/src/api.rs +1371 -0
  328. data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +77 -0
  329. data/vendor/kreuzberg-tesseract/src/enums.rs +297 -0
  330. data/vendor/kreuzberg-tesseract/src/error.rs +81 -0
  331. data/vendor/kreuzberg-tesseract/src/lib.rs +145 -0
  332. data/vendor/kreuzberg-tesseract/src/monitor.rs +57 -0
  333. data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +197 -0
  334. data/vendor/kreuzberg-tesseract/src/page_iterator.rs +253 -0
  335. data/vendor/kreuzberg-tesseract/src/result_iterator.rs +286 -0
  336. data/vendor/kreuzberg-tesseract/src/result_renderer.rs +183 -0
  337. data/vendor/kreuzberg-tesseract/tests/integration_test.rs +211 -0
  338. data/vendor/rb-sys/.cargo_vcs_info.json +5 -5
  339. data/vendor/rb-sys/Cargo.lock +393 -393
  340. data/vendor/rb-sys/Cargo.toml +70 -70
  341. data/vendor/rb-sys/Cargo.toml.orig +57 -57
  342. data/vendor/rb-sys/LICENSE-APACHE +190 -190
  343. data/vendor/rb-sys/LICENSE-MIT +21 -21
  344. data/vendor/rb-sys/build/features.rs +111 -111
  345. data/vendor/rb-sys/build/main.rs +286 -286
  346. data/vendor/rb-sys/build/stable_api_config.rs +155 -155
  347. data/vendor/rb-sys/build/version.rs +50 -50
  348. data/vendor/rb-sys/readme.md +36 -36
  349. data/vendor/rb-sys/src/bindings.rs +21 -21
  350. data/vendor/rb-sys/src/hidden.rs +11 -11
  351. data/vendor/rb-sys/src/lib.rs +35 -35
  352. data/vendor/rb-sys/src/macros.rs +371 -371
  353. data/vendor/rb-sys/src/memory.rs +53 -53
  354. data/vendor/rb-sys/src/ruby_abi_version.rs +38 -38
  355. data/vendor/rb-sys/src/special_consts.rs +31 -31
  356. data/vendor/rb-sys/src/stable_api/compiled.c +179 -179
  357. data/vendor/rb-sys/src/stable_api/compiled.rs +257 -257
  358. data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +324 -324
  359. data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +332 -332
  360. data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +325 -325
  361. data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +323 -323
  362. data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +339 -339
  363. data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +339 -339
  364. data/vendor/rb-sys/src/stable_api.rs +260 -260
  365. data/vendor/rb-sys/src/symbol.rs +31 -31
  366. data/vendor/rb-sys/src/tracking_allocator.rs +330 -330
  367. data/vendor/rb-sys/src/utils.rs +89 -89
  368. data/vendor/rb-sys/src/value_type.rs +7 -7
  369. metadata +44 -81
  370. data/vendor/rb-sys/bin/release.sh +0 -21
@@ -1,1300 +1,1300 @@
1
- [
2
- "'ll",
3
- "'tis",
4
- "'twas",
5
- "'ve",
6
- "10",
7
- "39",
8
- "a",
9
- "a's",
10
- "able",
11
- "ableabout",
12
- "about",
13
- "above",
14
- "abroad",
15
- "abst",
16
- "accordance",
17
- "according",
18
- "accordingly",
19
- "across",
20
- "act",
21
- "actually",
22
- "ad",
23
- "added",
24
- "adj",
25
- "adopted",
26
- "ae",
27
- "af",
28
- "affected",
29
- "affecting",
30
- "affects",
31
- "after",
32
- "afterwards",
33
- "ag",
34
- "again",
35
- "against",
36
- "ago",
37
- "ah",
38
- "ahead",
39
- "ai",
40
- "ain't",
41
- "aint",
42
- "al",
43
- "all",
44
- "allow",
45
- "allows",
46
- "almost",
47
- "alone",
48
- "along",
49
- "alongside",
50
- "already",
51
- "also",
52
- "although",
53
- "always",
54
- "am",
55
- "amid",
56
- "amidst",
57
- "among",
58
- "amongst",
59
- "amoungst",
60
- "amount",
61
- "an",
62
- "and",
63
- "announce",
64
- "another",
65
- "any",
66
- "anybody",
67
- "anyhow",
68
- "anymore",
69
- "anyone",
70
- "anything",
71
- "anyway",
72
- "anyways",
73
- "anywhere",
74
- "ao",
75
- "apart",
76
- "apparently",
77
- "appear",
78
- "appreciate",
79
- "appropriate",
80
- "approximately",
81
- "aq",
82
- "ar",
83
- "are",
84
- "area",
85
- "areas",
86
- "aren",
87
- "aren't",
88
- "arent",
89
- "arise",
90
- "around",
91
- "arpa",
92
- "as",
93
- "aside",
94
- "ask",
95
- "asked",
96
- "asking",
97
- "asks",
98
- "associated",
99
- "at",
100
- "au",
101
- "auth",
102
- "available",
103
- "aw",
104
- "away",
105
- "awfully",
106
- "az",
107
- "b",
108
- "ba",
109
- "back",
110
- "backed",
111
- "backing",
112
- "backs",
113
- "backward",
114
- "backwards",
115
- "bb",
116
- "bd",
117
- "be",
118
- "became",
119
- "because",
120
- "become",
121
- "becomes",
122
- "becoming",
123
- "been",
124
- "before",
125
- "beforehand",
126
- "began",
127
- "begin",
128
- "beginning",
129
- "beginnings",
130
- "begins",
131
- "behind",
132
- "being",
133
- "beings",
134
- "believe",
135
- "below",
136
- "beside",
137
- "besides",
138
- "best",
139
- "better",
140
- "between",
141
- "beyond",
142
- "bf",
143
- "bg",
144
- "bh",
145
- "bi",
146
- "big",
147
- "bill",
148
- "billion",
149
- "biol",
150
- "bj",
151
- "bm",
152
- "bn",
153
- "bo",
154
- "both",
155
- "bottom",
156
- "br",
157
- "brief",
158
- "briefly",
159
- "bs",
160
- "bt",
161
- "but",
162
- "buy",
163
- "bv",
164
- "bw",
165
- "by",
166
- "bz",
167
- "c",
168
- "c'mon",
169
- "c's",
170
- "ca",
171
- "call",
172
- "came",
173
- "can",
174
- "can't",
175
- "cannot",
176
- "cant",
177
- "caption",
178
- "case",
179
- "cases",
180
- "cause",
181
- "causes",
182
- "cc",
183
- "cd",
184
- "certain",
185
- "certainly",
186
- "cf",
187
- "cg",
188
- "ch",
189
- "changes",
190
- "ci",
191
- "ck",
192
- "cl",
193
- "clear",
194
- "clearly",
195
- "click",
196
- "cm",
197
- "cmon",
198
- "cn",
199
- "co",
200
- "co.",
201
- "com",
202
- "come",
203
- "comes",
204
- "computer",
205
- "con",
206
- "concerning",
207
- "consequently",
208
- "consider",
209
- "considering",
210
- "contain",
211
- "containing",
212
- "contains",
213
- "copy",
214
- "corresponding",
215
- "could",
216
- "could've",
217
- "couldn",
218
- "couldn't",
219
- "couldnt",
220
- "course",
221
- "cr",
222
- "cry",
223
- "cs",
224
- "cu",
225
- "currently",
226
- "cv",
227
- "cx",
228
- "cy",
229
- "cz",
230
- "d",
231
- "dare",
232
- "daren't",
233
- "darent",
234
- "date",
235
- "de",
236
- "dear",
237
- "definitely",
238
- "describe",
239
- "described",
240
- "despite",
241
- "detail",
242
- "did",
243
- "didn",
244
- "didn't",
245
- "didnt",
246
- "differ",
247
- "different",
248
- "differently",
249
- "directly",
250
- "dj",
251
- "dk",
252
- "dm",
253
- "do",
254
- "does",
255
- "doesn",
256
- "doesn't",
257
- "doesnt",
258
- "doing",
259
- "don",
260
- "don't",
261
- "done",
262
- "dont",
263
- "doubtful",
264
- "down",
265
- "downed",
266
- "downing",
267
- "downs",
268
- "downwards",
269
- "due",
270
- "during",
271
- "dz",
272
- "e",
273
- "each",
274
- "early",
275
- "ec",
276
- "ed",
277
- "edu",
278
- "ee",
279
- "effect",
280
- "eg",
281
- "eh",
282
- "eight",
283
- "eighty",
284
- "either",
285
- "eleven",
286
- "else",
287
- "elsewhere",
288
- "empty",
289
- "end",
290
- "ended",
291
- "ending",
292
- "ends",
293
- "enough",
294
- "entirely",
295
- "er",
296
- "es",
297
- "especially",
298
- "et",
299
- "et-al",
300
- "etc",
301
- "even",
302
- "evenly",
303
- "ever",
304
- "evermore",
305
- "every",
306
- "everybody",
307
- "everyone",
308
- "everything",
309
- "everywhere",
310
- "ex",
311
- "exactly",
312
- "example",
313
- "except",
314
- "f",
315
- "face",
316
- "faces",
317
- "fact",
318
- "facts",
319
- "fairly",
320
- "far",
321
- "farther",
322
- "felt",
323
- "few",
324
- "fewer",
325
- "ff",
326
- "fi",
327
- "fifteen",
328
- "fifth",
329
- "fifty",
330
- "fify",
331
- "fill",
332
- "find",
333
- "finds",
334
- "fire",
335
- "first",
336
- "five",
337
- "fix",
338
- "fj",
339
- "fk",
340
- "fm",
341
- "fo",
342
- "followed",
343
- "following",
344
- "follows",
345
- "for",
346
- "forever",
347
- "former",
348
- "formerly",
349
- "forth",
350
- "forty",
351
- "forward",
352
- "found",
353
- "four",
354
- "fr",
355
- "free",
356
- "from",
357
- "front",
358
- "full",
359
- "fully",
360
- "further",
361
- "furthered",
362
- "furthering",
363
- "furthermore",
364
- "furthers",
365
- "fx",
366
- "g",
367
- "ga",
368
- "gave",
369
- "gb",
370
- "gd",
371
- "ge",
372
- "general",
373
- "generally",
374
- "get",
375
- "gets",
376
- "getting",
377
- "gf",
378
- "gg",
379
- "gh",
380
- "gi",
381
- "give",
382
- "given",
383
- "gives",
384
- "giving",
385
- "gl",
386
- "gm",
387
- "gmt",
388
- "gn",
389
- "go",
390
- "goes",
391
- "going",
392
- "gone",
393
- "good",
394
- "goods",
395
- "got",
396
- "gotten",
397
- "gov",
398
- "gp",
399
- "gq",
400
- "gr",
401
- "great",
402
- "greater",
403
- "greatest",
404
- "greetings",
405
- "group",
406
- "grouped",
407
- "grouping",
408
- "groups",
409
- "gs",
410
- "gt",
411
- "gu",
412
- "gw",
413
- "gy",
414
- "h",
415
- "had",
416
- "hadn't",
417
- "hadnt",
418
- "half",
419
- "happens",
420
- "hardly",
421
- "has",
422
- "hasn",
423
- "hasn't",
424
- "hasnt",
425
- "have",
426
- "haven",
427
- "haven't",
428
- "havent",
429
- "having",
430
- "he",
431
- "he'd",
432
- "he'll",
433
- "he's",
434
- "hed",
435
- "hell",
436
- "hello",
437
- "help",
438
- "hence",
439
- "her",
440
- "here",
441
- "here's",
442
- "hereafter",
443
- "hereby",
444
- "herein",
445
- "heres",
446
- "hereupon",
447
- "hers",
448
- "herself",
449
- "herse”",
450
- "hes",
451
- "hi",
452
- "hid",
453
- "high",
454
- "higher",
455
- "highest",
456
- "him",
457
- "himself",
458
- "himse”",
459
- "his",
460
- "hither",
461
- "hk",
462
- "hm",
463
- "hn",
464
- "home",
465
- "homepage",
466
- "hopefully",
467
- "how",
468
- "how'd",
469
- "how'll",
470
- "how's",
471
- "howbeit",
472
- "however",
473
- "hr",
474
- "ht",
475
- "htm",
476
- "html",
477
- "http",
478
- "hu",
479
- "hundred",
480
- "i",
481
- "i'd",
482
- "i'll",
483
- "i'm",
484
- "i've",
485
- "i.e.",
486
- "id",
487
- "ie",
488
- "if",
489
- "ignored",
490
- "ii",
491
- "il",
492
- "ill",
493
- "im",
494
- "immediate",
495
- "immediately",
496
- "importance",
497
- "important",
498
- "in",
499
- "inasmuch",
500
- "inc",
501
- "inc.",
502
- "indeed",
503
- "index",
504
- "indicate",
505
- "indicated",
506
- "indicates",
507
- "information",
508
- "inner",
509
- "inside",
510
- "insofar",
511
- "instead",
512
- "int",
513
- "interest",
514
- "interested",
515
- "interesting",
516
- "interests",
517
- "into",
518
- "invention",
519
- "inward",
520
- "io",
521
- "iq",
522
- "ir",
523
- "is",
524
- "isn",
525
- "isn't",
526
- "isnt",
527
- "it",
528
- "it'd",
529
- "it'll",
530
- "it's",
531
- "itd",
532
- "itll",
533
- "its",
534
- "itself",
535
- "itse”",
536
- "ive",
537
- "j",
538
- "je",
539
- "jm",
540
- "jo",
541
- "join",
542
- "jp",
543
- "just",
544
- "k",
545
- "ke",
546
- "keep",
547
- "keeps",
548
- "kept",
549
- "keys",
550
- "kg",
551
- "kh",
552
- "ki",
553
- "kind",
554
- "km",
555
- "kn",
556
- "knew",
557
- "know",
558
- "known",
559
- "knows",
560
- "kp",
561
- "kr",
562
- "kw",
563
- "ky",
564
- "kz",
565
- "l",
566
- "la",
567
- "large",
568
- "largely",
569
- "last",
570
- "lately",
571
- "later",
572
- "latest",
573
- "latter",
574
- "latterly",
575
- "lb",
576
- "lc",
577
- "least",
578
- "length",
579
- "less",
580
- "lest",
581
- "let",
582
- "let's",
583
- "lets",
584
- "li",
585
- "like",
586
- "liked",
587
- "likely",
588
- "likewise",
589
- "line",
590
- "little",
591
- "lk",
592
- "ll",
593
- "long",
594
- "longer",
595
- "longest",
596
- "look",
597
- "looking",
598
- "looks",
599
- "low",
600
- "lower",
601
- "lr",
602
- "ls",
603
- "lt",
604
- "ltd",
605
- "lu",
606
- "lv",
607
- "ly",
608
- "m",
609
- "ma",
610
- "made",
611
- "mainly",
612
- "make",
613
- "makes",
614
- "making",
615
- "man",
616
- "many",
617
- "may",
618
- "maybe",
619
- "mayn't",
620
- "maynt",
621
- "mc",
622
- "md",
623
- "me",
624
- "mean",
625
- "means",
626
- "meantime",
627
- "meanwhile",
628
- "member",
629
- "members",
630
- "men",
631
- "merely",
632
- "mg",
633
- "mh",
634
- "microsoft",
635
- "might",
636
- "might've",
637
- "mightn't",
638
- "mightnt",
639
- "mil",
640
- "mill",
641
- "million",
642
- "mine",
643
- "minus",
644
- "miss",
645
- "mk",
646
- "ml",
647
- "mm",
648
- "mn",
649
- "mo",
650
- "more",
651
- "moreover",
652
- "most",
653
- "mostly",
654
- "move",
655
- "mp",
656
- "mq",
657
- "mr",
658
- "mrs",
659
- "ms",
660
- "msie",
661
- "mt",
662
- "mu",
663
- "much",
664
- "mug",
665
- "must",
666
- "must've",
667
- "mustn't",
668
- "mustnt",
669
- "mv",
670
- "mw",
671
- "mx",
672
- "my",
673
- "myself",
674
- "myse”",
675
- "mz",
676
- "n",
677
- "na",
678
- "name",
679
- "namely",
680
- "nay",
681
- "nc",
682
- "nd",
683
- "ne",
684
- "near",
685
- "nearly",
686
- "necessarily",
687
- "necessary",
688
- "need",
689
- "needed",
690
- "needing",
691
- "needn't",
692
- "neednt",
693
- "needs",
694
- "neither",
695
- "net",
696
- "netscape",
697
- "never",
698
- "neverf",
699
- "neverless",
700
- "nevertheless",
701
- "new",
702
- "newer",
703
- "newest",
704
- "next",
705
- "nf",
706
- "ng",
707
- "ni",
708
- "nine",
709
- "ninety",
710
- "nl",
711
- "no",
712
- "no-one",
713
- "nobody",
714
- "non",
715
- "none",
716
- "nonetheless",
717
- "noone",
718
- "nor",
719
- "normally",
720
- "nos",
721
- "not",
722
- "noted",
723
- "nothing",
724
- "notwithstanding",
725
- "novel",
726
- "now",
727
- "nowhere",
728
- "np",
729
- "nr",
730
- "nu",
731
- "null",
732
- "number",
733
- "numbers",
734
- "nz",
735
- "o",
736
- "obtain",
737
- "obtained",
738
- "obviously",
739
- "of",
740
- "off",
741
- "often",
742
- "oh",
743
- "ok",
744
- "okay",
745
- "old",
746
- "older",
747
- "oldest",
748
- "om",
749
- "omitted",
750
- "on",
751
- "once",
752
- "one",
753
- "one's",
754
- "ones",
755
- "only",
756
- "onto",
757
- "open",
758
- "opened",
759
- "opening",
760
- "opens",
761
- "opposite",
762
- "or",
763
- "ord",
764
- "order",
765
- "ordered",
766
- "ordering",
767
- "orders",
768
- "org",
769
- "other",
770
- "others",
771
- "otherwise",
772
- "ought",
773
- "oughtn't",
774
- "oughtnt",
775
- "our",
776
- "ours",
777
- "ourselves",
778
- "out",
779
- "outside",
780
- "over",
781
- "overall",
782
- "owing",
783
- "own",
784
- "p",
785
- "pa",
786
- "page",
787
- "pages",
788
- "part",
789
- "parted",
790
- "particular",
791
- "particularly",
792
- "parting",
793
- "parts",
794
- "past",
795
- "pe",
796
- "per",
797
- "perhaps",
798
- "pf",
799
- "pg",
800
- "ph",
801
- "pk",
802
- "pl",
803
- "place",
804
- "placed",
805
- "places",
806
- "please",
807
- "plus",
808
- "pm",
809
- "pmid",
810
- "pn",
811
- "point",
812
- "pointed",
813
- "pointing",
814
- "points",
815
- "poorly",
816
- "possible",
817
- "possibly",
818
- "potentially",
819
- "pp",
820
- "pr",
821
- "predominantly",
822
- "present",
823
- "presented",
824
- "presenting",
825
- "presents",
826
- "presumably",
827
- "previously",
828
- "primarily",
829
- "probably",
830
- "problem",
831
- "problems",
832
- "promptly",
833
- "proud",
834
- "provided",
835
- "provides",
836
- "pt",
837
- "put",
838
- "puts",
839
- "pw",
840
- "py",
841
- "q",
842
- "qa",
843
- "que",
844
- "quickly",
845
- "quite",
846
- "qv",
847
- "r",
848
- "ran",
849
- "rather",
850
- "rd",
851
- "re",
852
- "readily",
853
- "really",
854
- "reasonably",
855
- "recent",
856
- "recently",
857
- "ref",
858
- "refs",
859
- "regarding",
860
- "regardless",
861
- "regards",
862
- "related",
863
- "relatively",
864
- "research",
865
- "reserved",
866
- "respectively",
867
- "resulted",
868
- "resulting",
869
- "results",
870
- "right",
871
- "ring",
872
- "ro",
873
- "room",
874
- "rooms",
875
- "round",
876
- "ru",
877
- "run",
878
- "rw",
879
- "s",
880
- "sa",
881
- "said",
882
- "same",
883
- "saw",
884
- "say",
885
- "saying",
886
- "says",
887
- "sb",
888
- "sc",
889
- "sd",
890
- "se",
891
- "sec",
892
- "second",
893
- "secondly",
894
- "seconds",
895
- "section",
896
- "see",
897
- "seeing",
898
- "seem",
899
- "seemed",
900
- "seeming",
901
- "seems",
902
- "seen",
903
- "sees",
904
- "self",
905
- "selves",
906
- "sensible",
907
- "sent",
908
- "serious",
909
- "seriously",
910
- "seven",
911
- "seventy",
912
- "several",
913
- "sg",
914
- "sh",
915
- "shall",
916
- "shan't",
917
- "shant",
918
- "she",
919
- "she'd",
920
- "she'll",
921
- "she's",
922
- "shed",
923
- "shell",
924
- "shes",
925
- "should",
926
- "should've",
927
- "shouldn",
928
- "shouldn't",
929
- "shouldnt",
930
- "show",
931
- "showed",
932
- "showing",
933
- "shown",
934
- "showns",
935
- "shows",
936
- "si",
937
- "side",
938
- "sides",
939
- "significant",
940
- "significantly",
941
- "similar",
942
- "similarly",
943
- "since",
944
- "sincere",
945
- "site",
946
- "six",
947
- "sixty",
948
- "sj",
949
- "sk",
950
- "sl",
951
- "slightly",
952
- "sm",
953
- "small",
954
- "smaller",
955
- "smallest",
956
- "sn",
957
- "so",
958
- "some",
959
- "somebody",
960
- "someday",
961
- "somehow",
962
- "someone",
963
- "somethan",
964
- "something",
965
- "sometime",
966
- "sometimes",
967
- "somewhat",
968
- "somewhere",
969
- "soon",
970
- "sorry",
971
- "specifically",
972
- "specified",
973
- "specify",
974
- "specifying",
975
- "sr",
976
- "st",
977
- "state",
978
- "states",
979
- "still",
980
- "stop",
981
- "strongly",
982
- "su",
983
- "sub",
984
- "substantially",
985
- "successfully",
986
- "such",
987
- "sufficiently",
988
- "suggest",
989
- "sup",
990
- "sure",
991
- "sv",
992
- "sy",
993
- "system",
994
- "sz",
995
- "t",
996
- "t's",
997
- "take",
998
- "taken",
999
- "taking",
1000
- "tc",
1001
- "td",
1002
- "tell",
1003
- "ten",
1004
- "tends",
1005
- "test",
1006
- "text",
1007
- "tf",
1008
- "tg",
1009
- "th",
1010
- "than",
1011
- "thank",
1012
- "thanks",
1013
- "thanx",
1014
- "that",
1015
- "that'll",
1016
- "that's",
1017
- "that've",
1018
- "thatll",
1019
- "thats",
1020
- "thatve",
1021
- "the",
1022
- "their",
1023
- "theirs",
1024
- "them",
1025
- "themselves",
1026
- "then",
1027
- "thence",
1028
- "there",
1029
- "there'd",
1030
- "there'll",
1031
- "there're",
1032
- "there's",
1033
- "there've",
1034
- "thereafter",
1035
- "thereby",
1036
- "thered",
1037
- "therefore",
1038
- "therein",
1039
- "therell",
1040
- "thereof",
1041
- "therere",
1042
- "theres",
1043
- "thereto",
1044
- "thereupon",
1045
- "thereve",
1046
- "these",
1047
- "they",
1048
- "they'd",
1049
- "they'll",
1050
- "they're",
1051
- "they've",
1052
- "theyd",
1053
- "theyll",
1054
- "theyre",
1055
- "theyve",
1056
- "thick",
1057
- "thin",
1058
- "thing",
1059
- "things",
1060
- "think",
1061
- "thinks",
1062
- "third",
1063
- "thirty",
1064
- "this",
1065
- "thorough",
1066
- "thoroughly",
1067
- "those",
1068
- "thou",
1069
- "though",
1070
- "thoughh",
1071
- "thought",
1072
- "thoughts",
1073
- "thousand",
1074
- "three",
1075
- "throug",
1076
- "through",
1077
- "throughout",
1078
- "thru",
1079
- "thus",
1080
- "til",
1081
- "till",
1082
- "tip",
1083
- "tis",
1084
- "tj",
1085
- "tk",
1086
- "tm",
1087
- "tn",
1088
- "to",
1089
- "today",
1090
- "together",
1091
- "too",
1092
- "took",
1093
- "top",
1094
- "toward",
1095
- "towards",
1096
- "tp",
1097
- "tr",
1098
- "tried",
1099
- "tries",
1100
- "trillion",
1101
- "truly",
1102
- "try",
1103
- "trying",
1104
- "ts",
1105
- "tt",
1106
- "turn",
1107
- "turned",
1108
- "turning",
1109
- "turns",
1110
- "tv",
1111
- "tw",
1112
- "twas",
1113
- "twelve",
1114
- "twenty",
1115
- "twice",
1116
- "two",
1117
- "tz",
1118
- "u",
1119
- "ua",
1120
- "ug",
1121
- "uk",
1122
- "um",
1123
- "un",
1124
- "under",
1125
- "underneath",
1126
- "undoing",
1127
- "unfortunately",
1128
- "unless",
1129
- "unlike",
1130
- "unlikely",
1131
- "until",
1132
- "unto",
1133
- "up",
1134
- "upon",
1135
- "ups",
1136
- "upwards",
1137
- "us",
1138
- "use",
1139
- "used",
1140
- "useful",
1141
- "usefully",
1142
- "usefulness",
1143
- "uses",
1144
- "using",
1145
- "usually",
1146
- "uucp",
1147
- "uy",
1148
- "uz",
1149
- "v",
1150
- "va",
1151
- "value",
1152
- "various",
1153
- "vc",
1154
- "ve",
1155
- "versus",
1156
- "very",
1157
- "vg",
1158
- "vi",
1159
- "via",
1160
- "viz",
1161
- "vn",
1162
- "vol",
1163
- "vols",
1164
- "vs",
1165
- "vu",
1166
- "w",
1167
- "want",
1168
- "wanted",
1169
- "wanting",
1170
- "wants",
1171
- "was",
1172
- "wasn",
1173
- "wasn't",
1174
- "wasnt",
1175
- "way",
1176
- "ways",
1177
- "we",
1178
- "we'd",
1179
- "we'll",
1180
- "we're",
1181
- "we've",
1182
- "web",
1183
- "webpage",
1184
- "website",
1185
- "wed",
1186
- "welcome",
1187
- "well",
1188
- "wells",
1189
- "went",
1190
- "were",
1191
- "weren",
1192
- "weren't",
1193
- "werent",
1194
- "weve",
1195
- "wf",
1196
- "what",
1197
- "what'd",
1198
- "what'll",
1199
- "what's",
1200
- "what've",
1201
- "whatever",
1202
- "whatll",
1203
- "whats",
1204
- "whatve",
1205
- "when",
1206
- "when'd",
1207
- "when'll",
1208
- "when's",
1209
- "whence",
1210
- "whenever",
1211
- "where",
1212
- "where'd",
1213
- "where'll",
1214
- "where's",
1215
- "whereafter",
1216
- "whereas",
1217
- "whereby",
1218
- "wherein",
1219
- "wheres",
1220
- "whereupon",
1221
- "wherever",
1222
- "whether",
1223
- "which",
1224
- "whichever",
1225
- "while",
1226
- "whilst",
1227
- "whim",
1228
- "whither",
1229
- "who",
1230
- "who'd",
1231
- "who'll",
1232
- "who's",
1233
- "whod",
1234
- "whoever",
1235
- "whole",
1236
- "wholl",
1237
- "whom",
1238
- "whomever",
1239
- "whos",
1240
- "whose",
1241
- "why",
1242
- "why'd",
1243
- "why'll",
1244
- "why's",
1245
- "widely",
1246
- "width",
1247
- "will",
1248
- "willing",
1249
- "wish",
1250
- "with",
1251
- "within",
1252
- "without",
1253
- "won",
1254
- "won't",
1255
- "wonder",
1256
- "wont",
1257
- "words",
1258
- "work",
1259
- "worked",
1260
- "working",
1261
- "works",
1262
- "world",
1263
- "would",
1264
- "would've",
1265
- "wouldn",
1266
- "wouldn't",
1267
- "wouldnt",
1268
- "ws",
1269
- "www",
1270
- "x",
1271
- "y",
1272
- "ye",
1273
- "year",
1274
- "years",
1275
- "yes",
1276
- "yet",
1277
- "you",
1278
- "you'd",
1279
- "you'll",
1280
- "you're",
1281
- "you've",
1282
- "youd",
1283
- "youll",
1284
- "young",
1285
- "younger",
1286
- "youngest",
1287
- "your",
1288
- "youre",
1289
- "yours",
1290
- "yourself",
1291
- "yourselves",
1292
- "youve",
1293
- "yt",
1294
- "yu",
1295
- "z",
1296
- "za",
1297
- "zero",
1298
- "zm",
1299
- "zr"
1300
- ]
1
+ [
2
+ "'ll",
3
+ "'tis",
4
+ "'twas",
5
+ "'ve",
6
+ "10",
7
+ "39",
8
+ "a",
9
+ "a's",
10
+ "able",
11
+ "ableabout",
12
+ "about",
13
+ "above",
14
+ "abroad",
15
+ "abst",
16
+ "accordance",
17
+ "according",
18
+ "accordingly",
19
+ "across",
20
+ "act",
21
+ "actually",
22
+ "ad",
23
+ "added",
24
+ "adj",
25
+ "adopted",
26
+ "ae",
27
+ "af",
28
+ "affected",
29
+ "affecting",
30
+ "affects",
31
+ "after",
32
+ "afterwards",
33
+ "ag",
34
+ "again",
35
+ "against",
36
+ "ago",
37
+ "ah",
38
+ "ahead",
39
+ "ai",
40
+ "ain't",
41
+ "aint",
42
+ "al",
43
+ "all",
44
+ "allow",
45
+ "allows",
46
+ "almost",
47
+ "alone",
48
+ "along",
49
+ "alongside",
50
+ "already",
51
+ "also",
52
+ "although",
53
+ "always",
54
+ "am",
55
+ "amid",
56
+ "amidst",
57
+ "among",
58
+ "amongst",
59
+ "amoungst",
60
+ "amount",
61
+ "an",
62
+ "and",
63
+ "announce",
64
+ "another",
65
+ "any",
66
+ "anybody",
67
+ "anyhow",
68
+ "anymore",
69
+ "anyone",
70
+ "anything",
71
+ "anyway",
72
+ "anyways",
73
+ "anywhere",
74
+ "ao",
75
+ "apart",
76
+ "apparently",
77
+ "appear",
78
+ "appreciate",
79
+ "appropriate",
80
+ "approximately",
81
+ "aq",
82
+ "ar",
83
+ "are",
84
+ "area",
85
+ "areas",
86
+ "aren",
87
+ "aren't",
88
+ "arent",
89
+ "arise",
90
+ "around",
91
+ "arpa",
92
+ "as",
93
+ "aside",
94
+ "ask",
95
+ "asked",
96
+ "asking",
97
+ "asks",
98
+ "associated",
99
+ "at",
100
+ "au",
101
+ "auth",
102
+ "available",
103
+ "aw",
104
+ "away",
105
+ "awfully",
106
+ "az",
107
+ "b",
108
+ "ba",
109
+ "back",
110
+ "backed",
111
+ "backing",
112
+ "backs",
113
+ "backward",
114
+ "backwards",
115
+ "bb",
116
+ "bd",
117
+ "be",
118
+ "became",
119
+ "because",
120
+ "become",
121
+ "becomes",
122
+ "becoming",
123
+ "been",
124
+ "before",
125
+ "beforehand",
126
+ "began",
127
+ "begin",
128
+ "beginning",
129
+ "beginnings",
130
+ "begins",
131
+ "behind",
132
+ "being",
133
+ "beings",
134
+ "believe",
135
+ "below",
136
+ "beside",
137
+ "besides",
138
+ "best",
139
+ "better",
140
+ "between",
141
+ "beyond",
142
+ "bf",
143
+ "bg",
144
+ "bh",
145
+ "bi",
146
+ "big",
147
+ "bill",
148
+ "billion",
149
+ "biol",
150
+ "bj",
151
+ "bm",
152
+ "bn",
153
+ "bo",
154
+ "both",
155
+ "bottom",
156
+ "br",
157
+ "brief",
158
+ "briefly",
159
+ "bs",
160
+ "bt",
161
+ "but",
162
+ "buy",
163
+ "bv",
164
+ "bw",
165
+ "by",
166
+ "bz",
167
+ "c",
168
+ "c'mon",
169
+ "c's",
170
+ "ca",
171
+ "call",
172
+ "came",
173
+ "can",
174
+ "can't",
175
+ "cannot",
176
+ "cant",
177
+ "caption",
178
+ "case",
179
+ "cases",
180
+ "cause",
181
+ "causes",
182
+ "cc",
183
+ "cd",
184
+ "certain",
185
+ "certainly",
186
+ "cf",
187
+ "cg",
188
+ "ch",
189
+ "changes",
190
+ "ci",
191
+ "ck",
192
+ "cl",
193
+ "clear",
194
+ "clearly",
195
+ "click",
196
+ "cm",
197
+ "cmon",
198
+ "cn",
199
+ "co",
200
+ "co.",
201
+ "com",
202
+ "come",
203
+ "comes",
204
+ "computer",
205
+ "con",
206
+ "concerning",
207
+ "consequently",
208
+ "consider",
209
+ "considering",
210
+ "contain",
211
+ "containing",
212
+ "contains",
213
+ "copy",
214
+ "corresponding",
215
+ "could",
216
+ "could've",
217
+ "couldn",
218
+ "couldn't",
219
+ "couldnt",
220
+ "course",
221
+ "cr",
222
+ "cry",
223
+ "cs",
224
+ "cu",
225
+ "currently",
226
+ "cv",
227
+ "cx",
228
+ "cy",
229
+ "cz",
230
+ "d",
231
+ "dare",
232
+ "daren't",
233
+ "darent",
234
+ "date",
235
+ "de",
236
+ "dear",
237
+ "definitely",
238
+ "describe",
239
+ "described",
240
+ "despite",
241
+ "detail",
242
+ "did",
243
+ "didn",
244
+ "didn't",
245
+ "didnt",
246
+ "differ",
247
+ "different",
248
+ "differently",
249
+ "directly",
250
+ "dj",
251
+ "dk",
252
+ "dm",
253
+ "do",
254
+ "does",
255
+ "doesn",
256
+ "doesn't",
257
+ "doesnt",
258
+ "doing",
259
+ "don",
260
+ "don't",
261
+ "done",
262
+ "dont",
263
+ "doubtful",
264
+ "down",
265
+ "downed",
266
+ "downing",
267
+ "downs",
268
+ "downwards",
269
+ "due",
270
+ "during",
271
+ "dz",
272
+ "e",
273
+ "each",
274
+ "early",
275
+ "ec",
276
+ "ed",
277
+ "edu",
278
+ "ee",
279
+ "effect",
280
+ "eg",
281
+ "eh",
282
+ "eight",
283
+ "eighty",
284
+ "either",
285
+ "eleven",
286
+ "else",
287
+ "elsewhere",
288
+ "empty",
289
+ "end",
290
+ "ended",
291
+ "ending",
292
+ "ends",
293
+ "enough",
294
+ "entirely",
295
+ "er",
296
+ "es",
297
+ "especially",
298
+ "et",
299
+ "et-al",
300
+ "etc",
301
+ "even",
302
+ "evenly",
303
+ "ever",
304
+ "evermore",
305
+ "every",
306
+ "everybody",
307
+ "everyone",
308
+ "everything",
309
+ "everywhere",
310
+ "ex",
311
+ "exactly",
312
+ "example",
313
+ "except",
314
+ "f",
315
+ "face",
316
+ "faces",
317
+ "fact",
318
+ "facts",
319
+ "fairly",
320
+ "far",
321
+ "farther",
322
+ "felt",
323
+ "few",
324
+ "fewer",
325
+ "ff",
326
+ "fi",
327
+ "fifteen",
328
+ "fifth",
329
+ "fifty",
330
+ "fify",
331
+ "fill",
332
+ "find",
333
+ "finds",
334
+ "fire",
335
+ "first",
336
+ "five",
337
+ "fix",
338
+ "fj",
339
+ "fk",
340
+ "fm",
341
+ "fo",
342
+ "followed",
343
+ "following",
344
+ "follows",
345
+ "for",
346
+ "forever",
347
+ "former",
348
+ "formerly",
349
+ "forth",
350
+ "forty",
351
+ "forward",
352
+ "found",
353
+ "four",
354
+ "fr",
355
+ "free",
356
+ "from",
357
+ "front",
358
+ "full",
359
+ "fully",
360
+ "further",
361
+ "furthered",
362
+ "furthering",
363
+ "furthermore",
364
+ "furthers",
365
+ "fx",
366
+ "g",
367
+ "ga",
368
+ "gave",
369
+ "gb",
370
+ "gd",
371
+ "ge",
372
+ "general",
373
+ "generally",
374
+ "get",
375
+ "gets",
376
+ "getting",
377
+ "gf",
378
+ "gg",
379
+ "gh",
380
+ "gi",
381
+ "give",
382
+ "given",
383
+ "gives",
384
+ "giving",
385
+ "gl",
386
+ "gm",
387
+ "gmt",
388
+ "gn",
389
+ "go",
390
+ "goes",
391
+ "going",
392
+ "gone",
393
+ "good",
394
+ "goods",
395
+ "got",
396
+ "gotten",
397
+ "gov",
398
+ "gp",
399
+ "gq",
400
+ "gr",
401
+ "great",
402
+ "greater",
403
+ "greatest",
404
+ "greetings",
405
+ "group",
406
+ "grouped",
407
+ "grouping",
408
+ "groups",
409
+ "gs",
410
+ "gt",
411
+ "gu",
412
+ "gw",
413
+ "gy",
414
+ "h",
415
+ "had",
416
+ "hadn't",
417
+ "hadnt",
418
+ "half",
419
+ "happens",
420
+ "hardly",
421
+ "has",
422
+ "hasn",
423
+ "hasn't",
424
+ "hasnt",
425
+ "have",
426
+ "haven",
427
+ "haven't",
428
+ "havent",
429
+ "having",
430
+ "he",
431
+ "he'd",
432
+ "he'll",
433
+ "he's",
434
+ "hed",
435
+ "hell",
436
+ "hello",
437
+ "help",
438
+ "hence",
439
+ "her",
440
+ "here",
441
+ "here's",
442
+ "hereafter",
443
+ "hereby",
444
+ "herein",
445
+ "heres",
446
+ "hereupon",
447
+ "hers",
448
+ "herself",
449
+ "herse”",
450
+ "hes",
451
+ "hi",
452
+ "hid",
453
+ "high",
454
+ "higher",
455
+ "highest",
456
+ "him",
457
+ "himself",
458
+ "himse”",
459
+ "his",
460
+ "hither",
461
+ "hk",
462
+ "hm",
463
+ "hn",
464
+ "home",
465
+ "homepage",
466
+ "hopefully",
467
+ "how",
468
+ "how'd",
469
+ "how'll",
470
+ "how's",
471
+ "howbeit",
472
+ "however",
473
+ "hr",
474
+ "ht",
475
+ "htm",
476
+ "html",
477
+ "http",
478
+ "hu",
479
+ "hundred",
480
+ "i",
481
+ "i'd",
482
+ "i'll",
483
+ "i'm",
484
+ "i've",
485
+ "i.e.",
486
+ "id",
487
+ "ie",
488
+ "if",
489
+ "ignored",
490
+ "ii",
491
+ "il",
492
+ "ill",
493
+ "im",
494
+ "immediate",
495
+ "immediately",
496
+ "importance",
497
+ "important",
498
+ "in",
499
+ "inasmuch",
500
+ "inc",
501
+ "inc.",
502
+ "indeed",
503
+ "index",
504
+ "indicate",
505
+ "indicated",
506
+ "indicates",
507
+ "information",
508
+ "inner",
509
+ "inside",
510
+ "insofar",
511
+ "instead",
512
+ "int",
513
+ "interest",
514
+ "interested",
515
+ "interesting",
516
+ "interests",
517
+ "into",
518
+ "invention",
519
+ "inward",
520
+ "io",
521
+ "iq",
522
+ "ir",
523
+ "is",
524
+ "isn",
525
+ "isn't",
526
+ "isnt",
527
+ "it",
528
+ "it'd",
529
+ "it'll",
530
+ "it's",
531
+ "itd",
532
+ "itll",
533
+ "its",
534
+ "itself",
535
+ "itse”",
536
+ "ive",
537
+ "j",
538
+ "je",
539
+ "jm",
540
+ "jo",
541
+ "join",
542
+ "jp",
543
+ "just",
544
+ "k",
545
+ "ke",
546
+ "keep",
547
+ "keeps",
548
+ "kept",
549
+ "keys",
550
+ "kg",
551
+ "kh",
552
+ "ki",
553
+ "kind",
554
+ "km",
555
+ "kn",
556
+ "knew",
557
+ "know",
558
+ "known",
559
+ "knows",
560
+ "kp",
561
+ "kr",
562
+ "kw",
563
+ "ky",
564
+ "kz",
565
+ "l",
566
+ "la",
567
+ "large",
568
+ "largely",
569
+ "last",
570
+ "lately",
571
+ "later",
572
+ "latest",
573
+ "latter",
574
+ "latterly",
575
+ "lb",
576
+ "lc",
577
+ "least",
578
+ "length",
579
+ "less",
580
+ "lest",
581
+ "let",
582
+ "let's",
583
+ "lets",
584
+ "li",
585
+ "like",
586
+ "liked",
587
+ "likely",
588
+ "likewise",
589
+ "line",
590
+ "little",
591
+ "lk",
592
+ "ll",
593
+ "long",
594
+ "longer",
595
+ "longest",
596
+ "look",
597
+ "looking",
598
+ "looks",
599
+ "low",
600
+ "lower",
601
+ "lr",
602
+ "ls",
603
+ "lt",
604
+ "ltd",
605
+ "lu",
606
+ "lv",
607
+ "ly",
608
+ "m",
609
+ "ma",
610
+ "made",
611
+ "mainly",
612
+ "make",
613
+ "makes",
614
+ "making",
615
+ "man",
616
+ "many",
617
+ "may",
618
+ "maybe",
619
+ "mayn't",
620
+ "maynt",
621
+ "mc",
622
+ "md",
623
+ "me",
624
+ "mean",
625
+ "means",
626
+ "meantime",
627
+ "meanwhile",
628
+ "member",
629
+ "members",
630
+ "men",
631
+ "merely",
632
+ "mg",
633
+ "mh",
634
+ "microsoft",
635
+ "might",
636
+ "might've",
637
+ "mightn't",
638
+ "mightnt",
639
+ "mil",
640
+ "mill",
641
+ "million",
642
+ "mine",
643
+ "minus",
644
+ "miss",
645
+ "mk",
646
+ "ml",
647
+ "mm",
648
+ "mn",
649
+ "mo",
650
+ "more",
651
+ "moreover",
652
+ "most",
653
+ "mostly",
654
+ "move",
655
+ "mp",
656
+ "mq",
657
+ "mr",
658
+ "mrs",
659
+ "ms",
660
+ "msie",
661
+ "mt",
662
+ "mu",
663
+ "much",
664
+ "mug",
665
+ "must",
666
+ "must've",
667
+ "mustn't",
668
+ "mustnt",
669
+ "mv",
670
+ "mw",
671
+ "mx",
672
+ "my",
673
+ "myself",
674
+ "myse”",
675
+ "mz",
676
+ "n",
677
+ "na",
678
+ "name",
679
+ "namely",
680
+ "nay",
681
+ "nc",
682
+ "nd",
683
+ "ne",
684
+ "near",
685
+ "nearly",
686
+ "necessarily",
687
+ "necessary",
688
+ "need",
689
+ "needed",
690
+ "needing",
691
+ "needn't",
692
+ "neednt",
693
+ "needs",
694
+ "neither",
695
+ "net",
696
+ "netscape",
697
+ "never",
698
+ "neverf",
699
+ "neverless",
700
+ "nevertheless",
701
+ "new",
702
+ "newer",
703
+ "newest",
704
+ "next",
705
+ "nf",
706
+ "ng",
707
+ "ni",
708
+ "nine",
709
+ "ninety",
710
+ "nl",
711
+ "no",
712
+ "no-one",
713
+ "nobody",
714
+ "non",
715
+ "none",
716
+ "nonetheless",
717
+ "noone",
718
+ "nor",
719
+ "normally",
720
+ "nos",
721
+ "not",
722
+ "noted",
723
+ "nothing",
724
+ "notwithstanding",
725
+ "novel",
726
+ "now",
727
+ "nowhere",
728
+ "np",
729
+ "nr",
730
+ "nu",
731
+ "null",
732
+ "number",
733
+ "numbers",
734
+ "nz",
735
+ "o",
736
+ "obtain",
737
+ "obtained",
738
+ "obviously",
739
+ "of",
740
+ "off",
741
+ "often",
742
+ "oh",
743
+ "ok",
744
+ "okay",
745
+ "old",
746
+ "older",
747
+ "oldest",
748
+ "om",
749
+ "omitted",
750
+ "on",
751
+ "once",
752
+ "one",
753
+ "one's",
754
+ "ones",
755
+ "only",
756
+ "onto",
757
+ "open",
758
+ "opened",
759
+ "opening",
760
+ "opens",
761
+ "opposite",
762
+ "or",
763
+ "ord",
764
+ "order",
765
+ "ordered",
766
+ "ordering",
767
+ "orders",
768
+ "org",
769
+ "other",
770
+ "others",
771
+ "otherwise",
772
+ "ought",
773
+ "oughtn't",
774
+ "oughtnt",
775
+ "our",
776
+ "ours",
777
+ "ourselves",
778
+ "out",
779
+ "outside",
780
+ "over",
781
+ "overall",
782
+ "owing",
783
+ "own",
784
+ "p",
785
+ "pa",
786
+ "page",
787
+ "pages",
788
+ "part",
789
+ "parted",
790
+ "particular",
791
+ "particularly",
792
+ "parting",
793
+ "parts",
794
+ "past",
795
+ "pe",
796
+ "per",
797
+ "perhaps",
798
+ "pf",
799
+ "pg",
800
+ "ph",
801
+ "pk",
802
+ "pl",
803
+ "place",
804
+ "placed",
805
+ "places",
806
+ "please",
807
+ "plus",
808
+ "pm",
809
+ "pmid",
810
+ "pn",
811
+ "point",
812
+ "pointed",
813
+ "pointing",
814
+ "points",
815
+ "poorly",
816
+ "possible",
817
+ "possibly",
818
+ "potentially",
819
+ "pp",
820
+ "pr",
821
+ "predominantly",
822
+ "present",
823
+ "presented",
824
+ "presenting",
825
+ "presents",
826
+ "presumably",
827
+ "previously",
828
+ "primarily",
829
+ "probably",
830
+ "problem",
831
+ "problems",
832
+ "promptly",
833
+ "proud",
834
+ "provided",
835
+ "provides",
836
+ "pt",
837
+ "put",
838
+ "puts",
839
+ "pw",
840
+ "py",
841
+ "q",
842
+ "qa",
843
+ "que",
844
+ "quickly",
845
+ "quite",
846
+ "qv",
847
+ "r",
848
+ "ran",
849
+ "rather",
850
+ "rd",
851
+ "re",
852
+ "readily",
853
+ "really",
854
+ "reasonably",
855
+ "recent",
856
+ "recently",
857
+ "ref",
858
+ "refs",
859
+ "regarding",
860
+ "regardless",
861
+ "regards",
862
+ "related",
863
+ "relatively",
864
+ "research",
865
+ "reserved",
866
+ "respectively",
867
+ "resulted",
868
+ "resulting",
869
+ "results",
870
+ "right",
871
+ "ring",
872
+ "ro",
873
+ "room",
874
+ "rooms",
875
+ "round",
876
+ "ru",
877
+ "run",
878
+ "rw",
879
+ "s",
880
+ "sa",
881
+ "said",
882
+ "same",
883
+ "saw",
884
+ "say",
885
+ "saying",
886
+ "says",
887
+ "sb",
888
+ "sc",
889
+ "sd",
890
+ "se",
891
+ "sec",
892
+ "second",
893
+ "secondly",
894
+ "seconds",
895
+ "section",
896
+ "see",
897
+ "seeing",
898
+ "seem",
899
+ "seemed",
900
+ "seeming",
901
+ "seems",
902
+ "seen",
903
+ "sees",
904
+ "self",
905
+ "selves",
906
+ "sensible",
907
+ "sent",
908
+ "serious",
909
+ "seriously",
910
+ "seven",
911
+ "seventy",
912
+ "several",
913
+ "sg",
914
+ "sh",
915
+ "shall",
916
+ "shan't",
917
+ "shant",
918
+ "she",
919
+ "she'd",
920
+ "she'll",
921
+ "she's",
922
+ "shed",
923
+ "shell",
924
+ "shes",
925
+ "should",
926
+ "should've",
927
+ "shouldn",
928
+ "shouldn't",
929
+ "shouldnt",
930
+ "show",
931
+ "showed",
932
+ "showing",
933
+ "shown",
934
+ "showns",
935
+ "shows",
936
+ "si",
937
+ "side",
938
+ "sides",
939
+ "significant",
940
+ "significantly",
941
+ "similar",
942
+ "similarly",
943
+ "since",
944
+ "sincere",
945
+ "site",
946
+ "six",
947
+ "sixty",
948
+ "sj",
949
+ "sk",
950
+ "sl",
951
+ "slightly",
952
+ "sm",
953
+ "small",
954
+ "smaller",
955
+ "smallest",
956
+ "sn",
957
+ "so",
958
+ "some",
959
+ "somebody",
960
+ "someday",
961
+ "somehow",
962
+ "someone",
963
+ "somethan",
964
+ "something",
965
+ "sometime",
966
+ "sometimes",
967
+ "somewhat",
968
+ "somewhere",
969
+ "soon",
970
+ "sorry",
971
+ "specifically",
972
+ "specified",
973
+ "specify",
974
+ "specifying",
975
+ "sr",
976
+ "st",
977
+ "state",
978
+ "states",
979
+ "still",
980
+ "stop",
981
+ "strongly",
982
+ "su",
983
+ "sub",
984
+ "substantially",
985
+ "successfully",
986
+ "such",
987
+ "sufficiently",
988
+ "suggest",
989
+ "sup",
990
+ "sure",
991
+ "sv",
992
+ "sy",
993
+ "system",
994
+ "sz",
995
+ "t",
996
+ "t's",
997
+ "take",
998
+ "taken",
999
+ "taking",
1000
+ "tc",
1001
+ "td",
1002
+ "tell",
1003
+ "ten",
1004
+ "tends",
1005
+ "test",
1006
+ "text",
1007
+ "tf",
1008
+ "tg",
1009
+ "th",
1010
+ "than",
1011
+ "thank",
1012
+ "thanks",
1013
+ "thanx",
1014
+ "that",
1015
+ "that'll",
1016
+ "that's",
1017
+ "that've",
1018
+ "thatll",
1019
+ "thats",
1020
+ "thatve",
1021
+ "the",
1022
+ "their",
1023
+ "theirs",
1024
+ "them",
1025
+ "themselves",
1026
+ "then",
1027
+ "thence",
1028
+ "there",
1029
+ "there'd",
1030
+ "there'll",
1031
+ "there're",
1032
+ "there's",
1033
+ "there've",
1034
+ "thereafter",
1035
+ "thereby",
1036
+ "thered",
1037
+ "therefore",
1038
+ "therein",
1039
+ "therell",
1040
+ "thereof",
1041
+ "therere",
1042
+ "theres",
1043
+ "thereto",
1044
+ "thereupon",
1045
+ "thereve",
1046
+ "these",
1047
+ "they",
1048
+ "they'd",
1049
+ "they'll",
1050
+ "they're",
1051
+ "they've",
1052
+ "theyd",
1053
+ "theyll",
1054
+ "theyre",
1055
+ "theyve",
1056
+ "thick",
1057
+ "thin",
1058
+ "thing",
1059
+ "things",
1060
+ "think",
1061
+ "thinks",
1062
+ "third",
1063
+ "thirty",
1064
+ "this",
1065
+ "thorough",
1066
+ "thoroughly",
1067
+ "those",
1068
+ "thou",
1069
+ "though",
1070
+ "thoughh",
1071
+ "thought",
1072
+ "thoughts",
1073
+ "thousand",
1074
+ "three",
1075
+ "throug",
1076
+ "through",
1077
+ "throughout",
1078
+ "thru",
1079
+ "thus",
1080
+ "til",
1081
+ "till",
1082
+ "tip",
1083
+ "tis",
1084
+ "tj",
1085
+ "tk",
1086
+ "tm",
1087
+ "tn",
1088
+ "to",
1089
+ "today",
1090
+ "together",
1091
+ "too",
1092
+ "took",
1093
+ "top",
1094
+ "toward",
1095
+ "towards",
1096
+ "tp",
1097
+ "tr",
1098
+ "tried",
1099
+ "tries",
1100
+ "trillion",
1101
+ "truly",
1102
+ "try",
1103
+ "trying",
1104
+ "ts",
1105
+ "tt",
1106
+ "turn",
1107
+ "turned",
1108
+ "turning",
1109
+ "turns",
1110
+ "tv",
1111
+ "tw",
1112
+ "twas",
1113
+ "twelve",
1114
+ "twenty",
1115
+ "twice",
1116
+ "two",
1117
+ "tz",
1118
+ "u",
1119
+ "ua",
1120
+ "ug",
1121
+ "uk",
1122
+ "um",
1123
+ "un",
1124
+ "under",
1125
+ "underneath",
1126
+ "undoing",
1127
+ "unfortunately",
1128
+ "unless",
1129
+ "unlike",
1130
+ "unlikely",
1131
+ "until",
1132
+ "unto",
1133
+ "up",
1134
+ "upon",
1135
+ "ups",
1136
+ "upwards",
1137
+ "us",
1138
+ "use",
1139
+ "used",
1140
+ "useful",
1141
+ "usefully",
1142
+ "usefulness",
1143
+ "uses",
1144
+ "using",
1145
+ "usually",
1146
+ "uucp",
1147
+ "uy",
1148
+ "uz",
1149
+ "v",
1150
+ "va",
1151
+ "value",
1152
+ "various",
1153
+ "vc",
1154
+ "ve",
1155
+ "versus",
1156
+ "very",
1157
+ "vg",
1158
+ "vi",
1159
+ "via",
1160
+ "viz",
1161
+ "vn",
1162
+ "vol",
1163
+ "vols",
1164
+ "vs",
1165
+ "vu",
1166
+ "w",
1167
+ "want",
1168
+ "wanted",
1169
+ "wanting",
1170
+ "wants",
1171
+ "was",
1172
+ "wasn",
1173
+ "wasn't",
1174
+ "wasnt",
1175
+ "way",
1176
+ "ways",
1177
+ "we",
1178
+ "we'd",
1179
+ "we'll",
1180
+ "we're",
1181
+ "we've",
1182
+ "web",
1183
+ "webpage",
1184
+ "website",
1185
+ "wed",
1186
+ "welcome",
1187
+ "well",
1188
+ "wells",
1189
+ "went",
1190
+ "were",
1191
+ "weren",
1192
+ "weren't",
1193
+ "werent",
1194
+ "weve",
1195
+ "wf",
1196
+ "what",
1197
+ "what'd",
1198
+ "what'll",
1199
+ "what's",
1200
+ "what've",
1201
+ "whatever",
1202
+ "whatll",
1203
+ "whats",
1204
+ "whatve",
1205
+ "when",
1206
+ "when'd",
1207
+ "when'll",
1208
+ "when's",
1209
+ "whence",
1210
+ "whenever",
1211
+ "where",
1212
+ "where'd",
1213
+ "where'll",
1214
+ "where's",
1215
+ "whereafter",
1216
+ "whereas",
1217
+ "whereby",
1218
+ "wherein",
1219
+ "wheres",
1220
+ "whereupon",
1221
+ "wherever",
1222
+ "whether",
1223
+ "which",
1224
+ "whichever",
1225
+ "while",
1226
+ "whilst",
1227
+ "whim",
1228
+ "whither",
1229
+ "who",
1230
+ "who'd",
1231
+ "who'll",
1232
+ "who's",
1233
+ "whod",
1234
+ "whoever",
1235
+ "whole",
1236
+ "wholl",
1237
+ "whom",
1238
+ "whomever",
1239
+ "whos",
1240
+ "whose",
1241
+ "why",
1242
+ "why'd",
1243
+ "why'll",
1244
+ "why's",
1245
+ "widely",
1246
+ "width",
1247
+ "will",
1248
+ "willing",
1249
+ "wish",
1250
+ "with",
1251
+ "within",
1252
+ "without",
1253
+ "won",
1254
+ "won't",
1255
+ "wonder",
1256
+ "wont",
1257
+ "words",
1258
+ "work",
1259
+ "worked",
1260
+ "working",
1261
+ "works",
1262
+ "world",
1263
+ "would",
1264
+ "would've",
1265
+ "wouldn",
1266
+ "wouldn't",
1267
+ "wouldnt",
1268
+ "ws",
1269
+ "www",
1270
+ "x",
1271
+ "y",
1272
+ "ye",
1273
+ "year",
1274
+ "years",
1275
+ "yes",
1276
+ "yet",
1277
+ "you",
1278
+ "you'd",
1279
+ "you'll",
1280
+ "you're",
1281
+ "you've",
1282
+ "youd",
1283
+ "youll",
1284
+ "young",
1285
+ "younger",
1286
+ "youngest",
1287
+ "your",
1288
+ "youre",
1289
+ "yours",
1290
+ "yourself",
1291
+ "yourselves",
1292
+ "youve",
1293
+ "yt",
1294
+ "yu",
1295
+ "z",
1296
+ "za",
1297
+ "zero",
1298
+ "zm",
1299
+ "zr"
1300
+ ]