@kreuzberg/wasm 4.0.0-rc.21 → 4.0.0-rc.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,3 +1,29 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropNames = Object.getOwnPropertyNames;
3
+ var __esm = (fn, res) => function __init() {
4
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
5
+ };
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+
11
+ // typescript/pdfium.js
12
+ var pdfium_exports = {};
13
+ __export(pdfium_exports, {
14
+ default: () => initPdfium
15
+ });
16
+ async function initPdfium() {
17
+ return {
18
+ // Dummy implementation for testing
19
+ };
20
+ }
21
+ var init_pdfium = __esm({
22
+ "typescript/pdfium.js"() {
23
+ "use strict";
24
+ }
25
+ });
26
+
1
27
  // typescript/adapters/wasm-adapter.ts
2
28
  var MAX_FILE_SIZE = 512 * 1024 * 1024;
3
29
  function isNumberOrNull(value) {
@@ -312,93 +338,48 @@ var TesseractWasmBackend = class {
312
338
  return this.supportedLangsCache;
313
339
  }
314
340
  this.supportedLangsCache = [
315
- // Major languages
316
341
  "eng",
317
- // English
318
342
  "deu",
319
- // German
320
343
  "fra",
321
- // French
322
344
  "spa",
323
- // Spanish
324
345
  "ita",
325
- // Italian
326
346
  "por",
327
- // Portuguese
328
347
  "nld",
329
- // Dutch
330
348
  "rus",
331
- // Russian
332
349
  "jpn",
333
- // Japanese
334
350
  "kor",
335
- // Korean
336
351
  "chi_sim",
337
- // Chinese (Simplified)
338
352
  "chi_tra",
339
- // Chinese (Traditional)
340
- // Additional European languages
341
353
  "pol",
342
- // Polish
343
354
  "tur",
344
- // Turkish
345
355
  "swe",
346
- // Swedish
347
356
  "dan",
348
- // Danish
349
357
  "fin",
350
- // Finnish
351
358
  "nor",
352
- // Norwegian
353
359
  "ces",
354
- // Czech
355
360
  "slk",
356
- // Slovak
357
361
  "ron",
358
- // Romanian
359
362
  "hun",
360
- // Hungarian
361
363
  "hrv",
362
- // Croatian
363
364
  "srp",
364
- // Serbian
365
365
  "bul",
366
- // Bulgarian
367
366
  "ukr",
368
- // Ukrainian
369
367
  "ell",
370
- // Greek
371
- // Asian languages
372
368
  "ara",
373
- // Arabic
374
369
  "heb",
375
- // Hebrew
376
370
  "hin",
377
- // Hindi
378
371
  "tha",
379
- // Thai
380
372
  "vie",
381
- // Vietnamese
382
373
  "mkd",
383
- // Macedonian
384
374
  "ben",
385
- // Bengali
386
375
  "tam",
387
- // Tamil
388
376
  "tel",
389
- // Telugu
390
377
  "kan",
391
- // Kannada
392
378
  "mal",
393
- // Malayalam
394
379
  "mya",
395
- // Burmese
396
380
  "khm",
397
- // Khmer
398
381
  "lao",
399
- // Lao
400
382
  "sin"
401
- // Sinhala
402
383
  ];
403
384
  return this.supportedLangsCache;
404
385
  }
@@ -500,7 +481,6 @@ var TesseractWasmBackend = class {
500
481
  ...pageMetadata
501
482
  },
502
483
  tables: []
503
- // Tesseract-wasm doesn't provide structured table detection
504
484
  };
505
485
  } catch (error) {
506
486
  const message = error instanceof Error ? error.message : String(error);
@@ -786,7 +766,6 @@ function getRuntimeVersion() {
786
766
  switch (runtime) {
787
767
  case "node":
788
768
  return process.version?.substring(1);
789
- // Remove 'v' prefix
790
769
  case "deno": {
791
770
  const deno = globalThis.Deno;
792
771
  const version = deno?.version;
@@ -834,6 +813,168 @@ function getRuntimeInfo() {
834
813
  };
835
814
  }
836
815
 
816
+ // typescript/plugin-registry.ts
817
+ var postProcessors = /* @__PURE__ */ new Map();
818
+ var validators = /* @__PURE__ */ new Map();
819
+ function validatePostProcessor(processor) {
820
+ if (processor === null || processor === void 0) {
821
+ throw new Error("Post-processor cannot be null or undefined");
822
+ }
823
+ const obj = processor;
824
+ if (typeof obj.name !== "function") {
825
+ throw new Error("Post-processor must implement name() method");
826
+ }
827
+ if (typeof obj.process !== "function") {
828
+ throw new Error("Post-processor must implement process() method");
829
+ }
830
+ const name = obj.name();
831
+ if (typeof name !== "string" || name.trim() === "") {
832
+ throw new Error("Post-processor name must be a non-empty string");
833
+ }
834
+ return true;
835
+ }
836
+ function registerPostProcessor(processor) {
837
+ validatePostProcessor(processor);
838
+ const name = processor.name();
839
+ if (postProcessors.has(name)) {
840
+ console.warn(`Post-processor "${name}" already registered, overwriting with new implementation`);
841
+ }
842
+ postProcessors.set(name, processor);
843
+ }
844
+ function getPostProcessor(name) {
845
+ return postProcessors.get(name);
846
+ }
847
+ function listPostProcessors() {
848
+ return Array.from(postProcessors.keys());
849
+ }
850
+ async function unregisterPostProcessor(name) {
851
+ const processor = postProcessors.get(name);
852
+ if (!processor) {
853
+ const available = Array.from(postProcessors.keys());
854
+ const availableStr = available.length > 0 ? ` Available: ${available.join(", ")}` : "";
855
+ throw new Error(`Post-processor "${name}" is not registered.${availableStr}`);
856
+ }
857
+ try {
858
+ if (processor.shutdown) {
859
+ await processor.shutdown();
860
+ }
861
+ } catch (error) {
862
+ console.warn(`Error during shutdown of post-processor "${name}":`, error);
863
+ }
864
+ postProcessors.delete(name);
865
+ }
866
+ async function clearPostProcessors() {
867
+ const entries = Array.from(postProcessors.entries());
868
+ for (const [_name, processor] of entries) {
869
+ try {
870
+ if (processor.shutdown) {
871
+ await processor.shutdown();
872
+ }
873
+ } catch (error) {
874
+ console.warn(`Error during shutdown of post-processor "${_name}":`, error);
875
+ }
876
+ }
877
+ postProcessors.clear();
878
+ }
879
+ function validateValidator(validator) {
880
+ if (validator === null || validator === void 0) {
881
+ throw new Error("Validator cannot be null or undefined");
882
+ }
883
+ const obj = validator;
884
+ if (typeof obj.name !== "function") {
885
+ throw new Error("Validator must implement name() method");
886
+ }
887
+ if (typeof obj.validate !== "function") {
888
+ throw new Error("Validator must implement validate() method");
889
+ }
890
+ const name = obj.name();
891
+ if (typeof name !== "string" || name.trim() === "") {
892
+ throw new Error("Validator name must be a non-empty string");
893
+ }
894
+ return true;
895
+ }
896
+ function registerValidator(validator) {
897
+ validateValidator(validator);
898
+ const name = validator.name();
899
+ if (validators.has(name)) {
900
+ console.warn(`Validator "${name}" already registered, overwriting with new implementation`);
901
+ }
902
+ validators.set(name, validator);
903
+ }
904
+ function getValidator(name) {
905
+ return validators.get(name);
906
+ }
907
+ function listValidators() {
908
+ return Array.from(validators.keys());
909
+ }
910
+ async function unregisterValidator(name) {
911
+ const validator = validators.get(name);
912
+ if (!validator) {
913
+ const available = Array.from(validators.keys());
914
+ const availableStr = available.length > 0 ? ` Available: ${available.join(", ")}` : "";
915
+ throw new Error(`Validator "${name}" is not registered.${availableStr}`);
916
+ }
917
+ try {
918
+ if (validator.shutdown) {
919
+ await validator.shutdown();
920
+ }
921
+ } catch (error) {
922
+ console.warn(`Error during shutdown of validator "${name}":`, error);
923
+ }
924
+ validators.delete(name);
925
+ }
926
+ async function clearValidators() {
927
+ const entries = Array.from(validators.entries());
928
+ for (const [_name, validator] of entries) {
929
+ try {
930
+ if (validator.shutdown) {
931
+ await validator.shutdown();
932
+ }
933
+ } catch (error) {
934
+ console.warn(`Error during shutdown of validator "${_name}":`, error);
935
+ }
936
+ }
937
+ validators.clear();
938
+ }
939
+ function executePostProcessor(name, result) {
940
+ const processor = postProcessors.get(name);
941
+ if (!processor) {
942
+ return Promise.reject(new Error(`Post-processor "${name}" is not registered`));
943
+ }
944
+ try {
945
+ const output = processor.process(result);
946
+ if (output instanceof Promise) {
947
+ return output;
948
+ }
949
+ return Promise.resolve(output);
950
+ } catch (error) {
951
+ return Promise.reject(new Error(`Error executing post-processor "${name}": ${String(error)}`));
952
+ }
953
+ }
954
+ function executeValidator(name, result) {
955
+ const validator = validators.get(name);
956
+ if (!validator) {
957
+ return Promise.reject(new Error(`Validator "${name}" is not registered`));
958
+ }
959
+ try {
960
+ const output = validator.validate(result);
961
+ if (output instanceof Promise) {
962
+ return output;
963
+ }
964
+ return Promise.resolve(output);
965
+ } catch (error) {
966
+ return Promise.reject(new Error(`Error executing validator "${name}": ${String(error)}`));
967
+ }
968
+ }
969
+ function setupGlobalCallbacks() {
970
+ if (typeof globalThis !== "undefined") {
971
+ const callbacksObj = globalThis;
972
+ callbacksObj.__kreuzberg_execute_post_processor = executePostProcessor;
973
+ callbacksObj.__kreuzberg_execute_validator = executeValidator;
974
+ }
975
+ }
976
+ setupGlobalCallbacks();
977
+
837
978
  // typescript/index.ts
838
979
  var wasm = null;
839
980
  var initialized = false;
@@ -848,7 +989,7 @@ async function initializePdfiumAsync(wasmModule) {
848
989
  return;
849
990
  }
850
991
  try {
851
- const pdfiumModule = await import("./pdfium.js");
992
+ const pdfiumModule = await Promise.resolve().then(() => (init_pdfium(), pdfium_exports));
852
993
  const pdfium = typeof pdfiumModule.default === "function" ? await pdfiumModule.default() : pdfiumModule;
853
994
  const success = wasmModule.initialize_pdfium_render(pdfium, wasmModule, false);
854
995
  if (!success) {
@@ -1172,6 +1313,8 @@ export {
1172
1313
  batchExtractBytesSync,
1173
1314
  batchExtractFiles,
1174
1315
  clearOcrBackends,
1316
+ clearPostProcessors,
1317
+ clearValidators,
1175
1318
  configToJS,
1176
1319
  detectRuntime,
1177
1320
  enableOcr,
@@ -1182,8 +1325,10 @@ export {
1182
1325
  fileToUint8Array,
1183
1326
  getInitializationError,
1184
1327
  getOcrBackend,
1328
+ getPostProcessor,
1185
1329
  getRuntimeInfo,
1186
1330
  getRuntimeVersion,
1331
+ getValidator,
1187
1332
  getVersion,
1188
1333
  getWasmCapabilities,
1189
1334
  hasBigInt,
@@ -1205,8 +1350,14 @@ export {
1205
1350
  isWebEnvironment,
1206
1351
  jsToExtractionResult,
1207
1352
  listOcrBackends,
1353
+ listPostProcessors,
1354
+ listValidators,
1208
1355
  registerOcrBackend,
1356
+ registerPostProcessor,
1357
+ registerValidator,
1209
1358
  unregisterOcrBackend,
1359
+ unregisterPostProcessor,
1360
+ unregisterValidator,
1210
1361
  wrapWasmError
1211
1362
  };
1212
1363
  //# sourceMappingURL=index.js.map