hazo_pdf 1.7.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/SETUP_CHECKLIST.md +693 -0
  2. package/config/hazo_pdf_config.ini.sample +42 -0
  3. package/db_setup_postgres.sql +17 -0
  4. package/db_setup_sqlite.sql +13 -0
  5. package/dist/{chunk-NQ6KUJWG.js → chunk-7M53O3HF.js} +14 -4
  6. package/dist/chunk-7M53O3HF.js.map +1 -0
  7. package/dist/{chunk-4JJOUQ62.js → chunk-KDOQ3FIO.js} +176 -87
  8. package/dist/chunk-KDOQ3FIO.js.map +1 -0
  9. package/dist/{chunk-KHB3VZJQ.js → chunk-LFFCPDWC.js} +14 -3
  10. package/dist/chunk-LFFCPDWC.js.map +1 -0
  11. package/dist/{chunk-264BTVJT.js → chunk-TZJ5S57X.js} +18 -31
  12. package/dist/chunk-TZJ5S57X.js.map +1 -0
  13. package/dist/index.d.ts +9 -5
  14. package/dist/index.js +35 -16
  15. package/dist/index.js.map +1 -1
  16. package/dist/{pdf_saver-7FA4DAXI.js → pdf_saver-T6SEDYEE.js} +3 -3
  17. package/dist/{pdf_viewer-B6S5PJJB.js → pdf_viewer-TFCSUGWU.js} +3 -3
  18. package/dist/server/index.d.ts +5 -1
  19. package/dist/server/index.js +219 -81
  20. package/dist/server/index.js.map +1 -1
  21. package/dist/server/{text_search-2OZOVUIP.js → text_search-PVDG5Y6I.js} +14 -3
  22. package/dist/server/text_search-PVDG5Y6I.js.map +1 -0
  23. package/dist/styles/full.css +5821 -7156
  24. package/dist/styles/full.css.map +1 -1
  25. package/dist/styles/index.css +4844 -3929
  26. package/dist/styles/index.css.map +1 -1
  27. package/dist/{text_search-I2KZ7DTW.js → text_search-SO4ZOMIZ.js} +2 -2
  28. package/package.json +51 -36
  29. package/dist/chunk-264BTVJT.js.map +0 -1
  30. package/dist/chunk-4JJOUQ62.js.map +0 -1
  31. package/dist/chunk-KHB3VZJQ.js.map +0 -1
  32. package/dist/chunk-NQ6KUJWG.js.map +0 -1
  33. package/dist/server/text_search-2OZOVUIP.js.map +0 -1
  34. /package/dist/{pdf_saver-7FA4DAXI.js.map → pdf_saver-T6SEDYEE.js.map} +0 -0
  35. /package/dist/{pdf_viewer-B6S5PJJB.js.map → pdf_viewer-TFCSUGWU.js.map} +0 -0
  36. /package/dist/{text_search-I2KZ7DTW.js.map → text_search-SO4ZOMIZ.js.map} +0 -0
@@ -1,40 +1,55 @@
1
+ // src/server/index.ts
2
+ import { HazoInternalError } from "hazo_core/errors";
3
+
4
+ // src/server/extract.ts
5
+ import {
6
+ generateRequestId,
7
+ getCorrelationId,
8
+ optional_import,
9
+ withContext
10
+ } from "hazo_core";
11
+ import {
12
+ HazoExternalError,
13
+ HazoNotFoundError,
14
+ HazoUnavailableError,
15
+ HazoValidationError
16
+ } from "hazo_core/errors";
17
+
1
18
  // src/utils/logger.ts
19
+ import { createLogger } from "hazo_core";
2
20
  var console_logger = {
3
- info: (message, data) => {
4
- if (data) {
5
- console.log(`[hazo_pdf] ${message}`, data);
6
- } else {
7
- console.log(`[hazo_pdf] ${message}`);
8
- }
9
- },
10
- debug: (message, data) => {
11
- if (data) {
12
- console.debug(`[hazo_pdf] ${message}`, data);
13
- } else {
14
- console.debug(`[hazo_pdf] ${message}`);
15
- }
16
- },
17
- warn: (message, data) => {
18
- if (data) {
19
- console.warn(`[hazo_pdf] ${message}`, data);
20
- } else {
21
- console.warn(`[hazo_pdf] ${message}`);
22
- }
23
- },
24
- error: (message, data) => {
25
- if (data) {
26
- console.error(`[hazo_pdf] ${message}`, data);
27
- } else {
28
- console.error(`[hazo_pdf] ${message}`);
29
- }
30
- }
21
+ info: (message, data) => data ? console.log(`[hazo_pdf] ${message}`, data) : console.log(`[hazo_pdf] ${message}`),
22
+ debug: (message, data) => data ? console.debug(`[hazo_pdf] ${message}`, data) : console.debug(`[hazo_pdf] ${message}`),
23
+ warn: (message, data) => data ? console.warn(`[hazo_pdf] ${message}`, data) : console.warn(`[hazo_pdf] ${message}`),
24
+ error: (message, data) => data ? console.error(`[hazo_pdf] ${message}`, data) : console.error(`[hazo_pdf] ${message}`)
31
25
  };
32
- var current_logger = console_logger;
26
+ function build_default_logger() {
27
+ try {
28
+ return createLogger("hazo_pdf");
29
+ } catch {
30
+ return console_logger;
31
+ }
32
+ }
33
+ var current_logger = null;
33
34
  function get_logger() {
35
+ if (!current_logger) {
36
+ current_logger = build_default_logger();
37
+ }
34
38
  return current_logger;
35
39
  }
36
40
 
37
41
  // src/server/extract.ts
42
+ async function require_module(pkg) {
43
+ const mod = await optional_import(pkg);
44
+ if (!mod) {
45
+ throw new HazoUnavailableError({
46
+ code: "HAZO_PDF_OPTIONAL_DEP_MISSING",
47
+ pkg: "hazo_pdf",
48
+ message: `Required optional peer "${pkg}" is not installed`
49
+ });
50
+ }
51
+ return mod;
52
+ }
38
53
  var is_initialized = false;
39
54
  var hazo_files_adapter = null;
40
55
  var llm_initialized = false;
@@ -48,12 +63,14 @@ async function ensure_initialized(sqlite_path, logger = get_logger()) {
48
63
  return;
49
64
  }
50
65
  try {
51
- const { initialize_llm_api, get_current_config } = await import("hazo_llm_api/server");
52
- const { SqliteAdapter } = await import("hazo_connect/server");
66
+ const llm_mod = await require_module("hazo_llm_api/server");
67
+ const connect_mod = await require_module("hazo_connect/server");
68
+ const { initialize_llm_api, get_current_config } = llm_mod;
69
+ const { SqliteAdapter } = connect_mod;
53
70
  if (!llm_initialized) {
54
71
  await initialize_llm_api({ logger });
55
72
  llm_initialized = true;
56
- logger.info("hazo_llm_api initialized");
73
+ logger.debug("extract.llm_api.initialized");
57
74
  }
58
75
  const config = get_current_config();
59
76
  const db_path = sqlite_path || config?.sqlite_path || "prompt_library.sqlite";
@@ -61,16 +78,17 @@ async function ensure_initialized(sqlite_path, logger = get_logger()) {
61
78
  type: "sqlite",
62
79
  database_path: db_path
63
80
  });
64
- const { HAZO_FILES_TABLE_SCHEMA } = await import("hazo_files");
81
+ const files_mod = await require_module("hazo_files");
82
+ const { HAZO_FILES_TABLE_SCHEMA } = files_mod;
65
83
  const adapter = hazo_files_adapter;
66
84
  await adapter.rawQuery(HAZO_FILES_TABLE_SCHEMA.sqlite.ddl);
67
85
  for (const idx of HAZO_FILES_TABLE_SCHEMA.sqlite.indexes) {
68
86
  await adapter.rawQuery(idx);
69
87
  }
70
- logger.debug("hazo_files table initialized", { sqlite_path: db_path });
88
+ logger.debug("extract.hazo_files.table_initialized", { sqlite_path: db_path });
71
89
  is_initialized = true;
72
90
  } catch (error) {
73
- logger.error("Failed to initialize extraction", {
91
+ logger.error("extract.initialize.failed", {
74
92
  error: error instanceof Error ? error.message : String(error)
75
93
  });
76
94
  throw error;
@@ -78,21 +96,30 @@ async function ensure_initialized(sqlite_path, logger = get_logger()) {
78
96
  }
79
97
  async function load_document_as_base64(file_path, logger = get_logger()) {
80
98
  if (file_path.startsWith("http://") || file_path.startsWith("https://")) {
81
- logger.debug("Loading document from URL", { url: file_path });
99
+ logger.debug("extract.document.load_from_url", { url: file_path });
82
100
  const response = await fetch(file_path);
83
101
  if (!response.ok) {
84
- throw new Error(`Failed to fetch document: ${response.status} ${response.statusText}`);
102
+ throw new HazoExternalError({
103
+ code: "HAZO_PDF_EXTERNAL_FETCH_FAILED",
104
+ pkg: "hazo_pdf",
105
+ message: `Failed to fetch document: ${response.status} ${response.statusText}`,
106
+ httpStatus: 502
107
+ });
85
108
  }
86
109
  const buffer2 = await response.arrayBuffer();
87
110
  const base642 = Buffer.from(buffer2).toString("base64");
88
111
  const content_type = response.headers.get("content-type") || "application/pdf";
89
112
  return { base64: base642, mime_type: content_type };
90
113
  }
91
- logger.debug("Loading document from filesystem", { path: file_path });
114
+ logger.debug("extract.document.load_from_fs", { path: file_path });
92
115
  const fs = await import("fs");
93
116
  const path = await import("path");
94
117
  if (!fs.existsSync(file_path)) {
95
- throw new Error(`File not found: ${file_path}`);
118
+ throw new HazoNotFoundError({
119
+ code: "HAZO_PDF_FILE_NOT_FOUND",
120
+ pkg: "hazo_pdf",
121
+ message: `File not found: ${file_path}`
122
+ });
96
123
  }
97
124
  const buffer = fs.readFileSync(file_path);
98
125
  const base64 = buffer.toString("base64");
@@ -109,10 +136,16 @@ async function load_document_as_base64(file_path, logger = get_logger()) {
109
136
  return { base64, mime_type };
110
137
  }
111
138
  async function load_document_by_file_id(file_id, _storage_type, file_manager, logger = get_logger()) {
112
- const { HAZO_FILES_TABLE_SCHEMA } = await import("hazo_files");
113
- const { createCrudService } = await import("hazo_connect/server");
139
+ const files_mod = await require_module("hazo_files");
140
+ const connect_mod = await require_module("hazo_connect/server");
141
+ const { HAZO_FILES_TABLE_SCHEMA } = files_mod;
142
+ const { createCrudService } = connect_mod;
114
143
  if (!hazo_files_adapter) {
115
- throw new Error("hazo_files adapter not initialized");
144
+ throw new HazoUnavailableError({
145
+ code: "HAZO_PDF_ADAPTER_UNINITIALIZED",
146
+ pkg: "hazo_pdf",
147
+ message: "hazo_files adapter not initialized \u2014 call ensure_initialized() first"
148
+ });
116
149
  }
117
150
  const crudService = createCrudService(
118
151
  hazo_files_adapter,
@@ -122,23 +155,40 @@ async function load_document_by_file_id(file_id, _storage_type, file_manager, lo
122
155
  where: { id: file_id }
123
156
  });
124
157
  if (!files || files.length === 0) {
125
- throw new Error(`File record not found: ${file_id}`);
158
+ throw new HazoNotFoundError({
159
+ code: "HAZO_PDF_FILE_RECORD_NOT_FOUND",
160
+ pkg: "hazo_pdf",
161
+ message: `File record not found: ${file_id}`
162
+ });
126
163
  }
127
164
  const file_record = files[0];
128
165
  const file_path = file_record.file_path;
129
166
  const mime_type = file_record.file_type || "application/pdf";
130
- logger.debug("Found file record", { file_id, file_path, storage_type: file_record.storage_type });
167
+ logger.debug("extract.file_record.found", { file_id, file_path, storage_type: file_record.storage_type });
131
168
  if (file_record.storage_type === "google_drive") {
132
169
  if (!file_manager) {
133
- throw new Error("file_manager is required for Google Drive files");
170
+ throw new HazoValidationError({
171
+ code: "HAZO_PDF_FILE_MANAGER_REQUIRED",
172
+ pkg: "hazo_pdf",
173
+ message: "file_manager is required for Google Drive files"
174
+ });
134
175
  }
135
176
  if (!file_manager.isInitialized()) {
136
- throw new Error("file_manager is not initialized");
177
+ throw new HazoUnavailableError({
178
+ code: "HAZO_PDF_FILE_MANAGER_UNINITIALIZED",
179
+ pkg: "hazo_pdf",
180
+ message: "file_manager is not initialized"
181
+ });
137
182
  }
138
- logger.debug("Downloading from Google Drive", { file_path });
183
+ logger.debug("extract.google_drive.download", { file_path });
139
184
  const result = await file_manager.downloadFile(file_path);
140
185
  if (!result.success || !result.data) {
141
- throw new Error(result.error || "Failed to download file from Google Drive");
186
+ throw new HazoExternalError({
187
+ code: "HAZO_PDF_EXTERNAL_DOWNLOAD_FAILED",
188
+ pkg: "hazo_pdf",
189
+ message: result.error || "Failed to download file from Google Drive",
190
+ httpStatus: 502
191
+ });
142
192
  }
143
193
  const buffer = Buffer.from(result.data);
144
194
  const base64 = buffer.toString("base64");
@@ -150,6 +200,10 @@ async function load_document_by_file_id(file_id, _storage_type, file_manager, lo
150
200
  };
151
201
  }
152
202
  async function extract_document_data(source, options) {
203
+ const correlationId = getCorrelationId() ?? generateRequestId();
204
+ return withContext({ correlationId }, () => _extract_document_data(source, options));
205
+ }
206
+ async function _extract_document_data(source, options) {
153
207
  const logger = options.logger || get_logger();
154
208
  const storage_type = options.storage_type || "local";
155
209
  const save_to_hazo_files = options.save_to_hazo_files !== false;
@@ -192,12 +246,13 @@ async function extract_document_data(source, options) {
192
246
  size_kb: doc_size_kb,
193
247
  prompt: `${options.prompt_area}/${options.prompt_key}`
194
248
  });
249
+ const llm_mod = await require_module("hazo_llm_api/server");
195
250
  const {
196
251
  hazo_llm_dynamic_data_extract,
197
252
  get_database,
198
253
  get_prompt_by_area_and_key,
199
254
  default_logger
200
- } = await import("hazo_llm_api/server");
255
+ } = llm_mod;
201
256
  const db = get_database();
202
257
  if (!db) {
203
258
  return {
@@ -249,8 +304,10 @@ async function extract_document_data(source, options) {
249
304
  const storage_file_path = options.original_file_path || resolved_file_path;
250
305
  if (save_to_hazo_files && hazo_files_adapter) {
251
306
  try {
252
- const { createFileMetadataService, HAZO_FILES_TABLE_SCHEMA } = await import("hazo_files");
253
- const { createCrudService } = await import("hazo_connect/server");
307
+ const files_mod = await require_module("hazo_files");
308
+ const connect_mod = await require_module("hazo_connect/server");
309
+ const { createFileMetadataService, HAZO_FILES_TABLE_SCHEMA } = files_mod;
310
+ const { createCrudService } = connect_mod;
254
311
  const crudService = createCrudService(
255
312
  hazo_files_adapter,
256
313
  HAZO_FILES_TABLE_SCHEMA.tableName
@@ -326,7 +383,34 @@ async function extract_document_data(source, options) {
326
383
  }
327
384
 
328
385
  // src/server/snippet.ts
386
+ import {
387
+ generateRequestId as generateRequestId2,
388
+ getCorrelationId as getCorrelationId2,
389
+ optional_import as optional_import2,
390
+ withContext as withContext2
391
+ } from "hazo_core";
392
+ import {
393
+ HazoExternalError as HazoExternalError2,
394
+ HazoNotFoundError as HazoNotFoundError2,
395
+ HazoUnavailableError as HazoUnavailableError2,
396
+ HazoValidationError as HazoValidationError2
397
+ } from "hazo_core/errors";
398
+ async function require_module2(pkg) {
399
+ const mod = await optional_import2(pkg);
400
+ if (!mod) {
401
+ throw new HazoUnavailableError2({
402
+ code: "HAZO_PDF_OPTIONAL_DEP_MISSING",
403
+ pkg: "hazo_pdf",
404
+ message: `Required optional peer "${pkg}" is not installed`
405
+ });
406
+ }
407
+ return mod;
408
+ }
329
409
  async function extract_text_snippet(source, options) {
410
+ const correlationId = getCorrelationId2() ?? generateRequestId2();
411
+ return withContext2({ correlationId }, () => _extract_text_snippet(source, options));
412
+ }
413
+ async function _extract_text_snippet(source, options) {
330
414
  const {
331
415
  search_text,
332
416
  page_index = 0,
@@ -356,7 +440,7 @@ async function extract_text_snippet(source, options) {
356
440
  standardFontDataUrl: standard_font_data_url,
357
441
  verbosity: 0
358
442
  }).promise;
359
- const { find_all_text_in_pdf } = await import("./text_search-2OZOVUIP.js");
443
+ const { find_all_text_in_pdf } = await import("./text_search-PVDG5Y6I.js");
360
444
  const total_pages = pdf.numPages;
361
445
  const snippets = [];
362
446
  if (match_mode === "first") {
@@ -564,20 +648,33 @@ async function load_pdf_bytes(source) {
564
648
  return source.pdf_bytes;
565
649
  }
566
650
  if (!source.file_path) {
567
- throw new Error("Either file_path or pdf_bytes is required");
651
+ throw new HazoValidationError2({
652
+ code: "HAZO_PDF_VALIDATION_ERROR",
653
+ pkg: "hazo_pdf",
654
+ message: "Either file_path or pdf_bytes is required"
655
+ });
568
656
  }
569
657
  const file_path = source.file_path;
570
658
  if (file_path.startsWith("http://") || file_path.startsWith("https://")) {
571
659
  const response = await fetch(file_path);
572
660
  if (!response.ok) {
573
- throw new Error(`Failed to fetch PDF: ${response.status} ${response.statusText}`);
661
+ throw new HazoExternalError2({
662
+ code: "HAZO_PDF_EXTERNAL_FETCH_FAILED",
663
+ pkg: "hazo_pdf",
664
+ message: `Failed to fetch PDF: ${response.status} ${response.statusText}`,
665
+ httpStatus: 502
666
+ });
574
667
  }
575
668
  const buffer2 = await response.arrayBuffer();
576
669
  return new Uint8Array(buffer2);
577
670
  }
578
671
  const fs = await import("fs");
579
672
  if (!fs.existsSync(file_path)) {
580
- throw new Error(`File not found: ${file_path}`);
673
+ throw new HazoNotFoundError2({
674
+ code: "HAZO_PDF_FILE_NOT_FOUND",
675
+ pkg: "hazo_pdf",
676
+ message: `File not found: ${file_path}`
677
+ });
581
678
  }
582
679
  const buffer = fs.readFileSync(file_path);
583
680
  return new Uint8Array(buffer);
@@ -595,7 +692,8 @@ async function find_text_with_llm(page, viewport, search_text, _render_scale) {
595
692
  }).promise;
596
693
  const image_buffer = canvas.toBuffer("image/png");
597
694
  const image_base64 = image_buffer.toString("base64");
598
- const { hazo_llm_image_text } = await import("hazo_llm_api/server");
695
+ const llm_mod = await require_module2("hazo_llm_api/server");
696
+ const { hazo_llm_image_text } = llm_mod;
599
697
  const prompt = `Find the text "${search_text}" in this document image.
600
698
  If found, return ONLY a JSON object with the approximate bounding box as percentage coordinates (0-100):
601
699
  {"found": true, "x_pct": <left edge %>, "y_pct": <top edge %>, "width_pct": <width %>, "height_pct": <height %>}
@@ -642,6 +740,15 @@ Return ONLY the JSON object, nothing else.`;
642
740
  }
643
741
 
644
742
  // src/server/split.ts
743
+ import {
744
+ generateRequestId as generateRequestId3,
745
+ getCorrelationId as getCorrelationId3,
746
+ withContext as withContext3
747
+ } from "hazo_core";
748
+ import {
749
+ HazoExternalError as HazoExternalError3,
750
+ HazoValidationError as HazoValidationError3
751
+ } from "hazo_core/errors";
645
752
  function sanitize_label(label) {
646
753
  return label.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_+|_+$/g, "").slice(0, 60);
647
754
  }
@@ -653,7 +760,12 @@ function generate_filename(label, pages) {
653
760
  }
654
761
  function to_uint8array(data) {
655
762
  if (!data) {
656
- throw new Error("No data received from downloadFile");
763
+ throw new HazoExternalError3({
764
+ code: "HAZO_PDF_EXTERNAL_DOWNLOAD_EMPTY",
765
+ pkg: "hazo_pdf",
766
+ message: "No data received from downloadFile",
767
+ httpStatus: 502
768
+ });
657
769
  }
658
770
  if (data instanceof Uint8Array) {
659
771
  return data;
@@ -664,52 +776,73 @@ function to_uint8array(data) {
664
776
  if (typeof Buffer !== "undefined" && Buffer.isBuffer(data)) {
665
777
  return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
666
778
  }
667
- throw new Error("Unsupported data type from downloadFile");
779
+ throw new HazoValidationError3({
780
+ code: "HAZO_PDF_UNSUPPORTED_TYPE",
781
+ pkg: "hazo_pdf",
782
+ message: "Unsupported data type from downloadFile"
783
+ });
668
784
  }
669
785
  async function split_pdf(request, file_manager) {
786
+ const correlationId = getCorrelationId3() ?? generateRequestId3();
787
+ return withContext3({ correlationId }, () => _split_pdf(request, file_manager));
788
+ }
789
+ async function _split_pdf(request, file_manager) {
670
790
  const logger = get_logger();
671
- logger.info("Starting PDF split", {
791
+ logger.info("pdf.split.start", {
672
792
  source_file_id: request.source_file_id,
673
793
  split_count: request.splits.length,
674
794
  output_folder: request.output_folder
675
795
  });
676
796
  const { PDFDocument } = await import("pdf-lib");
677
- logger.debug("Downloading source PDF", { file_id: request.source_file_id });
797
+ logger.debug("split.source.download", { file_id: request.source_file_id });
678
798
  const download_result = await file_manager.downloadFile(request.source_file_id);
679
799
  if (!download_result.success || !download_result.data) {
680
- throw new Error(
681
- download_result.error || `Failed to download source file: ${request.source_file_id}`
682
- );
800
+ throw new HazoExternalError3({
801
+ code: "HAZO_PDF_EXTERNAL_DOWNLOAD_FAILED",
802
+ pkg: "hazo_pdf",
803
+ message: download_result.error || `Failed to download source file: ${request.source_file_id}`,
804
+ httpStatus: 502
805
+ });
683
806
  }
684
807
  const source_bytes = to_uint8array(download_result.data);
685
- logger.debug("Loading source PDF", { byte_size: source_bytes.byteLength });
808
+ logger.debug("split.source.load", { byte_size: source_bytes.byteLength });
686
809
  const source_pdf = await PDFDocument.load(source_bytes);
687
810
  const total_pages = source_pdf.getPageCount();
688
- logger.info("Source PDF loaded", { total_pages });
811
+ logger.debug("split.source.loaded", { total_pages });
689
812
  const page_counts = new Array(total_pages + 1).fill(0);
690
813
  for (const split of request.splits) {
691
814
  for (const page of split.pages) {
692
815
  if (page < 1 || page > total_pages) {
693
- throw new Error(
694
- `Split "${split.label}" references page ${page} which is out of range (1-${total_pages})`
695
- );
816
+ throw new HazoValidationError3({
817
+ code: "HAZO_PDF_SPLIT_PAGE_OUT_OF_RANGE",
818
+ pkg: "hazo_pdf",
819
+ message: `Split "${split.label}" references page ${page} which is out of range (1-${total_pages})`
820
+ });
696
821
  }
697
822
  page_counts[page]++;
698
823
  }
699
824
  }
700
825
  for (let p = 1; p <= total_pages; p++) {
701
826
  if (page_counts[p] === 0) {
702
- throw new Error(`Page ${p} is not included in any split`);
827
+ throw new HazoValidationError3({
828
+ code: "HAZO_PDF_SPLIT_PAGE_MISSING",
829
+ pkg: "hazo_pdf",
830
+ message: `Page ${p} is not included in any split`
831
+ });
703
832
  }
704
833
  if (page_counts[p] > 1) {
705
- throw new Error(`Page ${p} appears in multiple splits`);
834
+ throw new HazoValidationError3({
835
+ code: "HAZO_PDF_SPLIT_PAGE_DUPLICATED",
836
+ pkg: "hazo_pdf",
837
+ message: `Page ${p} appears in multiple splits`
838
+ });
706
839
  }
707
840
  }
708
- logger.debug("Split validation passed", { total_pages, split_count: request.splits.length });
841
+ logger.debug("split.validation.passed", { total_pages, split_count: request.splits.length });
709
842
  const outputs = [];
710
843
  const output_folder = request.output_folder || "";
711
844
  for (const instruction of request.splits) {
712
- logger.debug("Processing split", {
845
+ logger.debug("split.instruction.start", {
713
846
  split_id: instruction.split_id,
714
847
  label: instruction.label,
715
848
  pages: instruction.pages
@@ -724,14 +857,17 @@ async function split_pdf(request, file_manager) {
724
857
  const byte_size = pdf_bytes.byteLength;
725
858
  const file_name = instruction.output_filename || generate_filename(instruction.label, instruction.pages);
726
859
  const file_path = output_folder ? `${output_folder.replace(/\/$/, "")}/${file_name}` : file_name;
727
- logger.debug("Uploading split PDF", { file_path, byte_size });
860
+ logger.debug("split.upload", { file_path, byte_size });
728
861
  const upload_result = await file_manager.uploadFile(new Uint8Array(pdf_bytes), file_path, {
729
862
  overwrite: true
730
863
  });
731
864
  if (!upload_result.success) {
732
- throw new Error(
733
- upload_result.error || `Failed to upload split "${instruction.label}" to ${file_path}`
734
- );
865
+ throw new HazoExternalError3({
866
+ code: "HAZO_PDF_EXTERNAL_UPLOAD_FAILED",
867
+ pkg: "hazo_pdf",
868
+ message: upload_result.error || `Failed to upload split "${instruction.label}" to ${file_path}`,
869
+ httpStatus: 502
870
+ });
735
871
  }
736
872
  outputs.push({
737
873
  split_id: instruction.split_id,
@@ -743,7 +879,7 @@ async function split_pdf(request, file_manager) {
743
879
  page_count: instruction.pages.length,
744
880
  byte_size
745
881
  });
746
- logger.info("Split uploaded", {
882
+ logger.debug("split.upload.done", {
747
883
  split_id: instruction.split_id,
748
884
  label: instruction.label,
749
885
  file_path,
@@ -751,7 +887,7 @@ async function split_pdf(request, file_manager) {
751
887
  byte_size
752
888
  });
753
889
  }
754
- logger.info("PDF split complete", {
890
+ logger.info("pdf.split.complete", {
755
891
  source_file_id: request.source_file_id,
756
892
  output_count: outputs.length
757
893
  });
@@ -763,9 +899,11 @@ async function split_pdf(request, file_manager) {
763
899
 
764
900
  // src/server/index.ts
765
901
  if (typeof window !== "undefined") {
766
- throw new Error(
767
- "hazo_pdf/server cannot be imported in the browser. This module is server-only and requires Node.js runtime."
768
- );
902
+ throw new HazoInternalError({
903
+ code: "HAZO_PDF_SERVER_ONLY",
904
+ pkg: "hazo_pdf",
905
+ message: "hazo_pdf/server cannot be imported in the browser. This module is server-only and requires Node.js runtime."
906
+ });
769
907
  }
770
908
  export {
771
909
  extract_document_data,