hazo_pdf 1.7.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SETUP_CHECKLIST.md +693 -0
- package/config/hazo_pdf_config.ini.sample +42 -0
- package/db_setup_postgres.sql +17 -0
- package/db_setup_sqlite.sql +13 -0
- package/dist/{chunk-NQ6KUJWG.js → chunk-7M53O3HF.js} +14 -4
- package/dist/chunk-7M53O3HF.js.map +1 -0
- package/dist/{chunk-4JJOUQ62.js → chunk-KDOQ3FIO.js} +176 -87
- package/dist/chunk-KDOQ3FIO.js.map +1 -0
- package/dist/{chunk-KHB3VZJQ.js → chunk-LFFCPDWC.js} +14 -3
- package/dist/chunk-LFFCPDWC.js.map +1 -0
- package/dist/{chunk-264BTVJT.js → chunk-TZJ5S57X.js} +18 -31
- package/dist/chunk-TZJ5S57X.js.map +1 -0
- package/dist/index.d.ts +9 -5
- package/dist/index.js +35 -16
- package/dist/index.js.map +1 -1
- package/dist/{pdf_saver-7FA4DAXI.js → pdf_saver-T6SEDYEE.js} +3 -3
- package/dist/{pdf_viewer-B6S5PJJB.js → pdf_viewer-TFCSUGWU.js} +3 -3
- package/dist/server/index.d.ts +5 -1
- package/dist/server/index.js +219 -81
- package/dist/server/index.js.map +1 -1
- package/dist/server/{text_search-2OZOVUIP.js → text_search-PVDG5Y6I.js} +14 -3
- package/dist/server/text_search-PVDG5Y6I.js.map +1 -0
- package/dist/styles/full.css +5821 -7156
- package/dist/styles/full.css.map +1 -1
- package/dist/styles/index.css +4844 -3929
- package/dist/styles/index.css.map +1 -1
- package/dist/{text_search-I2KZ7DTW.js → text_search-SO4ZOMIZ.js} +2 -2
- package/package.json +51 -36
- package/dist/chunk-264BTVJT.js.map +0 -1
- package/dist/chunk-4JJOUQ62.js.map +0 -1
- package/dist/chunk-KHB3VZJQ.js.map +0 -1
- package/dist/chunk-NQ6KUJWG.js.map +0 -1
- package/dist/server/text_search-2OZOVUIP.js.map +0 -1
- /package/dist/{pdf_saver-7FA4DAXI.js.map → pdf_saver-T6SEDYEE.js.map} +0 -0
- /package/dist/{pdf_viewer-B6S5PJJB.js.map → pdf_viewer-TFCSUGWU.js.map} +0 -0
- /package/dist/{text_search-I2KZ7DTW.js.map → text_search-SO4ZOMIZ.js.map} +0 -0
|
@@ -0,0 +1,693 @@
|
|
|
1
|
+
# Hazo PDF Setup Checklist
|
|
2
|
+
|
|
3
|
+
This guide walks you through setting up and using the Hazo PDF library in your React application.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
Before you begin, ensure you have:
|
|
8
|
+
|
|
9
|
+
- [ ] **Node.js 18+** installed on your system
|
|
10
|
+
- [ ] **npm** or **yarn** package manager
|
|
11
|
+
- [ ] A **React 18+** application (Next.js, Vite, Create React App, etc.)
|
|
12
|
+
- [ ] **TypeScript** support (recommended but optional)
|
|
13
|
+
|
|
14
|
+
## Installation Steps
|
|
15
|
+
|
|
16
|
+
### 1. Install the Package
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install hazo_pdf
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Or with yarn:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
yarn add hazo_pdf
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
#### Optional: Install hazo_logs for Logging
|
|
29
|
+
|
|
30
|
+
For structured logging and debugging, optionally install hazo_logs:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
npm install hazo_logs
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Or with yarn:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
yarn add hazo_logs
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Note: hazo_logs is an optional peer dependency. If not installed, hazo_pdf will use console-based logging.
|
|
43
|
+
|
|
44
|
+
#### Optional: Install hazo_files for Remote Storage
|
|
45
|
+
|
|
46
|
+
For remote file storage integration (Google Drive, Dropbox, local filesystem), optionally install hazo_files:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
npm install hazo_files
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Or with yarn:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
yarn add hazo_files
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Note: hazo_files is an optional peer dependency. When installed, it enables loading and saving PDFs from remote storage providers.
|
|
59
|
+
|
|
60
|
+
### 2. Import Styles
|
|
61
|
+
|
|
62
|
+
Choose the appropriate CSS file based on your application setup:
|
|
63
|
+
|
|
64
|
+
**Option A: For apps with existing styles (Recommended)**
|
|
65
|
+
|
|
66
|
+
Import `styles.css` - this does NOT include Tailwind preflight/base resets:
|
|
67
|
+
|
|
68
|
+
```tsx
|
|
69
|
+
import 'hazo_pdf/styles.css';
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
**Option B: For standalone apps**
|
|
73
|
+
|
|
74
|
+
Import `styles-full.css` - includes Tailwind preflight/base styles:
|
|
75
|
+
|
|
76
|
+
```tsx
|
|
77
|
+
import 'hazo_pdf/styles-full.css';
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### 3. Import the Component
|
|
81
|
+
|
|
82
|
+
```tsx
|
|
83
|
+
import { PdfViewer } from 'hazo_pdf';
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Or for dialog/modal usage:
|
|
87
|
+
|
|
88
|
+
```tsx
|
|
89
|
+
import { PdfViewerDialog } from 'hazo_pdf';
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
For TypeScript projects, also import types:
|
|
93
|
+
|
|
94
|
+
```tsx
|
|
95
|
+
import type {
|
|
96
|
+
PdfViewerProps,
|
|
97
|
+
PdfViewerDialogProps,
|
|
98
|
+
PdfAnnotation,
|
|
99
|
+
PdfViewerRef,
|
|
100
|
+
HighlightFieldInfo
|
|
101
|
+
} from 'hazo_pdf';
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### 4. Create a Container with Explicit Dimensions
|
|
105
|
+
|
|
106
|
+
The PDF viewer requires its parent container to have explicit width and height:
|
|
107
|
+
|
|
108
|
+
```tsx
|
|
109
|
+
<div style={{ width: '100%', height: '600px' }}>
|
|
110
|
+
<PdfViewer url="/document.pdf" />
|
|
111
|
+
</div>
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### 5. Verify the Setup
|
|
115
|
+
|
|
116
|
+
Create a simple test component to verify everything works:
|
|
117
|
+
|
|
118
|
+
```tsx
|
|
119
|
+
import { PdfViewer } from 'hazo_pdf';
|
|
120
|
+
import 'hazo_pdf/styles.css';
|
|
121
|
+
|
|
122
|
+
export default function TestViewer() {
|
|
123
|
+
return (
|
|
124
|
+
<div style={{ width: '100%', height: '800px' }}>
|
|
125
|
+
<PdfViewer
|
|
126
|
+
url="/path/to/test.pdf"
|
|
127
|
+
on_load={(pdf) => console.log('PDF loaded:', pdf.numPages, 'pages')}
|
|
128
|
+
on_error={(error) => console.error('PDF error:', error)}
|
|
129
|
+
/>
|
|
130
|
+
</div>
|
|
131
|
+
);
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Configuration (Optional)
|
|
136
|
+
|
|
137
|
+
### 6. Create a Configuration File
|
|
138
|
+
|
|
139
|
+
For advanced styling customization, create a configuration INI file:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
mkdir -p config
|
|
143
|
+
touch config/hazo_pdf_config.ini
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Example configuration:
|
|
147
|
+
|
|
148
|
+
```ini
|
|
149
|
+
[viewer]
|
|
150
|
+
viewer_background_color = #f5f5f5
|
|
151
|
+
append_timestamp_to_text_edits = true
|
|
152
|
+
annotation_text_suffix_fixed_text = user_x
|
|
153
|
+
|
|
154
|
+
[freetext_annotation]
|
|
155
|
+
freetext_text_color = #0066cc
|
|
156
|
+
freetext_background_color = rgb(230, 243, 255)
|
|
157
|
+
freetext_background_opacity = 0.1
|
|
158
|
+
freetext_border_color = #003366
|
|
159
|
+
freetext_border_width = 1
|
|
160
|
+
|
|
161
|
+
[toolbar]
|
|
162
|
+
toolbar_background_color = rgb(240, 248, 255)
|
|
163
|
+
toolbar_font_color = rgb(30, 58, 138)
|
|
164
|
+
toolbar_button_background_color = rgb(219, 234, 254)
|
|
165
|
+
toolbar_button_save_background_color = rgb(34, 197, 94)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### 7. Use Configuration File
|
|
169
|
+
|
|
170
|
+
Pass the config file path to the component:
|
|
171
|
+
|
|
172
|
+
```tsx
|
|
173
|
+
<PdfViewer
|
|
174
|
+
url="/document.pdf"
|
|
175
|
+
config_file="config/hazo_pdf_config.ini"
|
|
176
|
+
/>
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## Using the Ref API (Optional)
|
|
180
|
+
|
|
181
|
+
If you need programmatic control over highlights, set up a ref:
|
|
182
|
+
|
|
183
|
+
### 8. Create a Ref
|
|
184
|
+
|
|
185
|
+
```tsx
|
|
186
|
+
import { useRef } from 'react';
|
|
187
|
+
import { PdfViewer, PdfViewerRef } from 'hazo_pdf';
|
|
188
|
+
|
|
189
|
+
function MyComponent() {
|
|
190
|
+
const viewer_ref = useRef<PdfViewerRef>(null);
|
|
191
|
+
|
|
192
|
+
return <PdfViewer ref={viewer_ref} url="/document.pdf" />;
|
|
193
|
+
}
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### 9. Use Ref Methods
|
|
197
|
+
|
|
198
|
+
The ref exposes three methods for highlight management:
|
|
199
|
+
|
|
200
|
+
```tsx
|
|
201
|
+
// Create a highlight
|
|
202
|
+
const highlight_id = viewer_ref.current?.highlight_region(
|
|
203
|
+
0, // page_index (zero-based)
|
|
204
|
+
[100, 500, 300, 550], // rect in PDF coordinates [x1, y1, x2, y2]
|
|
205
|
+
{
|
|
206
|
+
border_color: '#FF0000',
|
|
207
|
+
background_color: '#FFFF00',
|
|
208
|
+
background_opacity: 0.4
|
|
209
|
+
}
|
|
210
|
+
);
|
|
211
|
+
|
|
212
|
+
// Remove a specific highlight
|
|
213
|
+
const removed = viewer_ref.current?.remove_highlight(highlight_id);
|
|
214
|
+
|
|
215
|
+
// Clear all API-created highlights
|
|
216
|
+
viewer_ref.current?.clear_all_highlights();
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### 10. Understand PDF Coordinates
|
|
220
|
+
|
|
221
|
+
Important notes about the coordinate system:
|
|
222
|
+
|
|
223
|
+
- Coordinates are in PDF space (points), not screen pixels
|
|
224
|
+
- Origin is at the **bottom-left** corner (Y increases upward)
|
|
225
|
+
- Rectangle format: `[x1, y1, x2, y2]` where (x1, y1) is bottom-left, (x2, y2) is top-right
|
|
226
|
+
|
|
227
|
+
## Troubleshooting
|
|
228
|
+
|
|
229
|
+
### PDF Not Displaying
|
|
230
|
+
|
|
231
|
+
- [ ] Verify the parent container has explicit width and height
|
|
232
|
+
- [ ] Check the PDF URL is correct and accessible
|
|
233
|
+
- [ ] Check browser console for errors
|
|
234
|
+
- [ ] Ensure CSS file is imported
|
|
235
|
+
- [ ] Verify the PDF file is not corrupted
|
|
236
|
+
|
|
237
|
+
### Styles Not Applying
|
|
238
|
+
|
|
239
|
+
- [ ] Confirm you imported either `styles.css` or `styles-full.css`
|
|
240
|
+
- [ ] Check for CSS conflicts with existing styles
|
|
241
|
+
- [ ] Try using `styles-full.css` if using `styles.css` doesn't work
|
|
242
|
+
- [ ] Verify configuration file path is correct (if using config file)
|
|
243
|
+
|
|
244
|
+
### TypeScript Errors
|
|
245
|
+
|
|
246
|
+
- [ ] Ensure you're using TypeScript 4.5+
|
|
247
|
+
- [ ] Import types from `hazo_pdf`: `import type { PdfViewerRef } from 'hazo_pdf';`
|
|
248
|
+
- [ ] Check that React types are installed: `npm install --save-dev @types/react`
|
|
249
|
+
|
|
250
|
+
### Ref Not Working
|
|
251
|
+
|
|
252
|
+
- [ ] Verify you imported `PdfViewerRef` type: `import type { PdfViewerRef } from 'hazo_pdf';`
|
|
253
|
+
- [ ] Ensure you're using `useRef<PdfViewerRef>(null)`
|
|
254
|
+
- [ ] Check that the ref is attached: `<PdfViewer ref={viewer_ref} ... />`
|
|
255
|
+
- [ ] Verify you're accessing ref methods after component mounts: `viewer_ref.current?.highlight_region(...)`
|
|
256
|
+
|
|
257
|
+
### Highlights Not Appearing
|
|
258
|
+
|
|
259
|
+
- [ ] Verify coordinates are in PDF space, not screen space
|
|
260
|
+
- [ ] Check that page_index is zero-based (first page = 0)
|
|
261
|
+
- [ ] Ensure rectangle coordinates are valid (x1 < x2, y1 < y2)
|
|
262
|
+
- [ ] Verify the PDF has loaded before calling highlight methods
|
|
263
|
+
- [ ] Check colors are in valid hex format (e.g., "#FF0000")
|
|
264
|
+
|
|
265
|
+
## Post-Installation Configuration
|
|
266
|
+
|
|
267
|
+
### Enable Logging (Optional)
|
|
268
|
+
|
|
269
|
+
To enable structured logging with hazo_logs:
|
|
270
|
+
|
|
271
|
+
#### 1. Create Logger Configuration File
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
mkdir -p config
|
|
275
|
+
touch config/hazo_logs_config.ini
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
Example configuration:
|
|
279
|
+
|
|
280
|
+
```ini
|
|
281
|
+
[logging]
|
|
282
|
+
log_level = DEBUG
|
|
283
|
+
log_format = json
|
|
284
|
+
output_file = logs/hazo_pdf.log
|
|
285
|
+
|
|
286
|
+
[console]
|
|
287
|
+
enabled = true
|
|
288
|
+
log_level = INFO
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
#### 2. Initialize Logger in Your App
|
|
292
|
+
|
|
293
|
+
```tsx
|
|
294
|
+
import { PdfViewer } from 'hazo_pdf';
|
|
295
|
+
import { create_logger } from 'hazo_logs';
|
|
296
|
+
import 'hazo_pdf/styles.css';
|
|
297
|
+
|
|
298
|
+
// Create logger instance
|
|
299
|
+
const logger = create_logger('my_app', 'config/hazo_logs_config.ini');
|
|
300
|
+
|
|
301
|
+
function App() {
|
|
302
|
+
return (
|
|
303
|
+
<div style={{ width: '100%', height: '800px' }}>
|
|
304
|
+
<PdfViewer
|
|
305
|
+
url="/document.pdf"
|
|
306
|
+
logger={logger}
|
|
307
|
+
on_load={(pdf) => console.log('PDF loaded:', pdf.numPages, 'pages')}
|
|
308
|
+
/>
|
|
309
|
+
</div>
|
|
310
|
+
);
|
|
311
|
+
}
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
#### 3. Alternative: Custom Logger
|
|
315
|
+
|
|
316
|
+
You can also provide a custom logger that matches the Logger interface:
|
|
317
|
+
|
|
318
|
+
```tsx
|
|
319
|
+
import { PdfViewer } from 'hazo_pdf';
|
|
320
|
+
|
|
321
|
+
const custom_logger = {
|
|
322
|
+
info: (message, data) => console.log('[INFO]', message, data),
|
|
323
|
+
debug: (message, data) => console.debug('[DEBUG]', message, data),
|
|
324
|
+
warn: (message, data) => console.warn('[WARN]', message, data),
|
|
325
|
+
error: (message, data) => console.error('[ERROR]', message, data),
|
|
326
|
+
};
|
|
327
|
+
|
|
328
|
+
<PdfViewer
|
|
329
|
+
url="/document.pdf"
|
|
330
|
+
logger={custom_logger}
|
|
331
|
+
/>
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
**What Gets Logged:**
|
|
335
|
+
- PDF conversion operations (image, text, Excel to PDF)
|
|
336
|
+
- PDF loading and rendering events
|
|
337
|
+
- Annotation operations (create, update, delete)
|
|
338
|
+
- Error conditions and warnings
|
|
339
|
+
|
|
340
|
+
**Note:** If no logger is provided, hazo_pdf automatically falls back to console logging with `[hazo_pdf]` prefix.
|
|
341
|
+
|
|
342
|
+
### Enable Metadata Sidepanel (Optional)
|
|
343
|
+
|
|
344
|
+
To display metadata in a sidepanel:
|
|
345
|
+
|
|
346
|
+
```tsx
|
|
347
|
+
import type { MetadataInput } from 'hazo_pdf';
|
|
348
|
+
|
|
349
|
+
const metadata: MetadataInput = {
|
|
350
|
+
header: [
|
|
351
|
+
{ style: 'h1', label: 'Document Information' }
|
|
352
|
+
],
|
|
353
|
+
data: [
|
|
354
|
+
{
|
|
355
|
+
label: 'Title',
|
|
356
|
+
style: 'h3',
|
|
357
|
+
value: 'My Document',
|
|
358
|
+
editable: true
|
|
359
|
+
}
|
|
360
|
+
],
|
|
361
|
+
footer: [
|
|
362
|
+
{ style: 'body', label: 'Version 1.0' }
|
|
363
|
+
]
|
|
364
|
+
};
|
|
365
|
+
|
|
366
|
+
<PdfViewer
|
|
367
|
+
url="/document.pdf"
|
|
368
|
+
sidepanel_metadata_enabled={true}
|
|
369
|
+
metadata_input={metadata}
|
|
370
|
+
on_metadata_change={(updatedRow, allData) => {
|
|
371
|
+
console.log('Metadata updated:', updatedRow);
|
|
372
|
+
return { updatedRow, allData };
|
|
373
|
+
}}
|
|
374
|
+
/>
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
### Add Custom Stamps (Optional)
|
|
378
|
+
|
|
379
|
+
Configure custom stamps for quick annotation:
|
|
380
|
+
|
|
381
|
+
```tsx
|
|
382
|
+
const custom_stamps = JSON.stringify([
|
|
383
|
+
{
|
|
384
|
+
name: "Approved",
|
|
385
|
+
text: "✓",
|
|
386
|
+
order: 1,
|
|
387
|
+
time_stamp_suffix_enabled: true,
|
|
388
|
+
background_color: "rgb(200, 255, 200)",
|
|
389
|
+
border_size: 1,
|
|
390
|
+
font_color: "#000000",
|
|
391
|
+
font_weight: "bold",
|
|
392
|
+
font_size: 16
|
|
393
|
+
}
|
|
394
|
+
]);
|
|
395
|
+
|
|
396
|
+
<PdfViewer
|
|
397
|
+
url="/document.pdf"
|
|
398
|
+
right_click_custom_stamps={custom_stamps}
|
|
399
|
+
/>
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
### Enable Multi-File Support (Optional)
|
|
403
|
+
|
|
404
|
+
Manage and view multiple PDF files:
|
|
405
|
+
|
|
406
|
+
```tsx
|
|
407
|
+
import type { FileItem, UploadResult } from 'hazo_pdf';
|
|
408
|
+
|
|
409
|
+
const [files, setFiles] = useState<FileItem[]>([]);
|
|
410
|
+
|
|
411
|
+
const handle_upload = async (file: File, converted_pdf?: Uint8Array): Promise<UploadResult> => {
|
|
412
|
+
// Upload to server
|
|
413
|
+
const formData = new FormData();
|
|
414
|
+
formData.append('file', converted_pdf || file);
|
|
415
|
+
|
|
416
|
+
const response = await fetch('/api/upload', {
|
|
417
|
+
method: 'POST',
|
|
418
|
+
body: formData
|
|
419
|
+
});
|
|
420
|
+
|
|
421
|
+
const result = await response.json();
|
|
422
|
+
|
|
423
|
+
return result.success
|
|
424
|
+
? { success: true, file_id: result.file_id, url: result.url }
|
|
425
|
+
: { success: false, error: result.error };
|
|
426
|
+
};
|
|
427
|
+
|
|
428
|
+
<PdfViewer
|
|
429
|
+
files={files}
|
|
430
|
+
on_files_change={setFiles}
|
|
431
|
+
on_upload={handle_upload}
|
|
432
|
+
/>
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
### Enable hazo_files Integration (Optional)
|
|
436
|
+
|
|
437
|
+
Load and save PDFs from remote storage:
|
|
438
|
+
|
|
439
|
+
```tsx
|
|
440
|
+
import { FileManager } from 'hazo_files';
|
|
441
|
+
|
|
442
|
+
// Initialize FileManager
|
|
443
|
+
const file_manager = new FileManager({
|
|
444
|
+
storage_type: 'google_drive',
|
|
445
|
+
config_file: 'config/hazo_files_config.ini'
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
await file_manager.initialize();
|
|
449
|
+
|
|
450
|
+
<PdfViewer
|
|
451
|
+
url="/remote/path/document.pdf"
|
|
452
|
+
file_manager={file_manager}
|
|
453
|
+
save_path="/remote/path/document.pdf"
|
|
454
|
+
/>
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
### Enable Data Extraction (Optional)
|
|
458
|
+
|
|
459
|
+
Extract structured data using LLM prompts:
|
|
460
|
+
|
|
461
|
+
```tsx
|
|
462
|
+
<PdfViewer
|
|
463
|
+
url="/invoice.pdf"
|
|
464
|
+
show_extract_button={true}
|
|
465
|
+
extract_prompt_area="invoices"
|
|
466
|
+
extract_prompt_key="extract_invoice_data"
|
|
467
|
+
extract_api_endpoint="/api/extract"
|
|
468
|
+
on_extract_complete={(data) => console.log('Extracted:', data)}
|
|
469
|
+
on_extract_error={(error) => console.error('Error:', error)}
|
|
470
|
+
/>
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
### Use Server-Side Extraction (Optional)
|
|
474
|
+
|
|
475
|
+
For server-side extraction in API routes, use `hazo_pdf/server`:
|
|
476
|
+
|
|
477
|
+
```typescript
|
|
478
|
+
// In Next.js API route or server action
|
|
479
|
+
import { extract_document_data } from 'hazo_pdf/server';
|
|
480
|
+
|
|
481
|
+
const result = await extract_document_data(
|
|
482
|
+
{ file_path: '/path/to/document.pdf' },
|
|
483
|
+
{
|
|
484
|
+
prompt_area: 'invoices',
|
|
485
|
+
prompt_key: 'extract_invoice_data',
|
|
486
|
+
save_to_hazo_files: true,
|
|
487
|
+
}
|
|
488
|
+
);
|
|
489
|
+
|
|
490
|
+
if (result.success) {
|
|
491
|
+
console.log('Extracted:', result.data);
|
|
492
|
+
}
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
**Note:** Requires `hazo_llm_api` package to be installed.
|
|
496
|
+
|
|
497
|
+
### Use Text Snippet Extraction (Optional)
|
|
498
|
+
|
|
499
|
+
Extract a cropped image snippet from a PDF with matching text highlighted:
|
|
500
|
+
|
|
501
|
+
```typescript
|
|
502
|
+
import { extract_text_snippet } from 'hazo_pdf/server';
|
|
503
|
+
|
|
504
|
+
const result = await extract_text_snippet(
|
|
505
|
+
{ file_path: '/path/to/document.pdf' },
|
|
506
|
+
{
|
|
507
|
+
search_text: '$16,578.20',
|
|
508
|
+
snippet_size: 'half', // 'full' | 'half' | 'quarter'
|
|
509
|
+
match_mode: 'first', // 'first' | 'all'
|
|
510
|
+
}
|
|
511
|
+
);
|
|
512
|
+
|
|
513
|
+
if (result.success) {
|
|
514
|
+
const snippet = result.snippets[0];
|
|
515
|
+
// snippet.image_buffer - PNG Buffer
|
|
516
|
+
// snippet.image_base64 - Base64 PNG
|
|
517
|
+
}
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
**Note:** Requires `@napi-rs/canvas` (usually already installed via `pdfjs-dist`). Server-only.
|
|
521
|
+
|
|
522
|
+
### Use PdfViewerDialog for Modals (Optional)
|
|
523
|
+
|
|
524
|
+
Display PDFs in a modal dialog with built-in backdrop and escape key handling:
|
|
525
|
+
|
|
526
|
+
```tsx
|
|
527
|
+
import { useState } from 'react';
|
|
528
|
+
import { PdfViewerDialog } from 'hazo_pdf';
|
|
529
|
+
|
|
530
|
+
function App() {
|
|
531
|
+
const [isOpen, setIsOpen] = useState(false);
|
|
532
|
+
|
|
533
|
+
return (
|
|
534
|
+
<>
|
|
535
|
+
<button onClick={() => setIsOpen(true)}>Open PDF</button>
|
|
536
|
+
<PdfViewerDialog
|
|
537
|
+
open={isOpen}
|
|
538
|
+
on_open_change={setIsOpen}
|
|
539
|
+
url="/document.pdf"
|
|
540
|
+
dialog_width="90vw"
|
|
541
|
+
dialog_height="90vh"
|
|
542
|
+
// All PdfViewer props also work
|
|
543
|
+
on_load={(pdf) => console.log('Loaded', pdf.numPages)}
|
|
544
|
+
/>
|
|
545
|
+
</>
|
|
546
|
+
);
|
|
547
|
+
}
|
|
548
|
+
```
|
|
549
|
+
|
|
550
|
+
### Enable Auto-Highlighting (Optional)
|
|
551
|
+
|
|
552
|
+
Automatically highlight extracted field values in the PDF:
|
|
553
|
+
|
|
554
|
+
```tsx
|
|
555
|
+
import type { HighlightFieldInfo } from 'hazo_pdf';
|
|
556
|
+
|
|
557
|
+
// Highlighted fields (automatically searches and highlights in PDF)
|
|
558
|
+
const highlight_fields_info: HighlightFieldInfo[] = [
|
|
559
|
+
{ field_name: 'invoice_number', value: 'INV-2024-001', page_index: 0 },
|
|
560
|
+
{ field_name: 'total_amount', value: '$1,250.00', page_index: 0 },
|
|
561
|
+
{ field_name: 'customer_name', value: 'Acme Corp', page_index: 0 },
|
|
562
|
+
];
|
|
563
|
+
|
|
564
|
+
<PdfViewer
|
|
565
|
+
url="/invoice.pdf"
|
|
566
|
+
highlight_fields_info={highlight_fields_info}
|
|
567
|
+
show_file_info_button={true}
|
|
568
|
+
/>
|
|
569
|
+
```
|
|
570
|
+
|
|
571
|
+
**Features:**
|
|
572
|
+
- Automatically searches for field values in the PDF text layer
|
|
573
|
+
- Creates visual highlight boxes at found positions
|
|
574
|
+
- Displays field names and values in File Info sidepanel
|
|
575
|
+
- Smart text matching (exact first, then partial with normalization)
|
|
576
|
+
- Configurable colors, opacity, and search behavior
|
|
577
|
+
|
|
578
|
+
**Customize highlight appearance:**
|
|
579
|
+
|
|
580
|
+
```tsx
|
|
581
|
+
<PdfViewer
|
|
582
|
+
url="/invoice.pdf"
|
|
583
|
+
highlight_fields_info={highlight_fields_info}
|
|
584
|
+
auto_highlight_options={{
|
|
585
|
+
border_color: '#0066CC',
|
|
586
|
+
background_color: '#E6F2FF',
|
|
587
|
+
background_opacity: 0.4,
|
|
588
|
+
}}
|
|
589
|
+
/>
|
|
590
|
+
```
|
|
591
|
+
|
|
592
|
+
### Enable File Info Sidepanel (Optional)
|
|
593
|
+
|
|
594
|
+
Display extracted document data in a sidepanel:
|
|
595
|
+
|
|
596
|
+
```tsx
|
|
597
|
+
import type { HighlightFieldInfo } from 'hazo_pdf';
|
|
598
|
+
|
|
599
|
+
// Document-level data
|
|
600
|
+
const doc_data = {
|
|
601
|
+
invoice_number: 'INV-2024-001',
|
|
602
|
+
total_amount: 1250.00,
|
|
603
|
+
customer_name: 'Acme Corp',
|
|
604
|
+
};
|
|
605
|
+
|
|
606
|
+
// Highlighted fields (auto-highlighted and shown in sidepanel)
|
|
607
|
+
const highlight_fields_info: HighlightFieldInfo[] = [
|
|
608
|
+
{ field_name: 'invoice_number', value: 'INV-2024-001', page_index: 0 },
|
|
609
|
+
{ field_name: 'total_amount', value: '$1,250.00', page_index: 0 },
|
|
610
|
+
{ field_name: 'customer_name', value: 'Acme Corp', page_index: 0 },
|
|
611
|
+
];
|
|
612
|
+
|
|
613
|
+
<PdfViewer
|
|
614
|
+
url="/invoice.pdf"
|
|
615
|
+
doc_data={doc_data}
|
|
616
|
+
highlight_fields_info={highlight_fields_info}
|
|
617
|
+
show_file_info_button={true}
|
|
618
|
+
/>
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
**Alternative: Legacy File Metadata Format**
|
|
622
|
+
|
|
623
|
+
For file-based metadata with filename matching:
|
|
624
|
+
|
|
625
|
+
```tsx
|
|
626
|
+
import type { FileMetadataInput } from 'hazo_pdf';
|
|
627
|
+
|
|
628
|
+
const file_metadata: FileMetadataInput = [
|
|
629
|
+
{
|
|
630
|
+
filename: 'invoice.pdf',
|
|
631
|
+
file_data: {
|
|
632
|
+
invoice_number: 'INV-2024-001',
|
|
633
|
+
total_amount: '$1,250.00',
|
|
634
|
+
line_items: [
|
|
635
|
+
{ item: 'Widget A', quantity: '10', price: '$50.00' },
|
|
636
|
+
{ item: 'Widget B', quantity: '15', price: '$50.00' }
|
|
637
|
+
]
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
];
|
|
641
|
+
|
|
642
|
+
<PdfViewer
|
|
643
|
+
url="/invoice.pdf"
|
|
644
|
+
file_metadata={file_metadata}
|
|
645
|
+
show_file_info_button={true}
|
|
646
|
+
/>
|
|
647
|
+
```
|
|
648
|
+
|
|
649
|
+
**Alternative: hazo_files Extraction Data**
|
|
650
|
+
|
|
651
|
+
For displaying extraction history from hazo_files `file_data.raw_data`:
|
|
652
|
+
|
|
653
|
+
```tsx
|
|
654
|
+
import type { ExtractionEntry } from 'hazo_pdf';
|
|
655
|
+
|
|
656
|
+
const extractions: ExtractionEntry[] = [
|
|
657
|
+
{
|
|
658
|
+
id: 'ext_123',
|
|
659
|
+
extracted_at: '2026-03-30T10:00:00Z',
|
|
660
|
+
source: 'autofill',
|
|
661
|
+
data: {
|
|
662
|
+
document_date: '30 June 2024',
|
|
663
|
+
total_amount: '$29,696.60',
|
|
664
|
+
},
|
|
665
|
+
},
|
|
666
|
+
];
|
|
667
|
+
|
|
668
|
+
<PdfViewer
|
|
669
|
+
url="/invoice.pdf"
|
|
670
|
+
extractions={extractions}
|
|
671
|
+
show_file_info_button={true}
|
|
672
|
+
/>
|
|
673
|
+
```
|
|
674
|
+
|
|
675
|
+
## Next Steps
|
|
676
|
+
|
|
677
|
+
- [ ] Read the [README.md](./README.md) for detailed API documentation
|
|
678
|
+
- [ ] Explore the [Auto-Highlighting](./README.md#auto-highlighting) feature for automatic field highlighting
|
|
679
|
+
- [ ] Explore the [Programmatic Highlight API](./README.md#programmatic-highlight-api) section for manual highlight control
|
|
680
|
+
- [ ] Review configuration options in `config/hazo_pdf_config.ini`
|
|
681
|
+
- [ ] Check out example implementations in the README
|
|
682
|
+
- [ ] Test annotation features (Square, FreeText, Stamps)
|
|
683
|
+
- [ ] Implement save functionality with `on_save` callback
|
|
684
|
+
|
|
685
|
+
## Support
|
|
686
|
+
|
|
687
|
+
If you encounter issues not covered in this checklist:
|
|
688
|
+
|
|
689
|
+
1. Check the [README.md](./README.md) for detailed documentation
|
|
690
|
+
2. Review the configuration file (`config/hazo_pdf_config.ini`) for all available options
|
|
691
|
+
3. Examine the test app code in the `app/` directory for working examples
|
|
692
|
+
4. Check browser console for error messages
|
|
693
|
+
5. Verify all prerequisites are met
|