okrapdf 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.d.ts +9 -0
- package/dist/browser.js +16 -0
- package/dist/browser.js.map +1 -0
- package/dist/chunk-AG3A2T3B.js +84 -0
- package/dist/chunk-AG3A2T3B.js.map +1 -0
- package/dist/chunk-C6ZT7DKX.js +113 -0
- package/dist/chunk-C6ZT7DKX.js.map +1 -0
- package/dist/chunk-HITG34US.js +626 -0
- package/dist/chunk-HITG34US.js.map +1 -0
- package/dist/chunk-SBT5T6ZK.js +817 -0
- package/dist/chunk-SBT5T6ZK.js.map +1 -0
- package/dist/cli/bin.d.ts +1 -0
- package/dist/cli/bin.js +212 -0
- package/dist/cli/bin.js.map +1 -0
- package/dist/cli/index.d.ts +536 -0
- package/dist/cli/index.js +73 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/client-aHzx0a5x.d.ts +58 -0
- package/dist/index.d.ts +57 -0
- package/dist/index.js +26 -0
- package/dist/index.js.map +1 -0
- package/dist/react/index.d.ts +197 -0
- package/dist/react/index.js +432 -0
- package/dist/react/index.js.map +1 -0
- package/dist/types-DEYgGUnH.d.ts +329 -0
- package/dist/url.d.ts +48 -0
- package/dist/url.js +7 -0
- package/dist/url.js.map +1 -0
- package/dist/worker.d.ts +44 -0
- package/dist/worker.js +82 -0
- package/dist/worker.js.map +1 -0
- package/package.json +82 -0
|
@@ -0,0 +1,817 @@
|
|
|
1
|
+
// src/cli/commands/tree.ts
|
|
2
|
+
async function tree(client, jobId, options = {}) {
|
|
3
|
+
const treeData = await client.request(`/document/${jobId}/verification-tree`);
|
|
4
|
+
let filteredPages = treeData.pages.map((p) => p.page);
|
|
5
|
+
if (options.status) {
|
|
6
|
+
filteredPages = treeData.pages.filter((p) => p.status === options.status).map((p) => p.page);
|
|
7
|
+
}
|
|
8
|
+
if (options.entity) {
|
|
9
|
+
const entitiesData = await client.request(`/document/${jobId}/nodes?type=${options.entity}`);
|
|
10
|
+
const pagesWithEntity = new Set(entitiesData.entities.map((e) => e.page));
|
|
11
|
+
filteredPages = filteredPages.filter((p) => pagesWithEntity.has(p));
|
|
12
|
+
}
|
|
13
|
+
return { tree: treeData, filteredPages };
|
|
14
|
+
}
|
|
15
|
+
function formatTreeOutput(result, format = "text") {
|
|
16
|
+
if (format === "json") {
|
|
17
|
+
return JSON.stringify(result, null, 2);
|
|
18
|
+
}
|
|
19
|
+
const { tree: treeData, filteredPages } = result;
|
|
20
|
+
const lines = [];
|
|
21
|
+
if (format === "markdown") {
|
|
22
|
+
lines.push(`# Verification Tree: ${treeData.jobId}`);
|
|
23
|
+
lines.push("");
|
|
24
|
+
lines.push(`**Total Pages:** ${treeData.totalPages}`);
|
|
25
|
+
lines.push("");
|
|
26
|
+
lines.push("## Summary");
|
|
27
|
+
lines.push("| Status | Count |");
|
|
28
|
+
lines.push("|--------|-------|");
|
|
29
|
+
lines.push(`| Complete | ${treeData.summary.complete} |`);
|
|
30
|
+
lines.push(`| Partial | ${treeData.summary.partial} |`);
|
|
31
|
+
lines.push(`| Pending | ${treeData.summary.pending} |`);
|
|
32
|
+
lines.push(`| Flagged | ${treeData.summary.flagged} |`);
|
|
33
|
+
lines.push(`| Empty | ${treeData.summary.empty} |`);
|
|
34
|
+
lines.push(`| Gap | ${treeData.summary.gap} |`);
|
|
35
|
+
lines.push("");
|
|
36
|
+
lines.push("## Pages");
|
|
37
|
+
lines.push("| Page | Status | Total | Verified | Pending | Flagged |");
|
|
38
|
+
lines.push("|------|--------|-------|----------|---------|---------|");
|
|
39
|
+
for (const page of treeData.pages) {
|
|
40
|
+
if (filteredPages.includes(page.page)) {
|
|
41
|
+
lines.push(
|
|
42
|
+
`| ${page.page} | ${page.status} | ${page.total} | ${page.verified} | ${page.pending} | ${page.flagged} |`
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
} else {
|
|
47
|
+
lines.push(`Verification Tree: ${treeData.jobId}`);
|
|
48
|
+
lines.push(`Total Pages: ${treeData.totalPages}`);
|
|
49
|
+
lines.push("");
|
|
50
|
+
lines.push("Summary:");
|
|
51
|
+
lines.push(` Complete: ${treeData.summary.complete}`);
|
|
52
|
+
lines.push(` Partial: ${treeData.summary.partial}`);
|
|
53
|
+
lines.push(` Pending: ${treeData.summary.pending}`);
|
|
54
|
+
lines.push(` Flagged: ${treeData.summary.flagged}`);
|
|
55
|
+
lines.push(` Empty: ${treeData.summary.empty}`);
|
|
56
|
+
lines.push(` Gap: ${treeData.summary.gap}`);
|
|
57
|
+
lines.push("");
|
|
58
|
+
lines.push("Pages:");
|
|
59
|
+
for (const page of treeData.pages) {
|
|
60
|
+
if (filteredPages.includes(page.page)) {
|
|
61
|
+
const statusIcon = getStatusIcon(page.status);
|
|
62
|
+
const counts = `[${page.verified}/${page.total}]`;
|
|
63
|
+
const flags = page.flagged > 0 ? ` (${page.flagged} flagged)` : "";
|
|
64
|
+
const gaps = page.hasCoverageGaps ? " [GAP]" : "";
|
|
65
|
+
lines.push(` ${statusIcon} p${page.page.toString().padStart(3)} ${counts}${flags}${gaps}`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return lines.join("\n");
|
|
70
|
+
}
|
|
71
|
+
function getStatusIcon(status) {
|
|
72
|
+
switch (status) {
|
|
73
|
+
case "complete":
|
|
74
|
+
return "\u2713";
|
|
75
|
+
case "partial":
|
|
76
|
+
return "\u25D0";
|
|
77
|
+
case "pending":
|
|
78
|
+
return "\u25CB";
|
|
79
|
+
case "flagged":
|
|
80
|
+
return "\u2691";
|
|
81
|
+
case "empty":
|
|
82
|
+
return "\xB7";
|
|
83
|
+
case "gap":
|
|
84
|
+
return "!";
|
|
85
|
+
case "error":
|
|
86
|
+
return "\u2717";
|
|
87
|
+
default:
|
|
88
|
+
return "?";
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// src/cli/query-engine.ts
|
|
93
|
+
function parseSelector(selector) {
|
|
94
|
+
const parts = { types: [] };
|
|
95
|
+
if (selector.includes(",") && !selector.includes("[")) {
|
|
96
|
+
const segments = selector.split(",").map((s) => s.trim());
|
|
97
|
+
for (const seg of segments) {
|
|
98
|
+
const subParts = parseSelector(seg);
|
|
99
|
+
parts.types.push(...subParts.types);
|
|
100
|
+
}
|
|
101
|
+
return parts;
|
|
102
|
+
}
|
|
103
|
+
const typeMatches = selector.match(/\.([a-zA-Z][a-zA-Z0-9_]*)/g);
|
|
104
|
+
if (typeMatches) {
|
|
105
|
+
parts.types = typeMatches.map((m) => m.slice(1));
|
|
106
|
+
}
|
|
107
|
+
const idMatch = selector.match(/#([\w-]+)/);
|
|
108
|
+
if (idMatch) {
|
|
109
|
+
parts.id = idMatch[1];
|
|
110
|
+
}
|
|
111
|
+
const pageMatch = selector.match(/:pages?\((\d+)(?:-(\d+))?\)/);
|
|
112
|
+
if (pageMatch) {
|
|
113
|
+
if (pageMatch[2]) {
|
|
114
|
+
parts.pageFilter = {
|
|
115
|
+
type: "range",
|
|
116
|
+
value: [parseInt(pageMatch[1], 10), parseInt(pageMatch[2], 10)]
|
|
117
|
+
};
|
|
118
|
+
} else {
|
|
119
|
+
parts.pageFilter = {
|
|
120
|
+
type: "single",
|
|
121
|
+
value: parseInt(pageMatch[1], 10)
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
const confMatch = selector.match(/\[confidence(>=?|<=?|>|<)(\d+\.?\d*)\]/);
|
|
126
|
+
if (confMatch) {
|
|
127
|
+
parts.confidenceFilter = {
|
|
128
|
+
op: confMatch[1],
|
|
129
|
+
value: parseFloat(confMatch[2])
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
const verifyMatch = selector.match(/\[(?:verified|status)=(\w+)\]/);
|
|
133
|
+
if (verifyMatch) {
|
|
134
|
+
const val = verifyMatch[1].toLowerCase();
|
|
135
|
+
if (val === "true") {
|
|
136
|
+
parts.verificationFilter = "verified";
|
|
137
|
+
} else if (["pending", "verified", "flagged", "rejected"].includes(val)) {
|
|
138
|
+
parts.verificationFilter = val;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
const containsMatch = selector.match(/:contains\(([^)]+)\)/);
|
|
142
|
+
if (containsMatch) {
|
|
143
|
+
parts.textContains = containsMatch[1];
|
|
144
|
+
}
|
|
145
|
+
if (parts.types.length === 0 && selector.includes("*")) {
|
|
146
|
+
parts.types = ["table", "figure", "footnote", "summary", "signature", "paragraph"];
|
|
147
|
+
}
|
|
148
|
+
return parts;
|
|
149
|
+
}
|
|
150
|
+
function filterEntities(entities, parts) {
|
|
151
|
+
return entities.filter((entity) => {
|
|
152
|
+
if (parts.types.length > 0 && !parts.types.includes(entity.type)) {
|
|
153
|
+
return false;
|
|
154
|
+
}
|
|
155
|
+
if (parts.id && entity.id !== parts.id) {
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
if (parts.pageFilter) {
|
|
159
|
+
if (parts.pageFilter.type === "single") {
|
|
160
|
+
if (entity.page !== parts.pageFilter.value) return false;
|
|
161
|
+
} else {
|
|
162
|
+
const [start, end] = parts.pageFilter.value;
|
|
163
|
+
if (entity.page < start || entity.page > end) return false;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
if (parts.confidenceFilter && entity.confidence !== void 0) {
|
|
167
|
+
const { op, value } = parts.confidenceFilter;
|
|
168
|
+
switch (op) {
|
|
169
|
+
case ">":
|
|
170
|
+
if (!(entity.confidence > value)) return false;
|
|
171
|
+
break;
|
|
172
|
+
case ">=":
|
|
173
|
+
if (!(entity.confidence >= value)) return false;
|
|
174
|
+
break;
|
|
175
|
+
case "<":
|
|
176
|
+
if (!(entity.confidence < value)) return false;
|
|
177
|
+
break;
|
|
178
|
+
case "<=":
|
|
179
|
+
if (!(entity.confidence <= value)) return false;
|
|
180
|
+
break;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
if (parts.verificationFilter && entity.verificationStatus !== parts.verificationFilter) {
|
|
184
|
+
return false;
|
|
185
|
+
}
|
|
186
|
+
if (parts.textContains && entity.title) {
|
|
187
|
+
if (!entity.title.toLowerCase().includes(parts.textContains.toLowerCase())) {
|
|
188
|
+
return false;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return true;
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
function executeQuery(entities, selector, options = {}) {
|
|
195
|
+
const parts = parseSelector(selector);
|
|
196
|
+
if (options.minConfidence !== void 0) {
|
|
197
|
+
parts.confidenceFilter = { op: ">=", value: options.minConfidence };
|
|
198
|
+
}
|
|
199
|
+
if (options.pageRange) {
|
|
200
|
+
parts.pageFilter = { type: "range", value: options.pageRange };
|
|
201
|
+
}
|
|
202
|
+
let results = filterEntities(entities, parts);
|
|
203
|
+
if (options.sortBy) {
|
|
204
|
+
results = [...results].sort((a, b) => {
|
|
205
|
+
switch (options.sortBy) {
|
|
206
|
+
case "confidence":
|
|
207
|
+
return (b.confidence ?? 0) - (a.confidence ?? 0);
|
|
208
|
+
case "page":
|
|
209
|
+
return a.page - b.page;
|
|
210
|
+
case "type":
|
|
211
|
+
return a.type.localeCompare(b.type);
|
|
212
|
+
default:
|
|
213
|
+
return 0;
|
|
214
|
+
}
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
const stats = calculateStats(results);
|
|
218
|
+
if (options.topK && options.topK > 0) {
|
|
219
|
+
results = results.slice(0, options.topK);
|
|
220
|
+
}
|
|
221
|
+
return {
|
|
222
|
+
entities: results,
|
|
223
|
+
total: results.length,
|
|
224
|
+
stats
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
function calculateStats(entities) {
|
|
228
|
+
const byType = {};
|
|
229
|
+
const byPage = {};
|
|
230
|
+
let totalConfidence = 0;
|
|
231
|
+
let minConfidence = Infinity;
|
|
232
|
+
let maxConfidence = -Infinity;
|
|
233
|
+
let confidenceCount = 0;
|
|
234
|
+
for (const entity of entities) {
|
|
235
|
+
byType[entity.type] = (byType[entity.type] || 0) + 1;
|
|
236
|
+
byPage[entity.page] = (byPage[entity.page] || 0) + 1;
|
|
237
|
+
if (entity.confidence !== void 0) {
|
|
238
|
+
totalConfidence += entity.confidence;
|
|
239
|
+
confidenceCount++;
|
|
240
|
+
if (entity.confidence < minConfidence) minConfidence = entity.confidence;
|
|
241
|
+
if (entity.confidence > maxConfidence) maxConfidence = entity.confidence;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
return {
|
|
245
|
+
total: entities.length,
|
|
246
|
+
byType,
|
|
247
|
+
byPage,
|
|
248
|
+
avgConfidence: confidenceCount > 0 ? totalConfidence / confidenceCount : 0,
|
|
249
|
+
minConfidence: minConfidence === Infinity ? 0 : minConfidence,
|
|
250
|
+
maxConfidence: maxConfidence === -Infinity ? 0 : maxConfidence
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// src/cli/commands/find.ts
|
|
255
|
+
async function find(client, jobId, selector, options = {}) {
|
|
256
|
+
const entitiesData = await client.request(`/document/${jobId}/nodes`);
|
|
257
|
+
return executeQuery(entitiesData.entities, selector, options);
|
|
258
|
+
}
|
|
259
|
+
function formatFindOutput(result, format = "text", showStats = false) {
|
|
260
|
+
if (format === "json") {
|
|
261
|
+
return JSON.stringify(showStats ? result : result.entities, null, 2);
|
|
262
|
+
}
|
|
263
|
+
if (format === "ids") {
|
|
264
|
+
return result.entities.map((e) => e.id).join("\n");
|
|
265
|
+
}
|
|
266
|
+
if (format === "entities") {
|
|
267
|
+
return result.entities.map((e) => `${e.type} ${e.page} ${e.id} ${e.title || ""}`).join("\n");
|
|
268
|
+
}
|
|
269
|
+
const lines = [];
|
|
270
|
+
lines.push(`Found ${result.total} entities`);
|
|
271
|
+
lines.push("");
|
|
272
|
+
if (showStats) {
|
|
273
|
+
lines.push("Stats:");
|
|
274
|
+
lines.push(` By Type:`);
|
|
275
|
+
for (const [type, count] of Object.entries(result.stats.byType)) {
|
|
276
|
+
lines.push(` ${type}: ${count}`);
|
|
277
|
+
}
|
|
278
|
+
lines.push(` Confidence: avg=${result.stats.avgConfidence.toFixed(2)}, min=${result.stats.minConfidence.toFixed(2)}, max=${result.stats.maxConfidence.toFixed(2)}`);
|
|
279
|
+
lines.push(` Pages: ${Object.keys(result.stats.byPage).length}`);
|
|
280
|
+
lines.push("");
|
|
281
|
+
}
|
|
282
|
+
lines.push("Entities:");
|
|
283
|
+
for (const entity of result.entities) {
|
|
284
|
+
const conf = entity.confidence !== void 0 ? ` (${(entity.confidence * 100).toFixed(0)}%)` : "";
|
|
285
|
+
const title = entity.title ? ` "${entity.title.slice(0, 40)}${entity.title.length > 40 ? "..." : ""}"` : "";
|
|
286
|
+
lines.push(` [p${entity.page}] ${entity.type}${title}${conf}`);
|
|
287
|
+
}
|
|
288
|
+
return lines.join("\n");
|
|
289
|
+
}
|
|
290
|
+
function formatStats(stats) {
|
|
291
|
+
const lines = [];
|
|
292
|
+
lines.push(`Total: ${stats.total}`);
|
|
293
|
+
lines.push("");
|
|
294
|
+
lines.push("By Type:");
|
|
295
|
+
for (const [type, count] of Object.entries(stats.byType)) {
|
|
296
|
+
const pct = (count / stats.total * 100).toFixed(1);
|
|
297
|
+
lines.push(` ${type.padEnd(12)} ${count.toString().padStart(4)} (${pct}%)`);
|
|
298
|
+
}
|
|
299
|
+
lines.push("");
|
|
300
|
+
lines.push("By Page:");
|
|
301
|
+
const pageCounts = Object.entries(stats.byPage).sort((a, b) => parseInt(a[0]) - parseInt(b[0])).slice(0, 20);
|
|
302
|
+
for (const [page, count] of pageCounts) {
|
|
303
|
+
lines.push(` p${page.padStart(3)}: ${"\u2588".repeat(Math.min(count, 40))} ${count}`);
|
|
304
|
+
}
|
|
305
|
+
if (Object.keys(stats.byPage).length > 20) {
|
|
306
|
+
lines.push(` ... and ${Object.keys(stats.byPage).length - 20} more pages`);
|
|
307
|
+
}
|
|
308
|
+
lines.push("");
|
|
309
|
+
lines.push("Confidence:");
|
|
310
|
+
lines.push(` Average: ${(stats.avgConfidence * 100).toFixed(1)}%`);
|
|
311
|
+
lines.push(` Min: ${(stats.minConfidence * 100).toFixed(1)}%`);
|
|
312
|
+
lines.push(` Max: ${(stats.maxConfidence * 100).toFixed(1)}%`);
|
|
313
|
+
return lines.join("\n");
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// src/cli/commands/page.ts
|
|
317
|
+
async function pageGet(client, jobId, pageNum, options = {}) {
|
|
318
|
+
if (options.version) {
|
|
319
|
+
return client.request(`/document/${jobId}/pages/${pageNum}/versions/${options.version}`);
|
|
320
|
+
}
|
|
321
|
+
return client.request(`/document/${jobId}/pages/${pageNum}`);
|
|
322
|
+
}
|
|
323
|
+
async function pageEdit(client, jobId, pageNum, content) {
|
|
324
|
+
const result = await client.request(
|
|
325
|
+
`/document/${jobId}/pages/${pageNum}`,
|
|
326
|
+
{
|
|
327
|
+
method: "POST",
|
|
328
|
+
body: JSON.stringify({ content }),
|
|
329
|
+
headers: { "Content-Type": "application/json" }
|
|
330
|
+
}
|
|
331
|
+
);
|
|
332
|
+
return { success: result.success, version: result.version };
|
|
333
|
+
}
|
|
334
|
+
async function pageResolve(client, jobId, pageNum, options) {
|
|
335
|
+
return client.request(
|
|
336
|
+
`/document/${jobId}/pages/${pageNum}/resolve`,
|
|
337
|
+
{
|
|
338
|
+
method: "POST",
|
|
339
|
+
body: JSON.stringify(options),
|
|
340
|
+
headers: { "Content-Type": "application/json" }
|
|
341
|
+
}
|
|
342
|
+
);
|
|
343
|
+
}
|
|
344
|
+
async function pageVersions(client, jobId, pageNum) {
|
|
345
|
+
return client.request(`/document/${jobId}/pages/${pageNum}/versions`);
|
|
346
|
+
}
|
|
347
|
+
function formatPageOutput(content, format = "markdown") {
|
|
348
|
+
if (format === "json") {
|
|
349
|
+
return JSON.stringify(content, null, 2);
|
|
350
|
+
}
|
|
351
|
+
if (format === "markdown") {
|
|
352
|
+
return content.content;
|
|
353
|
+
}
|
|
354
|
+
const lines = [];
|
|
355
|
+
lines.push(`Page ${content.page}`);
|
|
356
|
+
if (content.version) {
|
|
357
|
+
lines.push(`Version: ${content.version}`);
|
|
358
|
+
}
|
|
359
|
+
lines.push(`Length: ${content.content.length} chars`);
|
|
360
|
+
lines.push("");
|
|
361
|
+
lines.push("---");
|
|
362
|
+
lines.push(content.content);
|
|
363
|
+
return lines.join("\n");
|
|
364
|
+
}
|
|
365
|
+
function formatVersionsOutput(versions, format = "text") {
|
|
366
|
+
if (format === "json") {
|
|
367
|
+
return JSON.stringify(versions, null, 2);
|
|
368
|
+
}
|
|
369
|
+
const lines = [];
|
|
370
|
+
lines.push(`Page ${versions.page} - ${versions.versions.length} versions`);
|
|
371
|
+
lines.push(`Current: v${versions.currentVersion}`);
|
|
372
|
+
lines.push("");
|
|
373
|
+
for (const v of versions.versions) {
|
|
374
|
+
const current = v.version === versions.currentVersion ? " *" : "";
|
|
375
|
+
const date = v.createdAt ? new Date(v.createdAt).toLocaleString() : "unknown";
|
|
376
|
+
lines.push(` v${v.version}${current} [${v.editSource}] ${date}`);
|
|
377
|
+
if (v.preview) {
|
|
378
|
+
lines.push(` "${v.preview.slice(0, 60)}${v.preview.length > 60 ? "..." : ""}"`);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
return lines.join("\n");
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// src/cli/commands/search.ts
|
|
385
|
+
async function search(client, jobId, query) {
|
|
386
|
+
return client.request(`/document/${jobId}/search?q=${encodeURIComponent(query)}`);
|
|
387
|
+
}
|
|
388
|
+
function formatSearchOutput(result, format = "text") {
|
|
389
|
+
if (format === "json") {
|
|
390
|
+
return JSON.stringify(result, null, 2);
|
|
391
|
+
}
|
|
392
|
+
const lines = [];
|
|
393
|
+
lines.push(`Search: "${result.query}"`);
|
|
394
|
+
lines.push(`Found ${result.totalMatches} matches in ${result.results.length} pages`);
|
|
395
|
+
lines.push("");
|
|
396
|
+
for (const r of result.results) {
|
|
397
|
+
const source = r.matchSource ? ` [${r.matchSource}]` : "";
|
|
398
|
+
lines.push(`p${r.page.toString().padStart(3)} (${r.matchCount} matches)${source}`);
|
|
399
|
+
if (r.snippet) {
|
|
400
|
+
lines.push(` "${r.snippet.slice(0, 80)}${r.snippet.length > 80 ? "..." : ""}"`);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
return lines.join("\n");
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// src/cli/commands/tables.ts
|
|
407
|
+
async function tables(client, jobId, options = {}) {
|
|
408
|
+
const url = options.page ? `/document/${jobId}/tables?page=${options.page}` : `/document/${jobId}/tables`;
|
|
409
|
+
const result = await client.request(url);
|
|
410
|
+
if (options.status) {
|
|
411
|
+
result.tables = result.tables.filter((t) => t.verificationStatus === options.status);
|
|
412
|
+
}
|
|
413
|
+
return result;
|
|
414
|
+
}
|
|
415
|
+
function formatTablesOutput(result, format = "text") {
|
|
416
|
+
if (format === "json") {
|
|
417
|
+
return JSON.stringify(result, null, 2);
|
|
418
|
+
}
|
|
419
|
+
if (format === "markdown") {
|
|
420
|
+
return result.tables.map((t) => {
|
|
421
|
+
return `## Table (p${t.pageNumber})
|
|
422
|
+
|
|
423
|
+
${t.markdown}`;
|
|
424
|
+
}).join("\n\n---\n\n");
|
|
425
|
+
}
|
|
426
|
+
const lines = [];
|
|
427
|
+
lines.push(`Tables: ${result.tables.length}`);
|
|
428
|
+
lines.push("");
|
|
429
|
+
const byPage = /* @__PURE__ */ new Map();
|
|
430
|
+
for (const t of result.tables) {
|
|
431
|
+
const pageGroup = byPage.get(t.pageNumber) || [];
|
|
432
|
+
pageGroup.push(t);
|
|
433
|
+
byPage.set(t.pageNumber, pageGroup);
|
|
434
|
+
}
|
|
435
|
+
for (const [page, pageTables] of [...byPage.entries()].sort((a, b) => a[0] - b[0])) {
|
|
436
|
+
lines.push(`Page ${page}:`);
|
|
437
|
+
for (const t of pageTables) {
|
|
438
|
+
const status = getStatusIcon2(t.verificationStatus);
|
|
439
|
+
const conf = t.confidence !== null ? ` (${(t.confidence * 100).toFixed(0)}%)` : "";
|
|
440
|
+
const preview = t.markdown.split("\n")[0].slice(0, 50);
|
|
441
|
+
lines.push(` ${status} ${t.id}${conf}`);
|
|
442
|
+
lines.push(` ${preview}${t.markdown.length > 50 ? "..." : ""}`);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
return lines.join("\n");
|
|
446
|
+
}
|
|
447
|
+
function getStatusIcon2(status) {
|
|
448
|
+
switch (status) {
|
|
449
|
+
case "verified":
|
|
450
|
+
return "\u2713";
|
|
451
|
+
case "pending":
|
|
452
|
+
return "\u25CB";
|
|
453
|
+
case "flagged":
|
|
454
|
+
return "\u2691";
|
|
455
|
+
case "rejected":
|
|
456
|
+
return "\u2717";
|
|
457
|
+
default:
|
|
458
|
+
return "?";
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// src/cli/commands/history.ts
|
|
463
|
+
async function history(client, jobId, options = {}) {
|
|
464
|
+
const limit = options.limit || 50;
|
|
465
|
+
return client.request(`/document/${jobId}/history?limit=${limit}`);
|
|
466
|
+
}
|
|
467
|
+
function formatHistoryOutput(result, format = "text") {
|
|
468
|
+
if (format === "json") {
|
|
469
|
+
return JSON.stringify(result, null, 2);
|
|
470
|
+
}
|
|
471
|
+
const lines = [];
|
|
472
|
+
lines.push(`History: ${result.history.length} entries`);
|
|
473
|
+
lines.push("");
|
|
474
|
+
for (const entry of result.history) {
|
|
475
|
+
const date = new Date(entry.createdAt).toLocaleString();
|
|
476
|
+
const page = entry.pageNum !== null ? ` p${entry.pageNum}` : "";
|
|
477
|
+
const transition = entry.transitionName || `${entry.previousState || "?"} -> ${entry.state}`;
|
|
478
|
+
const by = entry.triggeredByName || entry.triggeredBy || "system";
|
|
479
|
+
lines.push(`[${date}] ${entry.entityType}${page}`);
|
|
480
|
+
lines.push(` ${transition} by ${by}`);
|
|
481
|
+
if (entry.reason) {
|
|
482
|
+
lines.push(` Reason: ${entry.reason}`);
|
|
483
|
+
}
|
|
484
|
+
if (entry.resolution) {
|
|
485
|
+
lines.push(` Resolution: ${entry.resolution}`);
|
|
486
|
+
}
|
|
487
|
+
lines.push("");
|
|
488
|
+
}
|
|
489
|
+
return lines.join("\n");
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// src/cli/commands/toc.ts
|
|
493
|
+
import WebSocket from "ws";
|
|
494
|
+
async function toc(client, jobId, options = {}) {
|
|
495
|
+
const params = options.maxDepth ? `?maxDepth=${options.maxDepth}` : "";
|
|
496
|
+
const result = await client.request(`/document/${jobId}/toc${params}`);
|
|
497
|
+
if (options.watch && result._replay) {
|
|
498
|
+
console.log(`
|
|
499
|
+
\u{1F4FC} Watching live events for session ${result._replay.sessionId}...
|
|
500
|
+
`);
|
|
501
|
+
await watchLiveEvents(result._replay.replayUrl);
|
|
502
|
+
}
|
|
503
|
+
return result;
|
|
504
|
+
}
|
|
505
|
+
async function watchLiveEvents(wsUrl) {
|
|
506
|
+
return new Promise((resolve, reject) => {
|
|
507
|
+
const ws = new WebSocket(wsUrl);
|
|
508
|
+
let hasSeenEvents = false;
|
|
509
|
+
ws.on("open", () => {
|
|
510
|
+
ws.send(JSON.stringify({ type: "JOIN_SESSION" }));
|
|
511
|
+
});
|
|
512
|
+
ws.on("message", (data) => {
|
|
513
|
+
try {
|
|
514
|
+
const event = JSON.parse(data.toString());
|
|
515
|
+
if (event.type === "EVENTS_BATCH") {
|
|
516
|
+
for (const evt of event.events) {
|
|
517
|
+
printEvent(evt);
|
|
518
|
+
hasSeenEvents = true;
|
|
519
|
+
}
|
|
520
|
+
} else if (event.type && event.type.startsWith("TOC_")) {
|
|
521
|
+
printEvent(event);
|
|
522
|
+
hasSeenEvents = true;
|
|
523
|
+
} else if (event.type === "TOC_RESPONSE_READY") {
|
|
524
|
+
printEvent(event);
|
|
525
|
+
hasSeenEvents = true;
|
|
526
|
+
setTimeout(() => {
|
|
527
|
+
ws.close();
|
|
528
|
+
resolve();
|
|
529
|
+
}, 500);
|
|
530
|
+
}
|
|
531
|
+
} catch (err) {
|
|
532
|
+
console.error("Failed to parse event:", err);
|
|
533
|
+
}
|
|
534
|
+
});
|
|
535
|
+
ws.on("error", (error) => {
|
|
536
|
+
console.error("WebSocket error:", error.message);
|
|
537
|
+
reject(error);
|
|
538
|
+
});
|
|
539
|
+
ws.on("close", () => {
|
|
540
|
+
if (!hasSeenEvents) {
|
|
541
|
+
console.log("No events received (session may have completed).");
|
|
542
|
+
}
|
|
543
|
+
resolve();
|
|
544
|
+
});
|
|
545
|
+
setTimeout(() => {
|
|
546
|
+
ws.close();
|
|
547
|
+
resolve();
|
|
548
|
+
}, 1e4);
|
|
549
|
+
});
|
|
550
|
+
}
|
|
551
|
+
function printEvent(event) {
|
|
552
|
+
const timestamp = event.eventTimestamp ? new Date(event.eventTimestamp).toISOString().slice(11, 23) : "";
|
|
553
|
+
const duration = event.cost?.duration_ms ? ` (${event.cost.duration_ms}ms)` : "";
|
|
554
|
+
const eventType = event.type.replace("TOC_", "").replace(/_/g, " ").toLowerCase();
|
|
555
|
+
console.log(`[${timestamp}] ${eventType}${duration}`);
|
|
556
|
+
if (event.data && Object.keys(event.data).length > 0) {
|
|
557
|
+
const dataStr = formatEventData(event.data);
|
|
558
|
+
if (dataStr) {
|
|
559
|
+
console.log(` ${dataStr}`);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
function formatEventData(data) {
|
|
564
|
+
const relevant = [];
|
|
565
|
+
if (data.fileName) relevant.push(`file: ${data.fileName}`);
|
|
566
|
+
if (data.gcsPath) relevant.push(`path: ${data.gcsPath}`);
|
|
567
|
+
if (data.sizeBytes) relevant.push(`size: ${(data.sizeBytes / 1024 / 1024).toFixed(2)}MB`);
|
|
568
|
+
if (data.sandboxId) relevant.push(`sandbox: ${data.sandboxId.slice(0, 12)}...`);
|
|
569
|
+
if (data.template) relevant.push(`template: ${data.template}`);
|
|
570
|
+
if (data.strategy) relevant.push(`strategy: ${data.strategy}`);
|
|
571
|
+
if (data.totalEntries !== void 0) relevant.push(`entries: ${data.totalEntries}`);
|
|
572
|
+
if (data.exitCode !== void 0) relevant.push(`exit: ${data.exitCode}`);
|
|
573
|
+
if (data.totalElapsedMs) relevant.push(`total: ${data.totalElapsedMs}ms`);
|
|
574
|
+
return relevant.join(", ");
|
|
575
|
+
}
|
|
576
|
+
function formatTocOutput(result, format = "text") {
|
|
577
|
+
if (format === "json") {
|
|
578
|
+
return JSON.stringify(result, null, 2);
|
|
579
|
+
}
|
|
580
|
+
if (format === "markdown") {
|
|
581
|
+
const lines2 = [];
|
|
582
|
+
lines2.push(`# Table of Contents
|
|
583
|
+
`);
|
|
584
|
+
lines2.push(`_${result.file_name}_
|
|
585
|
+
`);
|
|
586
|
+
lines2.push(`Strategy: ${result.strategy} | Pages: ${result.total_pages} | Entries: ${result.total_entries}
|
|
587
|
+
`);
|
|
588
|
+
for (const entry of result.toc) {
|
|
589
|
+
const hashes = "#".repeat(entry.level + 1);
|
|
590
|
+
lines2.push(`${hashes} ${entry.title} (p. ${entry.page})`);
|
|
591
|
+
}
|
|
592
|
+
return lines2.join("\n");
|
|
593
|
+
}
|
|
594
|
+
const lines = [];
|
|
595
|
+
lines.push(`File: ${result.file_name}`);
|
|
596
|
+
lines.push(`Strategy: ${result.strategy}`);
|
|
597
|
+
lines.push(`Entries: ${result.total_entries}`);
|
|
598
|
+
lines.push(`Pages: ${result.total_pages}`);
|
|
599
|
+
lines.push(`Elapsed: ${result.elapsed_ms}ms (total: ${result.total_elapsed_ms}ms)`);
|
|
600
|
+
lines.push("");
|
|
601
|
+
if (result.total_entries === 0) {
|
|
602
|
+
lines.push("No table of contents found.");
|
|
603
|
+
if (result.strategy === "none") {
|
|
604
|
+
lines.push("This PDF may not have bookmarks or a printed TOC page.");
|
|
605
|
+
}
|
|
606
|
+
} else {
|
|
607
|
+
lines.push("Table of Contents:");
|
|
608
|
+
lines.push("");
|
|
609
|
+
for (const entry of result.toc) {
|
|
610
|
+
const indent = " ".repeat(entry.level - 1);
|
|
611
|
+
const dots = ".".repeat(Math.max(1, 60 - indent.length - entry.title.length));
|
|
612
|
+
lines.push(`${indent}${entry.title} ${dots} ${entry.page}`);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
return lines.join("\n");
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
// src/cli/config.ts
|
|
619
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
|
|
620
|
+
import { join } from "path";
|
|
621
|
+
import { homedir } from "os";
|
|
622
|
+
function getGlobalConfigDir() {
|
|
623
|
+
const xdgConfigHome = process.env.XDG_CONFIG_HOME;
|
|
624
|
+
if (xdgConfigHome) {
|
|
625
|
+
return join(xdgConfigHome, "okra");
|
|
626
|
+
}
|
|
627
|
+
return join(homedir(), ".okra");
|
|
628
|
+
}
|
|
629
|
+
function getGlobalConfigPath() {
|
|
630
|
+
return join(getGlobalConfigDir(), "config.json");
|
|
631
|
+
}
|
|
632
|
+
function readGlobalConfig() {
|
|
633
|
+
try {
|
|
634
|
+
const configPath = getGlobalConfigPath();
|
|
635
|
+
if (!existsSync(configPath)) {
|
|
636
|
+
return null;
|
|
637
|
+
}
|
|
638
|
+
const content = readFileSync(configPath, "utf-8");
|
|
639
|
+
return JSON.parse(content);
|
|
640
|
+
} catch (error) {
|
|
641
|
+
return null;
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
function writeGlobalConfig(config) {
|
|
645
|
+
const configDir = getGlobalConfigDir();
|
|
646
|
+
const configPath = getGlobalConfigPath();
|
|
647
|
+
if (!existsSync(configDir)) {
|
|
648
|
+
mkdirSync(configDir, { recursive: true });
|
|
649
|
+
}
|
|
650
|
+
writeFileSync(configPath, JSON.stringify(config, null, 2), "utf-8");
|
|
651
|
+
}
|
|
652
|
+
function readProjectConfig() {
|
|
653
|
+
const projectFiles = [".okrarc", ".okra.json"];
|
|
654
|
+
for (const filename of projectFiles) {
|
|
655
|
+
try {
|
|
656
|
+
const path = join(process.cwd(), filename);
|
|
657
|
+
if (existsSync(path)) {
|
|
658
|
+
const content = readFileSync(path, "utf-8");
|
|
659
|
+
return JSON.parse(content);
|
|
660
|
+
}
|
|
661
|
+
} catch (error) {
|
|
662
|
+
continue;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
return null;
|
|
666
|
+
}
|
|
667
|
+
function getApiKey() {
|
|
668
|
+
if (process.env.OKRA_API_KEY) {
|
|
669
|
+
return process.env.OKRA_API_KEY;
|
|
670
|
+
}
|
|
671
|
+
const projectConfig = readProjectConfig();
|
|
672
|
+
if (projectConfig?.apiKey) {
|
|
673
|
+
return projectConfig.apiKey;
|
|
674
|
+
}
|
|
675
|
+
const globalConfig = readGlobalConfig();
|
|
676
|
+
if (globalConfig?.apiKey) {
|
|
677
|
+
return globalConfig.apiKey;
|
|
678
|
+
}
|
|
679
|
+
return void 0;
|
|
680
|
+
}
|
|
681
|
+
function getBaseUrl() {
|
|
682
|
+
if (process.env.OKRA_BASE_URL) {
|
|
683
|
+
return process.env.OKRA_BASE_URL;
|
|
684
|
+
}
|
|
685
|
+
const projectConfig = readProjectConfig();
|
|
686
|
+
if (projectConfig?.baseUrl) {
|
|
687
|
+
return projectConfig.baseUrl;
|
|
688
|
+
}
|
|
689
|
+
const globalConfig = readGlobalConfig();
|
|
690
|
+
if (globalConfig?.baseUrl) {
|
|
691
|
+
return globalConfig.baseUrl;
|
|
692
|
+
}
|
|
693
|
+
return "https://app.okrapdf.com";
|
|
694
|
+
}
|
|
695
|
+
function getApiKeySource() {
|
|
696
|
+
if (process.env.OKRA_API_KEY) {
|
|
697
|
+
return "environment variable (OKRA_API_KEY)";
|
|
698
|
+
}
|
|
699
|
+
const projectConfig = readProjectConfig();
|
|
700
|
+
if (projectConfig?.apiKey) {
|
|
701
|
+
const files = [".okrarc", ".okra.json"];
|
|
702
|
+
for (const f of files) {
|
|
703
|
+
if (existsSync(join(process.cwd(), f))) {
|
|
704
|
+
return `project config (${f})`;
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
const globalConfig = readGlobalConfig();
|
|
709
|
+
if (globalConfig?.apiKey) {
|
|
710
|
+
return `global config (${getGlobalConfigPath()})`;
|
|
711
|
+
}
|
|
712
|
+
return "not found";
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
// src/cli/commands/auth.ts
|
|
716
|
+
import * as readline from "readline";
|
|
717
|
+
function prompt(question) {
|
|
718
|
+
const rl = readline.createInterface({
|
|
719
|
+
input: process.stdin,
|
|
720
|
+
output: process.stdout
|
|
721
|
+
});
|
|
722
|
+
return new Promise((resolve) => {
|
|
723
|
+
rl.question(question, (answer) => {
|
|
724
|
+
rl.close();
|
|
725
|
+
resolve(answer.trim());
|
|
726
|
+
});
|
|
727
|
+
});
|
|
728
|
+
}
|
|
729
|
+
async function authLogin() {
|
|
730
|
+
console.log("okra CLI Authentication");
|
|
731
|
+
console.log("");
|
|
732
|
+
console.log("Get your API key from: https://app.okrapdf.com/settings/api");
|
|
733
|
+
console.log("");
|
|
734
|
+
const apiKey = await prompt("Enter your API key: ");
|
|
735
|
+
if (!apiKey) {
|
|
736
|
+
console.error("Error: API key cannot be empty");
|
|
737
|
+
process.exit(1);
|
|
738
|
+
}
|
|
739
|
+
if (!apiKey.startsWith("okra_")) {
|
|
740
|
+
console.warn('Warning: API key should start with "okra_"');
|
|
741
|
+
}
|
|
742
|
+
const config = readGlobalConfig() || {};
|
|
743
|
+
config.apiKey = apiKey;
|
|
744
|
+
writeGlobalConfig(config);
|
|
745
|
+
console.log("");
|
|
746
|
+
console.log(`\u2713 API key saved to ${getGlobalConfigPath()}`);
|
|
747
|
+
console.log("");
|
|
748
|
+
console.log("You can now use okra commands without setting OKRA_API_KEY");
|
|
749
|
+
}
|
|
750
|
+
async function authStatus() {
|
|
751
|
+
const apiKey = getApiKey();
|
|
752
|
+
const source = getApiKeySource();
|
|
753
|
+
console.log("okra CLI Authentication Status");
|
|
754
|
+
console.log("");
|
|
755
|
+
if (apiKey) {
|
|
756
|
+
const maskedKey = apiKey.slice(0, 10) + "..." + apiKey.slice(-4);
|
|
757
|
+
console.log(`\u2713 Authenticated: ${maskedKey}`);
|
|
758
|
+
console.log(` Source: ${source}`);
|
|
759
|
+
} else {
|
|
760
|
+
console.log("\u2717 Not authenticated");
|
|
761
|
+
console.log("");
|
|
762
|
+
console.log("Set API key via:");
|
|
763
|
+
console.log(" okra auth login");
|
|
764
|
+
console.log(' export OKRA_API_KEY="okra_xxx"');
|
|
765
|
+
}
|
|
766
|
+
console.log("");
|
|
767
|
+
}
|
|
768
|
+
async function authLogout() {
|
|
769
|
+
const config = readGlobalConfig();
|
|
770
|
+
if (!config || !config.apiKey) {
|
|
771
|
+
console.log("No API key found in global config");
|
|
772
|
+
return;
|
|
773
|
+
}
|
|
774
|
+
delete config.apiKey;
|
|
775
|
+
writeGlobalConfig(config);
|
|
776
|
+
console.log(`\u2713 API key removed from ${getGlobalConfigPath()}`);
|
|
777
|
+
console.log("");
|
|
778
|
+
console.log("Note: Environment variables and project configs are not affected");
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
export {
|
|
782
|
+
tree,
|
|
783
|
+
formatTreeOutput,
|
|
784
|
+
parseSelector,
|
|
785
|
+
filterEntities,
|
|
786
|
+
executeQuery,
|
|
787
|
+
calculateStats,
|
|
788
|
+
find,
|
|
789
|
+
formatFindOutput,
|
|
790
|
+
formatStats,
|
|
791
|
+
pageGet,
|
|
792
|
+
pageEdit,
|
|
793
|
+
pageResolve,
|
|
794
|
+
pageVersions,
|
|
795
|
+
formatPageOutput,
|
|
796
|
+
formatVersionsOutput,
|
|
797
|
+
search,
|
|
798
|
+
formatSearchOutput,
|
|
799
|
+
tables,
|
|
800
|
+
formatTablesOutput,
|
|
801
|
+
history,
|
|
802
|
+
formatHistoryOutput,
|
|
803
|
+
toc,
|
|
804
|
+
formatTocOutput,
|
|
805
|
+
getGlobalConfigDir,
|
|
806
|
+
getGlobalConfigPath,
|
|
807
|
+
readGlobalConfig,
|
|
808
|
+
writeGlobalConfig,
|
|
809
|
+
readProjectConfig,
|
|
810
|
+
getApiKey,
|
|
811
|
+
getBaseUrl,
|
|
812
|
+
getApiKeySource,
|
|
813
|
+
authLogin,
|
|
814
|
+
authStatus,
|
|
815
|
+
authLogout
|
|
816
|
+
};
|
|
817
|
+
//# sourceMappingURL=chunk-SBT5T6ZK.js.map
|