@okrapdf/cli 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +18 -4
- package/dist/cli.js.map +1 -1
- package/dist/index.js +0 -0
- package/dist/lib/config.d.ts +8 -0
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/config.js +15 -0
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/output.d.ts +1 -1
- package/dist/lib/output.d.ts.map +1 -1
- package/dist/lib/output.js +53 -2
- package/dist/lib/output.js.map +1 -1
- package/package.json +1 -1
- package/dist/commands/docs.d.ts +0 -6
- package/dist/commands/docs.d.ts.map +0 -1
- package/dist/commands/docs.js +0 -157
- package/dist/commands/docs.js.map +0 -1
- package/dist/commands/step.d.ts +0 -17
- package/dist/commands/step.d.ts.map +0 -1
- package/dist/commands/step.js +0 -212
- package/dist/commands/step.js.map +0 -1
- package/dist/commands/workflow.d.ts +0 -18
- package/dist/commands/workflow.d.ts.map +0 -1
- package/dist/commands/workflow.js +0 -340
- package/dist/commands/workflow.js.map +0 -1
- package/dist/lib/dom-emitter.d.ts +0 -14
- package/dist/lib/dom-emitter.d.ts.map +0 -1
- package/dist/lib/dom-emitter.js +0 -66
- package/dist/lib/dom-emitter.js.map +0 -1
- package/dist/lib/dom-ops.d.ts +0 -55
- package/dist/lib/dom-ops.d.ts.map +0 -1
- package/dist/lib/dom-ops.js +0 -93
- package/dist/lib/dom-ops.js.map +0 -1
- package/dist/lib/pdfquery.d.ts +0 -57
- package/dist/lib/pdfquery.d.ts.map +0 -1
- package/dist/lib/pdfquery.js +0 -101
- package/dist/lib/pdfquery.js.map +0 -1
- package/dist/lib/sinks/file.d.ts +0 -8
- package/dist/lib/sinks/file.d.ts.map +0 -1
- package/dist/lib/sinks/file.js +0 -30
- package/dist/lib/sinks/file.js.map +0 -1
- package/dist/lib/sinks/index.d.ts +0 -6
- package/dist/lib/sinks/index.d.ts.map +0 -1
- package/dist/lib/sinks/index.js +0 -30
- package/dist/lib/sinks/index.js.map +0 -1
- package/dist/lib/sinks/partykit.d.ts +0 -14
- package/dist/lib/sinks/partykit.d.ts.map +0 -1
- package/dist/lib/sinks/partykit.js +0 -124
- package/dist/lib/sinks/partykit.js.map +0 -1
- package/dist/lib/sinks/stdout.d.ts +0 -6
- package/dist/lib/sinks/stdout.d.ts.map +0 -1
- package/dist/lib/sinks/stdout.js +0 -9
- package/dist/lib/sinks/stdout.js.map +0 -1
package/dist/lib/dom-emitter.js
DELETED
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
export class DomEmitter {
|
|
2
|
-
sink;
|
|
3
|
-
constructor(sink) {
|
|
4
|
-
this.sink = sink;
|
|
5
|
-
}
|
|
6
|
-
async addDocument(id, filename) {
|
|
7
|
-
await this.sink.emit([{
|
|
8
|
-
op: 'ADD_NODE',
|
|
9
|
-
parentId: '',
|
|
10
|
-
node: { id, type: 'document', attrs: { filename } }
|
|
11
|
-
}]);
|
|
12
|
-
}
|
|
13
|
-
async addPage(docId, pageNum, text) {
|
|
14
|
-
await this.sink.emit([{
|
|
15
|
-
op: 'ADD_NODE',
|
|
16
|
-
parentId: docId,
|
|
17
|
-
node: {
|
|
18
|
-
id: `page-${pageNum}`,
|
|
19
|
-
type: 'page',
|
|
20
|
-
text: text?.slice(0, 500),
|
|
21
|
-
attrs: { num: pageNum, page: pageNum }
|
|
22
|
-
}
|
|
23
|
-
}]);
|
|
24
|
-
}
|
|
25
|
-
async addTable(pageId, tableId, bbox) {
|
|
26
|
-
await this.sink.emit([{
|
|
27
|
-
op: 'ADD_NODE',
|
|
28
|
-
parentId: pageId,
|
|
29
|
-
node: {
|
|
30
|
-
id: tableId,
|
|
31
|
-
type: 'table',
|
|
32
|
-
attrs: { bbox, status: 'pending' }
|
|
33
|
-
}
|
|
34
|
-
}]);
|
|
35
|
-
}
|
|
36
|
-
async addFigure(pageId, figureId, bbox) {
|
|
37
|
-
await this.sink.emit([{
|
|
38
|
-
op: 'ADD_NODE',
|
|
39
|
-
parentId: pageId,
|
|
40
|
-
node: {
|
|
41
|
-
id: figureId,
|
|
42
|
-
type: 'figure',
|
|
43
|
-
attrs: { bbox, status: 'pending' }
|
|
44
|
-
}
|
|
45
|
-
}]);
|
|
46
|
-
}
|
|
47
|
-
async setTableContent(tableId, markdown) {
|
|
48
|
-
await this.sink.emit([
|
|
49
|
-
{ op: 'SET_TEXT', nodeId: tableId, text: markdown },
|
|
50
|
-
{ op: 'SET_ATTR', nodeId: tableId, key: 'status', value: 'extracted' }
|
|
51
|
-
]);
|
|
52
|
-
}
|
|
53
|
-
async setStatus(nodeId, status) {
|
|
54
|
-
await this.sink.emit([{ op: 'SET_ATTR', nodeId, key: 'status', value: status }]);
|
|
55
|
-
}
|
|
56
|
-
async setProgress(nodeId, completed, total) {
|
|
57
|
-
await this.sink.emit([
|
|
58
|
-
{ op: 'SET_ATTR', nodeId, key: 'pagesCompleted', value: completed },
|
|
59
|
-
{ op: 'SET_ATTR', nodeId, key: 'totalPages', value: total }
|
|
60
|
-
]);
|
|
61
|
-
}
|
|
62
|
-
async close() {
|
|
63
|
-
await this.sink.close();
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
//# sourceMappingURL=dom-emitter.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"dom-emitter.js","sourceRoot":"","sources":["../../src/lib/dom-emitter.ts"],"names":[],"mappings":"AAEA,MAAM,OAAO,UAAU;IACD;IAApB,YAAoB,IAAe;QAAf,SAAI,GAAJ,IAAI,CAAW;IAAG,CAAC;IAEvC,KAAK,CAAC,WAAW,CAAC,EAAU,EAAE,QAAgB;QAC5C,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACpB,EAAE,EAAE,UAAU;gBACd,QAAQ,EAAE,EAAE;gBACZ,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,QAAQ,EAAE,EAAE;aACpD,CAAC,CAAC,CAAC;IACN,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,KAAa,EAAE,OAAe,EAAE,IAAa;QACzD,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACpB,EAAE,EAAE,UAAU;gBACd,QAAQ,EAAE,KAAK;gBACf,IAAI,EAAE;oBACJ,EAAE,EAAE,QAAQ,OAAO,EAAE;oBACrB,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;oBACzB,KAAK,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE;iBACvC;aACF,CAAC,CAAC,CAAC;IACN,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,MAAc,EAAE,OAAe,EAAE,IAAc;QAC5D,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACpB,EAAE,EAAE,UAAU;gBACd,QAAQ,EAAE,MAAM;gBAChB,IAAI,EAAE;oBACJ,EAAE,EAAE,OAAO;oBACX,IAAI,EAAE,OAAO;oBACb,KAAK,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE;iBACnC;aACF,CAAC,CAAC,CAAC;IACN,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,MAAc,EAAE,QAAgB,EAAE,IAAc;QAC9D,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACpB,EAAE,EAAE,UAAU;gBACd,QAAQ,EAAE,MAAM;gBAChB,IAAI,EAAE;oBACJ,EAAE,EAAE,QAAQ;oBACZ,IAAI,EAAE,QAAQ;oBACd,KAAK,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE;iBACnC;aACF,CAAC,CAAC,CAAC;IACN,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,OAAe,EAAE,QAAgB;QACrD,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;YACnB,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE;YACnD,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,WAAW,EAAE;SACvE,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,MAAc,EAAE,MAAc;QAC5C,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;IACnF,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,MAAc,EAAE,SAAiB,EAAE,KAAa;QAChE,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;YACnB,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,gBAAgB,EAAE,KAAK,EAAE,SAAS,EAAE;YACnE,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,YAAY,EAAE,KAAK,EAAE,KAAK,EAAE;SAC5D,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;IAC1B,CAAC;CACF"}
|
package/dist/lib/dom-ops.d.ts
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* DOM Operations for streaming document extraction
|
|
3
|
-
*
|
|
4
|
-
* Generic tree mutations (like React's virtual DOM) that can be
|
|
5
|
-
* transformed to any target format (PartyKit JdomPatch, NDJSON, etc.)
|
|
6
|
-
*/
|
|
7
|
-
export type DomNodeType = 'document' | 'page' | 'table' | 'figure' | 'paragraph' | 'heading' | 'footnote' | 'list' | 'list-item' | 'ocr-block';
|
|
8
|
-
export interface DomNode {
|
|
9
|
-
id: string;
|
|
10
|
-
type: DomNodeType;
|
|
11
|
-
text?: string;
|
|
12
|
-
attrs?: Record<string, unknown>;
|
|
13
|
-
children?: DomNode[];
|
|
14
|
-
}
|
|
15
|
-
export type DomOp = {
|
|
16
|
-
op: 'ADD_NODE';
|
|
17
|
-
parentId: string;
|
|
18
|
-
index?: number;
|
|
19
|
-
node: DomNode;
|
|
20
|
-
} | {
|
|
21
|
-
op: 'REMOVE_NODE';
|
|
22
|
-
nodeId: string;
|
|
23
|
-
} | {
|
|
24
|
-
op: 'UPDATE_NODE';
|
|
25
|
-
nodeId: string;
|
|
26
|
-
patch: Partial<DomNode>;
|
|
27
|
-
} | {
|
|
28
|
-
op: 'SET_ATTR';
|
|
29
|
-
nodeId: string;
|
|
30
|
-
key: string;
|
|
31
|
-
value: unknown;
|
|
32
|
-
} | {
|
|
33
|
-
op: 'SET_TEXT';
|
|
34
|
-
nodeId: string;
|
|
35
|
-
text: string;
|
|
36
|
-
};
|
|
37
|
-
export interface DomOpSink {
|
|
38
|
-
/** Emit one or more operations */
|
|
39
|
-
emit(ops: DomOp[]): Promise<void>;
|
|
40
|
-
/** Close the sink */
|
|
41
|
-
close(): Promise<void>;
|
|
42
|
-
}
|
|
43
|
-
export interface JdomPatch {
|
|
44
|
-
op: 'replace' | 'add' | 'remove';
|
|
45
|
-
path: (string | number)[];
|
|
46
|
-
value?: unknown;
|
|
47
|
-
}
|
|
48
|
-
/**
|
|
49
|
-
* Convert a DomOp to JdomPatch format
|
|
50
|
-
*
|
|
51
|
-
* Note: This is a simplified conversion that assumes a flat node structure.
|
|
52
|
-
* For nested trees, path resolution would need access to the full tree state.
|
|
53
|
-
*/
|
|
54
|
-
export declare function domOpToJdomPatch(op: DomOp): JdomPatch[];
|
|
55
|
-
//# sourceMappingURL=dom-ops.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"dom-ops.d.ts","sourceRoot":"","sources":["../../src/lib/dom-ops.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,MAAM,MAAM,WAAW,GACnB,UAAU,GACV,MAAM,GACN,OAAO,GACP,QAAQ,GACR,WAAW,GACX,SAAS,GACT,UAAU,GACV,MAAM,GACN,WAAW,GACX,WAAW,CAAC;AAGhB,MAAM,WAAW,OAAO;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,WAAW,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;CACtB;AAGD,MAAM,MAAM,KAAK,GACb;IAAE,EAAE,EAAE,UAAU,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,OAAO,CAAA;CAAE,GACnE;IAAE,EAAE,EAAE,aAAa,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACrC;IAAE,EAAE,EAAE,aAAa,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,OAAO,CAAC,OAAO,CAAC,CAAA;CAAE,GAC9D;IAAE,EAAE,EAAE,UAAU,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,OAAO,CAAA;CAAE,GAC/D;IAAE,EAAE,EAAE,UAAU,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAGrD,MAAM,WAAW,SAAS;IACxB,kCAAkC;IAClC,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClC,qBAAqB;IACrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAGD,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,SAAS,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjC,IAAI,EAAE,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAC;IAC1B,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,EAAE,EAAE,KAAK,GAAG,SAAS,EAAE,CAoDvD"}
|
package/dist/lib/dom-ops.js
DELETED
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* DOM Operations for streaming document extraction
|
|
3
|
-
*
|
|
4
|
-
* Generic tree mutations (like React's virtual DOM) that can be
|
|
5
|
-
* transformed to any target format (PartyKit JdomPatch, NDJSON, etc.)
|
|
6
|
-
*/
|
|
7
|
-
/**
|
|
8
|
-
* Convert a DomOp to JdomPatch format
|
|
9
|
-
*
|
|
10
|
-
* Note: This is a simplified conversion that assumes a flat node structure.
|
|
11
|
-
* For nested trees, path resolution would need access to the full tree state.
|
|
12
|
-
*/
|
|
13
|
-
export function domOpToJdomPatch(op) {
|
|
14
|
-
switch (op.op) {
|
|
15
|
-
case 'ADD_NODE':
|
|
16
|
-
return [{
|
|
17
|
-
op: 'add',
|
|
18
|
-
path: ['children', op.index ?? -1], // -1 = append
|
|
19
|
-
value: domNodeToJdom(op.node),
|
|
20
|
-
}];
|
|
21
|
-
case 'REMOVE_NODE':
|
|
22
|
-
return [{
|
|
23
|
-
op: 'remove',
|
|
24
|
-
path: ['children', op.nodeId], // Simplified - actual impl needs index lookup
|
|
25
|
-
}];
|
|
26
|
-
case 'UPDATE_NODE':
|
|
27
|
-
const patches = [];
|
|
28
|
-
if (op.patch.text !== undefined) {
|
|
29
|
-
patches.push({
|
|
30
|
-
op: 'replace',
|
|
31
|
-
path: [op.nodeId, 'textContent'],
|
|
32
|
-
value: op.patch.text,
|
|
33
|
-
});
|
|
34
|
-
}
|
|
35
|
-
if (op.patch.attrs) {
|
|
36
|
-
for (const [key, value] of Object.entries(op.patch.attrs)) {
|
|
37
|
-
patches.push({
|
|
38
|
-
op: 'replace',
|
|
39
|
-
path: [op.nodeId, 'attributes', key],
|
|
40
|
-
value,
|
|
41
|
-
});
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
return patches;
|
|
45
|
-
case 'SET_ATTR':
|
|
46
|
-
return [{
|
|
47
|
-
op: 'replace',
|
|
48
|
-
path: [op.nodeId, 'attributes', op.key],
|
|
49
|
-
value: op.value,
|
|
50
|
-
}];
|
|
51
|
-
case 'SET_TEXT':
|
|
52
|
-
return [{
|
|
53
|
-
op: 'replace',
|
|
54
|
-
path: [op.nodeId, 'textContent'],
|
|
55
|
-
value: op.text,
|
|
56
|
-
}];
|
|
57
|
-
default:
|
|
58
|
-
return [];
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
/**
|
|
62
|
-
* Convert DomNode to Jdom format
|
|
63
|
-
*/
|
|
64
|
-
function domNodeToJdom(node) {
|
|
65
|
-
return {
|
|
66
|
-
id: node.id,
|
|
67
|
-
type: node.type,
|
|
68
|
-
tagName: nodeTypeToTagName(node.type),
|
|
69
|
-
page: node.attrs?.page ?? 0,
|
|
70
|
-
textContent: node.text ?? null,
|
|
71
|
-
attributes: node.attrs ?? {},
|
|
72
|
-
children: node.children?.map(domNodeToJdom) ?? [],
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
/**
|
|
76
|
-
* Map node type to HTML-like tag name
|
|
77
|
-
*/
|
|
78
|
-
function nodeTypeToTagName(type) {
|
|
79
|
-
const tagMap = {
|
|
80
|
-
'document': 'article',
|
|
81
|
-
'page': 'section',
|
|
82
|
-
'table': 'table',
|
|
83
|
-
'figure': 'figure',
|
|
84
|
-
'paragraph': 'p',
|
|
85
|
-
'heading': 'h1',
|
|
86
|
-
'footnote': 'aside',
|
|
87
|
-
'list': 'ul',
|
|
88
|
-
'list-item': 'li',
|
|
89
|
-
'ocr-block': 'div',
|
|
90
|
-
};
|
|
91
|
-
return tagMap[type] ?? 'div';
|
|
92
|
-
}
|
|
93
|
-
//# sourceMappingURL=dom-ops.js.map
|
package/dist/lib/dom-ops.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"dom-ops.js","sourceRoot":"","sources":["../../src/lib/dom-ops.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AA+CH;;;;;GAKG;AACH,MAAM,UAAU,gBAAgB,CAAC,EAAS;IACxC,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;QACd,KAAK,UAAU;YACb,OAAO,CAAC;oBACN,EAAE,EAAE,KAAK;oBACT,IAAI,EAAE,CAAC,UAAU,EAAE,EAAE,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,EAAE,cAAc;oBAClD,KAAK,EAAE,aAAa,CAAC,EAAE,CAAC,IAAI,CAAC;iBAC9B,CAAC,CAAC;QAEL,KAAK,aAAa;YAChB,OAAO,CAAC;oBACN,EAAE,EAAE,QAAQ;oBACZ,IAAI,EAAE,CAAC,UAAU,EAAE,EAAE,CAAC,MAAM,CAAC,EAAE,8CAA8C;iBAC9E,CAAC,CAAC;QAEL,KAAK,aAAa;YAChB,MAAM,OAAO,GAAgB,EAAE,CAAC;YAChC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;gBAChC,OAAO,CAAC,IAAI,CAAC;oBACX,EAAE,EAAE,SAAS;oBACb,IAAI,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,aAAa,CAAC;oBAChC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI;iBACrB,CAAC,CAAC;YACL,CAAC;YACD,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;gBACnB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;oBAC1D,OAAO,CAAC,IAAI,CAAC;wBACX,EAAE,EAAE,SAAS;wBACb,IAAI,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,YAAY,EAAE,GAAG,CAAC;wBACpC,KAAK;qBACN,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YACD,OAAO,OAAO,CAAC;QAEjB,KAAK,UAAU;YACb,OAAO,CAAC;oBACN,EAAE,EAAE,SAAS;oBACb,IAAI,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,YAAY,EAAE,EAAE,CAAC,GAAG,CAAC;oBACvC,KAAK,EAAE,EAAE,CAAC,KAAK;iBAChB,CAAC,CAAC;QAEL,KAAK,UAAU;YACb,OAAO,CAAC;oBACN,EAAE,EAAE,SAAS;oBACb,IAAI,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,aAAa,CAAC;oBAChC,KAAK,EAAE,EAAE,CAAC,IAAI;iBACf,CAAC,CAAC;QAEL;YACE,OAAO,EAAE,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,IAAa;IAClC,OAAO;QACL,EAAE,EAAE,IAAI,CAAC,EAAE;QACX,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,OAAO,EAAE,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC;QACrC,IAAI,EAAE,IAAI,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC;QAC3B,WAAW,EAAE,IAAI,CAAC,IAAI,IAAI,IAAI;QAC9B,UAAU,EAAE,IAAI,CAAC,KAAK,IAAI,EAAE;QAC5B,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,aAAa,CAAC,IAAI,EAAE;KAClD,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,IAAiB;IAC1C,MAAM,MAAM,GAAgC;QAC1C,UAAU,EAAE,SAAS;QACrB,MAAM,EAAE,SAAS;QACjB,OAAO,EAAE,OAAO;QAChB,QAAQ,EAAE,QAAQ;QAClB,WAAW,EAAE,GAAG;QAChB,SAAS,EAAE,IAAI;QACf,UAAU,EAAE,OAAO;QACnB,MAAM,EAAE,IAAI;QACZ,WAAW,EAAE,IAAI;QACjB,WAAW,EAAE,KAAK;KACnB,CAAC;IACF,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC;AAC/B,CAAC"}
|
package/dist/lib/pdfquery.d.ts
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* pdfquery Integration Wrapper
|
|
3
|
-
*
|
|
4
|
-
* Thin wrapper around existing pdfquery library - DO NOT reimplement
|
|
5
|
-
* Converts OkraPDF API job results to VirtualDoc format for jQuery-style queries
|
|
6
|
-
*/
|
|
7
|
-
import { type VirtualDoc, type QueryEngine, type QueryResult, type QueryConfig, type QueryResponse } from 'pdfquery';
|
|
8
|
-
import type { JobResults } from '../types.js';
|
|
9
|
-
/**
|
|
10
|
-
* Build VirtualDoc from OkraPDF job results
|
|
11
|
-
*
|
|
12
|
-
* Converts API response format to pdfquery compiler input
|
|
13
|
-
*/
|
|
14
|
-
export declare function buildVirtualDocFromJob(jobResults: JobResults): VirtualDoc;
|
|
15
|
-
/**
|
|
16
|
-
* Create query engine from job results
|
|
17
|
-
*
|
|
18
|
-
* Convenience function that builds VirtualDoc and creates query engine
|
|
19
|
-
*/
|
|
20
|
-
export declare function createQueryEngineFromJob(jobResults: JobResults): QueryEngine;
|
|
21
|
-
/**
|
|
22
|
-
* Execute a query config against job results
|
|
23
|
-
*
|
|
24
|
-
* Uses pdfquery's executeQuery for config-based queries
|
|
25
|
-
*/
|
|
26
|
-
export declare function executeJobQuery(jobResults: JobResults, config: QueryConfig): QueryResponse;
|
|
27
|
-
/**
|
|
28
|
-
* Format query result as markdown
|
|
29
|
-
*
|
|
30
|
-
* Uses pdfquery's built-in .html() method which returns markdown-like HTML
|
|
31
|
-
*/
|
|
32
|
-
export declare function formatAsMarkdown(queryResult: QueryResult): string;
|
|
33
|
-
/**
|
|
34
|
-
* Format query result as JSON entities
|
|
35
|
-
*
|
|
36
|
-
* Uses pdfquery's built-in .toArray() method
|
|
37
|
-
*/
|
|
38
|
-
export declare function formatAsEntities(queryResult: QueryResult): any[];
|
|
39
|
-
/**
|
|
40
|
-
* Format query result with statistics
|
|
41
|
-
*/
|
|
42
|
-
export declare function formatWithStats(queryResult: QueryResult): {
|
|
43
|
-
entities: any[];
|
|
44
|
-
stats: {
|
|
45
|
-
total: number;
|
|
46
|
-
verified: number;
|
|
47
|
-
flagged: number;
|
|
48
|
-
pending: number;
|
|
49
|
-
score: number;
|
|
50
|
-
avgConfidence: number;
|
|
51
|
-
};
|
|
52
|
-
};
|
|
53
|
-
/**
|
|
54
|
-
* Re-export pdfquery types and functions for convenience
|
|
55
|
-
*/
|
|
56
|
-
export { createQueryEngine, executeQuery, type QueryEngine, type QueryResult, type QueryConfig, type QueryResponse, type VirtualDoc, } from 'pdfquery';
|
|
57
|
-
//# sourceMappingURL=pdfquery.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pdfquery.d.ts","sourceRoot":"","sources":["../../src/lib/pdfquery.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAIL,KAAK,UAAU,EACf,KAAK,WAAW,EAChB,KAAK,WAAW,EAEhB,KAAK,WAAW,EAChB,KAAK,aAAa,EACnB,MAAM,UAAU,CAAC;AAClB,OAAO,KAAK,EAAE,UAAU,EAAe,MAAM,aAAa,CAAC;AAE3D;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,UAAU,EAAE,UAAU,GAAG,UAAU,CAmCzE;AAED;;;;GAIG;AACH,wBAAgB,wBAAwB,CAAC,UAAU,EAAE,UAAU,GAAG,WAAW,CAG5E;AAED;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,UAAU,EAAE,UAAU,EAAE,MAAM,EAAE,WAAW,GAAG,aAAa,CAG1F;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,WAAW,GAAG,MAAM,CAEjE;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,WAAW,GAAG,GAAG,EAAE,CAEhE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,WAAW,EAAE,WAAW,GAAG;IACzD,QAAQ,EAAE,GAAG,EAAE,CAAC;IAChB,KAAK,EAAE;QACL,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;QACjB,OAAO,EAAE,MAAM,CAAC;QAChB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,MAAM,CAAC;QACd,aAAa,EAAE,MAAM,CAAC;KACvB,CAAC;CACH,CAeA;AAED;;GAEG;AACH,OAAO,EACL,iBAAiB,EACjB,YAAY,EACZ,KAAK,WAAW,EAChB,KAAK,WAAW,EAChB,KAAK,WAAW,EAChB,KAAK,aAAa,EAClB,KAAK,UAAU,GAChB,MAAM,UAAU,CAAC"}
|
package/dist/lib/pdfquery.js
DELETED
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* pdfquery Integration Wrapper
|
|
3
|
-
*
|
|
4
|
-
* Thin wrapper around existing pdfquery library - DO NOT reimplement
|
|
5
|
-
* Converts OkraPDF API job results to VirtualDoc format for jQuery-style queries
|
|
6
|
-
*/
|
|
7
|
-
import { DocCompiler, createQueryEngine, executeQuery, } from 'pdfquery';
|
|
8
|
-
/**
|
|
9
|
-
* Build VirtualDoc from OkraPDF job results
|
|
10
|
-
*
|
|
11
|
-
* Converts API response format to pdfquery compiler input
|
|
12
|
-
*/
|
|
13
|
-
export function buildVirtualDocFromJob(jobResults) {
|
|
14
|
-
const compiler = new DocCompiler({
|
|
15
|
-
documentId: jobResults.job_id,
|
|
16
|
-
fileName: jobResults.filename,
|
|
17
|
-
autoDetectTypes: true,
|
|
18
|
-
});
|
|
19
|
-
// Add tables
|
|
20
|
-
if (jobResults.tables && jobResults.tables.length > 0) {
|
|
21
|
-
const tables = jobResults.tables.map((table) => ({
|
|
22
|
-
id: table.id,
|
|
23
|
-
page_number: table.page_number,
|
|
24
|
-
markdown: table.content_markdown, // Note: field name is 'markdown' not 'content_markdown'
|
|
25
|
-
bbox: table.bbox ? {
|
|
26
|
-
xmin: table.bbox.xmin,
|
|
27
|
-
ymin: table.bbox.ymin,
|
|
28
|
-
xmax: table.bbox.xmax,
|
|
29
|
-
ymax: table.bbox.ymax,
|
|
30
|
-
} : { xmin: 0, ymin: 0, xmax: 1, ymax: 1 },
|
|
31
|
-
confidence: table.confidence ?? 0.9,
|
|
32
|
-
verification_status: 'pending',
|
|
33
|
-
verified_by: null,
|
|
34
|
-
verified_at: null,
|
|
35
|
-
}));
|
|
36
|
-
compiler.addTables(tables);
|
|
37
|
-
}
|
|
38
|
-
// Add pages (text content)
|
|
39
|
-
if (jobResults.pages && jobResults.pages.length > 0) {
|
|
40
|
-
// For now, we'll skip OCR blocks since we have tables
|
|
41
|
-
// Can add OCR block support later if needed
|
|
42
|
-
}
|
|
43
|
-
return compiler.compile();
|
|
44
|
-
}
|
|
45
|
-
/**
|
|
46
|
-
* Create query engine from job results
|
|
47
|
-
*
|
|
48
|
-
* Convenience function that builds VirtualDoc and creates query engine
|
|
49
|
-
*/
|
|
50
|
-
export function createQueryEngineFromJob(jobResults) {
|
|
51
|
-
const doc = buildVirtualDocFromJob(jobResults);
|
|
52
|
-
return createQueryEngine(doc);
|
|
53
|
-
}
|
|
54
|
-
/**
|
|
55
|
-
* Execute a query config against job results
|
|
56
|
-
*
|
|
57
|
-
* Uses pdfquery's executeQuery for config-based queries
|
|
58
|
-
*/
|
|
59
|
-
export function executeJobQuery(jobResults, config) {
|
|
60
|
-
const doc = buildVirtualDocFromJob(jobResults);
|
|
61
|
-
return executeQuery(doc, config);
|
|
62
|
-
}
|
|
63
|
-
/**
|
|
64
|
-
* Format query result as markdown
|
|
65
|
-
*
|
|
66
|
-
* Uses pdfquery's built-in .html() method which returns markdown-like HTML
|
|
67
|
-
*/
|
|
68
|
-
export function formatAsMarkdown(queryResult) {
|
|
69
|
-
return queryResult.html();
|
|
70
|
-
}
|
|
71
|
-
/**
|
|
72
|
-
* Format query result as JSON entities
|
|
73
|
-
*
|
|
74
|
-
* Uses pdfquery's built-in .toArray() method
|
|
75
|
-
*/
|
|
76
|
-
export function formatAsEntities(queryResult) {
|
|
77
|
-
return queryResult.toArray();
|
|
78
|
-
}
|
|
79
|
-
/**
|
|
80
|
-
* Format query result with statistics
|
|
81
|
-
*/
|
|
82
|
-
export function formatWithStats(queryResult) {
|
|
83
|
-
const entities = queryResult.toArray();
|
|
84
|
-
const stats = queryResult.stats();
|
|
85
|
-
return {
|
|
86
|
-
entities,
|
|
87
|
-
stats: {
|
|
88
|
-
total: stats.total,
|
|
89
|
-
verified: stats.verified,
|
|
90
|
-
flagged: stats.flagged,
|
|
91
|
-
pending: stats.pending,
|
|
92
|
-
score: stats.score,
|
|
93
|
-
avgConfidence: stats.avgConfidence ?? 0,
|
|
94
|
-
},
|
|
95
|
-
};
|
|
96
|
-
}
|
|
97
|
-
/**
|
|
98
|
-
* Re-export pdfquery types and functions for convenience
|
|
99
|
-
*/
|
|
100
|
-
export { createQueryEngine, executeQuery, } from 'pdfquery';
|
|
101
|
-
//# sourceMappingURL=pdfquery.js.map
|
package/dist/lib/pdfquery.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pdfquery.js","sourceRoot":"","sources":["../../src/lib/pdfquery.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACL,WAAW,EACX,iBAAiB,EACjB,YAAY,GAOb,MAAM,UAAU,CAAC;AAGlB;;;;GAIG;AACH,MAAM,UAAU,sBAAsB,CAAC,UAAsB;IAC3D,MAAM,QAAQ,GAAG,IAAI,WAAW,CAAC;QAC/B,UAAU,EAAE,UAAU,CAAC,MAAM;QAC7B,QAAQ,EAAE,UAAU,CAAC,QAAQ;QAC7B,eAAe,EAAE,IAAI;KACtB,CAAC,CAAC;IAEH,aAAa;IACb,IAAI,UAAU,CAAC,MAAM,IAAI,UAAU,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,MAAM,MAAM,GAAkB,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAkB,EAAE,EAAE,CAAC,CAAC;YAC3E,EAAE,EAAE,KAAK,CAAC,EAAE;YACZ,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,QAAQ,EAAE,KAAK,CAAC,gBAAgB,EAAE,wDAAwD;YAC1F,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;gBACjB,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI;gBACrB,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI;gBACrB,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI;gBACrB,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI;aACtB,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE;YAC1C,UAAU,EAAE,KAAK,CAAC,UAAU,IAAI,GAAG;YACnC,mBAAmB,EAAE,SAAkB;YACvC,WAAW,EAAE,IAAI;YACjB,WAAW,EAAE,IAAI;SAClB,CAAC,CAAC,CAAC;QAEJ,QAAQ,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAC7B,CAAC;IAED,2BAA2B;IAC3B,IAAI,UAAU,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpD,sDAAsD;QACtD,4CAA4C;IAC9C,CAAC;IAED,OAAO,QAAQ,CAAC,OAAO,EAAE,CAAC;AAC5B,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,wBAAwB,CAAC,UAAsB;IAC7D,MAAM,GAAG,GAAG,sBAAsB,CAAC,UAAU,CAAC,CAAC;IAC/C,OAAO,iBAAiB,CAAC,GAAG,CAAC,CAAC;AAChC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,UAAsB,EAAE,MAAmB;IACzE,MAAM,GAAG,GAAG,sBAAsB,CAAC,UAAU,CAAC,CAAC;IAC/C,OAAO,YAAY,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;AACnC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,gBAAgB,CAAC,WAAwB;IACvD,OAAO,WAAW,CAAC,IAAI,EAAE,CAAC;AAC5B,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,gBAAgB,CAAC,WAAwB;IACvD,OAAO,WAAW,CAAC,OAAO,EAAE,CAAC;AAC/B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,WAAwB;IAWtD,MAAM,QAAQ,GAAG,WAAW,CAAC,OAAO,EAAE,CAAC;IACvC,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,EAAE,CAAC;IAElC,OAAO;QACL,QAAQ;QACR,KAAK,EAAE;YACL,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,CAAC;SACxC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,OAAO,EACL,iBAAiB,EACjB,YAAY,GAMb,MAAM,UAAU,CAAC"}
|
package/dist/lib/sinks/file.d.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"file.d.ts","sourceRoot":"","sources":["../../../src/lib/sinks/file.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAEtD,qBAAa,QAAS,YAAW,SAAS;IACxC,OAAO,CAAC,MAAM,CAAc;gBAEhB,QAAQ,EAAE,MAAM;IAItB,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAWjC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAQ7B"}
|
package/dist/lib/sinks/file.js
DELETED
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import { createWriteStream } from 'fs';
|
|
2
|
-
export class FileSink {
|
|
3
|
-
stream;
|
|
4
|
-
constructor(filePath) {
|
|
5
|
-
this.stream = createWriteStream(filePath, { flags: 'a' });
|
|
6
|
-
}
|
|
7
|
-
async emit(ops) {
|
|
8
|
-
for (const op of ops) {
|
|
9
|
-
await new Promise((resolve, reject) => {
|
|
10
|
-
this.stream.write(JSON.stringify(op) + '\n', (err) => {
|
|
11
|
-
if (err)
|
|
12
|
-
reject(err);
|
|
13
|
-
else
|
|
14
|
-
resolve();
|
|
15
|
-
});
|
|
16
|
-
});
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
async close() {
|
|
20
|
-
await new Promise((resolve, reject) => {
|
|
21
|
-
this.stream.end((err) => {
|
|
22
|
-
if (err)
|
|
23
|
-
reject(err);
|
|
24
|
-
else
|
|
25
|
-
resolve();
|
|
26
|
-
});
|
|
27
|
-
});
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
//# sourceMappingURL=file.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"file.js","sourceRoot":"","sources":["../../../src/lib/sinks/file.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAoB,MAAM,IAAI,CAAC;AAGzD,MAAM,OAAO,QAAQ;IACX,MAAM,CAAc;IAE5B,YAAY,QAAgB;QAC1B,IAAI,CAAC,MAAM,GAAG,iBAAiB,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,GAAY;QACrB,KAAK,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC;YACrB,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBAC1C,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE;oBACnD,IAAI,GAAG;wBAAE,MAAM,CAAC,GAAG,CAAC,CAAC;;wBAChB,OAAO,EAAE,CAAC;gBACjB,CAAC,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC1C,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,GAA6B,EAAE,EAAE;gBAChD,IAAI,GAAG;oBAAE,MAAM,CAAC,GAAG,CAAC,CAAC;;oBAChB,OAAO,EAAE,CAAC;YACjB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;CACF"}
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
import type { DomOpSink } from '../dom-ops.js';
|
|
2
|
-
export { PartyKitSink } from './partykit.js';
|
|
3
|
-
export { StdoutSink } from './stdout.js';
|
|
4
|
-
export { FileSink } from './file.js';
|
|
5
|
-
export declare function parseSinkUrl(url: string): DomOpSink;
|
|
6
|
-
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/sinks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAK/C,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAIrC,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,CA2BnD"}
|
package/dist/lib/sinks/index.js
DELETED
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import { PartyKitSink } from './partykit.js';
|
|
2
|
-
import { StdoutSink } from './stdout.js';
|
|
3
|
-
import { FileSink } from './file.js';
|
|
4
|
-
export { PartyKitSink } from './partykit.js';
|
|
5
|
-
export { StdoutSink } from './stdout.js';
|
|
6
|
-
export { FileSink } from './file.js';
|
|
7
|
-
const DEFAULT_PARTYKIT_HOST = 'document-tree-party.steventsao.partykit.dev';
|
|
8
|
-
export function parseSinkUrl(url) {
|
|
9
|
-
if (url === 'stdout' || url === '-') {
|
|
10
|
-
return new StdoutSink();
|
|
11
|
-
}
|
|
12
|
-
if (url.startsWith('partykit://')) {
|
|
13
|
-
const parsed = new URL(url);
|
|
14
|
-
const host = parsed.host || DEFAULT_PARTYKIT_HOST;
|
|
15
|
-
const roomId = parsed.pathname.replace(/^\//, '');
|
|
16
|
-
if (!roomId) {
|
|
17
|
-
throw new Error('PartyKit URL must include room ID: partykit://host/room-id');
|
|
18
|
-
}
|
|
19
|
-
return new PartyKitSink(host, roomId);
|
|
20
|
-
}
|
|
21
|
-
if (url.startsWith('file://')) {
|
|
22
|
-
const filePath = url.replace('file://', '');
|
|
23
|
-
return new FileSink(filePath);
|
|
24
|
-
}
|
|
25
|
-
if (url.endsWith('.ndjson') || url.endsWith('.jsonl')) {
|
|
26
|
-
return new FileSink(url);
|
|
27
|
-
}
|
|
28
|
-
throw new Error(`Unknown sink URL format: ${url}. Use partykit://, file://, stdout, or a .ndjson file path`);
|
|
29
|
-
}
|
|
30
|
-
//# sourceMappingURL=index.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/sinks/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,MAAM,qBAAqB,GAAG,6CAA6C,CAAC;AAE5E,MAAM,UAAU,YAAY,CAAC,GAAW;IACtC,IAAI,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;QACpC,OAAO,IAAI,UAAU,EAAE,CAAC;IAC1B,CAAC;IAED,IAAI,GAAG,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,IAAI,qBAAqB,CAAC;QAClD,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAElD,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,4DAA4D,CAAC,CAAC;QAChF,CAAC;QAED,OAAO,IAAI,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACxC,CAAC;IAED,IAAI,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAC5C,OAAO,IAAI,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAChC,CAAC;IAED,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACtD,OAAO,IAAI,QAAQ,CAAC,GAAG,CAAC,CAAC;IAC3B,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,4DAA4D,CAAC,CAAC;AAC/G,CAAC"}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import type { DomOp, DomOpSink } from '../dom-ops.js';
|
|
2
|
-
export declare class PartyKitSink implements DomOpSink {
|
|
3
|
-
private host;
|
|
4
|
-
private roomId;
|
|
5
|
-
private nodePathCache;
|
|
6
|
-
constructor(host: string, roomId: string);
|
|
7
|
-
emit(ops: DomOp[]): Promise<void>;
|
|
8
|
-
close(): Promise<void>;
|
|
9
|
-
private toJdomPatches;
|
|
10
|
-
private resolvePath;
|
|
11
|
-
private toJdom;
|
|
12
|
-
private typeToTag;
|
|
13
|
-
}
|
|
14
|
-
//# sourceMappingURL=partykit.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"partykit.d.ts","sourceRoot":"","sources":["../../../src/lib/sinks/partykit.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,SAAS,EAAa,MAAM,eAAe,CAAC;AAEjE,qBAAa,YAAa,YAAW,SAAS;IAI1C,OAAO,CAAC,IAAI;IACZ,OAAO,CAAC,MAAM;IAJhB,OAAO,CAAC,aAAa,CAA0C;gBAGrD,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,MAAM;IAGlB,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAuBjC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAI5B,OAAO,CAAC,aAAa;IAkErB,OAAO,CAAC,WAAW;IASnB,OAAO,CAAC,MAAM;IAYd,OAAO,CAAC,SAAS;CAelB"}
|
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
export class PartyKitSink {
|
|
2
|
-
host;
|
|
3
|
-
roomId;
|
|
4
|
-
nodePathCache = new Map();
|
|
5
|
-
constructor(host, roomId) {
|
|
6
|
-
this.host = host;
|
|
7
|
-
this.roomId = roomId;
|
|
8
|
-
}
|
|
9
|
-
async emit(ops) {
|
|
10
|
-
const patches = ops.flatMap(op => this.toJdomPatches(op));
|
|
11
|
-
if (patches.length === 0)
|
|
12
|
-
return;
|
|
13
|
-
const url = `https://${this.host}/parties/document/${this.roomId}/tree`;
|
|
14
|
-
const response = await fetch(url, {
|
|
15
|
-
method: 'PATCH',
|
|
16
|
-
headers: { 'Content-Type': 'application/json' },
|
|
17
|
-
body: JSON.stringify({
|
|
18
|
-
patches,
|
|
19
|
-
reason: 'cli-extraction',
|
|
20
|
-
mutatedBy: 'cli',
|
|
21
|
-
mutatedByName: 'OkraPDF CLI'
|
|
22
|
-
})
|
|
23
|
-
});
|
|
24
|
-
if (!response.ok) {
|
|
25
|
-
const text = await response.text();
|
|
26
|
-
throw new Error(`PartyKit PATCH failed: ${response.status} ${text}`);
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
async close() {
|
|
30
|
-
this.nodePathCache.clear();
|
|
31
|
-
}
|
|
32
|
-
toJdomPatches(op) {
|
|
33
|
-
switch (op.op) {
|
|
34
|
-
case 'ADD_NODE': {
|
|
35
|
-
const parentPath = this.resolvePath(op.parentId);
|
|
36
|
-
const childPath = [...parentPath, 'children', op.index ?? -1];
|
|
37
|
-
this.nodePathCache.set(op.node.id, [...parentPath, 'children', op.index ?? this.nodePathCache.size]);
|
|
38
|
-
return [{
|
|
39
|
-
op: 'add',
|
|
40
|
-
path: childPath,
|
|
41
|
-
value: this.toJdom(op.node),
|
|
42
|
-
}];
|
|
43
|
-
}
|
|
44
|
-
case 'REMOVE_NODE': {
|
|
45
|
-
const nodePath = this.resolvePath(op.nodeId);
|
|
46
|
-
this.nodePathCache.delete(op.nodeId);
|
|
47
|
-
return [{
|
|
48
|
-
op: 'remove',
|
|
49
|
-
path: nodePath,
|
|
50
|
-
}];
|
|
51
|
-
}
|
|
52
|
-
case 'UPDATE_NODE': {
|
|
53
|
-
const patches = [];
|
|
54
|
-
const nodePath = this.resolvePath(op.nodeId);
|
|
55
|
-
if (op.patch.text !== undefined) {
|
|
56
|
-
patches.push({
|
|
57
|
-
op: 'replace',
|
|
58
|
-
path: [...nodePath, 'textContent'],
|
|
59
|
-
value: op.patch.text,
|
|
60
|
-
});
|
|
61
|
-
}
|
|
62
|
-
if (op.patch.attrs) {
|
|
63
|
-
for (const [key, value] of Object.entries(op.patch.attrs)) {
|
|
64
|
-
patches.push({
|
|
65
|
-
op: 'replace',
|
|
66
|
-
path: [...nodePath, 'attributes', key],
|
|
67
|
-
value,
|
|
68
|
-
});
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
return patches;
|
|
72
|
-
}
|
|
73
|
-
case 'SET_TEXT':
|
|
74
|
-
return [{
|
|
75
|
-
op: 'replace',
|
|
76
|
-
path: [...this.resolvePath(op.nodeId), 'textContent'],
|
|
77
|
-
value: op.text,
|
|
78
|
-
}];
|
|
79
|
-
case 'SET_ATTR':
|
|
80
|
-
return [{
|
|
81
|
-
op: 'replace',
|
|
82
|
-
path: [...this.resolvePath(op.nodeId), 'attributes', op.key],
|
|
83
|
-
value: op.value,
|
|
84
|
-
}];
|
|
85
|
-
default:
|
|
86
|
-
return [];
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
resolvePath(nodeId) {
|
|
90
|
-
if (!nodeId || nodeId === '')
|
|
91
|
-
return [];
|
|
92
|
-
const cached = this.nodePathCache.get(nodeId);
|
|
93
|
-
if (cached)
|
|
94
|
-
return cached;
|
|
95
|
-
return [nodeId];
|
|
96
|
-
}
|
|
97
|
-
toJdom(node) {
|
|
98
|
-
return {
|
|
99
|
-
id: node.id,
|
|
100
|
-
type: node.type,
|
|
101
|
-
tagName: this.typeToTag(node.type),
|
|
102
|
-
page: node.attrs?.page ?? node.attrs?.num ?? 0,
|
|
103
|
-
textContent: node.text ?? null,
|
|
104
|
-
attributes: node.attrs ?? {},
|
|
105
|
-
children: [],
|
|
106
|
-
};
|
|
107
|
-
}
|
|
108
|
-
typeToTag(type) {
|
|
109
|
-
const map = {
|
|
110
|
-
document: 'article',
|
|
111
|
-
page: 'section',
|
|
112
|
-
table: 'table',
|
|
113
|
-
figure: 'figure',
|
|
114
|
-
paragraph: 'p',
|
|
115
|
-
heading: 'h1',
|
|
116
|
-
footnote: 'aside',
|
|
117
|
-
list: 'ul',
|
|
118
|
-
'list-item': 'li',
|
|
119
|
-
'ocr-block': 'div',
|
|
120
|
-
};
|
|
121
|
-
return map[type] ?? 'div';
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
//# sourceMappingURL=partykit.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"partykit.js","sourceRoot":"","sources":["../../../src/lib/sinks/partykit.ts"],"names":[],"mappings":"AAEA,MAAM,OAAO,YAAY;IAIb;IACA;IAJF,aAAa,GAAG,IAAI,GAAG,EAA+B,CAAC;IAE/D,YACU,IAAY,EACZ,MAAc;QADd,SAAI,GAAJ,IAAI,CAAQ;QACZ,WAAM,GAAN,MAAM,CAAQ;IACrB,CAAC;IAEJ,KAAK,CAAC,IAAI,CAAC,GAAY;QACrB,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC,CAAC;QAE1D,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAEjC,MAAM,GAAG,GAAG,WAAW,IAAI,CAAC,IAAI,qBAAqB,IAAI,CAAC,MAAM,OAAO,CAAC;QACxE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,MAAM,EAAE,OAAO;YACf,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,OAAO;gBACP,MAAM,EAAE,gBAAgB;gBACxB,SAAS,EAAE,KAAK;gBAChB,aAAa,EAAE,aAAa;aAC7B,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CAAC,0BAA0B,QAAQ,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC,CAAC;QACvE,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,CAAC;IAC7B,CAAC;IAEO,aAAa,CAAC,EAAS;QAC7B,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;YACd,KAAK,UAAU,CAAC,CAAC,CAAC;gBAChB,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,QAAQ,CAAC,CAAC;gBACjD,MAAM,SAAS,GAAG,CAAC,GAAG,UAAU,EAAE,UAAU,EAAE,EAAE,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC;gBAE9D,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,GAAG,UAAU,EAAE,UAAU,EAAE,EAAE,CAAC,KAAK,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC;gBAErG,OAAO,CAAC;wBACN,EAAE,EAAE,KAAK;wBACT,IAAI,EAAE,SAAS;wBACf,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC;qBAC5B,CAAC,CAAC;YACL,CAAC;YAED,KAAK,aAAa,CAAC,CAAC,CAAC;gBACnB,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;gBAC7C,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;gBACrC,OAAO,CAAC;wBACN,EAAE,EAAE,QAAQ;wBACZ,IAAI,EAAE,QAAQ;qBACf,CAAC,CAAC;YACL,CAAC;YAED,KAAK,aAAa,CAAC,CAAC,CAAC;gBACnB,MAAM,OAAO,GAAgB,EAAE,CAAC;gBAChC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;gBAE7C,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;oBAChC,OAAO,CAAC,IAAI,CAAC;wBACX,EAAE,EAAE,SAAS;wBACb,IAAI,EAAE,CAAC,GAAG,QAAQ,EAAE,aAAa,CAAC;wBAClC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI;qBACrB,CAAC,CAAC;gBACL,CAAC;gBACD,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;oBACnB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;wBAC1D,OAAO,CAAC,IAAI,CAAC;4BACX,EAAE,EAAE,SAAS;4BACb,IAAI,EAAE,CAAC,GAAG,QAAQ,EAAE,YAAY,EAAE,GAAG,CAAC;4BACtC,KAAK;yBACN,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;gBACD,OAAO,OAAO,CAAC;YACjB,CAAC;YAED,KAAK,UAAU;gBACb,OAAO,CAAC;wBACN,EAAE,EAAE,SAAS;wBACb,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,aAAa,CAAC;wBACrD,KAAK,EAAE,EAAE,CAAC,IAAI;qBACf,CAAC,CAAC;YAEL,KAAK,UAAU;gBACb,OAAO,CAAC;wBACN,EAAE,EAAE,SAAS;wBACb,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,EAAE,CAAC,GAAG,CAAC;wBAC5D,KAAK,EAAE,EAAE,CAAC,KAAK;qBAChB,CAAC,CAAC;YAEL;gBACE,OAAO,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IAEO,WAAW,CAAC,MAAc;QAChC,IAAI,CAAC,MAAM,IAAI,MAAM,KAAK,EAAE;YAAE,OAAO,EAAE,CAAC;QAExC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAC9C,IAAI,MAAM;YAAE,OAAO,MAAM,CAAC;QAE1B,OAAO,CAAC,MAAM,CAAC,CAAC;IAClB,CAAC;IAEO,MAAM,CAAC,IAAwG;QACrH,OAAO;YACL,EAAE,EAAE,IAAI,CAAC,EAAE;YACX,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;YAClC,IAAI,EAAG,IAAI,CAAC,KAAK,EAAE,IAAe,IAAK,IAAI,CAAC,KAAK,EAAE,GAAc,IAAI,CAAC;YACtE,WAAW,EAAE,IAAI,CAAC,IAAI,IAAI,IAAI;YAC9B,UAAU,EAAE,IAAI,CAAC,KAAK,IAAI,EAAE;YAC5B,QAAQ,EAAE,EAAE;SACb,CAAC;IACJ,CAAC;IAEO,SAAS,CAAC,IAAY;QAC5B,MAAM,GAAG,GAA2B;YAClC,QAAQ,EAAE,SAAS;YACnB,IAAI,EAAE,SAAS;YACf,KAAK,EAAE,OAAO;YACd,MAAM,EAAE,QAAQ;YAChB,SAAS,EAAE,GAAG;YACd,OAAO,EAAE,IAAI;YACb,QAAQ,EAAE,OAAO;YACjB,IAAI,EAAE,IAAI;YACV,WAAW,EAAE,IAAI;YACjB,WAAW,EAAE,KAAK;SACnB,CAAC;QACF,OAAO,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC;IAC5B,CAAC;CACF"}
|