@okrapdf/cli 0.2.12 → 0.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/chat.d.ts +19 -1
- package/dist/commands/chat.d.ts.map +1 -1
- package/dist/commands/chat.js +185 -195
- package/dist/commands/chat.js.map +1 -1
- package/dist/commands/chat.test.d.ts +17 -0
- package/dist/commands/chat.test.d.ts.map +1 -0
- package/dist/commands/chat.test.js +199 -0
- package/dist/commands/chat.test.js.map +1 -0
- package/dist/commands/entities.d.ts.map +1 -1
- package/dist/commands/entities.js +4 -3
- package/dist/commands/entities.js.map +1 -1
- package/dist/commands/jobs.d.ts.map +1 -1
- package/dist/commands/jobs.js +250 -54
- package/dist/commands/jobs.js.map +1 -1
- package/dist/commands/schema.test.js +18 -0
- package/dist/commands/schema.test.js.map +1 -1
- package/dist/lib/agent-renderer.d.ts +27 -0
- package/dist/lib/agent-renderer.d.ts.map +1 -0
- package/dist/lib/agent-renderer.js +71 -0
- package/dist/lib/agent-renderer.js.map +1 -0
- package/dist/lib/bootstrap.d.ts +13 -0
- package/dist/lib/bootstrap.d.ts.map +1 -0
- package/dist/lib/bootstrap.js +13 -0
- package/dist/lib/bootstrap.js.map +1 -0
- package/dist/lib/bootstrap.test.d.ts +8 -0
- package/dist/lib/bootstrap.test.d.ts.map +1 -0
- package/dist/lib/bootstrap.test.js +53 -0
- package/dist/lib/bootstrap.test.js.map +1 -0
- package/dist/lib/pdfquery-adapter.d.ts +2 -1
- package/dist/lib/pdfquery-adapter.d.ts.map +1 -1
- package/dist/lib/pdfquery-adapter.js.map +1 -1
- package/dist/lib/system-prompt.d.ts +5 -0
- package/dist/lib/system-prompt.d.ts.map +1 -0
- package/dist/lib/system-prompt.js +58 -0
- package/dist/lib/system-prompt.js.map +1 -0
- package/dist/lib/validator.js +11 -11
- package/dist/lib/validator.js.map +1 -1
- package/dist/lib/ws-polyfill.d.ts +2 -0
- package/dist/lib/ws-polyfill.d.ts.map +1 -0
- package/dist/lib/ws-polyfill.js +9 -0
- package/dist/lib/ws-polyfill.js.map +1 -0
- package/dist/types.d.ts +64 -20
- package/dist/types.d.ts.map +1 -1
- package/package.json +3 -2
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fetch bootstrap data for an OCR job's sandbox
|
|
3
|
+
*/
|
|
4
|
+
import { get } from './client.js';
|
|
5
|
+
export async function fetchBootstrap(jobId) {
|
|
6
|
+
const res = await get(`api/sandbox/bootstrap/ocr/${jobId}`);
|
|
7
|
+
return {
|
|
8
|
+
archiveUrl: res.archiveUrl,
|
|
9
|
+
metadata: res.metadata,
|
|
10
|
+
pdfFilename: res.pdfFilename,
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=bootstrap.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bootstrap.js","sourceRoot":"","sources":["../../src/lib/bootstrap.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,GAAG,EAAE,MAAM,aAAa,CAAC;AAqBlC,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAAa;IAChD,MAAM,GAAG,GAAG,MAAM,GAAG,CAAoB,6BAA6B,KAAK,EAAE,CAAC,CAAC;IAC/E,OAAO;QACL,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,WAAW,EAAE,GAAG,CAAC,WAAW;KAC7B,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bootstrap.test.d.ts","sourceRoot":"","sources":["../../src/lib/bootstrap.test.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for bootstrap data parsing
|
|
3
|
+
*
|
|
4
|
+
* apiRequest() unwraps { success, data } → returns data directly.
|
|
5
|
+
* fetchBootstrap must NOT double-unwrap (i.e. access res.data.metadata).
|
|
6
|
+
*/
|
|
7
|
+
import { describe, it, expect, vi } from 'vitest';
|
|
8
|
+
// Mock the client module before importing bootstrap
|
|
9
|
+
vi.mock('./client.js', () => ({
|
|
10
|
+
get: vi.fn(),
|
|
11
|
+
}));
|
|
12
|
+
import { fetchBootstrap } from './bootstrap.js';
|
|
13
|
+
import { get } from './client.js';
|
|
14
|
+
const mockedGet = vi.mocked(get);
|
|
15
|
+
/** What the raw API returns (before apiRequest unwraps) */
|
|
16
|
+
const RAW_API_RESPONSE = {
|
|
17
|
+
success: true,
|
|
18
|
+
data: {
|
|
19
|
+
metadata: {
|
|
20
|
+
jobId: 'ocr-h6-OC6x3EO3AqnMLS3deA',
|
|
21
|
+
totalPages: 28,
|
|
22
|
+
completedPages: 28,
|
|
23
|
+
status: 'completed',
|
|
24
|
+
pdfUrl: 'gs://okrapdf/inbox/ocr-h6-OC6x3EO3AqnMLS3deA/2502.15840.pdf',
|
|
25
|
+
},
|
|
26
|
+
pdfFilename: '2502.15840.pdf',
|
|
27
|
+
archiveUrl: 'https://storage.googleapis.com/okrapdf/workspaces/ocr-h6-OC6x3EO3AqnMLS3deA/workspace.tar.gz?signed',
|
|
28
|
+
},
|
|
29
|
+
archiveUrl: 'https://storage.googleapis.com/okrapdf/workspaces/ocr-h6-OC6x3EO3AqnMLS3deA/workspace.tar.gz?signed',
|
|
30
|
+
};
|
|
31
|
+
/** What apiRequest returns after unwrapping { success, data } → data */
|
|
32
|
+
const UNWRAPPED = RAW_API_RESPONSE.data;
|
|
33
|
+
describe('fetchBootstrap', () => {
|
|
34
|
+
it('parses unwrapped response correctly (no double-unwrap)', async () => {
|
|
35
|
+
mockedGet.mockResolvedValueOnce(UNWRAPPED);
|
|
36
|
+
const result = await fetchBootstrap('ocr-h6-OC6x3EO3AqnMLS3deA');
|
|
37
|
+
expect(result.metadata.jobId).toBe('ocr-h6-OC6x3EO3AqnMLS3deA');
|
|
38
|
+
expect(result.metadata.totalPages).toBe(28);
|
|
39
|
+
expect(result.metadata.status).toBe('completed');
|
|
40
|
+
expect(result.pdfFilename).toBe('2502.15840.pdf');
|
|
41
|
+
expect(result.archiveUrl).toContain('workspace.tar.gz');
|
|
42
|
+
});
|
|
43
|
+
it('calls correct API endpoint', async () => {
|
|
44
|
+
mockedGet.mockResolvedValueOnce(UNWRAPPED);
|
|
45
|
+
await fetchBootstrap('ocr-abc123');
|
|
46
|
+
expect(mockedGet).toHaveBeenCalledWith('api/sandbox/bootstrap/ocr/ocr-abc123');
|
|
47
|
+
});
|
|
48
|
+
it('propagates API errors', async () => {
|
|
49
|
+
mockedGet.mockRejectedValueOnce(new Error('Not found'));
|
|
50
|
+
await expect(fetchBootstrap('ocr-bad')).rejects.toThrow('Not found');
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
//# sourceMappingURL=bootstrap.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bootstrap.test.js","sourceRoot":"","sources":["../../src/lib/bootstrap.test.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAElD,oDAAoD;AACpD,EAAE,CAAC,IAAI,CAAC,aAAa,EAAE,GAAG,EAAE,CAAC,CAAC;IAC5B,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE;CACb,CAAC,CAAC,CAAC;AAEJ,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,EAAE,GAAG,EAAE,MAAM,aAAa,CAAC;AAElC,MAAM,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;AAEjC,2DAA2D;AAC3D,MAAM,gBAAgB,GAAG;IACvB,OAAO,EAAE,IAAI;IACb,IAAI,EAAE;QACJ,QAAQ,EAAE;YACR,KAAK,EAAE,2BAA2B;YAClC,UAAU,EAAE,EAAE;YACd,cAAc,EAAE,EAAE;YAClB,MAAM,EAAE,WAAW;YACnB,MAAM,EAAE,6DAA6D;SACtE;QACD,WAAW,EAAE,gBAAgB;QAC7B,UAAU,EAAE,qGAAqG;KAClH;IACD,UAAU,EAAE,qGAAqG;CAClH,CAAC;AAEF,wEAAwE;AACxE,MAAM,SAAS,GAAG,gBAAgB,CAAC,IAAI,CAAC;AAExC,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;QACtE,SAAS,CAAC,qBAAqB,CAAC,SAAgB,CAAC,CAAC;QAElD,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,2BAA2B,CAAC,CAAC;QAEjE,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QAChE,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;QAC1C,SAAS,CAAC,qBAAqB,CAAC,SAAgB,CAAC,CAAC;QAElD,MAAM,cAAc,CAAC,YAAY,CAAC,CAAC;QAEnC,MAAM,CAAC,SAAS,CAAC,CAAC,oBAAoB,CAAC,sCAAsC,CAAC,CAAC;IACjF,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,KAAK,IAAI,EAAE;QACrC,SAAS,CAAC,qBAAqB,CAAC,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;QAExD,MAAM,MAAM,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;IACvE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* all other CLI commands stay unblocked even if pdfquery has issues.
|
|
9
9
|
*/
|
|
10
10
|
import type { JobResults } from '../types.js';
|
|
11
|
-
export type { QueryResult,
|
|
11
|
+
export type { QueryResult, BoundingBox as PdfQueryBBox } from 'pdfquery';
|
|
12
12
|
type BBox = {
|
|
13
13
|
x: number;
|
|
14
14
|
y: number;
|
|
@@ -23,6 +23,7 @@ type Tag = {
|
|
|
23
23
|
text?: string;
|
|
24
24
|
attrs?: Record<string, unknown>;
|
|
25
25
|
};
|
|
26
|
+
export type PdfQueryTag = Tag;
|
|
26
27
|
/** Convert OkraPDF job results to Tags */
|
|
27
28
|
export declare function jobResultsToTags(jobResults: JobResults): Tag[];
|
|
28
29
|
/** Create a pdfquery session from OkraPDF job results */
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pdfquery-adapter.d.ts","sourceRoot":"","sources":["../../src/lib/pdfquery-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAa9C,YAAY,EAAE,WAAW,EAAE,
|
|
1
|
+
{"version":3,"file":"pdfquery-adapter.d.ts","sourceRoot":"","sources":["../../src/lib/pdfquery-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAa9C,YAAY,EAAE,WAAW,EAAE,WAAW,IAAI,YAAY,EAAE,MAAM,UAAU,CAAC;AAMzE,KAAK,IAAI,GAAG;IAAE,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC;AAYpE,KAAK,GAAG,GAAG;IACT,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACjC,CAAC;AAGF,MAAM,MAAM,WAAW,GAAG,GAAG,CAAC;AAE9B,0CAA0C;AAC1C,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,UAAU,GAAG,GAAG,EAAE,CAsB9D;AAMD,yDAAyD;AACzD,wBAAsB,oBAAoB,CAAC,UAAU,EAAE,UAAU,gBAIhE;AAMD,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,GAAG,GAAG,MAAM,CAEzD;AAED,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,GAAG,GAAG,GAAG,EAAE,CAExD;AAED,wBAAgB,eAAe,CAAC,WAAW,EAAE,GAAG,GAAG;IACjD,QAAQ,EAAE,GAAG,EAAE,CAAC;IAChB,KAAK,EAAE;QACL,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;QACjB,OAAO,EAAE,MAAM,CAAC;QAChB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,MAAM,CAAC;QACd,aAAa,EAAE,MAAM,CAAC;KACvB,CAAC;CACH,CAeA"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pdfquery-adapter.js","sourceRoot":"","sources":["../../src/lib/pdfquery-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH,+BAA+B;AAC/B,IAAI,SAAS,GAAQ,IAAI,CAAC;AAE1B,KAAK,UAAU,WAAW;IACxB,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,SAAS,GAAG,MAAM,MAAM,CAAC,UAAU,CAAC,CAAC;IACvC,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAWD,6CAA6C;AAC7C,SAAS,eAAe,CAAC,CAA6D;IACpF,OAAO;QACL,CAAC,EAAE,CAAC,CAAC,IAAI;QACT,CAAC,EAAE,CAAC,CAAC,IAAI;QACT,KAAK,EAAE,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI;QACtB,MAAM,EAAE,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI;KACxB,CAAC;AACJ,CAAC;
|
|
1
|
+
{"version":3,"file":"pdfquery-adapter.js","sourceRoot":"","sources":["../../src/lib/pdfquery-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH,+BAA+B;AAC/B,IAAI,SAAS,GAAQ,IAAI,CAAC;AAE1B,KAAK,UAAU,WAAW;IACxB,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,SAAS,GAAG,MAAM,MAAM,CAAC,UAAU,CAAC,CAAC;IACvC,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAWD,6CAA6C;AAC7C,SAAS,eAAe,CAAC,CAA6D;IACpF,OAAO;QACL,CAAC,EAAE,CAAC,CAAC,IAAI;QACT,CAAC,EAAE,CAAC,CAAC,IAAI;QACT,KAAK,EAAE,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI;QACtB,MAAM,EAAE,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI;KACxB,CAAC;AACJ,CAAC;AAcD,0CAA0C;AAC1C,MAAM,UAAU,gBAAgB,CAAC,UAAsB;IACrD,MAAM,IAAI,GAAU,EAAE,CAAC;IAEvB,IAAI,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;QAC9B,KAAK,MAAM,KAAK,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;YACtC,IAAI,CAAC,IAAI,CAAC;gBACR,EAAE,EAAE,KAAK,CAAC,EAAE;gBACZ,IAAI,EAAE,OAAO;gBACb,IAAI,EAAE,KAAK,CAAC,WAAW;gBACvB,IAAI,EAAE,KAAK,CAAC,IAAI;oBACd,CAAC,CAAC,eAAe,CAAC,KAAK,CAAC,IAAI,CAAC;oBAC7B,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE;gBACvC,IAAI,EAAE,KAAK,CAAC,gBAAgB;gBAC5B,KAAK,EAAE;oBACL,UAAU,EAAE,KAAK,CAAC,UAAU,IAAI,GAAG;oBACnC,kBAAkB,EAAE,SAAS;iBAC9B;aACF,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,+EAA+E;AAC/E,gDAAgD;AAChD,+EAA+E;AAE/E,yDAAyD;AACzD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,UAAsB;IAC/D,MAAM,QAAQ,GAAG,MAAM,WAAW,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,gBAAgB,CAAC,UAAU,CAAC,CAAC;IAC1C,OAAO,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,+EAA+E;AAC/E,wDAAwD;AACxD,+EAA+E;AAE/E,MAAM,UAAU,gBAAgB,CAAC,WAAgB;IAC/C,OAAO,WAAW,CAAC,IAAI,EAAE,CAAC;AAC5B,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,WAAgB;IAC/C,OAAO,WAAW,CAAC,OAAO,EAAE,CAAC;AAC/B,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,WAAgB;IAW9C,MAAM,QAAQ,GAAG,WAAW,CAAC,OAAO,EAAE,CAAC;IACvC,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,EAAE,CAAC;IAElC,OAAO;QACL,QAAQ;QACR,KAAK,EAAE;YACL,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,CAAC;SACxC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal system prompt for CLI agent chat (no UI/mention context)
|
|
3
|
+
*/
|
|
4
|
+
export declare const CLI_SYSTEM_PROMPT = "You are a helpful assistant that can help with document analysis and extraction. Your goal is to understand the one pdf given to you with tools like parse and whole suite of python libraries.\n\n<goal>\n - To start, list, or grep for keywords inside derived folder directly. Most likely, you will find markdown files that are page-to-page extracted from the pdf.\n - Try to cite from the derived folder when you try to answer a question or create a document.\n - If there is no relevant information returned from searching the derived/ directory, offer to parse more pages.\n</goal>\n\n<environment>\n OS: Linux (x86_64) in an ephemeral sandbox environment\n Python: 3.12.3, fully functional\n</environment>\n\n<files>\n - There is only one pdf that is in /mnt/data/*.pdf. When user talks about \"the document\", refer to this pdf.\n - All the files you need are in /mnt/data/*\n - Everything you create/extract from the source pdf, move them in the app/derived directory so we can save them for future use in this ephemeral sandbox\n</files>\n\n<derived-folder-structure>\n The derived/ folder contains two parallel OCR outputs that match page-to-page:\n\n <ocr-folder path=\"derived/ocr/\">\n - Raw character extraction from Google DocAI without layout interpretation\n - Use as GROUND TRUTH to detect hallucinations in pages/ output\n - File format: page_001.md, page_002.md, ... + index.json\n </ocr-folder>\n\n <pages-folder path=\"derived/pages/\">\n - AI-interpreted OCR with layout intelligence (VLM/OpenRouter)\n - Structured output: markdown tables, proper column alignment, organized sections\n - Use for QA, data extraction, and answering user questions\n - File format: page_001.md, page_002.md, ... + index.json\n </pages-folder>\n\n <validation-workflow>\n When answering questions or extracting data:\n 1. Use pages/ for structured extraction and user-facing answers\n 2. Cross-reference ocr/ to verify numbers/text are not hallucinated\n 3. If pages/ content differs significantly from ocr/, flag as potential hallucination\n </validation-workflow>\n</derived-folder-structure>\n\n<parse-cli-reference>\n The parse CLI extracts content from PDFs. Output goes to /mnt/data/parse/ directory.\n parse document.pdf # Parse entire PDF\n parse -v document.pdf # Verbose (show progress)\n For specific page ranges: use pypdf to extract pages first, then parse the extracted PDF.\n</parse-cli-reference>\n\n<constraints>\n - NEVER use the Read tool on PDF or image files directly - they will exceed buffer limits and crash.\n - When you mention a file path, always use the full path, ie /mnt/data/derived/revenue.xlsx\n</constraints>";
|
|
5
|
+
//# sourceMappingURL=system-prompt.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"system-prompt.d.ts","sourceRoot":"","sources":["../../src/lib/system-prompt.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,iBAAiB,oqFAqDf,CAAC"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal system prompt for CLI agent chat (no UI/mention context)
|
|
3
|
+
*/
|
|
4
|
+
export const CLI_SYSTEM_PROMPT = `You are a helpful assistant that can help with document analysis and extraction. Your goal is to understand the one pdf given to you with tools like parse and whole suite of python libraries.
|
|
5
|
+
|
|
6
|
+
<goal>
|
|
7
|
+
- To start, list, or grep for keywords inside derived folder directly. Most likely, you will find markdown files that are page-to-page extracted from the pdf.
|
|
8
|
+
- Try to cite from the derived folder when you try to answer a question or create a document.
|
|
9
|
+
- If there is no relevant information returned from searching the derived/ directory, offer to parse more pages.
|
|
10
|
+
</goal>
|
|
11
|
+
|
|
12
|
+
<environment>
|
|
13
|
+
OS: Linux (x86_64) in an ephemeral sandbox environment
|
|
14
|
+
Python: 3.12.3, fully functional
|
|
15
|
+
</environment>
|
|
16
|
+
|
|
17
|
+
<files>
|
|
18
|
+
- There is only one pdf that is in /mnt/data/*.pdf. When user talks about "the document", refer to this pdf.
|
|
19
|
+
- All the files you need are in /mnt/data/*
|
|
20
|
+
- Everything you create/extract from the source pdf, move them in the app/derived directory so we can save them for future use in this ephemeral sandbox
|
|
21
|
+
</files>
|
|
22
|
+
|
|
23
|
+
<derived-folder-structure>
|
|
24
|
+
The derived/ folder contains two parallel OCR outputs that match page-to-page:
|
|
25
|
+
|
|
26
|
+
<ocr-folder path="derived/ocr/">
|
|
27
|
+
- Raw character extraction from Google DocAI without layout interpretation
|
|
28
|
+
- Use as GROUND TRUTH to detect hallucinations in pages/ output
|
|
29
|
+
- File format: page_001.md, page_002.md, ... + index.json
|
|
30
|
+
</ocr-folder>
|
|
31
|
+
|
|
32
|
+
<pages-folder path="derived/pages/">
|
|
33
|
+
- AI-interpreted OCR with layout intelligence (VLM/OpenRouter)
|
|
34
|
+
- Structured output: markdown tables, proper column alignment, organized sections
|
|
35
|
+
- Use for QA, data extraction, and answering user questions
|
|
36
|
+
- File format: page_001.md, page_002.md, ... + index.json
|
|
37
|
+
</pages-folder>
|
|
38
|
+
|
|
39
|
+
<validation-workflow>
|
|
40
|
+
When answering questions or extracting data:
|
|
41
|
+
1. Use pages/ for structured extraction and user-facing answers
|
|
42
|
+
2. Cross-reference ocr/ to verify numbers/text are not hallucinated
|
|
43
|
+
3. If pages/ content differs significantly from ocr/, flag as potential hallucination
|
|
44
|
+
</validation-workflow>
|
|
45
|
+
</derived-folder-structure>
|
|
46
|
+
|
|
47
|
+
<parse-cli-reference>
|
|
48
|
+
The parse CLI extracts content from PDFs. Output goes to /mnt/data/parse/ directory.
|
|
49
|
+
parse document.pdf # Parse entire PDF
|
|
50
|
+
parse -v document.pdf # Verbose (show progress)
|
|
51
|
+
For specific page ranges: use pypdf to extract pages first, then parse the extracted PDF.
|
|
52
|
+
</parse-cli-reference>
|
|
53
|
+
|
|
54
|
+
<constraints>
|
|
55
|
+
- NEVER use the Read tool on PDF or image files directly - they will exceed buffer limits and crash.
|
|
56
|
+
- When you mention a file path, always use the full path, ie /mnt/data/derived/revenue.xlsx
|
|
57
|
+
</constraints>`;
|
|
58
|
+
//# sourceMappingURL=system-prompt.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"system-prompt.js","sourceRoot":"","sources":["../../src/lib/system-prompt.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;eAqDlB,CAAC"}
|
package/dist/lib/validator.js
CHANGED
|
@@ -101,31 +101,31 @@ export function validateJobComplete(validation) {
|
|
|
101
101
|
// Job exists but not complete
|
|
102
102
|
const statusMessages = {
|
|
103
103
|
queued: {
|
|
104
|
-
message: `Job '${validation.job.job_id
|
|
104
|
+
message: `Job '${validation.job.job_id}' is queued (status: queued)`,
|
|
105
105
|
suggestions: [
|
|
106
106
|
'Try:',
|
|
107
|
-
` okra jobs get ${validation.job.job_id
|
|
108
|
-
` okra jobs wait ${validation.job.job_id
|
|
107
|
+
` okra jobs get ${validation.job.job_id} # Check status`,
|
|
108
|
+
` okra jobs wait ${validation.job.job_id} # Wait for completion`,
|
|
109
109
|
],
|
|
110
110
|
},
|
|
111
111
|
pending: {
|
|
112
|
-
message: `Job '${validation.job.job_id
|
|
112
|
+
message: `Job '${validation.job.job_id}' is pending (status: pending)`,
|
|
113
113
|
suggestions: [
|
|
114
114
|
'Try:',
|
|
115
|
-
` okra jobs get ${validation.job.job_id
|
|
116
|
-
` okra jobs wait ${validation.job.job_id
|
|
115
|
+
` okra jobs get ${validation.job.job_id} # Check status`,
|
|
116
|
+
` okra jobs wait ${validation.job.job_id} # Wait for completion`,
|
|
117
117
|
],
|
|
118
118
|
},
|
|
119
119
|
running: {
|
|
120
|
-
message: `Job '${validation.job.job_id
|
|
120
|
+
message: `Job '${validation.job.job_id}' is still processing (status: running)`,
|
|
121
121
|
suggestions: [
|
|
122
122
|
'Try:',
|
|
123
|
-
` okra jobs get ${validation.job.job_id
|
|
124
|
-
` okra jobs wait ${validation.job.job_id
|
|
123
|
+
` okra jobs get ${validation.job.job_id} # Check status`,
|
|
124
|
+
` okra jobs wait ${validation.job.job_id} # Wait for completion`,
|
|
125
125
|
],
|
|
126
126
|
},
|
|
127
127
|
failed: {
|
|
128
|
-
message: `Job '${validation.job.job_id
|
|
128
|
+
message: `Job '${validation.job.job_id}' failed`,
|
|
129
129
|
suggestions: [
|
|
130
130
|
`Error: ${validation.job.error || 'Unknown error'}`,
|
|
131
131
|
'',
|
|
@@ -134,7 +134,7 @@ export function validateJobComplete(validation) {
|
|
|
134
134
|
],
|
|
135
135
|
},
|
|
136
136
|
cancelled: {
|
|
137
|
-
message: `Job '${validation.job.job_id
|
|
137
|
+
message: `Job '${validation.job.job_id}' was cancelled`,
|
|
138
138
|
suggestions: [
|
|
139
139
|
'Try:',
|
|
140
140
|
' okra extract myfile.pdf # Create new job',
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/lib/validator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,GAAG,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE1C,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B;;GAEG;AACH,MAAM,OAAO,eAAgB,SAAQ,KAAK;IAGtB;IAFlB,YACE,OAAe,EACC,WAAqB;QAErC,KAAK,CAAC,OAAO,CAAC,CAAC;QAFC,gBAAW,GAAX,WAAW,CAAU;QAGrC,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAC;IAChC,CAAC;IAED,QAAQ;QACN,MAAM,KAAK,GAAG;YACZ,KAAK,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,OAAO,EAAE,CAAC;YACrC,EAAE;YACF,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC;SACvC,CAAC;QACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;CACF;AAYD;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,KAAa;IACxC,8DAA8D;IAC9D,OAAO,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;AAC5D,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAAa,EACb,KAAoB;IAEpB,qBAAqB;IACrB,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACL,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,IAAI,eAAe,CACxB,2BAA2B,KAAK,GAAG,EACnC;gBACE,2DAA2D;gBAC3D,EAAE;gBACF,WAAW;gBACX,oCAAoC;gBACpC,6CAA6C;aAC9C,CACF;SACF,CAAC;IACJ,CAAC;IAED,mCAAmC;IACnC,MAAM,YAAY,GAAG,KAAK,IAAI,IAAI,YAAY,EAAE,CAAC;IAEjD,IAAI,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;QAC/B,sBAAsB;QACtB,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC7C,IAAI,SAAS,EAAE,CAAC;YACd,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,MAAM,EAAE,WAAW,EAAE,mCAAmC;gBACxD,GAAG,EAAE,SAA2B;aACjC,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2BAA2B;IAC3B,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,GAAG,CAAM,eAAe,KAAK,EAAE,CAAC,CAAC;QAEnD,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,GAAG;SACJ,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,YAAY,IAAI,KAAK,CAAC,UAAU,KAAK,GAAG,EAAE,CAAC;YAC9D,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,IAAI,eAAe,CACxB,QAAQ,KAAK,aAAa,EAC1B;oBACE,aAAa;oBACb,qDAAqD;oBACrD,0DAA0D;iBAC3D,CACF;aACF,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,UAAyB;IAC3D,IAAI,CAAC,UAAU,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,CAAC;QAC1C,OAAO,IAAI,eAAe,CACxB,uBAAuB,EACvB,CAAC,oCAAoC,CAAC,CACvC,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,IAAI,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC;IAE1D,IAAI,MAAM,KAAK,WAAW,EAAE,CAAC;QAC3B,OAAO,IAAI,CAAC,CAAC,WAAW;IAC1B,CAAC;IAED,8BAA8B;IAC9B,MAAM,cAAc,GAA+D;QACjF,MAAM,EAAE;YACN,OAAO,EAAE,QAAQ,UAAU,CAAC,GAAG,CAAC,MAAM,
|
|
1
|
+
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/lib/validator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,GAAG,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE1C,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B;;GAEG;AACH,MAAM,OAAO,eAAgB,SAAQ,KAAK;IAGtB;IAFlB,YACE,OAAe,EACC,WAAqB;QAErC,KAAK,CAAC,OAAO,CAAC,CAAC;QAFC,gBAAW,GAAX,WAAW,CAAU;QAGrC,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAC;IAChC,CAAC;IAED,QAAQ;QACN,MAAM,KAAK,GAAG;YACZ,KAAK,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,OAAO,EAAE,CAAC;YACrC,EAAE;YACF,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC;SACvC,CAAC;QACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;CACF;AAYD;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,KAAa;IACxC,8DAA8D;IAC9D,OAAO,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;AAC5D,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAAa,EACb,KAAoB;IAEpB,qBAAqB;IACrB,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACL,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,IAAI,eAAe,CACxB,2BAA2B,KAAK,GAAG,EACnC;gBACE,2DAA2D;gBAC3D,EAAE;gBACF,WAAW;gBACX,oCAAoC;gBACpC,6CAA6C;aAC9C,CACF;SACF,CAAC;IACJ,CAAC;IAED,mCAAmC;IACnC,MAAM,YAAY,GAAG,KAAK,IAAI,IAAI,YAAY,EAAE,CAAC;IAEjD,IAAI,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;QAC/B,sBAAsB;QACtB,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC7C,IAAI,SAAS,EAAE,CAAC;YACd,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,MAAM,EAAE,WAAW,EAAE,mCAAmC;gBACxD,GAAG,EAAE,SAA2B;aACjC,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2BAA2B;IAC3B,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,GAAG,CAAM,eAAe,KAAK,EAAE,CAAC,CAAC;QAEnD,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,GAAG;SACJ,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,YAAY,IAAI,KAAK,CAAC,UAAU,KAAK,GAAG,EAAE,CAAC;YAC9D,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,IAAI,eAAe,CACxB,QAAQ,KAAK,aAAa,EAC1B;oBACE,aAAa;oBACb,qDAAqD;oBACrD,0DAA0D;iBAC3D,CACF;aACF,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,UAAyB;IAC3D,IAAI,CAAC,UAAU,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,CAAC;QAC1C,OAAO,IAAI,eAAe,CACxB,uBAAuB,EACvB,CAAC,oCAAoC,CAAC,CACvC,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,IAAI,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC;IAE1D,IAAI,MAAM,KAAK,WAAW,EAAE,CAAC;QAC3B,OAAO,IAAI,CAAC,CAAC,WAAW;IAC1B,CAAC;IAED,8BAA8B;IAC9B,MAAM,cAAc,GAA+D;QACjF,MAAM,EAAE;YACN,OAAO,EAAE,QAAQ,UAAU,CAAC,GAAG,CAAC,MAAM,8BAA8B;YACpE,WAAW,EAAE;gBACX,MAAM;gBACN,mBAAmB,UAAU,CAAC,GAAG,CAAC,MAAM,0BAA0B;gBAClE,oBAAoB,UAAU,CAAC,GAAG,CAAC,MAAM,gCAAgC;aAC1E;SACF;QACD,OAAO,EAAE;YACP,OAAO,EAAE,QAAQ,UAAU,CAAC,GAAG,CAAC,MAAM,gCAAgC;YACtE,WAAW,EAAE;gBACX,MAAM;gBACN,mBAAmB,UAAU,CAAC,GAAG,CAAC,MAAM,0BAA0B;gBAClE,oBAAoB,UAAU,CAAC,GAAG,CAAC,MAAM,gCAAgC;aAC1E;SACF;QACD,OAAO,EAAE;YACP,OAAO,EAAE,QAAQ,UAAU,CAAC,GAAG,CAAC,MAAM,yCAAyC;YAC/E,WAAW,EAAE;gBACX,MAAM;gBACN,mBAAmB,UAAU,CAAC,GAAG,CAAC,MAAM,0BAA0B;gBAClE,oBAAoB,UAAU,CAAC,GAAG,CAAC,MAAM,gCAAgC;aAC1E;SACF;QACD,MAAM,EAAE;YACN,OAAO,EAAE,QAAQ,UAAU,CAAC,GAAG,CAAC,MAAM,UAAU;YAChD,WAAW,EAAE;gBACX,UAAU,UAAU,CAAC,GAAG,CAAC,KAAK,IAAI,eAAe,EAAE;gBACnD,EAAE;gBACF,MAAM;gBACN,qDAAqD;aACtD;SACF;QACD,SAAS,EAAE;YACT,OAAO,EAAE,QAAQ,UAAU,CAAC,GAAG,CAAC,MAAM,iBAAiB;YACvD,WAAW,EAAE;gBACX,MAAM;gBACN,qDAAqD;aACtD;SACF;KACF,CAAC;IAEF,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,GAAG,cAAc,CAAC,MAAM,CAAC,IAAI;QACzD,OAAO,EAAE,8BAA8B,MAAM,EAAE;QAC/C,WAAW,EAAE,CAAC,wBAAwB,CAAC;KACxC,CAAC;IAEF,OAAO,IAAI,eAAe,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;AACnD,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,KAAa,EACb,KAAoB;IAEpB,sBAAsB;IACtB,MAAM,UAAU,GAAG,MAAM,iBAAiB,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IAEzD,IAAI,CAAC,UAAU,CAAC,MAAM,IAAI,UAAU,CAAC,KAAK,EAAE,CAAC;QAC3C,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,UAAU,CAAC,KAAM,EAAE,CAAC;IACpD,CAAC;IAED,2BAA2B;IAC3B,MAAM,aAAa,GAAG,mBAAmB,CAAC,UAAU,CAAC,CAAC;IAEtD,IAAI,aAAa,EAAE,CAAC;QAClB,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAE,CAAC;IAChD,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,UAAU,CAAC,GAAI,EAAE,CAAC;AAC/C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,KAAsB;IAC1D,OAAO,KAAK,CAAC,QAAQ,EAAE,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAAC,KAAsB;IACzD,OAAO,CAAC,KAAK,CAAC,IAAI,GAAG,qBAAqB,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC;IAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ws-polyfill.d.ts","sourceRoot":"","sources":["../../src/lib/ws-polyfill.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebSocket polyfill for Node.js — import before AgentSessionClient
|
|
3
|
+
*/
|
|
4
|
+
import WebSocket from 'ws';
|
|
5
|
+
if (!globalThis.WebSocket) {
|
|
6
|
+
// @ts-expect-error ws is compatible enough for agent-session
|
|
7
|
+
globalThis.WebSocket = WebSocket;
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=ws-polyfill.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ws-polyfill.js","sourceRoot":"","sources":["../../src/lib/ws-polyfill.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,SAAS,MAAM,IAAI,CAAC;AAE3B,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,CAAC;IAC1B,6DAA6D;IAC7D,UAAU,CAAC,SAAS,GAAG,SAAS,CAAC;AACnC,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -14,33 +14,26 @@ export interface Document {
|
|
|
14
14
|
tables_count?: number;
|
|
15
15
|
}
|
|
16
16
|
export interface Job {
|
|
17
|
-
|
|
18
|
-
job_id?: string;
|
|
19
|
-
status: JobStatus;
|
|
20
|
-
file_name?: string | null;
|
|
21
|
-
filename?: string | null;
|
|
22
|
-
pdf_url?: string | null;
|
|
23
|
-
total_pages: number | null;
|
|
24
|
-
pages_completed: number | null;
|
|
25
|
-
document_uuid?: string | null;
|
|
26
|
-
inserted_at?: string;
|
|
27
|
-
created_at?: string;
|
|
28
|
-
updated_at: string;
|
|
29
|
-
error: string | null;
|
|
30
|
-
}
|
|
31
|
-
export interface NormalizedJob {
|
|
32
|
-
id: string;
|
|
17
|
+
job_id: string;
|
|
33
18
|
status: JobStatus;
|
|
34
|
-
|
|
19
|
+
filename: string | null;
|
|
35
20
|
total_pages: number | null;
|
|
36
21
|
pages_completed: number | null;
|
|
22
|
+
pages_failed?: number;
|
|
23
|
+
progress_percent?: number | null;
|
|
24
|
+
created_at: string;
|
|
37
25
|
updated_at: string;
|
|
38
26
|
error: string | null;
|
|
27
|
+
viewer_url?: string;
|
|
28
|
+
results_url?: string;
|
|
39
29
|
}
|
|
40
30
|
export interface CreateJobResponse {
|
|
41
31
|
job_id: string;
|
|
42
32
|
status: JobStatus;
|
|
33
|
+
filename: string;
|
|
43
34
|
poll_url: string;
|
|
35
|
+
results_url: string;
|
|
36
|
+
viewer_url: string;
|
|
44
37
|
message?: string;
|
|
45
38
|
}
|
|
46
39
|
export interface JobResultsApiResponse {
|
|
@@ -147,9 +140,6 @@ export interface SignedUrlResponse {
|
|
|
147
140
|
signedUrl: string;
|
|
148
141
|
gcsPath: string;
|
|
149
142
|
gcsFileName: string;
|
|
150
|
-
upload_url?: string;
|
|
151
|
-
document_uuid?: string;
|
|
152
|
-
gcs_path?: string;
|
|
153
143
|
}
|
|
154
144
|
export interface UserInfo {
|
|
155
145
|
id: string;
|
|
@@ -164,4 +154,58 @@ export interface ExtractionOptions {
|
|
|
164
154
|
ocr_engine?: OcrEngine;
|
|
165
155
|
vlm_model?: VlmModel;
|
|
166
156
|
}
|
|
157
|
+
export type SchemaFieldType = 'string' | 'number' | 'boolean' | 'date' | 'array' | 'object';
|
|
158
|
+
export interface SchemaFieldDefinition {
|
|
159
|
+
key: string;
|
|
160
|
+
label?: string;
|
|
161
|
+
description?: string;
|
|
162
|
+
type: SchemaFieldType;
|
|
163
|
+
required?: boolean;
|
|
164
|
+
}
|
|
165
|
+
export interface SchemaDefinition {
|
|
166
|
+
name?: string;
|
|
167
|
+
fields: SchemaFieldDefinition[];
|
|
168
|
+
}
|
|
169
|
+
export interface SchemaRunRequest {
|
|
170
|
+
schema: SchemaDefinition;
|
|
171
|
+
options?: {
|
|
172
|
+
pages?: string;
|
|
173
|
+
citation_mode?: 'best' | 'all';
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
export interface SchemaCitation {
|
|
177
|
+
page: number;
|
|
178
|
+
quote: string;
|
|
179
|
+
bbox?: {
|
|
180
|
+
x: number;
|
|
181
|
+
y: number;
|
|
182
|
+
width: number;
|
|
183
|
+
height: number;
|
|
184
|
+
};
|
|
185
|
+
source: 'ocr_page' | 'table';
|
|
186
|
+
}
|
|
187
|
+
export interface SchemaFieldResult {
|
|
188
|
+
path: string;
|
|
189
|
+
type: SchemaFieldType;
|
|
190
|
+
value: unknown;
|
|
191
|
+
confidence: number | null;
|
|
192
|
+
citations: SchemaCitation[];
|
|
193
|
+
}
|
|
194
|
+
export interface SchemaRunResponse {
|
|
195
|
+
job_id: string;
|
|
196
|
+
run_id: string;
|
|
197
|
+
status: 'completed';
|
|
198
|
+
extracted_at: string;
|
|
199
|
+
values: Record<string, unknown>;
|
|
200
|
+
fields: SchemaFieldResult[];
|
|
201
|
+
}
|
|
202
|
+
export interface SchemaN8nReference {
|
|
203
|
+
recommended_columns: {
|
|
204
|
+
run_snapshot: string[];
|
|
205
|
+
field_rows: string[];
|
|
206
|
+
};
|
|
207
|
+
run_snapshot_row: Record<string, unknown>;
|
|
208
|
+
field_rows: Array<Record<string, unknown>>;
|
|
209
|
+
upsert_key_recommendation: string;
|
|
210
|
+
}
|
|
167
211
|
//# sourceMappingURL=types.d.ts.map
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,MAAM,MAAM,SAAS,GAAG,QAAQ,GAAG,SAAS,GAAG,SAAS,GAAG,WAAW,GAAG,QAAQ,GAAG,WAAW,CAAC;AAGhG,MAAM,MAAM,kBAAkB,GAAG,SAAS,GAAG,UAAU,GAAG,SAAS,GAAG,UAAU,GAAG,SAAS,GAAG,cAAc,CAAC;AAG9G,MAAM,MAAM,YAAY,GAAG,OAAO,GAAG,MAAM,GAAG,OAAO,GAAG,KAAK,GAAG,UAAU,CAAC;AAG3E,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAGD,MAAM,WAAW,GAAG;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,MAAM,MAAM,SAAS,GAAG,QAAQ,GAAG,SAAS,GAAG,SAAS,GAAG,WAAW,GAAG,QAAQ,GAAG,WAAW,CAAC;AAGhG,MAAM,MAAM,kBAAkB,GAAG,SAAS,GAAG,UAAU,GAAG,SAAS,GAAG,UAAU,GAAG,SAAS,GAAG,cAAc,CAAC;AAG9G,MAAM,MAAM,YAAY,GAAG,OAAO,GAAG,MAAM,GAAG,OAAO,GAAG,KAAK,GAAG,UAAU,CAAC;AAG3E,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAGD,MAAM,WAAW,GAAG;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,SAAS,CAAC;IAClB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gBAAgB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAGD,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,SAAS,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAGD,MAAM,WAAW,qBAAqB;IACpC,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE;QACP,MAAM,EAAE,WAAW,EAAE,CAAC;QACtB,IAAI,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;KAChD,CAAC;CACH;AAGD,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,UAAU,EAAE,CAAC;IACpB,MAAM,EAAE,WAAW,EAAE,CAAC;CACvB;AAED,MAAM,WAAW,UAAU;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,MAAM;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,WAAW,CAAC;CAC5B;AAED,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,MAAM,CAAC;IACzB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,IAAI,CAAC,EAAE,WAAW,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,WAAW,KAAK;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,MAAM,CAAC;IACzB,cAAc,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,IAAI,EAAE,WAAW,CAAC;IAClB,mBAAmB,CAAC,EAAE,kBAAkB,CAAC;IACzC,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAGD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,WAAW,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,UAAU,EAAE,CAAC;CAC7B;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,WAAW,WAAW,CAAC,CAAC;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,CAAC,CAAC;IACT,KAAK,CAAC,EAAE,QAAQ,CAAC;CAClB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAGD,MAAM,WAAW,iBAAiB,CAAC,CAAC;IAClC,KAAK,EAAE,CAAC,EAAE,CAAC;IACX,UAAU,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,EAAE,MAAM,CAAC;QACnB,WAAW,EAAE,OAAO,CAAC;QACrB,WAAW,EAAE,OAAO,CAAC;KACtB,CAAC;CACH;AAGD,MAAM,WAAW,SAAS;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,YAAY,CAAC;IAC7B,WAAW,CAAC,EAAE,SAAS,CAAC;IACxB,WAAW,CAAC,EAAE,QAAQ,CAAC;CACxB;AAGD,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;CACrB;AAGD,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;CACpB;AAGD,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,KAAK,GAAG,MAAM,CAAC;AAMpE,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,WAAW,GAAG,UAAU,GAAG,YAAY,CAAC;AAE1E,MAAM,MAAM,QAAQ,GAAG,MAAM,CAAC;AAE9B,MAAM,WAAW,iBAAiB;IAChC,UAAU,CAAC,EAAE,SAAS,CAAC;IACvB,SAAS,CAAC,EAAE,QAAQ,CAAC;CACtB;AAMD,MAAM,MAAM,eAAe,GACvB,QAAQ,GACR,QAAQ,GACR,SAAS,GACT,MAAM,GACN,OAAO,GACP,QAAQ,CAAC;AAEb,MAAM,WAAW,qBAAqB;IACpC,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,eAAe,CAAC;IACtB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,qBAAqB,EAAE,CAAC;CACjC;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,gBAAgB,CAAC;IACzB,OAAO,CAAC,EAAE;QACR,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,aAAa,CAAC,EAAE,MAAM,GAAG,KAAK,CAAC;KAChC,CAAC;CACH;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAC/D,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC;CAC9B;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,eAAe,CAAC;IACtB,KAAK,EAAE,OAAO,CAAC;IACf,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,SAAS,EAAE,cAAc,EAAE,CAAC;CAC7B;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,MAAM,EAAE,iBAAiB,EAAE,CAAC;CAC7B;AAED,MAAM,WAAW,kBAAkB;IACjC,mBAAmB,EAAE;QACnB,YAAY,EAAE,MAAM,EAAE,CAAC;QACvB,UAAU,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC;IACF,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAC3C,yBAAyB,EAAE,MAAM,CAAC;CACnC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@okrapdf/cli",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.16",
|
|
4
4
|
"description": "OkraPDF command-line interface for PDF extraction and document chat",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -46,6 +46,7 @@
|
|
|
46
46
|
"node": ">=18.0.0"
|
|
47
47
|
},
|
|
48
48
|
"dependencies": {
|
|
49
|
+
"@steventsao/agent-session": "^0.1.21",
|
|
49
50
|
"chalk": "^5.3.0",
|
|
50
51
|
"cli-table3": "^0.6.5",
|
|
51
52
|
"commander": "^12.1.0",
|
|
@@ -55,7 +56,7 @@
|
|
|
55
56
|
"form-data": "^4.0.0",
|
|
56
57
|
"got": "^14.4.2",
|
|
57
58
|
"ora": "^8.0.1",
|
|
58
|
-
"pdfquery": "
|
|
59
|
+
"pdfquery": "^0.1.2",
|
|
59
60
|
"ws": "^8.18.0"
|
|
60
61
|
},
|
|
61
62
|
"optionalDependencies": {
|