@apart-tech/intelligence-core 1.12.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth/ability.d.ts +1 -1
- package/dist/auth/ability.d.ts.map +1 -1
- package/dist/auth/ability.js +4 -0
- package/dist/auth/ability.js.map +1 -1
- package/dist/auth/ability.test.js +12 -1
- package/dist/auth/ability.test.js.map +1 -1
- package/dist/db/tenant.d.ts.map +1 -1
- package/dist/db/tenant.js +2 -0
- package/dist/db/tenant.js.map +1 -1
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/services/cleaning-service.d.ts +1 -1
- package/dist/services/cleaning-service.d.ts.map +1 -1
- package/dist/services/cleaning-service.js +46 -20
- package/dist/services/cleaning-service.js.map +1 -1
- package/dist/services/document-service.d.ts +91 -0
- package/dist/services/document-service.d.ts.map +1 -0
- package/dist/services/document-service.js +273 -0
- package/dist/services/document-service.js.map +1 -0
- package/dist/services/document-service.test.d.ts +2 -0
- package/dist/services/document-service.test.d.ts.map +1 -0
- package/dist/services/document-service.test.js +289 -0
- package/dist/services/document-service.test.js.map +1 -0
- package/dist/services/ocr-service.d.ts +21 -0
- package/dist/services/ocr-service.d.ts.map +1 -0
- package/dist/services/ocr-service.js +61 -0
- package/dist/services/ocr-service.js.map +1 -0
- package/dist/services/ocr-service.test.d.ts +2 -0
- package/dist/services/ocr-service.test.d.ts.map +1 -0
- package/dist/services/ocr-service.test.js +66 -0
- package/dist/services/ocr-service.test.js.map +1 -0
- package/package.json +1 -1
- package/prisma/schema.prisma +29 -0
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
import { describe, expect, it, vi, beforeEach } from "vitest";
|
|
2
|
+
import { DocumentService } from "./document-service.js";
|
|
3
|
+
// ── Mock factories ──────────────────────────────────────────────────────────
|
|
4
|
+
function mockPrisma() {
|
|
5
|
+
return {
|
|
6
|
+
document: {
|
|
7
|
+
create: vi.fn(),
|
|
8
|
+
update: vi.fn(),
|
|
9
|
+
findFirst: vi.fn(),
|
|
10
|
+
findMany: vi.fn(),
|
|
11
|
+
},
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
function mockStorage() {
|
|
15
|
+
const signedUrl = "https://storage.googleapis.com/signed-url";
|
|
16
|
+
const fileObj = {
|
|
17
|
+
getSignedUrl: vi.fn().mockResolvedValue([signedUrl]),
|
|
18
|
+
exists: vi.fn().mockResolvedValue([true]),
|
|
19
|
+
download: vi.fn().mockResolvedValue([Buffer.from("file-content")]),
|
|
20
|
+
};
|
|
21
|
+
return {
|
|
22
|
+
bucket: vi.fn().mockReturnValue({
|
|
23
|
+
file: vi.fn().mockReturnValue(fileObj),
|
|
24
|
+
}),
|
|
25
|
+
_file: fileObj,
|
|
26
|
+
_signedUrl: signedUrl,
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
function mockOcrService() {
|
|
30
|
+
return {
|
|
31
|
+
extractText: vi.fn().mockResolvedValue({
|
|
32
|
+
text: "# Doc Title\n\nExtracted content\n\n---\n\nPage 2 content",
|
|
33
|
+
tokensIn: 100,
|
|
34
|
+
tokensOut: 50,
|
|
35
|
+
model: "mistral-ocr-latest",
|
|
36
|
+
}),
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
function mockNodeService() {
|
|
40
|
+
return {
|
|
41
|
+
create: vi.fn().mockResolvedValue({
|
|
42
|
+
id: "node-1",
|
|
43
|
+
type: "document",
|
|
44
|
+
title: "test.pdf",
|
|
45
|
+
content: "extracted text",
|
|
46
|
+
}),
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
function mockChunkService() {
|
|
50
|
+
return {
|
|
51
|
+
chunkAndEmbed: vi.fn().mockResolvedValue(undefined),
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
function mockTagService() {
|
|
55
|
+
return {
|
|
56
|
+
applyTags: vi.fn().mockResolvedValue([]),
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
const TENANT = { organizationId: "org-123" };
|
|
60
|
+
const BUCKET_NAME = "test-bucket";
|
|
61
|
+
describe("DocumentService", () => {
|
|
62
|
+
let db;
|
|
63
|
+
let storage;
|
|
64
|
+
let ocrService;
|
|
65
|
+
let nodeService;
|
|
66
|
+
let chunkService;
|
|
67
|
+
let tagService;
|
|
68
|
+
let service;
|
|
69
|
+
beforeEach(() => {
|
|
70
|
+
db = mockPrisma();
|
|
71
|
+
storage = mockStorage();
|
|
72
|
+
ocrService = mockOcrService();
|
|
73
|
+
nodeService = mockNodeService();
|
|
74
|
+
chunkService = mockChunkService();
|
|
75
|
+
tagService = mockTagService();
|
|
76
|
+
service = new DocumentService(db, storage, BUCKET_NAME, ocrService, nodeService, chunkService, tagService, undefined, undefined, TENANT);
|
|
77
|
+
});
|
|
78
|
+
// ── createUploadUrls ──────────────────────────────────────────────────────
|
|
79
|
+
describe("createUploadUrls", () => {
|
|
80
|
+
it("creates document records and returns signed URLs", async () => {
|
|
81
|
+
db.document.create.mockResolvedValue({
|
|
82
|
+
id: "doc-1",
|
|
83
|
+
fileName: "test.pdf",
|
|
84
|
+
mimeType: "application/pdf",
|
|
85
|
+
fileSizeBytes: 1024,
|
|
86
|
+
storagePath: "",
|
|
87
|
+
status: "uploading",
|
|
88
|
+
});
|
|
89
|
+
db.document.update.mockResolvedValue({});
|
|
90
|
+
const results = await service.createUploadUrls([
|
|
91
|
+
{
|
|
92
|
+
fileName: "test.pdf",
|
|
93
|
+
mimeType: "application/pdf",
|
|
94
|
+
sizeBytes: 1024,
|
|
95
|
+
},
|
|
96
|
+
], "user@example.com");
|
|
97
|
+
expect(results).toHaveLength(1);
|
|
98
|
+
expect(results[0].id).toBe("doc-1");
|
|
99
|
+
expect(results[0].uploadUrl).toBe(storage._signedUrl);
|
|
100
|
+
expect(results[0].fileName).toBe("test.pdf");
|
|
101
|
+
// Verify DB calls
|
|
102
|
+
expect(db.document.create).toHaveBeenCalledOnce();
|
|
103
|
+
expect(db.document.update).toHaveBeenCalledOnce();
|
|
104
|
+
// Verify storage path includes org and doc ID
|
|
105
|
+
const updateCall = db.document.update.mock.calls[0][0];
|
|
106
|
+
expect(updateCall.data.storagePath).toContain("org-123/doc-1/original.pdf");
|
|
107
|
+
});
|
|
108
|
+
it("maps mime types to correct extensions", async () => {
|
|
109
|
+
const mimeMap = {
|
|
110
|
+
"application/pdf": "pdf",
|
|
111
|
+
"image/png": "png",
|
|
112
|
+
"image/jpeg": "jpeg",
|
|
113
|
+
"image/avif": "avif",
|
|
114
|
+
};
|
|
115
|
+
for (const [mime, ext] of Object.entries(mimeMap)) {
|
|
116
|
+
db.document.create.mockResolvedValue({
|
|
117
|
+
id: `doc-${ext}`,
|
|
118
|
+
fileName: `test.${ext}`,
|
|
119
|
+
mimeType: mime,
|
|
120
|
+
});
|
|
121
|
+
db.document.update.mockResolvedValue({});
|
|
122
|
+
await service.createUploadUrls([{ fileName: `test.${ext}`, mimeType: mime, sizeBytes: 100 }], "user");
|
|
123
|
+
const updateCall = db.document.update.mock.calls[db.document.update.mock.calls.length - 1][0];
|
|
124
|
+
expect(updateCall.data.storagePath).toContain(`original.${ext}`);
|
|
125
|
+
}
|
|
126
|
+
});
|
|
127
|
+
});
|
|
128
|
+
// ── processDocuments ──────────────────────────────────────────────────────
|
|
129
|
+
describe("processDocuments", () => {
|
|
130
|
+
it("sets status to processing and fires background work", async () => {
|
|
131
|
+
db.document.findFirst.mockResolvedValue({
|
|
132
|
+
id: "doc-1",
|
|
133
|
+
status: "uploading",
|
|
134
|
+
storagePath: "org-123/doc-1/original.pdf",
|
|
135
|
+
fileName: "test.pdf",
|
|
136
|
+
mimeType: "application/pdf",
|
|
137
|
+
createdBy: "user",
|
|
138
|
+
});
|
|
139
|
+
db.document.update.mockResolvedValue({});
|
|
140
|
+
const result = await service.processDocuments(["doc-1"]);
|
|
141
|
+
expect(result.documents).toHaveLength(1);
|
|
142
|
+
expect(result.documents[0].status).toBe("processing");
|
|
143
|
+
expect(result.warnings).toHaveLength(0);
|
|
144
|
+
});
|
|
145
|
+
it("warns on missing documents", async () => {
|
|
146
|
+
db.document.findFirst.mockResolvedValue(null);
|
|
147
|
+
const result = await service.processDocuments(["nonexistent"]);
|
|
148
|
+
expect(result.warnings).toContain("Document nonexistent not found");
|
|
149
|
+
});
|
|
150
|
+
it("warns on documents not in uploading status", async () => {
|
|
151
|
+
db.document.findFirst.mockResolvedValue({
|
|
152
|
+
id: "doc-1",
|
|
153
|
+
status: "ready",
|
|
154
|
+
});
|
|
155
|
+
const result = await service.processDocuments(["doc-1"]);
|
|
156
|
+
expect(result.warnings[0]).toContain("expected \"uploading\"");
|
|
157
|
+
expect(result.documents[0].status).toBe("ready");
|
|
158
|
+
});
|
|
159
|
+
});
|
|
160
|
+
// ── processDocument ───────────────────────────────────────────────────────
|
|
161
|
+
describe("processDocument", () => {
|
|
162
|
+
const mockDoc = {
|
|
163
|
+
id: "doc-1",
|
|
164
|
+
status: "processing",
|
|
165
|
+
storagePath: "org-123/doc-1/original.pdf",
|
|
166
|
+
fileName: "test.pdf",
|
|
167
|
+
mimeType: "application/pdf",
|
|
168
|
+
createdBy: "user",
|
|
169
|
+
fileSizeBytes: 1024,
|
|
170
|
+
};
|
|
171
|
+
it("runs full OCR → node → chunk → tag → update pipeline", async () => {
|
|
172
|
+
db.document.findFirst
|
|
173
|
+
.mockResolvedValueOnce(mockDoc) // initial lookup
|
|
174
|
+
.mockResolvedValueOnce(null); // dedup check
|
|
175
|
+
db.document.update.mockResolvedValue({});
|
|
176
|
+
await service.processDocument("doc-1");
|
|
177
|
+
// OCR was called
|
|
178
|
+
expect(ocrService.extractText).toHaveBeenCalledOnce();
|
|
179
|
+
// Node was created
|
|
180
|
+
expect(nodeService.create).toHaveBeenCalledWith(expect.objectContaining({
|
|
181
|
+
type: "document",
|
|
182
|
+
title: "test.pdf",
|
|
183
|
+
status: "approved",
|
|
184
|
+
}));
|
|
185
|
+
// Chunks were created
|
|
186
|
+
expect(chunkService.chunkAndEmbed).toHaveBeenCalledWith("node-1", "test.pdf", expect.any(String));
|
|
187
|
+
// Tags were applied
|
|
188
|
+
expect(tagService.applyTags).toHaveBeenCalledWith("node-1", expect.arrayContaining([
|
|
189
|
+
expect.objectContaining({ tagName: "source_document" }),
|
|
190
|
+
expect.objectContaining({ tagName: "source_file_format" }),
|
|
191
|
+
]), "document-service");
|
|
192
|
+
// Document updated to ready
|
|
193
|
+
const lastUpdate = db.document.update.mock.calls[db.document.update.mock.calls.length - 1][0];
|
|
194
|
+
expect(lastUpdate.data.status).toBe("ready");
|
|
195
|
+
expect(lastUpdate.data.nodeId).toBe("node-1");
|
|
196
|
+
expect(lastUpdate.data.contentHash).toBeDefined();
|
|
197
|
+
expect(lastUpdate.data.ocrModel).toBe("mistral-ocr-latest");
|
|
198
|
+
expect(lastUpdate.data.pageCount).toBe(2); // two pages separated by ---
|
|
199
|
+
});
|
|
200
|
+
it("deduplicates by content hash", async () => {
|
|
201
|
+
db.document.findFirst
|
|
202
|
+
.mockResolvedValueOnce(mockDoc) // initial lookup
|
|
203
|
+
.mockResolvedValueOnce({ id: "existing-doc", nodeId: "existing-node", status: "ready" }); // dedup match
|
|
204
|
+
db.document.update.mockResolvedValue({});
|
|
205
|
+
await service.processDocument("doc-1");
|
|
206
|
+
// OCR should NOT have been called
|
|
207
|
+
expect(ocrService.extractText).not.toHaveBeenCalled();
|
|
208
|
+
// Document should link to existing node
|
|
209
|
+
const updateCall = db.document.update.mock.calls[db.document.update.mock.calls.length - 1][0];
|
|
210
|
+
expect(updateCall.data.nodeId).toBe("existing-node");
|
|
211
|
+
expect(updateCall.data.status).toBe("ready");
|
|
212
|
+
});
|
|
213
|
+
it("marks as failed when GCS file missing", async () => {
|
|
214
|
+
storage._file.exists.mockResolvedValue([false]);
|
|
215
|
+
db.document.findFirst
|
|
216
|
+
.mockResolvedValueOnce(mockDoc);
|
|
217
|
+
db.document.update.mockResolvedValue({});
|
|
218
|
+
await expect(service.processDocument("doc-1")).rejects.toThrow("File not found in GCS");
|
|
219
|
+
// Should have updated status to failed
|
|
220
|
+
const updateCall = db.document.update.mock.calls[0][0];
|
|
221
|
+
expect(updateCall.data.status).toBe("failed");
|
|
222
|
+
expect(updateCall.data.errorMessage).toContain("File not found in GCS");
|
|
223
|
+
});
|
|
224
|
+
it("marks as failed when OCR fails", async () => {
|
|
225
|
+
db.document.findFirst
|
|
226
|
+
.mockResolvedValueOnce(mockDoc)
|
|
227
|
+
.mockResolvedValueOnce(null); // no dedup
|
|
228
|
+
db.document.update.mockResolvedValue({});
|
|
229
|
+
ocrService.extractText.mockRejectedValueOnce(new Error("OCR timeout"));
|
|
230
|
+
await expect(service.processDocument("doc-1")).rejects.toThrow("OCR timeout");
|
|
231
|
+
const updateCall = db.document.update.mock.calls[db.document.update.mock.calls.length - 1][0];
|
|
232
|
+
expect(updateCall.data.status).toBe("failed");
|
|
233
|
+
expect(updateCall.data.errorMessage).toBe("OCR timeout");
|
|
234
|
+
});
|
|
235
|
+
it("throws when document not found", async () => {
|
|
236
|
+
db.document.findFirst.mockResolvedValue(null);
|
|
237
|
+
await expect(service.processDocument("nonexistent")).rejects.toThrow("Document nonexistent not found");
|
|
238
|
+
});
|
|
239
|
+
});
|
|
240
|
+
// ── listDocuments ─────────────────────────────────────────────────────────
|
|
241
|
+
describe("listDocuments", () => {
|
|
242
|
+
it("lists all documents without filters", async () => {
|
|
243
|
+
db.document.findMany.mockResolvedValue([{ id: "doc-1" }]);
|
|
244
|
+
const docs = await service.listDocuments();
|
|
245
|
+
expect(db.document.findMany).toHaveBeenCalledWith({
|
|
246
|
+
where: {},
|
|
247
|
+
orderBy: { createdAt: "desc" },
|
|
248
|
+
});
|
|
249
|
+
expect(docs).toHaveLength(1);
|
|
250
|
+
});
|
|
251
|
+
it("filters by IDs", async () => {
|
|
252
|
+
db.document.findMany.mockResolvedValue([]);
|
|
253
|
+
await service.listDocuments({ ids: ["doc-1", "doc-2"] });
|
|
254
|
+
expect(db.document.findMany).toHaveBeenCalledWith({
|
|
255
|
+
where: { id: { in: ["doc-1", "doc-2"] } },
|
|
256
|
+
orderBy: { createdAt: "desc" },
|
|
257
|
+
});
|
|
258
|
+
});
|
|
259
|
+
it("filters by status", async () => {
|
|
260
|
+
db.document.findMany.mockResolvedValue([]);
|
|
261
|
+
await service.listDocuments({ status: "ready" });
|
|
262
|
+
expect(db.document.findMany).toHaveBeenCalledWith({
|
|
263
|
+
where: { status: "ready" },
|
|
264
|
+
orderBy: { createdAt: "desc" },
|
|
265
|
+
});
|
|
266
|
+
});
|
|
267
|
+
});
|
|
268
|
+
// ── getDownloadUrl ────────────────────────────────────────────────────────
|
|
269
|
+
describe("getDownloadUrl", () => {
|
|
270
|
+
it("generates a signed read URL", async () => {
|
|
271
|
+
db.document.findFirst.mockResolvedValue({
|
|
272
|
+
id: "doc-1",
|
|
273
|
+
fileName: "test.pdf",
|
|
274
|
+
mimeType: "application/pdf",
|
|
275
|
+
storagePath: "org-123/doc-1/original.pdf",
|
|
276
|
+
});
|
|
277
|
+
const result = await service.getDownloadUrl("doc-1");
|
|
278
|
+
expect(result.downloadUrl).toBe(storage._signedUrl);
|
|
279
|
+
expect(result.fileName).toBe("test.pdf");
|
|
280
|
+
expect(result.mimeType).toBe("application/pdf");
|
|
281
|
+
expect(result.expiresIn).toBe(900); // 15 minutes
|
|
282
|
+
});
|
|
283
|
+
it("throws when document not found", async () => {
|
|
284
|
+
db.document.findFirst.mockResolvedValue(null);
|
|
285
|
+
await expect(service.getDownloadUrl("nonexistent")).rejects.toThrow("Document nonexistent not found");
|
|
286
|
+
});
|
|
287
|
+
});
|
|
288
|
+
});
|
|
289
|
+
//# sourceMappingURL=document-service.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document-service.test.js","sourceRoot":"","sources":["../../src/services/document-service.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAMxD,+EAA+E;AAE/E,SAAS,UAAU;IACjB,OAAO;QACL,QAAQ,EAAE;YACR,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE;YACf,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE;YACf,SAAS,EAAE,EAAE,CAAC,EAAE,EAAE;YAClB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE;SAClB;KACK,CAAC;AACX,CAAC;AAED,SAAS,WAAW;IAClB,MAAM,SAAS,GAAG,2CAA2C,CAAC;IAC9D,MAAM,OAAO,GAAG;QACd,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,SAAS,CAAC,CAAC;QACpD,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC;QACzC,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC;KACnE,CAAC;IACF,OAAO;QACL,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC;YAC9B,IAAI,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,OAAO,CAAC;SACvC,CAAC;QACF,KAAK,EAAE,OAAO;QACd,UAAU,EAAE,SAAS;KACtB,CAAC;AACJ,CAAC;AAED,SAAS,cAAc;IACrB,OAAO;QACL,WAAW,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;YACrC,IAAI,EAAE,2DAA2D;YACjE,QAAQ,EAAE,GAAG;YACb,SAAS,EAAE,EAAE;YACb,KAAK,EAAE,oBAAoB;SAC5B,CAAC;KACI,CAAC;AACX,CAAC;AAED,SAAS,eAAe;IACtB,OAAO;QACL,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;YAChC,EAAE,EAAE,QAAQ;YACZ,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,UAAU;YACjB,OAAO,EAAE,gBAAgB;SAC1B,CAAC;KACI,CAAC;AACX,CAAC;AAED,SAAS,gBAAgB;IACvB,OAAO;QACL,aAAa,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC;KAC7C,CAAC;AACX,CAAC;AAED,SAAS,cAAc;IACrB,OAAO;QACL,SAAS,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,EAAE,CAAC;KAClC,CAAC;AACX,CAAC;AAED,MAAM,MAAM,GAAG,EAAE,cAAc,EAAE,SAAS,EAAE,CAAC;AAC7C,MAAM,WAAW,GAAG,aAAa,CAAC;AAElC,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,IAAI,EAAiC,CAAC;IACtC,IAAI,OAAuC,CAAC;IAC5C,IAAI,UAA6C,CAAC;IAClD,IAAI,WAA+C,CAAC;IACpD,IAAI,YAAiD,CAAC;IACtD,IAAI,UAA6C,CAAC;IAClD,IAAI,OAAwB,CAAC;IAE7B,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,GAAG,UAAU,EAAE,CAAC;QAClB,OAAO,GAAG,WAAW,EAAE,CAAC;QACxB,UAAU,GAAG,cAAc,EAAE,CAAC;QAC9B,WAAW,GAAG,eAAe,EAAE,CAAC;QAChC,YAAY,GAAG,gBAAgB,EAAE,CAAC;QAClC,UAAU,GAAG,cAAc,EAAE,CAAC;QAC9B,OAAO,GAAG,IAAI,eAAe,CAC3B,EAAE,EACF,OAAc,EACd,WAAW,EACX,UAAU,EACV,WAAW,EACX,YAAY,EACZ,UAAU,EACV,SAAS,EACT,SAAS,EACT,MAAM,CACP,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAE7E,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAChC,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;YAChE,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC;gBACnC,EAAE,EAAE,OAAO;gBACX,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,iBAAiB;gBAC3B,aAAa,EAAE,IAAI;gBACnB,WAAW,EAAE,EAAE;gBACf,MAAM,EAAE,WAAW;aACpB,CAAC,CAAC;YACH,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAEzC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,gBAAgB,CAC5C;gBACE;oBACE,QAAQ,EAAE,UAAU;oBACpB,QAAQ,EAAE,iBAAiB;oBAC3B,SAAS,EAAE,IAAI;iBAChB;aACF,EACD,kBAAkB,CACnB,CAAC;YAEF,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACpC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YACtD,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAE7C,kBAAkB;YAClB,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,oBAAoB,EAAE,CAAC;YAClD,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,oBAAoB,EAAE,CAAC;YAElD,8CAA8C;YAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACvD,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,SAAS,CAAC,4BAA4B,CAAC,CAAC;QAC9E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACrD,MAAM,OAAO,GAAG;gBACd,iBAAiB,EAAE,KAAK;gBACxB,WAAW,EAAE,KAAK;gBAClB,YAAY,EAAE,MAAM;gBACpB,YAAY,EAAE,MAAM;aACrB,CAAC;YAEF,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;gBAClD,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC;oBACnC,EAAE,EAAE,OAAO,GAAG,EAAE;oBAChB,QAAQ,EAAE,QAAQ,GAAG,EAAE;oBACvB,QAAQ,EAAE,IAAI;iBACf,CAAC,CAAC;gBACH,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;gBAEzC,MAAM,OAAO,CAAC,gBAAgB,CAC5B,CAAC,EAAE,QAAQ,EAAE,QAAQ,GAAG,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,EAC7D,MAAM,CACP,CAAC;gBAEF,MAAM,UAAU,GACd,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAC3B,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CACzC,CAAC,CAAC,CAAC,CAAC;gBACP,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,SAAS,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;YACnE,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAE7E,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAChC,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;YACnE,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC;gBACtC,EAAE,EAAE,OAAO;gBACX,MAAM,EAAE,WAAW;gBACnB,WAAW,EAAE,4BAA4B;gBACzC,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,iBAAiB;gBAC3B,SAAS,EAAE,MAAM;aAClB,CAAC,CAAC;YACH,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAEzC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YAEzD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACzC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YACtD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;YAC1C,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAE9C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,gBAAgB,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC;YAE/D,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,SAAS,CAAC,gCAAgC,CAAC,CAAC;QACtE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;YAC1D,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC;gBACtC,EAAE,EAAE,OAAO;gBACX,MAAM,EAAE,OAAO;aAChB,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YAEzD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;YAC/D,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAE7E,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;QAC/B,MAAM,OAAO,GAAG;YACd,EAAE,EAAE,OAAO;YACX,MAAM,EAAE,YAAY;YACpB,WAAW,EAAE,4BAA4B;YACzC,QAAQ,EAAE,UAAU;YACpB,QAAQ,EAAE,iBAAiB;YAC3B,SAAS,EAAE,MAAM;YACjB,aAAa,EAAE,IAAI;SACpB,CAAC;QAEF,EAAE,CAAC,sDAAsD,EAAE,KAAK,IAAI,EAAE;YACpE,EAAE,CAAC,QAAQ,CAAC,SAAS;iBAClB,qBAAqB,CAAC,OAAO,CAAC,CAAC,iBAAiB;iBAChD,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,cAAc;YAC9C,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAEzC,MAAM,OAAO,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;YAEvC,iBAAiB;YACjB,MAAM,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,oBAAoB,EAAE,CAAC;YAEtD,mBAAmB;YACnB,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,oBAAoB,CAC7C,MAAM,CAAC,gBAAgB,CAAC;gBACtB,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,UAAU;gBACjB,MAAM,EAAE,UAAU;aACnB,CAAC,CACH,CAAC;YAEF,sBAAsB;YACtB,MAAM,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC,oBAAoB,CACrD,QAAQ,EACR,UAAU,EACV,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CACnB,CAAC;YAEF,oBAAoB;YACpB,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,oBAAoB,CAC/C,QAAQ,EACR,MAAM,CAAC,eAAe,CAAC;gBACrB,MAAM,CAAC,gBAAgB,CAAC,EAAE,OAAO,EAAE,iBAAiB,EAAE,CAAC;gBACvD,MAAM,CAAC,gBAAgB,CAAC,EAAE,OAAO,EAAE,oBAAoB,EAAE,CAAC;aAC3D,CAAC,EACF,kBAAkB,CACnB,CAAC;YAEF,4BAA4B;YAC5B,MAAM,UAAU,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9F,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC7C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,WAAW,EAAE,CAAC;YAClD,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;YAC5D,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,6BAA6B;QAC1E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;YAC5C,EAAE,CAAC,QAAQ,CAAC,SAAS;iBAClB,qBAAqB,CAAC,OAAO,CAAC,CAAC,iBAAiB;iBAChD,qBAAqB,CAAC,EAAE,EAAE,EAAE,cAAc,EAAE,MAAM,EAAE,eAAe,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,cAAc;YAC1G,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAEzC,MAAM,OAAO,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;YAEvC,kCAAkC;YAClC,MAAM,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;YAEtD,wCAAwC;YACxC,MAAM,UAAU,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9F,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YACrD,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACrD,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;YAChD,EAAE,CAAC,QAAQ,CAAC,SAAS;iBAClB,qBAAqB,CAAC,OAAO,CAAC,CAAC;YAClC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAEzC,MAAM,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC5D,uBAAuB,CACxB,CAAC;YAEF,uCAAuC;YACvC,MAAM,UAAU,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACvD,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,uBAAuB,CAAC,CAAC;QAC1E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC9C,EAAE,CAAC,QAAQ,CAAC,SAAS;iBAClB,qBAAqB,CAAC,OAAO,CAAC;iBAC9B,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,WAAW;YAC3C,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YACxC,UAAU,CAAC,WAAmB,CAAC,qBAAqB,CACnD,IAAI,KAAK,CAAC,aAAa,CAAC,CACzB,CAAC;YAEF,MAAM,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC5D,aAAa,CACd,CAAC;YAEF,MAAM,UAAU,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9F,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC9C,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAE9C,MAAM,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAClE,gCAAgC,CACjC,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAE7E,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;QAC7B,EAAE,CAAC,qCAAqC,EAAE,KAAK,IAAI,EAAE;YACnD,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC;YAE1D,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,aAAa,EAAE,CAAC;YAE3C,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,oBAAoB,CAAC;gBAChD,KAAK,EAAE,EAAE;gBACT,OAAO,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE;aAC/B,CAAC,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gBAAgB,EAAE,KAAK,IAAI,EAAE;YAC9B,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAE3C,MAAM,OAAO,CAAC,aAAa,CAAC,EAAE,GAAG,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC;YAEzD,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,oBAAoB,CAAC;gBAChD,KAAK,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE;gBACzC,OAAO,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE;aAC/B,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mBAAmB,EAAE,KAAK,IAAI,EAAE;YACjC,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAE3C,MAAM,OAAO,CAAC,aAAa,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;YAEjD,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,oBAAoB,CAAC;gBAChD,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE;gBAC1B,OAAO,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE;aAC/B,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAE7E,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC9B,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC3C,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC;gBACtC,EAAE,EAAE,OAAO;gBACX,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,iBAAiB;gBAC3B,WAAW,EAAE,4BAA4B;aAC1C,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAErD,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YACpD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACzC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;YAChD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa;QACnD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC9C,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAE9C,MAAM,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CACjE,gCAAgC,CACjC,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OCR text extraction via the AIShield proxy (Mistral OCR model).
|
|
3
|
+
*
|
|
4
|
+
* Calls the OpenAI-compatible chat completions endpoint at AIShield,
|
|
5
|
+
* passing a signed GCS document URL as a `document_url` content part.
|
|
6
|
+
* The model returns extracted text as structured markdown.
|
|
7
|
+
*/
|
|
8
|
+
export interface OcrResult {
|
|
9
|
+
text: string;
|
|
10
|
+
tokensIn: number;
|
|
11
|
+
tokensOut: number;
|
|
12
|
+
model: string;
|
|
13
|
+
}
|
|
14
|
+
export declare class OcrService {
|
|
15
|
+
private baseUrl;
|
|
16
|
+
private apiKey;
|
|
17
|
+
private model;
|
|
18
|
+
constructor(baseUrl: string, apiKey: string, model?: string);
|
|
19
|
+
extractText(documentUrl: string, organizationId: string): Promise<OcrResult>;
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=ocr-service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ocr-service.d.ts","sourceRoot":"","sources":["../../src/services/ocr-service.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;CACf;AAQD,qBAAa,UAAU;IAEnB,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,KAAK;gBAFL,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,EACd,KAAK,GAAE,MAA6B;IAGxC,WAAW,CACf,WAAW,EAAE,MAAM,EACnB,cAAc,EAAE,MAAM,GACrB,OAAO,CAAC,SAAS,CAAC;CAiDtB"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OCR text extraction via the AIShield proxy (Mistral OCR model).
|
|
3
|
+
*
|
|
4
|
+
* Calls the OpenAI-compatible chat completions endpoint at AIShield,
|
|
5
|
+
* passing a signed GCS document URL as a `document_url` content part.
|
|
6
|
+
* The model returns extracted text as structured markdown.
|
|
7
|
+
*/
|
|
8
|
+
const OCR_PROMPT = `Extract all text content from this document. Format the output as clean markdown:
|
|
9
|
+
- Preserve document structure with headings (#, ##, ###).
|
|
10
|
+
- Render tables as markdown tables.
|
|
11
|
+
- Separate pages with --- (horizontal rule).
|
|
12
|
+
- Do not add any commentary — output only the extracted text.`;
|
|
13
|
+
export class OcrService {
|
|
14
|
+
baseUrl;
|
|
15
|
+
apiKey;
|
|
16
|
+
model;
|
|
17
|
+
constructor(baseUrl, apiKey, model = "mistral-ocr-latest") {
|
|
18
|
+
this.baseUrl = baseUrl;
|
|
19
|
+
this.apiKey = apiKey;
|
|
20
|
+
this.model = model;
|
|
21
|
+
}
|
|
22
|
+
async extractText(documentUrl, organizationId) {
|
|
23
|
+
const body = {
|
|
24
|
+
model: this.model,
|
|
25
|
+
messages: [
|
|
26
|
+
{
|
|
27
|
+
role: "user",
|
|
28
|
+
content: [
|
|
29
|
+
{ type: "text", text: OCR_PROMPT },
|
|
30
|
+
{ type: "document_url", document_url: documentUrl },
|
|
31
|
+
],
|
|
32
|
+
},
|
|
33
|
+
],
|
|
34
|
+
};
|
|
35
|
+
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
|
36
|
+
method: "POST",
|
|
37
|
+
headers: {
|
|
38
|
+
"Content-Type": "application/json",
|
|
39
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
40
|
+
Organisation: organizationId,
|
|
41
|
+
},
|
|
42
|
+
body: JSON.stringify(body),
|
|
43
|
+
});
|
|
44
|
+
if (!response.ok) {
|
|
45
|
+
const errorBody = await response.text().catch(() => "");
|
|
46
|
+
throw new Error(`OCR request failed (${response.status}): ${errorBody}`);
|
|
47
|
+
}
|
|
48
|
+
const result = await response.json();
|
|
49
|
+
const text = result.choices?.[0]?.message?.content;
|
|
50
|
+
if (text === undefined || text === null) {
|
|
51
|
+
throw new Error("OCR response missing choices[0].message.content");
|
|
52
|
+
}
|
|
53
|
+
return {
|
|
54
|
+
text,
|
|
55
|
+
tokensIn: result.usage?.prompt_tokens ?? 0,
|
|
56
|
+
tokensOut: result.usage?.completion_tokens ?? 0,
|
|
57
|
+
model: result.model ?? this.model,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
//# sourceMappingURL=ocr-service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ocr-service.js","sourceRoot":"","sources":["../../src/services/ocr-service.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AASH,MAAM,UAAU,GAAG;;;;8DAI2C,CAAC;AAE/D,MAAM,OAAO,UAAU;IAEX;IACA;IACA;IAHV,YACU,OAAe,EACf,MAAc,EACd,QAAgB,oBAAoB;QAFpC,YAAO,GAAP,OAAO,CAAQ;QACf,WAAM,GAAN,MAAM,CAAQ;QACd,UAAK,GAAL,KAAK,CAA+B;IAC3C,CAAC;IAEJ,KAAK,CAAC,WAAW,CACf,WAAmB,EACnB,cAAsB;QAEtB,MAAM,IAAI,GAAG;YACX,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE;wBACP,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE;wBAClC,EAAE,IAAI,EAAE,cAAc,EAAE,YAAY,EAAE,WAAW,EAAE;qBACpD;iBACF;aACF;SACF,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,sBAAsB,EAAE;YAClE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;gBACtC,YAAY,EAAE,cAAc;aAC7B;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;YACxD,MAAM,IAAI,KAAK,CACb,uBAAuB,QAAQ,CAAC,MAAM,MAAM,SAAS,EAAE,CACxD,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAIjC,CAAC;QAEF,MAAM,IAAI,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QACnD,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;QACrE,CAAC;QAED,OAAO;YACL,IAAI;YACJ,QAAQ,EAAE,MAAM,CAAC,KAAK,EAAE,aAAa,IAAI,CAAC;YAC1C,SAAS,EAAE,MAAM,CAAC,KAAK,EAAE,iBAAiB,IAAI,CAAC;YAC/C,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK;SAClC,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ocr-service.test.d.ts","sourceRoot":"","sources":["../../src/services/ocr-service.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { describe, expect, it, vi, afterEach, beforeEach } from "vitest";
|
|
2
|
+
import { OcrService } from "./ocr-service.js";
|
|
3
|
+
describe("OcrService", () => {
|
|
4
|
+
const BASE_URL = "https://aishield.apart.tech";
|
|
5
|
+
const API_KEY = "test-api-key";
|
|
6
|
+
const MODEL = "mistral-ocr-latest";
|
|
7
|
+
const ORG_ID = "org-123";
|
|
8
|
+
const DOC_URL = "https://storage.googleapis.com/bucket/doc.pdf?X-Goog-Signature=...";
|
|
9
|
+
let service;
|
|
10
|
+
const mockFetch = vi.fn();
|
|
11
|
+
beforeEach(() => {
|
|
12
|
+
service = new OcrService(BASE_URL, API_KEY, MODEL);
|
|
13
|
+
mockFetch.mockReset();
|
|
14
|
+
vi.stubGlobal("fetch", mockFetch);
|
|
15
|
+
});
|
|
16
|
+
afterEach(() => {
|
|
17
|
+
vi.restoreAllMocks();
|
|
18
|
+
});
|
|
19
|
+
it("sends correct request format and parses response", async () => {
|
|
20
|
+
mockFetch.mockResolvedValueOnce(new Response(JSON.stringify({
|
|
21
|
+
choices: [{ message: { content: "# Title\n\nExtracted text" } }],
|
|
22
|
+
usage: { prompt_tokens: 100, completion_tokens: 50 },
|
|
23
|
+
model: "mistral-ocr-latest",
|
|
24
|
+
}), { status: 200 }));
|
|
25
|
+
const result = await service.extractText(DOC_URL, ORG_ID);
|
|
26
|
+
expect(result.text).toBe("# Title\n\nExtracted text");
|
|
27
|
+
expect(result.tokensIn).toBe(100);
|
|
28
|
+
expect(result.tokensOut).toBe(50);
|
|
29
|
+
expect(result.model).toBe("mistral-ocr-latest");
|
|
30
|
+
expect(mockFetch).toHaveBeenCalledOnce();
|
|
31
|
+
const [url, opts] = mockFetch.mock.calls[0];
|
|
32
|
+
expect(url).toBe(`${BASE_URL}/v1/chat/completions`);
|
|
33
|
+
expect(opts.method).toBe("POST");
|
|
34
|
+
const headers = opts.headers;
|
|
35
|
+
expect(headers["Authorization"]).toBe(`Bearer ${API_KEY}`);
|
|
36
|
+
expect(headers["Organisation"]).toBe(ORG_ID);
|
|
37
|
+
expect(headers["Content-Type"]).toBe("application/json");
|
|
38
|
+
const body = JSON.parse(opts.body);
|
|
39
|
+
expect(body.model).toBe(MODEL);
|
|
40
|
+
expect(body.messages[0].content).toHaveLength(2);
|
|
41
|
+
expect(body.messages[0].content[0].type).toBe("text");
|
|
42
|
+
expect(body.messages[0].content[1].type).toBe("document_url");
|
|
43
|
+
expect(body.messages[0].content[1].document_url).toBe(DOC_URL);
|
|
44
|
+
});
|
|
45
|
+
it("throws on non-200 response", async () => {
|
|
46
|
+
mockFetch.mockResolvedValueOnce(new Response("Rate limited", { status: 429 }));
|
|
47
|
+
await expect(service.extractText(DOC_URL, ORG_ID)).rejects.toThrow("OCR request failed (429): Rate limited");
|
|
48
|
+
});
|
|
49
|
+
it("throws when response missing content", async () => {
|
|
50
|
+
mockFetch.mockResolvedValueOnce(new Response(JSON.stringify({ choices: [{ message: {} }], usage: {} }), { status: 200 }));
|
|
51
|
+
await expect(service.extractText(DOC_URL, ORG_ID)).rejects.toThrow("OCR response missing choices[0].message.content");
|
|
52
|
+
});
|
|
53
|
+
it("handles missing usage gracefully", async () => {
|
|
54
|
+
mockFetch.mockResolvedValueOnce(new Response(JSON.stringify({
|
|
55
|
+
choices: [{ message: { content: "text" } }],
|
|
56
|
+
}), { status: 200 }));
|
|
57
|
+
const result = await service.extractText(DOC_URL, ORG_ID);
|
|
58
|
+
expect(result.tokensIn).toBe(0);
|
|
59
|
+
expect(result.tokensOut).toBe(0);
|
|
60
|
+
});
|
|
61
|
+
it("handles network error", async () => {
|
|
62
|
+
mockFetch.mockRejectedValueOnce(new Error("Network error"));
|
|
63
|
+
await expect(service.extractText(DOC_URL, ORG_ID)).rejects.toThrow("Network error");
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
//# sourceMappingURL=ocr-service.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ocr-service.test.js","sourceRoot":"","sources":["../../src/services/ocr-service.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACzE,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAE9C,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,MAAM,QAAQ,GAAG,6BAA6B,CAAC;IAC/C,MAAM,OAAO,GAAG,cAAc,CAAC;IAC/B,MAAM,KAAK,GAAG,oBAAoB,CAAC;IACnC,MAAM,MAAM,GAAG,SAAS,CAAC;IACzB,MAAM,OAAO,GAAG,oEAAoE,CAAC;IAErF,IAAI,OAAmB,CAAC;IACxB,MAAM,SAAS,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;IAE1B,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,GAAG,IAAI,UAAU,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;QACnD,SAAS,CAAC,SAAS,EAAE,CAAC;QACtB,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;QAChE,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CACV,IAAI,CAAC,SAAS,CAAC;YACb,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,OAAO,EAAE,2BAA2B,EAAE,EAAE,CAAC;YAChE,KAAK,EAAE,EAAE,aAAa,EAAE,GAAG,EAAE,iBAAiB,EAAE,EAAE,EAAE;YACpD,KAAK,EAAE,oBAAoB;SAC5B,CAAC,EACF,EAAE,MAAM,EAAE,GAAG,EAAE,CAChB,CACF,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAE1D,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QAEhD,MAAM,CAAC,SAAS,CAAC,CAAC,oBAAoB,EAAE,CAAC;QACzC,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAA0B,CAAC;QACrE,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,QAAQ,sBAAsB,CAAC,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEjC,MAAM,OAAO,GAAG,IAAI,CAAC,OAAiC,CAAC;QACvD,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,OAAO,EAAE,CAAC,CAAC;QAC3D,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7C,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAEzD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAc,CAAC,CAAC;QAC7C,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACjD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC9D,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;QAC1C,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CAAC,cAAc,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAC9C,CAAC;QAEF,MAAM,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChE,wCAAwC,CACzC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;QACpD,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CACV,IAAI,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,EACzD,EAAE,MAAM,EAAE,GAAG,EAAE,CAChB,CACF,CAAC;QAEF,MAAM,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChE,iDAAiD,CAClD,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CACV,IAAI,CAAC,SAAS,CAAC;YACb,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;SAC5C,CAAC,EACF,EAAE,MAAM,EAAE,GAAG,EAAE,CAChB,CACF,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1D,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,KAAK,IAAI,EAAE;QACrC,SAAS,CAAC,qBAAqB,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC;QAE5D,MAAM,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChE,eAAe,CAChB,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
package/prisma/schema.prisma
CHANGED
|
@@ -39,6 +39,7 @@ model Organization {
|
|
|
39
39
|
nodeTags NodeTag[]
|
|
40
40
|
nodeChunks NodeChunk[]
|
|
41
41
|
piiQueryLogs PiiQueryLog[]
|
|
42
|
+
documents Document[]
|
|
42
43
|
|
|
43
44
|
@@map("organizations")
|
|
44
45
|
}
|
|
@@ -608,3 +609,31 @@ model NodeTagAudit {
|
|
|
608
609
|
@@index([orgId, createdAt], map: "idx_tag_audit_org_time")
|
|
609
610
|
@@map("node_tag_audit")
|
|
610
611
|
}
|
|
612
|
+
|
|
613
|
+
// ── Document Archive ────────────────────────────────────────────────────────
|
|
614
|
+
|
|
615
|
+
model Document {
|
|
616
|
+
id String @id @default(dbgenerated("gen_random_uuid()")) @db.Uuid
|
|
617
|
+
organizationId String @map("organization_id") @db.Uuid
|
|
618
|
+
fileName String @map("file_name") @db.VarChar(500)
|
|
619
|
+
mimeType String @map("mime_type") @db.VarChar(100)
|
|
620
|
+
fileSizeBytes Int @map("file_size_bytes")
|
|
621
|
+
storagePath String @map("storage_path") @db.VarChar(1000)
|
|
622
|
+
contentHash String? @map("content_hash") @db.VarChar(64)
|
|
623
|
+
status String @default("uploading") @db.VarChar(20)
|
|
624
|
+
ocrModel String? @map("ocr_model") @db.VarChar(100)
|
|
625
|
+
ocrTokensIn Int? @map("ocr_tokens_in")
|
|
626
|
+
ocrTokensOut Int? @map("ocr_tokens_out")
|
|
627
|
+
pageCount Int? @map("page_count")
|
|
628
|
+
nodeId String? @map("node_id") @db.Uuid
|
|
629
|
+
errorMessage String? @map("error_message")
|
|
630
|
+
createdBy String @map("created_by") @db.VarChar(255)
|
|
631
|
+
createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz
|
|
632
|
+
updatedAt DateTime @updatedAt @map("updated_at") @db.Timestamptz
|
|
633
|
+
|
|
634
|
+
organization Organization @relation(fields: [organizationId], references: [id], onDelete: Cascade)
|
|
635
|
+
|
|
636
|
+
@@index([organizationId, contentHash], map: "idx_documents_org_hash")
|
|
637
|
+
@@index([organizationId, status], map: "idx_documents_org_status")
|
|
638
|
+
@@map("documents")
|
|
639
|
+
}
|