@apart-tech/intelligence-core 1.12.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/auth/ability.d.ts +1 -1
  2. package/dist/auth/ability.d.ts.map +1 -1
  3. package/dist/auth/ability.js +4 -0
  4. package/dist/auth/ability.js.map +1 -1
  5. package/dist/auth/ability.test.js +12 -1
  6. package/dist/auth/ability.test.js.map +1 -1
  7. package/dist/db/tenant.d.ts.map +1 -1
  8. package/dist/db/tenant.js +2 -0
  9. package/dist/db/tenant.js.map +1 -1
  10. package/dist/index.d.ts +4 -0
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +2 -0
  13. package/dist/index.js.map +1 -1
  14. package/dist/services/cleaning-service.d.ts +1 -1
  15. package/dist/services/cleaning-service.d.ts.map +1 -1
  16. package/dist/services/cleaning-service.js +46 -20
  17. package/dist/services/cleaning-service.js.map +1 -1
  18. package/dist/services/document-service.d.ts +91 -0
  19. package/dist/services/document-service.d.ts.map +1 -0
  20. package/dist/services/document-service.js +273 -0
  21. package/dist/services/document-service.js.map +1 -0
  22. package/dist/services/document-service.test.d.ts +2 -0
  23. package/dist/services/document-service.test.d.ts.map +1 -0
  24. package/dist/services/document-service.test.js +289 -0
  25. package/dist/services/document-service.test.js.map +1 -0
  26. package/dist/services/ocr-service.d.ts +21 -0
  27. package/dist/services/ocr-service.d.ts.map +1 -0
  28. package/dist/services/ocr-service.js +61 -0
  29. package/dist/services/ocr-service.js.map +1 -0
  30. package/dist/services/ocr-service.test.d.ts +2 -0
  31. package/dist/services/ocr-service.test.d.ts.map +1 -0
  32. package/dist/services/ocr-service.test.js +66 -0
  33. package/dist/services/ocr-service.test.js.map +1 -0
  34. package/package.json +1 -1
  35. package/prisma/schema.prisma +29 -0
@@ -0,0 +1,289 @@
1
+ import { describe, expect, it, vi, beforeEach } from "vitest";
2
+ import { DocumentService } from "./document-service.js";
3
+ // ── Mock factories ──────────────────────────────────────────────────────────
4
+ function mockPrisma() {
5
+ return {
6
+ document: {
7
+ create: vi.fn(),
8
+ update: vi.fn(),
9
+ findFirst: vi.fn(),
10
+ findMany: vi.fn(),
11
+ },
12
+ };
13
+ }
14
+ function mockStorage() {
15
+ const signedUrl = "https://storage.googleapis.com/signed-url";
16
+ const fileObj = {
17
+ getSignedUrl: vi.fn().mockResolvedValue([signedUrl]),
18
+ exists: vi.fn().mockResolvedValue([true]),
19
+ download: vi.fn().mockResolvedValue([Buffer.from("file-content")]),
20
+ };
21
+ return {
22
+ bucket: vi.fn().mockReturnValue({
23
+ file: vi.fn().mockReturnValue(fileObj),
24
+ }),
25
+ _file: fileObj,
26
+ _signedUrl: signedUrl,
27
+ };
28
+ }
29
+ function mockOcrService() {
30
+ return {
31
+ extractText: vi.fn().mockResolvedValue({
32
+ text: "# Doc Title\n\nExtracted content\n\n---\n\nPage 2 content",
33
+ tokensIn: 100,
34
+ tokensOut: 50,
35
+ model: "mistral-ocr-latest",
36
+ }),
37
+ };
38
+ }
39
+ function mockNodeService() {
40
+ return {
41
+ create: vi.fn().mockResolvedValue({
42
+ id: "node-1",
43
+ type: "document",
44
+ title: "test.pdf",
45
+ content: "extracted text",
46
+ }),
47
+ };
48
+ }
49
+ function mockChunkService() {
50
+ return {
51
+ chunkAndEmbed: vi.fn().mockResolvedValue(undefined),
52
+ };
53
+ }
54
+ function mockTagService() {
55
+ return {
56
+ applyTags: vi.fn().mockResolvedValue([]),
57
+ };
58
+ }
59
+ const TENANT = { organizationId: "org-123" };
60
+ const BUCKET_NAME = "test-bucket";
61
+ describe("DocumentService", () => {
62
+ let db;
63
+ let storage;
64
+ let ocrService;
65
+ let nodeService;
66
+ let chunkService;
67
+ let tagService;
68
+ let service;
69
+ beforeEach(() => {
70
+ db = mockPrisma();
71
+ storage = mockStorage();
72
+ ocrService = mockOcrService();
73
+ nodeService = mockNodeService();
74
+ chunkService = mockChunkService();
75
+ tagService = mockTagService();
76
+ service = new DocumentService(db, storage, BUCKET_NAME, ocrService, nodeService, chunkService, tagService, undefined, undefined, TENANT);
77
+ });
78
+ // ── createUploadUrls ──────────────────────────────────────────────────────
79
+ describe("createUploadUrls", () => {
80
+ it("creates document records and returns signed URLs", async () => {
81
+ db.document.create.mockResolvedValue({
82
+ id: "doc-1",
83
+ fileName: "test.pdf",
84
+ mimeType: "application/pdf",
85
+ fileSizeBytes: 1024,
86
+ storagePath: "",
87
+ status: "uploading",
88
+ });
89
+ db.document.update.mockResolvedValue({});
90
+ const results = await service.createUploadUrls([
91
+ {
92
+ fileName: "test.pdf",
93
+ mimeType: "application/pdf",
94
+ sizeBytes: 1024,
95
+ },
96
+ ], "user@example.com");
97
+ expect(results).toHaveLength(1);
98
+ expect(results[0].id).toBe("doc-1");
99
+ expect(results[0].uploadUrl).toBe(storage._signedUrl);
100
+ expect(results[0].fileName).toBe("test.pdf");
101
+ // Verify DB calls
102
+ expect(db.document.create).toHaveBeenCalledOnce();
103
+ expect(db.document.update).toHaveBeenCalledOnce();
104
+ // Verify storage path includes org and doc ID
105
+ const updateCall = db.document.update.mock.calls[0][0];
106
+ expect(updateCall.data.storagePath).toContain("org-123/doc-1/original.pdf");
107
+ });
108
+ it("maps mime types to correct extensions", async () => {
109
+ const mimeMap = {
110
+ "application/pdf": "pdf",
111
+ "image/png": "png",
112
+ "image/jpeg": "jpeg",
113
+ "image/avif": "avif",
114
+ };
115
+ for (const [mime, ext] of Object.entries(mimeMap)) {
116
+ db.document.create.mockResolvedValue({
117
+ id: `doc-${ext}`,
118
+ fileName: `test.${ext}`,
119
+ mimeType: mime,
120
+ });
121
+ db.document.update.mockResolvedValue({});
122
+ await service.createUploadUrls([{ fileName: `test.${ext}`, mimeType: mime, sizeBytes: 100 }], "user");
123
+ const updateCall = db.document.update.mock.calls[db.document.update.mock.calls.length - 1][0];
124
+ expect(updateCall.data.storagePath).toContain(`original.${ext}`);
125
+ }
126
+ });
127
+ });
128
+ // ── processDocuments ──────────────────────────────────────────────────────
129
+ describe("processDocuments", () => {
130
+ it("sets status to processing and fires background work", async () => {
131
+ db.document.findFirst.mockResolvedValue({
132
+ id: "doc-1",
133
+ status: "uploading",
134
+ storagePath: "org-123/doc-1/original.pdf",
135
+ fileName: "test.pdf",
136
+ mimeType: "application/pdf",
137
+ createdBy: "user",
138
+ });
139
+ db.document.update.mockResolvedValue({});
140
+ const result = await service.processDocuments(["doc-1"]);
141
+ expect(result.documents).toHaveLength(1);
142
+ expect(result.documents[0].status).toBe("processing");
143
+ expect(result.warnings).toHaveLength(0);
144
+ });
145
+ it("warns on missing documents", async () => {
146
+ db.document.findFirst.mockResolvedValue(null);
147
+ const result = await service.processDocuments(["nonexistent"]);
148
+ expect(result.warnings).toContain("Document nonexistent not found");
149
+ });
150
+ it("warns on documents not in uploading status", async () => {
151
+ db.document.findFirst.mockResolvedValue({
152
+ id: "doc-1",
153
+ status: "ready",
154
+ });
155
+ const result = await service.processDocuments(["doc-1"]);
156
+ expect(result.warnings[0]).toContain("expected \"uploading\"");
157
+ expect(result.documents[0].status).toBe("ready");
158
+ });
159
+ });
160
+ // ── processDocument ───────────────────────────────────────────────────────
161
+ describe("processDocument", () => {
162
+ const mockDoc = {
163
+ id: "doc-1",
164
+ status: "processing",
165
+ storagePath: "org-123/doc-1/original.pdf",
166
+ fileName: "test.pdf",
167
+ mimeType: "application/pdf",
168
+ createdBy: "user",
169
+ fileSizeBytes: 1024,
170
+ };
171
+ it("runs full OCR → node → chunk → tag → update pipeline", async () => {
172
+ db.document.findFirst
173
+ .mockResolvedValueOnce(mockDoc) // initial lookup
174
+ .mockResolvedValueOnce(null); // dedup check
175
+ db.document.update.mockResolvedValue({});
176
+ await service.processDocument("doc-1");
177
+ // OCR was called
178
+ expect(ocrService.extractText).toHaveBeenCalledOnce();
179
+ // Node was created
180
+ expect(nodeService.create).toHaveBeenCalledWith(expect.objectContaining({
181
+ type: "document",
182
+ title: "test.pdf",
183
+ status: "approved",
184
+ }));
185
+ // Chunks were created
186
+ expect(chunkService.chunkAndEmbed).toHaveBeenCalledWith("node-1", "test.pdf", expect.any(String));
187
+ // Tags were applied
188
+ expect(tagService.applyTags).toHaveBeenCalledWith("node-1", expect.arrayContaining([
189
+ expect.objectContaining({ tagName: "source_document" }),
190
+ expect.objectContaining({ tagName: "source_file_format" }),
191
+ ]), "document-service");
192
+ // Document updated to ready
193
+ const lastUpdate = db.document.update.mock.calls[db.document.update.mock.calls.length - 1][0];
194
+ expect(lastUpdate.data.status).toBe("ready");
195
+ expect(lastUpdate.data.nodeId).toBe("node-1");
196
+ expect(lastUpdate.data.contentHash).toBeDefined();
197
+ expect(lastUpdate.data.ocrModel).toBe("mistral-ocr-latest");
198
+ expect(lastUpdate.data.pageCount).toBe(2); // two pages separated by ---
199
+ });
200
+ it("deduplicates by content hash", async () => {
201
+ db.document.findFirst
202
+ .mockResolvedValueOnce(mockDoc) // initial lookup
203
+ .mockResolvedValueOnce({ id: "existing-doc", nodeId: "existing-node", status: "ready" }); // dedup match
204
+ db.document.update.mockResolvedValue({});
205
+ await service.processDocument("doc-1");
206
+ // OCR should NOT have been called
207
+ expect(ocrService.extractText).not.toHaveBeenCalled();
208
+ // Document should link to existing node
209
+ const updateCall = db.document.update.mock.calls[db.document.update.mock.calls.length - 1][0];
210
+ expect(updateCall.data.nodeId).toBe("existing-node");
211
+ expect(updateCall.data.status).toBe("ready");
212
+ });
213
+ it("marks as failed when GCS file missing", async () => {
214
+ storage._file.exists.mockResolvedValue([false]);
215
+ db.document.findFirst
216
+ .mockResolvedValueOnce(mockDoc);
217
+ db.document.update.mockResolvedValue({});
218
+ await expect(service.processDocument("doc-1")).rejects.toThrow("File not found in GCS");
219
+ // Should have updated status to failed
220
+ const updateCall = db.document.update.mock.calls[0][0];
221
+ expect(updateCall.data.status).toBe("failed");
222
+ expect(updateCall.data.errorMessage).toContain("File not found in GCS");
223
+ });
224
+ it("marks as failed when OCR fails", async () => {
225
+ db.document.findFirst
226
+ .mockResolvedValueOnce(mockDoc)
227
+ .mockResolvedValueOnce(null); // no dedup
228
+ db.document.update.mockResolvedValue({});
229
+ ocrService.extractText.mockRejectedValueOnce(new Error("OCR timeout"));
230
+ await expect(service.processDocument("doc-1")).rejects.toThrow("OCR timeout");
231
+ const updateCall = db.document.update.mock.calls[db.document.update.mock.calls.length - 1][0];
232
+ expect(updateCall.data.status).toBe("failed");
233
+ expect(updateCall.data.errorMessage).toBe("OCR timeout");
234
+ });
235
+ it("throws when document not found", async () => {
236
+ db.document.findFirst.mockResolvedValue(null);
237
+ await expect(service.processDocument("nonexistent")).rejects.toThrow("Document nonexistent not found");
238
+ });
239
+ });
240
+ // ── listDocuments ─────────────────────────────────────────────────────────
241
+ describe("listDocuments", () => {
242
+ it("lists all documents without filters", async () => {
243
+ db.document.findMany.mockResolvedValue([{ id: "doc-1" }]);
244
+ const docs = await service.listDocuments();
245
+ expect(db.document.findMany).toHaveBeenCalledWith({
246
+ where: {},
247
+ orderBy: { createdAt: "desc" },
248
+ });
249
+ expect(docs).toHaveLength(1);
250
+ });
251
+ it("filters by IDs", async () => {
252
+ db.document.findMany.mockResolvedValue([]);
253
+ await service.listDocuments({ ids: ["doc-1", "doc-2"] });
254
+ expect(db.document.findMany).toHaveBeenCalledWith({
255
+ where: { id: { in: ["doc-1", "doc-2"] } },
256
+ orderBy: { createdAt: "desc" },
257
+ });
258
+ });
259
+ it("filters by status", async () => {
260
+ db.document.findMany.mockResolvedValue([]);
261
+ await service.listDocuments({ status: "ready" });
262
+ expect(db.document.findMany).toHaveBeenCalledWith({
263
+ where: { status: "ready" },
264
+ orderBy: { createdAt: "desc" },
265
+ });
266
+ });
267
+ });
268
+ // ── getDownloadUrl ────────────────────────────────────────────────────────
269
+ describe("getDownloadUrl", () => {
270
+ it("generates a signed read URL", async () => {
271
+ db.document.findFirst.mockResolvedValue({
272
+ id: "doc-1",
273
+ fileName: "test.pdf",
274
+ mimeType: "application/pdf",
275
+ storagePath: "org-123/doc-1/original.pdf",
276
+ });
277
+ const result = await service.getDownloadUrl("doc-1");
278
+ expect(result.downloadUrl).toBe(storage._signedUrl);
279
+ expect(result.fileName).toBe("test.pdf");
280
+ expect(result.mimeType).toBe("application/pdf");
281
+ expect(result.expiresIn).toBe(900); // 15 minutes
282
+ });
283
+ it("throws when document not found", async () => {
284
+ db.document.findFirst.mockResolvedValue(null);
285
+ await expect(service.getDownloadUrl("nonexistent")).rejects.toThrow("Document nonexistent not found");
286
+ });
287
+ });
288
+ });
289
+ //# sourceMappingURL=document-service.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document-service.test.js","sourceRoot":"","sources":["../../src/services/document-service.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAMxD,+EAA+E;AAE/E,SAAS,UAAU;IACjB,OAAO;QACL,QAAQ,EAAE;YACR,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE;YACf,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE;YACf,SAAS,EAAE,EAAE,CAAC,EAAE,EAAE;YAClB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE;SAClB;KACK,CAAC;AACX,CAAC;AAED,SAAS,WAAW;IAClB,MAAM,SAAS,GAAG,2CAA2C,CAAC;IAC9D,MAAM,OAAO,GAAG;QACd,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,SAAS,CAAC,CAAC;QACpD,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC;QACzC,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC;KACnE,CAAC;IACF,OAAO;QACL,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC;YAC9B,IAAI,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,OAAO,CAAC;SACvC,CAAC;QACF,KAAK,EAAE,OAAO;QACd,UAAU,EAAE,SAAS;KACtB,CAAC;AACJ,CAAC;AAED,SAAS,cAAc;IACrB,OAAO;QACL,WAAW,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;YACrC,IAAI,EAAE,2DAA2D;YACjE,QAAQ,EAAE,GAAG;YACb,SAAS,EAAE,EAAE;YACb,KAAK,EAAE,oBAAoB;SAC5B,CAAC;KACI,CAAC;AACX,CAAC;AAED,SAAS,eAAe;IACtB,OAAO;QACL,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;YAChC,EAAE,EAAE,QAAQ;YACZ,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,UAAU;YACjB,OAAO,EAAE,gBAAgB;SAC1B,CAAC;KACI,CAAC;AACX,CAAC;AAED,SAAS,gBAAgB;IACvB,OAAO;QACL,aAAa,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC;KAC7C,CAAC;AACX,CAAC;AAED,SAAS,cAAc;IACrB,OAAO;QACL,SAAS,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,EAAE,CAAC;KAClC,CAAC;AACX,CAAC;AAED,MAAM,MAAM,GAAG,EAAE,cAAc,EAAE,SAAS,EAAE,CAAC;AAC7C,MAAM,WAAW,GAAG,aAAa,CAAC;AAElC,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,IAAI,EAAiC,CAAC;IACtC,IAAI,OAAuC,CAAC;IAC5C,IAAI,UAA6C,CAAC;IAClD,IAAI,WAA+C,CAAC;IACpD,IAAI,YAAiD,CAAC;IACtD,IAAI,UAA6C,CAAC;IAClD,IAAI,OAAwB,CAAC;IAE7B,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,GAAG,UAAU,EAAE,CAAC;QAClB,OAAO,GAAG,WAAW,EAAE,CAAC;QACxB,UAAU,GAAG,cAAc,EAAE,CAAC;QAC9B,WAAW,GAAG,eAAe,EAAE,CAAC;QAChC,YAAY,GAAG,gBAAgB,EAAE,CAAC;QAClC,UAAU,GAAG,cAAc,EAAE,CAAC;QAC9B,OAAO,GAAG,IAAI,eAAe,CAC3B,EAAE,EACF,OAAc,EACd,WAAW,EACX,UAAU,EACV,WAAW,EACX,YAAY,EACZ,UAAU,EACV,SAAS,EACT,SAAS,EACT,MAAM,CACP,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAE7E,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAChC,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;YAChE,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC;gBACnC,EAAE,EAAE,OAAO;gBACX,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,iBAAiB;gBAC3B,aAAa,EAAE,IAAI;gBACnB,WAAW,EAAE,EAAE;gBACf,MAAM,EAAE,WAAW;aACpB,CAAC,CAAC;YACH,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAEzC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,gBAAgB,CAC5C;gBACE;oBACE,QAAQ,EAAE,UAAU;oBACpB,QAAQ,EAAE,iBAAiB;oBAC3B,SAAS,EAAE,IAAI;iBAChB;aACF,EACD,kBAAkB,CACnB,CAAC;YAEF,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACpC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YACtD,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAE7C,kBAAkB;YAClB,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,oBAAoB,EAAE,CAAC;YAClD,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,oBAAoB,EAAE,CAAC;YAElD,8CAA8C;YAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACvD,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,SAAS,CAAC,4BAA4B,CAAC,CAAC;QAC9E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACrD,MAAM,OAAO,GAAG;gBACd,iBAAiB,EAAE,KAAK;gBACxB,WAAW,EAAE,KAAK;gBAClB,YAAY,EAAE,MAAM;gBACpB,YAAY,EAAE,MAAM;aACrB,CAAC;YAEF,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;gBAClD,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC;oBACnC,EAAE,EAAE,OAAO,GAAG,EAAE;oBAChB,QAAQ,EAAE,QAAQ,GAAG,EAAE;oBACvB,QAAQ,EAAE,IAAI;iBACf,CAAC,CAAC;gBACH,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;gBAEzC,MAAM,OAAO,CAAC,gBAAgB,CAC5B,CAAC,EAAE,QAAQ,EAAE,QAAQ,GAAG,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,EAC7D,MAAM,CACP,CAAC;gBAEF,MAAM,UAAU,GACd,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAC3B,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CACzC,CAAC,CAAC,CAAC,CAAC;gBACP,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,SAAS,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;YACnE,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAE7E,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAChC,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;YACnE,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC;gBACtC,EAAE,EAAE,OAAO;gBACX,MAAM,EAAE,WAAW;gBACnB,WAAW,EAAE,4BAA4B;gBACzC,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,iBAAiB;gBAC3B,SAAS,EAAE,MAAM;aAClB,CAAC,CAAC;YACH,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAEzC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YAEzD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACzC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YACtD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;YAC1C,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAE9C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,gBAAgB,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC;YAE/D,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,SAAS,CAAC,gCAAgC,CAAC,CAAC;QACtE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;YAC1D,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC;gBACtC,EAAE,EAAE,OAAO;gBACX,MAAM,EAAE,OAAO;aAChB,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YAEzD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;YAC/D,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAE7E,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;QAC/B,MAAM,OAAO,GAAG;YACd,EAAE,EAAE,OAAO;YACX,MAAM,EAAE,YAAY;YACpB,WAAW,EAAE,4BAA4B;YACzC,QAAQ,EAAE,UAAU;YACpB,QAAQ,EAAE,iBAAiB;YAC3B,SAAS,EAAE,MAAM;YACjB,aAAa,EAAE,IAAI;SACpB,CAAC;QAEF,EAAE,CAAC,sDAAsD,EAAE,KAAK,IAAI,EAAE;YACpE,EAAE,CAAC,QAAQ,CAAC,SAAS;iBAClB,qBAAqB,CAAC,OAAO,CAAC,CAAC,iBAAiB;iBAChD,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,cAAc;YAC9C,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAEzC,MAAM,OAAO,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;YAEvC,iBAAiB;YACjB,MAAM,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,oBAAoB,EAAE,CAAC;YAEtD,mBAAmB;YACnB,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,oBAAoB,CAC7C,MAAM,CAAC,gBAAgB,CAAC;gBACtB,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,UAAU;gBACjB,MAAM,EAAE,UAAU;aACnB,CAAC,CACH,CAAC;YAEF,sBAAsB;YACtB,MAAM,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC,oBAAoB,CACrD,QAAQ,EACR,UAAU,EACV,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CACnB,CAAC;YAEF,oBAAoB;YACpB,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,oBAAoB,CAC/C,QAAQ,EACR,MAAM,CAAC,eAAe,CAAC;gBACrB,MAAM,CAAC,gBAAgB,CAAC,EAAE,OAAO,EAAE,iBAAiB,EAAE,CAAC;gBACvD,MAAM,CAAC,gBAAgB,CAAC,EAAE,OAAO,EAAE,oBAAoB,EAAE,CAAC;aAC3D,CAAC,EACF,kBAAkB,CACnB,CAAC;YAEF,4BAA4B;YAC5B,MAAM,UAAU,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9F,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC7C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,WAAW,EAAE,CAAC;YAClD,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;YAC5D,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,6BAA6B;QAC1E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;YAC5C,EAAE,CAAC,QAAQ,CAAC,SAAS;iBAClB,qBAAqB,CAAC,OAAO,CAAC,CAAC,iBAAiB;iBAChD,qBAAqB,CAAC,EAAE,EAAE,EAAE,cAAc,EAAE,MAAM,EAAE,eAAe,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,cAAc;YAC1G,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAEzC,MAAM,OAAO,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;YAEvC,kCAAkC;YAClC,MAAM,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;YAEtD,wCAAwC;YACxC,MAAM,UAAU,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9F,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YACrD,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACrD,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;YAChD,EAAE,CAAC,QAAQ,CAAC,SAAS;iBAClB,qBAAqB,CAAC,OAAO,CAAC,CAAC;YAClC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAEzC,MAAM,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC5D,uBAAuB,CACxB,CAAC;YAEF,uCAAuC;YACvC,MAAM,UAAU,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACvD,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,uBAAuB,CAAC,CAAC;QAC1E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC9C,EAAE,CAAC,QAAQ,CAAC,SAAS;iBAClB,qBAAqB,CAAC,OAAO,CAAC;iBAC9B,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,WAAW;YAC3C,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YACxC,UAAU,CAAC,WAAmB,CAAC,qBAAqB,CACnD,IAAI,KAAK,CAAC,aAAa,CAAC,CACzB,CAAC;YAEF,MAAM,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC5D,aAAa,CACd,CAAC;YAEF,MAAM,UAAU,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9F,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC9C,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAE9C,MAAM,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAClE,gCAAgC,CACjC,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAE7E,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;QAC7B,EAAE,CAAC,qCAAqC,EAAE,KAAK,IAAI,EAAE;YACnD,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC;YAE1D,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,aAAa,EAAE,CAAC;YAE3C,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,oBAAoB,CAAC;gBAChD,KAAK,EAAE,EAAE;gBACT,OAAO,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE;aAC/B,CAAC,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gBAAgB,EAAE,KAAK,IAAI,EAAE;YAC9B,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAE3C,MAAM,OAAO,CAAC,aAAa,CAAC,EAAE,GAAG,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC;YAEzD,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,oBAAoB,CAAC;gBAChD,KAAK,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE;gBACzC,OAAO,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE;aAC/B,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mBAAmB,EAAE,KAAK,IAAI,EAAE;YACjC,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAE3C,MAAM,OAAO,CAAC,aAAa,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;YAEjD,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,oBAAoB,CAAC;gBAChD,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE;gBAC1B,OAAO,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE;aAC/B,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAE7E,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC9B,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC3C,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC;gBACtC,EAAE,EAAE,OAAO;gBACX,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,iBAAiB;gBAC3B,WAAW,EAAE,4BAA4B;aAC1C,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAErD,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YACpD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACzC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;YAChD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa;QACnD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC9C,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAE9C,MAAM,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CACjE,gCAAgC,CACjC,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,21 @@
1
+ /**
2
+ * OCR text extraction via the AIShield proxy (Mistral OCR model).
3
+ *
4
+ * Calls the OpenAI-compatible chat completions endpoint at AIShield,
5
+ * passing a signed GCS document URL as a `document_url` content part.
6
+ * The model returns extracted text as structured markdown.
7
+ */
8
+ export interface OcrResult {
9
+ text: string;
10
+ tokensIn: number;
11
+ tokensOut: number;
12
+ model: string;
13
+ }
14
+ export declare class OcrService {
15
+ private baseUrl;
16
+ private apiKey;
17
+ private model;
18
+ constructor(baseUrl: string, apiKey: string, model?: string);
19
+ extractText(documentUrl: string, organizationId: string): Promise<OcrResult>;
20
+ }
21
+ //# sourceMappingURL=ocr-service.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ocr-service.d.ts","sourceRoot":"","sources":["../../src/services/ocr-service.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;CACf;AAQD,qBAAa,UAAU;IAEnB,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,KAAK;gBAFL,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,EACd,KAAK,GAAE,MAA6B;IAGxC,WAAW,CACf,WAAW,EAAE,MAAM,EACnB,cAAc,EAAE,MAAM,GACrB,OAAO,CAAC,SAAS,CAAC;CAiDtB"}
@@ -0,0 +1,61 @@
1
+ /**
2
+ * OCR text extraction via the AIShield proxy (Mistral OCR model).
3
+ *
4
+ * Calls the OpenAI-compatible chat completions endpoint at AIShield,
5
+ * passing a signed GCS document URL as a `document_url` content part.
6
+ * The model returns extracted text as structured markdown.
7
+ */
8
+ const OCR_PROMPT = `Extract all text content from this document. Format the output as clean markdown:
9
+ - Preserve document structure with headings (#, ##, ###).
10
+ - Render tables as markdown tables.
11
+ - Separate pages with --- (horizontal rule).
12
+ - Do not add any commentary — output only the extracted text.`;
13
+ export class OcrService {
14
+ baseUrl;
15
+ apiKey;
16
+ model;
17
+ constructor(baseUrl, apiKey, model = "mistral-ocr-latest") {
18
+ this.baseUrl = baseUrl;
19
+ this.apiKey = apiKey;
20
+ this.model = model;
21
+ }
22
+ async extractText(documentUrl, organizationId) {
23
+ const body = {
24
+ model: this.model,
25
+ messages: [
26
+ {
27
+ role: "user",
28
+ content: [
29
+ { type: "text", text: OCR_PROMPT },
30
+ { type: "document_url", document_url: documentUrl },
31
+ ],
32
+ },
33
+ ],
34
+ };
35
+ const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
36
+ method: "POST",
37
+ headers: {
38
+ "Content-Type": "application/json",
39
+ Authorization: `Bearer ${this.apiKey}`,
40
+ Organisation: organizationId,
41
+ },
42
+ body: JSON.stringify(body),
43
+ });
44
+ if (!response.ok) {
45
+ const errorBody = await response.text().catch(() => "");
46
+ throw new Error(`OCR request failed (${response.status}): ${errorBody}`);
47
+ }
48
+ const result = await response.json();
49
+ const text = result.choices?.[0]?.message?.content;
50
+ if (text === undefined || text === null) {
51
+ throw new Error("OCR response missing choices[0].message.content");
52
+ }
53
+ return {
54
+ text,
55
+ tokensIn: result.usage?.prompt_tokens ?? 0,
56
+ tokensOut: result.usage?.completion_tokens ?? 0,
57
+ model: result.model ?? this.model,
58
+ };
59
+ }
60
+ }
61
+ //# sourceMappingURL=ocr-service.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ocr-service.js","sourceRoot":"","sources":["../../src/services/ocr-service.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AASH,MAAM,UAAU,GAAG;;;;8DAI2C,CAAC;AAE/D,MAAM,OAAO,UAAU;IAEX;IACA;IACA;IAHV,YACU,OAAe,EACf,MAAc,EACd,QAAgB,oBAAoB;QAFpC,YAAO,GAAP,OAAO,CAAQ;QACf,WAAM,GAAN,MAAM,CAAQ;QACd,UAAK,GAAL,KAAK,CAA+B;IAC3C,CAAC;IAEJ,KAAK,CAAC,WAAW,CACf,WAAmB,EACnB,cAAsB;QAEtB,MAAM,IAAI,GAAG;YACX,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE;wBACP,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE;wBAClC,EAAE,IAAI,EAAE,cAAc,EAAE,YAAY,EAAE,WAAW,EAAE;qBACpD;iBACF;aACF;SACF,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,sBAAsB,EAAE;YAClE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;gBACtC,YAAY,EAAE,cAAc;aAC7B;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;YACxD,MAAM,IAAI,KAAK,CACb,uBAAuB,QAAQ,CAAC,MAAM,MAAM,SAAS,EAAE,CACxD,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAIjC,CAAC;QAEF,MAAM,IAAI,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QACnD,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;QACrE,CAAC;QAED,OAAO;YACL,IAAI;YACJ,QAAQ,EAAE,MAAM,CAAC,KAAK,EAAE,aAAa,IAAI,CAAC;YAC1C,SAAS,EAAE,MAAM,CAAC,KAAK,EAAE,iBAAiB,IAAI,CAAC;YAC/C,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK;SAClC,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=ocr-service.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ocr-service.test.d.ts","sourceRoot":"","sources":["../../src/services/ocr-service.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,66 @@
1
+ import { describe, expect, it, vi, afterEach, beforeEach } from "vitest";
2
+ import { OcrService } from "./ocr-service.js";
3
+ describe("OcrService", () => {
4
+ const BASE_URL = "https://aishield.apart.tech";
5
+ const API_KEY = "test-api-key";
6
+ const MODEL = "mistral-ocr-latest";
7
+ const ORG_ID = "org-123";
8
+ const DOC_URL = "https://storage.googleapis.com/bucket/doc.pdf?X-Goog-Signature=...";
9
+ let service;
10
+ const mockFetch = vi.fn();
11
+ beforeEach(() => {
12
+ service = new OcrService(BASE_URL, API_KEY, MODEL);
13
+ mockFetch.mockReset();
14
+ vi.stubGlobal("fetch", mockFetch);
15
+ });
16
+ afterEach(() => {
17
+ vi.restoreAllMocks();
18
+ });
19
+ it("sends correct request format and parses response", async () => {
20
+ mockFetch.mockResolvedValueOnce(new Response(JSON.stringify({
21
+ choices: [{ message: { content: "# Title\n\nExtracted text" } }],
22
+ usage: { prompt_tokens: 100, completion_tokens: 50 },
23
+ model: "mistral-ocr-latest",
24
+ }), { status: 200 }));
25
+ const result = await service.extractText(DOC_URL, ORG_ID);
26
+ expect(result.text).toBe("# Title\n\nExtracted text");
27
+ expect(result.tokensIn).toBe(100);
28
+ expect(result.tokensOut).toBe(50);
29
+ expect(result.model).toBe("mistral-ocr-latest");
30
+ expect(mockFetch).toHaveBeenCalledOnce();
31
+ const [url, opts] = mockFetch.mock.calls[0];
32
+ expect(url).toBe(`${BASE_URL}/v1/chat/completions`);
33
+ expect(opts.method).toBe("POST");
34
+ const headers = opts.headers;
35
+ expect(headers["Authorization"]).toBe(`Bearer ${API_KEY}`);
36
+ expect(headers["Organisation"]).toBe(ORG_ID);
37
+ expect(headers["Content-Type"]).toBe("application/json");
38
+ const body = JSON.parse(opts.body);
39
+ expect(body.model).toBe(MODEL);
40
+ expect(body.messages[0].content).toHaveLength(2);
41
+ expect(body.messages[0].content[0].type).toBe("text");
42
+ expect(body.messages[0].content[1].type).toBe("document_url");
43
+ expect(body.messages[0].content[1].document_url).toBe(DOC_URL);
44
+ });
45
+ it("throws on non-200 response", async () => {
46
+ mockFetch.mockResolvedValueOnce(new Response("Rate limited", { status: 429 }));
47
+ await expect(service.extractText(DOC_URL, ORG_ID)).rejects.toThrow("OCR request failed (429): Rate limited");
48
+ });
49
+ it("throws when response missing content", async () => {
50
+ mockFetch.mockResolvedValueOnce(new Response(JSON.stringify({ choices: [{ message: {} }], usage: {} }), { status: 200 }));
51
+ await expect(service.extractText(DOC_URL, ORG_ID)).rejects.toThrow("OCR response missing choices[0].message.content");
52
+ });
53
+ it("handles missing usage gracefully", async () => {
54
+ mockFetch.mockResolvedValueOnce(new Response(JSON.stringify({
55
+ choices: [{ message: { content: "text" } }],
56
+ }), { status: 200 }));
57
+ const result = await service.extractText(DOC_URL, ORG_ID);
58
+ expect(result.tokensIn).toBe(0);
59
+ expect(result.tokensOut).toBe(0);
60
+ });
61
+ it("handles network error", async () => {
62
+ mockFetch.mockRejectedValueOnce(new Error("Network error"));
63
+ await expect(service.extractText(DOC_URL, ORG_ID)).rejects.toThrow("Network error");
64
+ });
65
+ });
66
+ //# sourceMappingURL=ocr-service.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ocr-service.test.js","sourceRoot":"","sources":["../../src/services/ocr-service.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACzE,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAE9C,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,MAAM,QAAQ,GAAG,6BAA6B,CAAC;IAC/C,MAAM,OAAO,GAAG,cAAc,CAAC;IAC/B,MAAM,KAAK,GAAG,oBAAoB,CAAC;IACnC,MAAM,MAAM,GAAG,SAAS,CAAC;IACzB,MAAM,OAAO,GAAG,oEAAoE,CAAC;IAErF,IAAI,OAAmB,CAAC;IACxB,MAAM,SAAS,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;IAE1B,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,GAAG,IAAI,UAAU,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;QACnD,SAAS,CAAC,SAAS,EAAE,CAAC;QACtB,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;QAChE,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CACV,IAAI,CAAC,SAAS,CAAC;YACb,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,OAAO,EAAE,2BAA2B,EAAE,EAAE,CAAC;YAChE,KAAK,EAAE,EAAE,aAAa,EAAE,GAAG,EAAE,iBAAiB,EAAE,EAAE,EAAE;YACpD,KAAK,EAAE,oBAAoB;SAC5B,CAAC,EACF,EAAE,MAAM,EAAE,GAAG,EAAE,CAChB,CACF,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAE1D,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QAEhD,MAAM,CAAC,SAAS,CAAC,CAAC,oBAAoB,EAAE,CAAC;QACzC,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAA0B,CAAC;QACrE,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,QAAQ,sBAAsB,CAAC,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEjC,MAAM,OAAO,GAAG,IAAI,CAAC,OAAiC,CAAC;QACvD,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,OAAO,EAAE,CAAC,CAAC;QAC3D,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7C,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAEzD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAc,CAAC,CAAC;QAC7C,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACjD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC9D,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;QAC1C,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CAAC,cAAc,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAC9C,CAAC;QAEF,MAAM,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChE,wCAAwC,CACzC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;QACpD,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CACV,IAAI,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,EACzD,EAAE,MAAM,EAAE,GAAG,EAAE,CAChB,CACF,CAAC;QAEF,MAAM,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChE,iDAAiD,CAClD,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CACV,IAAI,CAAC,SAAS,CAAC;YACb,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;SAC5C,CAAC,EACF,EAAE,MAAM,EAAE,GAAG,EAAE,CAChB,CACF,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1D,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,KAAK,IAAI,EAAE;QACrC,SAAS,CAAC,qBAAqB,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC;QAE5D,MAAM,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChE,eAAe,CAChB,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@apart-tech/intelligence-core",
3
- "version": "1.12.0",
3
+ "version": "1.13.0",
4
4
  "description": "Core library: database, services, and providers for Apart Intelligence",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -39,6 +39,7 @@ model Organization {
39
39
  nodeTags NodeTag[]
40
40
  nodeChunks NodeChunk[]
41
41
  piiQueryLogs PiiQueryLog[]
42
+ documents Document[]
42
43
 
43
44
  @@map("organizations")
44
45
  }
@@ -608,3 +609,31 @@ model NodeTagAudit {
608
609
  @@index([orgId, createdAt], map: "idx_tag_audit_org_time")
609
610
  @@map("node_tag_audit")
610
611
  }
612
+
613
+ // ── Document Archive ────────────────────────────────────────────────────────
614
+
615
+ model Document {
616
+ id String @id @default(dbgenerated("gen_random_uuid()")) @db.Uuid
617
+ organizationId String @map("organization_id") @db.Uuid
618
+ fileName String @map("file_name") @db.VarChar(500)
619
+ mimeType String @map("mime_type") @db.VarChar(100)
620
+ fileSizeBytes Int @map("file_size_bytes")
621
+ storagePath String @map("storage_path") @db.VarChar(1000)
622
+ contentHash String? @map("content_hash") @db.VarChar(64)
623
+ status String @default("uploading") @db.VarChar(20)
624
+ ocrModel String? @map("ocr_model") @db.VarChar(100)
625
+ ocrTokensIn Int? @map("ocr_tokens_in")
626
+ ocrTokensOut Int? @map("ocr_tokens_out")
627
+ pageCount Int? @map("page_count")
628
+ nodeId String? @map("node_id") @db.Uuid
629
+ errorMessage String? @map("error_message")
630
+ createdBy String @map("created_by") @db.VarChar(255)
631
+ createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz
632
+ updatedAt DateTime @updatedAt @map("updated_at") @db.Timestamptz
633
+
634
+ organization Organization @relation(fields: [organizationId], references: [id], onDelete: Cascade)
635
+
636
+ @@index([organizationId, contentHash], map: "idx_documents_org_hash")
637
+ @@index([organizationId, status], map: "idx_documents_org_status")
638
+ @@map("documents")
639
+ }