@apart-tech/intelligence-core 1.12.1 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ /**
2
+ * OCR text extraction via the AIShield proxy (Mistral OCR model).
3
+ *
4
+ * Calls the OpenAI-compatible chat completions endpoint at AIShield,
5
+ * passing a signed GCS document URL as a `document_url` content part.
6
+ * The model returns extracted text as structured markdown.
7
+ */
8
+ const OCR_PROMPT = `Extract all text content from this document. Format the output as clean markdown:
9
+ - Preserve document structure with headings (#, ##, ###).
10
+ - Render tables as markdown tables.
11
+ - Separate pages with --- (horizontal rule).
12
+ - Do not add any commentary — output only the extracted text.`;
13
+ export class OcrService {
14
+ baseUrl;
15
+ apiKey;
16
+ model;
17
+ constructor(baseUrl, apiKey, model = "mistral-ocr-latest") {
18
+ this.baseUrl = baseUrl;
19
+ this.apiKey = apiKey;
20
+ this.model = model;
21
+ }
22
+ async extractText(documentUrl, organizationId) {
23
+ const body = {
24
+ model: this.model,
25
+ messages: [
26
+ {
27
+ role: "user",
28
+ content: [
29
+ { type: "text", text: OCR_PROMPT },
30
+ { type: "document_url", document_url: documentUrl },
31
+ ],
32
+ },
33
+ ],
34
+ };
35
+ const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
36
+ method: "POST",
37
+ headers: {
38
+ "Content-Type": "application/json",
39
+ Authorization: `Bearer ${this.apiKey}`,
40
+ Organisation: organizationId,
41
+ },
42
+ body: JSON.stringify(body),
43
+ });
44
+ if (!response.ok) {
45
+ const errorBody = await response.text().catch(() => "");
46
+ throw new Error(`OCR request failed (${response.status}): ${errorBody}`);
47
+ }
48
+ const result = await response.json();
49
+ const text = result.choices?.[0]?.message?.content;
50
+ if (text === undefined || text === null) {
51
+ throw new Error("OCR response missing choices[0].message.content");
52
+ }
53
+ return {
54
+ text,
55
+ tokensIn: result.usage?.prompt_tokens ?? 0,
56
+ tokensOut: result.usage?.completion_tokens ?? 0,
57
+ model: result.model ?? this.model,
58
+ };
59
+ }
60
+ }
61
+ //# sourceMappingURL=ocr-service.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ocr-service.js","sourceRoot":"","sources":["../../src/services/ocr-service.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AASH,MAAM,UAAU,GAAG;;;;8DAI2C,CAAC;AAE/D,MAAM,OAAO,UAAU;IAEX;IACA;IACA;IAHV,YACU,OAAe,EACf,MAAc,EACd,QAAgB,oBAAoB;QAFpC,YAAO,GAAP,OAAO,CAAQ;QACf,WAAM,GAAN,MAAM,CAAQ;QACd,UAAK,GAAL,KAAK,CAA+B;IAC3C,CAAC;IAEJ,KAAK,CAAC,WAAW,CACf,WAAmB,EACnB,cAAsB;QAEtB,MAAM,IAAI,GAAG;YACX,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE;wBACP,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE;wBAClC,EAAE,IAAI,EAAE,cAAc,EAAE,YAAY,EAAE,WAAW,EAAE;qBACpD;iBACF;aACF;SACF,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,sBAAsB,EAAE;YAClE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;gBACtC,YAAY,EAAE,cAAc;aAC7B;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;YACxD,MAAM,IAAI,KAAK,CACb,uBAAuB,QAAQ,CAAC,MAAM,MAAM,SAAS,EAAE,CACxD,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAIjC,CAAC;QAEF,MAAM,IAAI,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QACnD,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;QACrE,CAAC;QAED,OAAO;YACL,IAAI;YACJ,QAAQ,EAAE,MAAM,CAAC,KAAK,EAAE,aAAa,IAAI,CAAC;YAC1C,SAAS,EAAE,MAAM,CAAC,KAAK,EAAE,iBAAiB,IAAI,CAAC;YAC/C,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK;SAClC,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=ocr-service.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ocr-service.test.d.ts","sourceRoot":"","sources":["../../src/services/ocr-service.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,66 @@
1
+ import { describe, expect, it, vi, afterEach, beforeEach } from "vitest";
2
+ import { OcrService } from "./ocr-service.js";
3
+ describe("OcrService", () => {
4
+ const BASE_URL = "https://aishield.apart.tech";
5
+ const API_KEY = "test-api-key";
6
+ const MODEL = "mistral-ocr-latest";
7
+ const ORG_ID = "org-123";
8
+ const DOC_URL = "https://storage.googleapis.com/bucket/doc.pdf?X-Goog-Signature=...";
9
+ let service;
10
+ const mockFetch = vi.fn();
11
+ beforeEach(() => {
12
+ service = new OcrService(BASE_URL, API_KEY, MODEL);
13
+ mockFetch.mockReset();
14
+ vi.stubGlobal("fetch", mockFetch);
15
+ });
16
+ afterEach(() => {
17
+ vi.restoreAllMocks();
18
+ });
19
+ it("sends correct request format and parses response", async () => {
20
+ mockFetch.mockResolvedValueOnce(new Response(JSON.stringify({
21
+ choices: [{ message: { content: "# Title\n\nExtracted text" } }],
22
+ usage: { prompt_tokens: 100, completion_tokens: 50 },
23
+ model: "mistral-ocr-latest",
24
+ }), { status: 200 }));
25
+ const result = await service.extractText(DOC_URL, ORG_ID);
26
+ expect(result.text).toBe("# Title\n\nExtracted text");
27
+ expect(result.tokensIn).toBe(100);
28
+ expect(result.tokensOut).toBe(50);
29
+ expect(result.model).toBe("mistral-ocr-latest");
30
+ expect(mockFetch).toHaveBeenCalledOnce();
31
+ const [url, opts] = mockFetch.mock.calls[0];
32
+ expect(url).toBe(`${BASE_URL}/v1/chat/completions`);
33
+ expect(opts.method).toBe("POST");
34
+ const headers = opts.headers;
35
+ expect(headers["Authorization"]).toBe(`Bearer ${API_KEY}`);
36
+ expect(headers["Organisation"]).toBe(ORG_ID);
37
+ expect(headers["Content-Type"]).toBe("application/json");
38
+ const body = JSON.parse(opts.body);
39
+ expect(body.model).toBe(MODEL);
40
+ expect(body.messages[0].content).toHaveLength(2);
41
+ expect(body.messages[0].content[0].type).toBe("text");
42
+ expect(body.messages[0].content[1].type).toBe("document_url");
43
+ expect(body.messages[0].content[1].document_url).toBe(DOC_URL);
44
+ });
45
+ it("throws on non-200 response", async () => {
46
+ mockFetch.mockResolvedValueOnce(new Response("Rate limited", { status: 429 }));
47
+ await expect(service.extractText(DOC_URL, ORG_ID)).rejects.toThrow("OCR request failed (429): Rate limited");
48
+ });
49
+ it("throws when response missing content", async () => {
50
+ mockFetch.mockResolvedValueOnce(new Response(JSON.stringify({ choices: [{ message: {} }], usage: {} }), { status: 200 }));
51
+ await expect(service.extractText(DOC_URL, ORG_ID)).rejects.toThrow("OCR response missing choices[0].message.content");
52
+ });
53
+ it("handles missing usage gracefully", async () => {
54
+ mockFetch.mockResolvedValueOnce(new Response(JSON.stringify({
55
+ choices: [{ message: { content: "text" } }],
56
+ }), { status: 200 }));
57
+ const result = await service.extractText(DOC_URL, ORG_ID);
58
+ expect(result.tokensIn).toBe(0);
59
+ expect(result.tokensOut).toBe(0);
60
+ });
61
+ it("handles network error", async () => {
62
+ mockFetch.mockRejectedValueOnce(new Error("Network error"));
63
+ await expect(service.extractText(DOC_URL, ORG_ID)).rejects.toThrow("Network error");
64
+ });
65
+ });
66
+ //# sourceMappingURL=ocr-service.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ocr-service.test.js","sourceRoot":"","sources":["../../src/services/ocr-service.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACzE,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAE9C,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,MAAM,QAAQ,GAAG,6BAA6B,CAAC;IAC/C,MAAM,OAAO,GAAG,cAAc,CAAC;IAC/B,MAAM,KAAK,GAAG,oBAAoB,CAAC;IACnC,MAAM,MAAM,GAAG,SAAS,CAAC;IACzB,MAAM,OAAO,GAAG,oEAAoE,CAAC;IAErF,IAAI,OAAmB,CAAC;IACxB,MAAM,SAAS,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;IAE1B,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,GAAG,IAAI,UAAU,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;QACnD,SAAS,CAAC,SAAS,EAAE,CAAC;QACtB,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;QAChE,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CACV,IAAI,CAAC,SAAS,CAAC;YACb,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,OAAO,EAAE,2BAA2B,EAAE,EAAE,CAAC;YAChE,KAAK,EAAE,EAAE,aAAa,EAAE,GAAG,EAAE,iBAAiB,EAAE,EAAE,EAAE;YACpD,KAAK,EAAE,oBAAoB;SAC5B,CAAC,EACF,EAAE,MAAM,EAAE,GAAG,EAAE,CAChB,CACF,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAE1D,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QAEhD,MAAM,CAAC,SAAS,CAAC,CAAC,oBAAoB,EAAE,CAAC;QACzC,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAA0B,CAAC;QACrE,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,QAAQ,sBAAsB,CAAC,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEjC,MAAM,OAAO,GAAG,IAAI,CAAC,OAAiC,CAAC;QACvD,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,OAAO,EAAE,CAAC,CAAC;QAC3D,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7C,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAEzD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAc,CAAC,CAAC;QAC7C,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACjD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC9D,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;QAC1C,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CAAC,cAAc,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAC9C,CAAC;QAEF,MAAM,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChE,wCAAwC,CACzC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;QACpD,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CACV,IAAI,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,EACzD,EAAE,MAAM,EAAE,GAAG,EAAE,CAChB,CACF,CAAC;QAEF,MAAM,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChE,iDAAiD,CAClD,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,SAAS,CAAC,qBAAqB,CAC7B,IAAI,QAAQ,CACV,IAAI,CAAC,SAAS,CAAC;YACb,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;SAC5C,CAAC,EACF,EAAE,MAAM,EAAE,GAAG,EAAE,CAChB,CACF,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1D,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,KAAK,IAAI,EAAE;QACrC,SAAS,CAAC,qBAAqB,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC;QAE5D,MAAM,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChE,eAAe,CAChB,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@apart-tech/intelligence-core",
3
- "version": "1.12.1",
3
+ "version": "1.13.0",
4
4
  "description": "Core library: database, services, and providers for Apart Intelligence",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -39,6 +39,7 @@ model Organization {
39
39
  nodeTags NodeTag[]
40
40
  nodeChunks NodeChunk[]
41
41
  piiQueryLogs PiiQueryLog[]
42
+ documents Document[]
42
43
 
43
44
  @@map("organizations")
44
45
  }
@@ -608,3 +609,31 @@ model NodeTagAudit {
608
609
  @@index([orgId, createdAt], map: "idx_tag_audit_org_time")
609
610
  @@map("node_tag_audit")
610
611
  }
612
+
613
+ // ── Document Archive ────────────────────────────────────────────────────────
614
+
615
+ model Document {
616
+ id String @id @default(dbgenerated("gen_random_uuid()")) @db.Uuid
617
+ organizationId String @map("organization_id") @db.Uuid
618
+ fileName String @map("file_name") @db.VarChar(500)
619
+ mimeType String @map("mime_type") @db.VarChar(100)
620
+ fileSizeBytes Int @map("file_size_bytes")
621
+ storagePath String @map("storage_path") @db.VarChar(1000)
622
+ contentHash String? @map("content_hash") @db.VarChar(64)
623
+ status String @default("uploading") @db.VarChar(20)
624
+ ocrModel String? @map("ocr_model") @db.VarChar(100)
625
+ ocrTokensIn Int? @map("ocr_tokens_in")
626
+ ocrTokensOut Int? @map("ocr_tokens_out")
627
+ pageCount Int? @map("page_count")
628
+ nodeId String? @map("node_id") @db.Uuid
629
+ errorMessage String? @map("error_message")
630
+ createdBy String @map("created_by") @db.VarChar(255)
631
+ createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz
632
+ updatedAt DateTime @updatedAt @map("updated_at") @db.Timestamptz
633
+
634
+ organization Organization @relation(fields: [organizationId], references: [id], onDelete: Cascade)
635
+
636
+ @@index([organizationId, contentHash], map: "idx_documents_org_hash")
637
+ @@index([organizationId, status], map: "idx_documents_org_status")
638
+ @@map("documents")
639
+ }