@tstdl/base 0.93.61 → 0.93.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ai/genkit/helpers.d.ts +10 -0
- package/ai/genkit/helpers.js +14 -0
- package/ai/genkit/index.d.ts +2 -0
- package/ai/genkit/index.js +2 -0
- package/ai/genkit/module.d.ts +35 -0
- package/ai/genkit/module.js +56 -0
- package/ai/index.d.ts +1 -0
- package/ai/index.js +1 -0
- package/ai/prompts/format.d.ts +15 -0
- package/ai/prompts/format.js +17 -0
- package/ai/prompts/index.d.ts +3 -0
- package/ai/prompts/index.js +3 -0
- package/ai/prompts/instructions-formatter.d.ts +25 -0
- package/ai/prompts/instructions-formatter.js +166 -0
- package/ai/prompts/instructions.d.ts +3 -0
- package/ai/prompts/instructions.js +8 -0
- package/document-management/api/document-management.api.d.ts +8 -8
- package/document-management/models/document-assignment-scope.model.d.ts +3 -4
- package/document-management/models/document-assignment-scope.model.js +5 -9
- package/document-management/models/document-assignment-task.model.d.ts +3 -4
- package/document-management/models/document-assignment-task.model.js +4 -8
- package/document-management/models/document-category.model.d.ts +3 -4
- package/document-management/models/document-category.model.js +4 -8
- package/document-management/models/document-collection-assignment.model.d.ts +2 -3
- package/document-management/models/document-collection-assignment.model.js +4 -9
- package/document-management/models/document-collection.model.d.ts +3 -4
- package/document-management/models/document-collection.model.js +4 -8
- package/document-management/models/document-property-value.model.d.ts +3 -4
- package/document-management/models/document-property-value.model.js +5 -9
- package/document-management/models/document-property.model.d.ts +3 -4
- package/document-management/models/document-property.model.js +3 -7
- package/document-management/models/document-request-collection-assignment.model.d.ts +3 -4
- package/document-management/models/document-request-collection-assignment.model.js +5 -9
- package/document-management/models/document-request-template.d.ts +2 -3
- package/document-management/models/document-request-template.js +4 -9
- package/document-management/models/document-request.model.d.ts +3 -4
- package/document-management/models/document-request.model.js +5 -9
- package/document-management/models/document-requests-template.d.ts +2 -3
- package/document-management/models/document-requests-template.js +2 -7
- package/document-management/models/document-tag-assignment.model.d.ts +3 -4
- package/document-management/models/document-tag-assignment.model.js +5 -9
- package/document-management/models/document-tag.model.d.ts +3 -4
- package/document-management/models/document-tag.model.js +3 -7
- package/document-management/models/document-type-property.model.d.ts +2 -3
- package/document-management/models/document-type-property.model.js +4 -9
- package/document-management/models/document-type-validation.model.d.ts +2 -3
- package/document-management/models/document-type-validation.model.js +4 -9
- package/document-management/models/document-type.model.d.ts +3 -4
- package/document-management/models/document-type.model.js +4 -8
- package/document-management/models/document-validation-definition.model.d.ts +3 -4
- package/document-management/models/document-validation-definition.model.js +3 -7
- package/document-management/models/document-validation-execution-related-document.model.d.ts +3 -4
- package/document-management/models/document-validation-execution-related-document.model.js +5 -9
- package/document-management/models/document-validation-execution.model.d.ts +3 -4
- package/document-management/models/document-validation-execution.model.js +5 -9
- package/document-management/models/document-workflow.model.d.ts +3 -4
- package/document-management/models/document-workflow.model.js +4 -8
- package/document-management/models/document.model.d.ts +3 -4
- package/document-management/models/document.model.js +4 -8
- package/document-management/server/drizzle/0001_lyrical_wong.sql +123 -0
- package/document-management/server/drizzle/meta/0001_snapshot.json +2728 -0
- package/document-management/server/drizzle/meta/_journal.json +7 -0
- package/document-management/server/services/document-category-type.service.d.ts +6 -6
- package/document-management/server/services/document-category-type.service.js +6 -6
- package/document-management/server/services/document-file.service.d.ts +2 -0
- package/document-management/server/services/document-file.service.js +10 -9
- package/document-management/server/services/document-management-ai.service.d.ts +1 -0
- package/document-management/server/services/document-management-ai.service.js +268 -135
- package/document-management/server/services/document-management.service.d.ts +2 -2
- package/document-management/server/services/document-property.service.d.ts +4 -4
- package/document-management/server/services/document-property.service.js +3 -3
- package/document-management/server/services/document-request.service.d.ts +2 -2
- package/document-management/server/services/document-request.service.js +1 -1
- package/document-management/server/services/document-tag.service.d.ts +1 -1
- package/document-management/server/services/document-tag.service.js +2 -2
- package/document-management/server/services/document-validation.service.js +6 -6
- package/document-management/server/services/document.service.js +2 -7
- package/document-management/service-models/document-management.view-model.d.ts +1 -1
- package/document-management/service-models/document-management.view-model.js +1 -1
- package/document-management/service-models/document.service-model.d.ts +4 -4
- package/document-management/service-models/enriched/enriched-document-category.view.d.ts +1 -1
- package/document-management/service-models/enriched/enriched-document-type.view.d.ts +1 -1
- package/examples/document-management/main.js +6 -0
- package/json-path/json-path.js +1 -1
- package/orm/decorators.d.ts +18 -7
- package/orm/decorators.js +10 -2
- package/orm/entity.d.ts +2 -2
- package/orm/entity.js +2 -0
- package/orm/server/drizzle/schema-converter.d.ts +3 -3
- package/orm/server/drizzle/schema-converter.js +22 -5
- package/orm/server/repository.js +4 -6
- package/package.json +11 -7
- package/pdf/utils.js +1 -1
- package/schema/converters/zod-converter.d.ts +1 -1
- package/schema/converters/zod-converter.js +2 -13
- package/schema/converters/zod-v3-converter.d.ts +3 -3
- package/utils/file-reader.d.ts +0 -1
- package/utils/file-reader.js +4 -7
- package/utils/object/object.d.ts +4 -2
- package/utils/object/object.js +30 -21
- package/utils/stream/from-promise.js +2 -2
|
@@ -58,9 +58,11 @@ var __disposeResources = (this && this.__disposeResources) || (function (Suppres
|
|
|
58
58
|
});
|
|
59
59
|
var _a;
|
|
60
60
|
var DocumentManagementAiService_1;
|
|
61
|
+
import { readFile } from 'node:fs/promises';
|
|
61
62
|
import { and, isNull as drizzleIsNull, eq, inArray } from 'drizzle-orm';
|
|
62
63
|
import { P, match } from 'ts-pattern';
|
|
63
|
-
import {
|
|
64
|
+
import { convertToGenkitSchema, genkitGenerationOptions, injectGenkit, injectModel } from '../../../ai/genkit/index.js';
|
|
65
|
+
import { formatData, formatInstructions, orderedList } from '../../../ai/prompts/index.js';
|
|
64
66
|
import { TemporaryFile } from '../../../file/server/index.js';
|
|
65
67
|
import { inject } from '../../../injector/inject.js';
|
|
66
68
|
import { Logger } from '../../../logger/logger.js';
|
|
@@ -68,7 +70,7 @@ import { arrayAgg } from '../../../orm/index.js';
|
|
|
68
70
|
import { injectRepository } from '../../../orm/server/index.js';
|
|
69
71
|
import { array, boolean, enumeration, integer, nullable, number, object, string } from '../../../schema/index.js';
|
|
70
72
|
import { distinct } from '../../../utils/array/index.js';
|
|
71
|
-
import {
|
|
73
|
+
import { numericDateToDateTime, tryDateObjectToNumericDate } from '../../../utils/date-time.js';
|
|
72
74
|
import { fromEntries, objectEntries } from '../../../utils/object/object.js';
|
|
73
75
|
import { assertDefined, assertDefinedPass, assertNotNull, isNotNull, isNull, isUndefined } from '../../../utils/type-guards.js';
|
|
74
76
|
import { Document, DocumentProperty, DocumentRequestState, DocumentTypeProperty } from '../../models/index.js';
|
|
@@ -79,84 +81,232 @@ import { DocumentFileService } from './document-file.service.js';
|
|
|
79
81
|
import { DocumentPropertyService } from './document-property.service.js';
|
|
80
82
|
import { DocumentTagService } from './document-tag.service.js';
|
|
81
83
|
import { DocumentManagementSingleton } from './singleton.js';
|
|
82
|
-
|
|
83
|
-
const
|
|
84
|
-
|
|
84
|
+
// --- Prompts ---
|
|
85
|
+
const ocrSystemPrompt = `
|
|
86
|
+
You are an expert OCR and Document Digitization engine.
|
|
87
|
+
|
|
88
|
+
${formatInstructions({
|
|
89
|
+
'Primary Objective': 'Convert the provided document into semantically structured, clean Markdown.',
|
|
90
|
+
'Critical Constraints': orderedList([
|
|
91
|
+
'Output ONLY the Markdown content. Do not include introductory text, conversational filler, or code block fences (```).',
|
|
92
|
+
'Do not describe the visual appearance (e.g., "This looks like an invoice"). Transcribe the content only.',
|
|
93
|
+
]),
|
|
94
|
+
'Formatting Rules': orderedList({
|
|
95
|
+
'Headings': 'Use # for the main document title (once). Use ##, ### for sections based on logical hierarchy.',
|
|
96
|
+
'Text Content': 'Transcribe text verbatim. Do not correct spelling or grammar, summarize, or rewrite.',
|
|
97
|
+
'Tables': 'Strictly use Markdown table syntax. Align columns logically based on the visual grid.',
|
|
98
|
+
'Lists': 'Detect bullet points and numbered lists and format them as Markdown lists.',
|
|
99
|
+
'Emphasis': 'Use **bold** and _italics_ only where visually distinct in the source.',
|
|
100
|
+
'Columns': 'Read multi-column text as a single continuous flow.',
|
|
101
|
+
}),
|
|
102
|
+
'Complex Elements': {
|
|
103
|
+
'Images/Visuals': 'Replace non-textual diagrams with `> [Visual: Brief description of the image/chart]`.',
|
|
104
|
+
'Signatures': 'Mark distinct signatures as `> [Signature: {Name if legible/Context}]`.',
|
|
105
|
+
'Forms': 'Represent checkboxes as `[ ]` (unchecked) or `[x]` (checked). Format label/value pairs on separate lines or as a definition list if applicable.',
|
|
106
|
+
'Math': 'Transcribe equations using LaTeX syntax enclosed in `$...$` for inline or `$$...$$` for block equations.',
|
|
107
|
+
},
|
|
108
|
+
'Page Handling': [
|
|
109
|
+
'Metadata: Start every page with `<!-- Page {n} Start -->` and end with `<!-- Page {n} End -->` on separate lines.',
|
|
110
|
+
'Artifacts: Exclude running headers, footers, and page numbers unless they contain unique data not found elsewhere.',
|
|
111
|
+
],
|
|
112
|
+
'Error Handling': [
|
|
113
|
+
'Mark illegible text as `[Illegible]`.',
|
|
114
|
+
'Mark cut-off text as `[Cut off]`.',
|
|
115
|
+
],
|
|
116
|
+
})}
|
|
117
|
+
`.trim();
|
|
118
|
+
const ocrUserPrompt = 'Transcribe the attached document into Markdown following the system instructions.';
|
|
119
|
+
const classifySystemPrompt = `
|
|
120
|
+
You are a Document Taxonomy Specialist.
|
|
121
|
+
|
|
122
|
+
${formatInstructions({
|
|
123
|
+
'Task': `Analyze the visual layout and text content of the document to categorize it into exactly one of the provided hierarchical types.`,
|
|
124
|
+
'Input Context': 'You will be provided with a list of valid category labels (e.g., "Finance -> Invoice").',
|
|
125
|
+
'Analysis Strategy': orderedList([
|
|
126
|
+
'Scan the header and title for explicit document type names (e.g., "Invoice", "Contract", "Bill of Lading").',
|
|
127
|
+
'Analyze the layout structure (e.g., columns often imply Invoices/Receipts; dense paragraphs imply Contracts/Letters).',
|
|
128
|
+
'Identify key entities (e.g., "Total Due" implies financial; "Signed by" implies legal).',
|
|
129
|
+
]),
|
|
130
|
+
'Selection Logic': orderedList([
|
|
131
|
+
'Exact Match: If the document explicitly states its type, select the corresponding category.',
|
|
132
|
+
'Content Match: If implicit, match the intent.',
|
|
133
|
+
'Specificity: Always choose the most specific leaf-node category available.',
|
|
134
|
+
'Fallback: If ambiguous, choose the category that best describes the *primary* purpose of the document.',
|
|
135
|
+
]),
|
|
136
|
+
})}
|
|
137
|
+
`.trim();
|
|
138
|
+
const classifyUserPrompt = 'Determine the single most accurate document type from the provided list based on the document following the system instructions.';
|
|
139
|
+
const extractSystemPrompt = `
|
|
140
|
+
You are a Structured Data Extraction Analyst.
|
|
141
|
+
|
|
142
|
+
${formatInstructions({
|
|
143
|
+
'Task': 'Analyze the document and extract metadata into the defined JSON schema.',
|
|
144
|
+
'General Guidelines': orderedList({
|
|
145
|
+
'Language': 'Ensure all generated text (titles, summaries) matches the primary language of the document.',
|
|
146
|
+
'Null Handling': 'If a specific field or property is not present in the document, return null. Do not guess or hallucinate values.',
|
|
147
|
+
}),
|
|
148
|
+
'Field Specific Instructions': {
|
|
149
|
+
'Title': 'Create a concise, searchable filename-style title (e.g., "Invoice - Oct 2023").',
|
|
150
|
+
'Subtitle': 'Extract context usually found below the header (e.g., Project Name, Reference Number).',
|
|
151
|
+
'Summary': 'Write a 2-3 sentence executive summary. Mention the what type of information can be found in the document and its purpose.',
|
|
152
|
+
'Tags': 'Generate 3-5 keywords for categorization. Only use important information missing in title, subtitle and properties. Prioritize reusing of existing tags where possible.',
|
|
153
|
+
'Date': 'Identify the *creation* date of the document. If multiple dates exist, prioritize the primary date (like invoice or letter Date)',
|
|
154
|
+
},
|
|
155
|
+
'Property Extraction': [
|
|
156
|
+
'You will be given a list of specific dynamic properties to look for.',
|
|
157
|
+
'Extract values *exactly* as they appear for strings.',
|
|
158
|
+
'Normalize numbers and dates to standard formats.',
|
|
159
|
+
'If a property is ambiguous, favor the value most prominent in the document layout.',
|
|
160
|
+
'If a property is missing, set its value to null.',
|
|
161
|
+
],
|
|
162
|
+
})}`.trim();
|
|
163
|
+
const extractUserPrompt = 'Analyze the document and extract metadata and specific properties defined in the output schema following the system instructions.';
|
|
164
|
+
const assignCollectionSystemPrompt = `
|
|
165
|
+
You are a Digital Filing Assistant.
|
|
166
|
+
|
|
167
|
+
${formatInstructions({
|
|
168
|
+
'Task': `Assign the document to relevant collections based on its metadata and content.`,
|
|
169
|
+
'Input': 'Document Metadata and a list of Available Collections.',
|
|
170
|
+
'Matching Logic': orderedList([
|
|
171
|
+
'Direct Key-Match: Look for exact keyword matches between the collection name and the document metadata.',
|
|
172
|
+
'Semantic Fit: Determine if the document functionally belongs to a group.',
|
|
173
|
+
'Project Association: If the document references a specific project code or name found in a collection name, assign it there.',
|
|
174
|
+
]),
|
|
175
|
+
'Output': 'Return an array of matching collection IDs. If no collection is a strong fit, return an empty array.',
|
|
176
|
+
})}
|
|
177
|
+
`.trim();
|
|
178
|
+
const assignCollectionUserPrompt = 'Select the most appropriate collections for this document from the provided list following the system instructions.';
|
|
179
|
+
const assignRequestSystemPrompt = `
|
|
180
|
+
You are a Workflow Routing Agent.
|
|
181
|
+
|
|
182
|
+
${formatInstructions({
|
|
183
|
+
'Task': 'Match the provided document to an existing Open Document Request.',
|
|
184
|
+
'Input': 'Document Metadata and a list of Open Requests.',
|
|
185
|
+
'Matching Rules': orderedList({
|
|
186
|
+
'Hard Constraints': 'If a Request has a "Comment" or specific property requirement, the document MUST fulfill it strictly (e.g., "Need bill from July" must match date).',
|
|
187
|
+
'Ambiguity': 'If multiple requests match, select the one with the most specific constraints that are satisfied.',
|
|
188
|
+
'Negative Match': 'If the document satisfies the metadata but violates a comment constraint, it is unsuitable.',
|
|
189
|
+
}),
|
|
190
|
+
'Output': 'The ID of the matching request, or null if no request matches.',
|
|
191
|
+
})}
|
|
192
|
+
`.trim();
|
|
193
|
+
const assignRequestUserPrompt = 'Evaluate the document against the list of open requests and find the best match following the system instructions.';
|
|
85
194
|
let DocumentManagementAiService = DocumentManagementAiService_1 = class DocumentManagementAiService {
|
|
195
|
+
#genkit = injectGenkit();
|
|
196
|
+
#ocrModel = injectModel('gemini-2.5-flash-lite').withConfig({ temperature: 0.25, topP: 0.75, topK: 8 });
|
|
197
|
+
#classifyModel = injectModel('gemini-2.5-flash').withConfig({ temperature: 0.25, topP: 0.75, topK: 8 });
|
|
198
|
+
#extractModel = injectModel('gemini-2.5-flash').withConfig({ temperature: 0.25, topP: 0.75, topK: 8 });
|
|
199
|
+
#assignModel = injectModel('gemini-2.5-flash').withConfig({ temperature: 0.25, topP: 0.75, topK: 8 });
|
|
86
200
|
#documentCollectionService = inject(DocumentCollectionService);
|
|
87
201
|
#documentTagService = inject(DocumentTagService);
|
|
88
202
|
#documentCategoryTypeService = inject(DocumentCategoryTypeService);
|
|
89
203
|
#documentFileService = inject(DocumentFileService);
|
|
90
204
|
#documentPropertyService = inject(DocumentPropertyService);
|
|
91
|
-
#aiService = inject(AiService);
|
|
92
205
|
#documentPropertyRepository = injectRepository(DocumentProperty);
|
|
93
206
|
#documentRepository = injectRepository(Document);
|
|
94
207
|
#documentTypePropertyRepository = injectRepository(DocumentTypeProperty);
|
|
95
208
|
#logger = inject(Logger, DocumentManagementAiService_1.name);
|
|
96
|
-
async
|
|
209
|
+
async extractDocumentContent(tenantId, documentId) {
|
|
97
210
|
const env_1 = { stack: [], error: void 0, hasError: false };
|
|
211
|
+
try {
|
|
212
|
+
const document = await this.#documentRepository.loadByQuery({ tenantId, id: documentId });
|
|
213
|
+
const fileContentStream = this.#documentFileService.getContentStream(document);
|
|
214
|
+
const tmpFile = __addDisposableResource(env_1, await TemporaryFile.from(fileContentStream), true);
|
|
215
|
+
const buffer = await readFile(tmpFile.path);
|
|
216
|
+
const base64Data = buffer.toString('base64');
|
|
217
|
+
const dataUrl = `data:${document.mimeType};base64,${base64Data}`;
|
|
218
|
+
this.#logger.trace(`Extracting content from document ${document.id}`);
|
|
219
|
+
const result = await this.#genkit.generate({
|
|
220
|
+
model: this.#ocrModel,
|
|
221
|
+
output: { schema: convertToGenkitSchema(object({ content: string() })) },
|
|
222
|
+
system: ocrSystemPrompt,
|
|
223
|
+
prompt: [
|
|
224
|
+
{ media: { url: dataUrl } },
|
|
225
|
+
{ text: ocrUserPrompt },
|
|
226
|
+
],
|
|
227
|
+
});
|
|
228
|
+
if (isNull(result.output)) {
|
|
229
|
+
throw new Error(`AI returned null output for document "${document.id}".`);
|
|
230
|
+
}
|
|
231
|
+
return result.output.content;
|
|
232
|
+
}
|
|
233
|
+
catch (e_1) {
|
|
234
|
+
env_1.error = e_1;
|
|
235
|
+
env_1.hasError = true;
|
|
236
|
+
}
|
|
237
|
+
finally {
|
|
238
|
+
const result_1 = __disposeResources(env_1);
|
|
239
|
+
if (result_1)
|
|
240
|
+
await result_1;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
async classifyDocumentType(tenantId, documentId) {
|
|
244
|
+
const env_2 = { stack: [], error: void 0, hasError: false };
|
|
98
245
|
try {
|
|
99
246
|
const document = await this.#documentRepository.loadByQuery({ tenantId, id: documentId });
|
|
100
247
|
if (isNotNull(document.typeId)) {
|
|
101
248
|
return document.typeId;
|
|
102
249
|
}
|
|
103
250
|
const fileContentStream = this.#documentFileService.getContentStream(document);
|
|
104
|
-
const tmpFile = __addDisposableResource(
|
|
105
|
-
const
|
|
251
|
+
const tmpFile = __addDisposableResource(env_2, await TemporaryFile.from(fileContentStream), true);
|
|
252
|
+
const buffer = await readFile(tmpFile.path);
|
|
253
|
+
const base64Data = buffer.toString('base64');
|
|
254
|
+
const dataUrl = `data:${document.mimeType};base64,${base64Data}`;
|
|
106
255
|
const categories = await this.#documentCategoryTypeService.loadCategoryViews(tenantId);
|
|
107
256
|
const typeLabelEntries = getDescriptiveTypeLabels(categories);
|
|
108
257
|
const typeLabels = typeLabelEntries.map(({ label }) => label);
|
|
109
258
|
this.#logger.trace(`Classifying document ${document.id}`);
|
|
110
|
-
const
|
|
111
|
-
model:
|
|
112
|
-
|
|
259
|
+
const result = await this.#genkit.generate(genkitGenerationOptions({
|
|
260
|
+
model: this.#classifyModel,
|
|
261
|
+
config: {
|
|
113
262
|
maxOutputTokens: 128,
|
|
114
|
-
|
|
115
|
-
topP: 0.75,
|
|
116
|
-
topK: 4,
|
|
117
|
-
thinkingBudget: 0,
|
|
263
|
+
thinkingConfig: { thinkingBudget: 0 },
|
|
118
264
|
},
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
],
|
|
129
|
-
},
|
|
265
|
+
output: {
|
|
266
|
+
schema: object({
|
|
267
|
+
documentType: enumeration(typeLabels),
|
|
268
|
+
}),
|
|
269
|
+
},
|
|
270
|
+
system: classifySystemPrompt,
|
|
271
|
+
prompt: [
|
|
272
|
+
{ media: { url: dataUrl } },
|
|
273
|
+
{ text: classifyUserPrompt },
|
|
130
274
|
],
|
|
131
|
-
});
|
|
132
|
-
|
|
275
|
+
}));
|
|
276
|
+
if (isNull(result.output)) {
|
|
277
|
+
throw new Error(`AI returned null output for document classification "${document.id}".`);
|
|
278
|
+
}
|
|
279
|
+
const output = result.output;
|
|
280
|
+
const typeId = typeLabelEntries.find((entry) => entry.label == output.documentType)?.id;
|
|
133
281
|
assertDefined(typeId, `Could not classify document ${document.id}`);
|
|
134
282
|
return typeId;
|
|
135
283
|
}
|
|
136
|
-
catch (
|
|
137
|
-
|
|
138
|
-
|
|
284
|
+
catch (e_2) {
|
|
285
|
+
env_2.error = e_2;
|
|
286
|
+
env_2.hasError = true;
|
|
139
287
|
}
|
|
140
288
|
finally {
|
|
141
|
-
const
|
|
142
|
-
if (
|
|
143
|
-
await
|
|
289
|
+
const result_2 = __disposeResources(env_2);
|
|
290
|
+
if (result_2)
|
|
291
|
+
await result_2;
|
|
144
292
|
}
|
|
145
293
|
}
|
|
146
294
|
async extractDocumentInformation(tenantId, documentId) {
|
|
147
|
-
const
|
|
295
|
+
const env_3 = { stack: [], error: void 0, hasError: false };
|
|
148
296
|
try {
|
|
149
297
|
const document = await this.#documentRepository.loadByQuery({ tenantId, id: documentId });
|
|
150
298
|
const existingTags = await this.#documentTagService.loadTags(tenantId);
|
|
151
299
|
const fileContentStream = this.#documentFileService.getContentStream(document);
|
|
152
|
-
const tmpFile = __addDisposableResource(
|
|
153
|
-
const
|
|
300
|
+
const tmpFile = __addDisposableResource(env_3, await TemporaryFile.from(fileContentStream), true);
|
|
301
|
+
const buffer = await readFile(tmpFile.path);
|
|
302
|
+
const base64Data = buffer.toString('base64');
|
|
303
|
+
const dataUrl = `data:${document.mimeType};base64,${base64Data}`;
|
|
154
304
|
if (isNull(document.typeId)) {
|
|
155
305
|
throw new Error(`Document ${document.id} has no type`);
|
|
156
306
|
}
|
|
157
|
-
const typeProperties = await this.#documentTypePropertyRepository.loadManyByQuery({ tenantId
|
|
307
|
+
const typeProperties = await this.#documentTypePropertyRepository.loadManyByQuery({ tenantId, typeId: document.typeId });
|
|
158
308
|
const propertyIds = typeProperties.map((property) => property.propertyId);
|
|
159
|
-
const properties = (propertyIds.length > 0) ? await this.#documentPropertyRepository.loadManyByQuery({ tenantId
|
|
309
|
+
const properties = (propertyIds.length > 0) ? await this.#documentPropertyRepository.loadManyByQuery({ tenantId, id: { $in: propertyIds } }) : undefined;
|
|
160
310
|
const propertiesSchemaEntries = properties?.map((property) => {
|
|
161
311
|
const schema = match(property.dataType)
|
|
162
312
|
.with('text', () => nullable(string()))
|
|
@@ -177,39 +327,31 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
177
327
|
? {}
|
|
178
328
|
: { documentProperties: object(fromEntries(propertiesSchemaEntries)) }),
|
|
179
329
|
});
|
|
180
|
-
const
|
|
330
|
+
const tagLabels = existingTags.map((tag) => tag.label);
|
|
181
331
|
this.#logger.trace(`Extracting document ${document.id}`);
|
|
182
|
-
const
|
|
183
|
-
model:
|
|
184
|
-
|
|
332
|
+
const result = await this.#genkit.generate(genkitGenerationOptions({
|
|
333
|
+
model: this.#extractModel,
|
|
334
|
+
output: { schema: generationSchema },
|
|
335
|
+
config: {
|
|
185
336
|
maxOutputTokens: 2048,
|
|
186
|
-
|
|
187
|
-
topP: 0.5,
|
|
188
|
-
topK: 16,
|
|
189
|
-
thinkingBudget: 0,
|
|
337
|
+
thinkingConfig: { thinkingBudget: 0 },
|
|
190
338
|
},
|
|
191
|
-
|
|
192
|
-
|
|
339
|
+
system: extractSystemPrompt,
|
|
340
|
+
prompt: [
|
|
341
|
+
{ media: { url: dataUrl } },
|
|
193
342
|
{
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
{ file: filePart.file },
|
|
197
|
-
{
|
|
198
|
-
text: `<context>
|
|
199
|
-
${JSON.stringify(context, null, 2)}
|
|
200
|
-
</context>
|
|
201
|
-
Extrahiere den Inhalt des Dokuments in das angegebenen JSON Schema.
|
|
343
|
+
text: `
|
|
344
|
+
${formatData({ existingTags: tagLabels })}
|
|
202
345
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
Erstelle bis zu 5 Tags. Verwende vorhandene Tags, wenn sie passen. Erstelle neue Tags, wenn es keine passenden gibt.
|
|
206
|
-
Vermeide es, den Titel oder Untertitel als Tag zu verwenden.
|
|
207
|
-
Antworte auf deutsch.`,
|
|
208
|
-
},
|
|
209
|
-
],
|
|
346
|
+
${extractUserPrompt}
|
|
347
|
+
`.trim(),
|
|
210
348
|
},
|
|
211
349
|
],
|
|
212
|
-
});
|
|
350
|
+
}));
|
|
351
|
+
if (isNull(result.output)) {
|
|
352
|
+
throw new Error(`AI returned null output for document extraction "${document.id}".`);
|
|
353
|
+
}
|
|
354
|
+
const extraction = result.output;
|
|
213
355
|
const filteredDocumentTags = extraction.documentTags.filter((tag) => (tag != extraction.documentTitle) && (tag != extraction.documentSubtitle));
|
|
214
356
|
const date = isNotNull(extraction.documentDate) ? tryAiOutputDateObjectToNumericDate(extraction.documentDate) : null;
|
|
215
357
|
const parsedProperties = isUndefined(extraction.documentProperties)
|
|
@@ -238,14 +380,14 @@ Antworte auf deutsch.`,
|
|
|
238
380
|
properties: parsedProperties,
|
|
239
381
|
};
|
|
240
382
|
}
|
|
241
|
-
catch (
|
|
242
|
-
|
|
243
|
-
|
|
383
|
+
catch (e_3) {
|
|
384
|
+
env_3.error = e_3;
|
|
385
|
+
env_3.hasError = true;
|
|
244
386
|
}
|
|
245
387
|
finally {
|
|
246
|
-
const
|
|
247
|
-
if (
|
|
248
|
-
await
|
|
388
|
+
const result_3 = __disposeResources(env_3);
|
|
389
|
+
if (result_3)
|
|
390
|
+
await result_3;
|
|
249
391
|
}
|
|
250
392
|
}
|
|
251
393
|
async findSuitableCollectionsForDocument(document, collectionIds) {
|
|
@@ -261,41 +403,36 @@ Antworte auf deutsch.`,
|
|
|
261
403
|
}));
|
|
262
404
|
const documentTagLabels = documentTags.map((tag) => tag.label);
|
|
263
405
|
const propertyEntries = documentProperties.map((property) => [property.label, property.value]);
|
|
264
|
-
const
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
tags: (documentTagLabels.length > 0) ? documentTagLabels : undefined,
|
|
271
|
-
properties: fromEntries(propertyEntries),
|
|
272
|
-
},
|
|
273
|
-
collections,
|
|
406
|
+
const documentData = {
|
|
407
|
+
title: document.title ?? null,
|
|
408
|
+
subtitle: document.subtitle ?? null,
|
|
409
|
+
date: isNotNull(document.date) ? numericDateToDateTime(document.date).toISODate() : null,
|
|
410
|
+
tags: (documentTagLabels.length > 0) ? documentTagLabels : null,
|
|
411
|
+
summary: document.summary ?? null,
|
|
274
412
|
};
|
|
275
|
-
const result = await this.#
|
|
276
|
-
model:
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
413
|
+
const result = await this.#genkit.generate(genkitGenerationOptions({
|
|
414
|
+
model: this.#assignModel,
|
|
415
|
+
output: { schema: object({ collectionIds: array(string()) }) },
|
|
416
|
+
config: {
|
|
417
|
+
maxOutputTokens: 512,
|
|
418
|
+
thinkingConfig: {
|
|
419
|
+
thinkingBudget: 0,
|
|
420
|
+
},
|
|
283
421
|
},
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
${JSON.stringify(context, null, 2)}
|
|
291
|
-
</context>
|
|
422
|
+
system: assignCollectionSystemPrompt,
|
|
423
|
+
prompt: [{
|
|
424
|
+
text: `
|
|
425
|
+
${formatData({ document: documentData, documentProperties: fromEntries(propertyEntries) })}
|
|
426
|
+
|
|
427
|
+
${formatData({ collections })}
|
|
292
428
|
|
|
293
|
-
|
|
294
|
-
},
|
|
295
|
-
],
|
|
429
|
+
${assignCollectionUserPrompt}`,
|
|
296
430
|
}],
|
|
297
|
-
});
|
|
298
|
-
|
|
431
|
+
}));
|
|
432
|
+
if (isNull(result.output)) {
|
|
433
|
+
throw new Error(`AI returned null output for collection assignment "${document.id}".`);
|
|
434
|
+
}
|
|
435
|
+
return result.output.collectionIds;
|
|
299
436
|
}
|
|
300
437
|
async findSuitableRequestForDocument(document, collectionIds) {
|
|
301
438
|
const session = this.#documentPropertyRepository.session;
|
|
@@ -322,44 +459,40 @@ Ordne das Dokument unter "document" einer oder mehreren passenden Collection unt
|
|
|
322
459
|
const requests = openRequestsWithoutDocument.map((request) => ({
|
|
323
460
|
id: request.id,
|
|
324
461
|
collections: request.collectionIds.map((collectionId) => assertDefinedPass(collectionNamesMap[collectionId]).name),
|
|
325
|
-
comment: request.comment ??
|
|
462
|
+
comment: request.comment ?? null,
|
|
326
463
|
}));
|
|
327
464
|
const propertyEntries = documentProperties.map((property) => [property.label, property.value]);
|
|
328
|
-
const
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
tags: (documentTagLabels.length > 0) ? documentTagLabels : undefined,
|
|
335
|
-
properties: fromEntries(propertyEntries),
|
|
336
|
-
},
|
|
337
|
-
requests,
|
|
465
|
+
const documentData = {
|
|
466
|
+
title: document.title ?? null,
|
|
467
|
+
subtitle: document.subtitle ?? null,
|
|
468
|
+
date: isNotNull(document.date) ? numericDateToDateTime(document.date).toISODate() : null,
|
|
469
|
+
tags: (documentTagLabels.length > 0) ? documentTagLabels : null,
|
|
470
|
+
summary: document.summary ?? null,
|
|
338
471
|
};
|
|
339
|
-
const result = await this.#
|
|
340
|
-
model:
|
|
341
|
-
|
|
342
|
-
maxOutputTokens:
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
thinkingBudget: 0,
|
|
472
|
+
const result = await this.#genkit.generate(genkitGenerationOptions({
|
|
473
|
+
model: this.#assignModel,
|
|
474
|
+
config: {
|
|
475
|
+
maxOutputTokens: 128,
|
|
476
|
+
thinkingConfig: {
|
|
477
|
+
thinkingBudget: 0,
|
|
478
|
+
},
|
|
347
479
|
},
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
${
|
|
355
|
-
</context>
|
|
480
|
+
output: { schema: object({ requestId: nullable(string()) }) },
|
|
481
|
+
system: assignRequestSystemPrompt,
|
|
482
|
+
prompt: [{
|
|
483
|
+
text: `
|
|
484
|
+
${formatData({ document: documentData, documentProperties: fromEntries(propertyEntries) })}
|
|
485
|
+
|
|
486
|
+
${formatData({ requests })}
|
|
356
487
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
],
|
|
488
|
+
${assignRequestUserPrompt}
|
|
489
|
+
`.trim(),
|
|
360
490
|
}],
|
|
361
|
-
});
|
|
362
|
-
|
|
491
|
+
}));
|
|
492
|
+
if (isNull(result.output)) {
|
|
493
|
+
throw new Error(`AI returned null output for request assignment "${document.id}".`);
|
|
494
|
+
}
|
|
495
|
+
return result.output.requestId;
|
|
363
496
|
}
|
|
364
497
|
};
|
|
365
498
|
DocumentManagementAiService = DocumentManagementAiService_1 = __decorate([
|
|
@@ -96,8 +96,8 @@ export declare class DocumentManagementService extends Transactional {
|
|
|
96
96
|
getRelevantDocumentCollectionIds(tenantId: string, documentId: string): Promise<string[]>;
|
|
97
97
|
loadDataStream(tenantId: string, collectionIds: string[], cancellationSignal: CancellationSignal): AsyncGenerator<DocumentManagementData>;
|
|
98
98
|
loadData(tenantId: string, collectionIds: string[]): Promise<DocumentManagementData>;
|
|
99
|
-
loadDocumentRequestsTemplateData(tenantId: string
|
|
100
|
-
initializeCategoriesAndTypes<CategoryKey extends string, TypeKey extends string, DocumentPropertyKey extends string>(tenantId: string
|
|
99
|
+
loadDocumentRequestsTemplateData(tenantId: string): Promise<DocumentRequestsTemplateData>;
|
|
100
|
+
initializeCategoriesAndTypes<CategoryKey extends string, TypeKey extends string, DocumentPropertyKey extends string>(tenantId: string, data: CategoriesAndTypesInitializationData<CategoryKey, TypeKey, DocumentPropertyKey>): Promise<{
|
|
101
101
|
categories: Record<CategoryKey, DocumentCategory>;
|
|
102
102
|
types: Record<TypeKey, DocumentType>;
|
|
103
103
|
properties: Record<DocumentPropertyKey, DocumentProperty>;
|
|
@@ -46,18 +46,18 @@ export declare class DocumentPropertyService extends Transactional {
|
|
|
46
46
|
}, {}, {}>;
|
|
47
47
|
value: import("drizzle-orm").SQL.Aliased<string | number | boolean | null>;
|
|
48
48
|
}, "documentProperties">;
|
|
49
|
-
loadViews(tenantId: string
|
|
49
|
+
loadViews(tenantId: string): Promise<DocumentPropertyView[]>;
|
|
50
50
|
createProperty(data: {
|
|
51
|
-
tenantId: string
|
|
51
|
+
tenantId: string;
|
|
52
52
|
label: string;
|
|
53
53
|
dataType: DocumentPropertyDataType;
|
|
54
54
|
enumKey?: string;
|
|
55
55
|
}): Promise<DocumentProperty>;
|
|
56
|
-
updateProperty(tenantId: string
|
|
56
|
+
updateProperty(tenantId: string, id: string, update: {
|
|
57
57
|
label?: string;
|
|
58
58
|
dataType?: DocumentPropertyDataType;
|
|
59
59
|
}): Promise<DocumentProperty>;
|
|
60
|
-
assignPropertyToType(tenantId: string
|
|
60
|
+
assignPropertyToType(tenantId: string, typeId: string, propertyId: string): Promise<void>;
|
|
61
61
|
loadDocumentPropertyValues(tenantId: string, documentId: OneOrMany<string>, includeNulls?: boolean): Promise<DocumentPropertyValueView[]>;
|
|
62
62
|
setPropertyValues(document: Document, propertyValues: SetDocumentPropertyParameters[]): Promise<void>;
|
|
63
63
|
}
|
|
@@ -46,8 +46,8 @@ let DocumentPropertyService = class DocumentPropertyService extends Transactiona
|
|
|
46
46
|
.innerJoin(documentProperty, and(eq(documentProperty.id, documentTypeProperty.propertyId), or(drizzleIsNull(documentProperty.tenantId), eq(documentProperty.tenantId, document.tenantId))))
|
|
47
47
|
.leftJoin(documentPropertyValue, and(eq(documentPropertyValue.tenantId, document.tenantId), eq(documentPropertyValue.documentId, document.id), eq(documentPropertyValue.propertyId, documentProperty.id))));
|
|
48
48
|
async loadViews(tenantId) {
|
|
49
|
-
const properties = await this.#documentPropertyRepository.loadManyByQuery({ tenantId
|
|
50
|
-
const typeProperties = await this.#documentTypePropertyRepository.loadManyByQuery({ tenantId
|
|
49
|
+
const properties = await this.#documentPropertyRepository.loadManyByQuery({ tenantId });
|
|
50
|
+
const typeProperties = await this.#documentTypePropertyRepository.loadManyByQuery({ tenantId });
|
|
51
51
|
return properties.map((property) => {
|
|
52
52
|
const typeIds = typeProperties
|
|
53
53
|
.filter((typeProperty) => typeProperty.propertyId == property.id)
|
|
@@ -94,7 +94,7 @@ let DocumentPropertyService = class DocumentPropertyService extends Transactiona
|
|
|
94
94
|
}
|
|
95
95
|
await this.transaction(async (tx) => {
|
|
96
96
|
const propertyIds = propertyValues.map((property) => property.propertyId);
|
|
97
|
-
const properties = await this.#documentPropertyRepository.withTransaction(tx).loadManyByQuery({ tenantId:
|
|
97
|
+
const properties = await this.#documentPropertyRepository.withTransaction(tx).loadManyByQuery({ tenantId: document.tenantId, id: { $in: propertyIds } });
|
|
98
98
|
const propertiesMap = getEntityMap(properties);
|
|
99
99
|
const upserts = propertyValues.filter((value) => isNotNull(value.value)).map(({ propertyId, value, metadata }) => {
|
|
100
100
|
const property = propertiesMap.get(propertyId);
|
|
@@ -5,8 +5,8 @@ import type { RequestStats } from '../../service-models/index.js';
|
|
|
5
5
|
export declare class DocumentRequestService extends Transactional {
|
|
6
6
|
#private;
|
|
7
7
|
getRequestStats(tenantId: string, collectionIds: OneOrMany<string>): Promise<RequestStats>;
|
|
8
|
-
createRequestsTemplate(tenantId: string
|
|
9
|
-
updateRequestsTemplate(tenantId: string
|
|
8
|
+
createRequestsTemplate(tenantId: string, parameters: Pick<DocumentRequestsTemplate, 'label' | 'description'>): Promise<DocumentRequestsTemplate>;
|
|
9
|
+
updateRequestsTemplate(tenantId: string, id: string, parameters: Pick<DocumentRequestsTemplate, 'label' | 'description' | 'metadata'>): Promise<DocumentRequestsTemplate>;
|
|
10
10
|
applyRequestsTemplate(tenantId: string, id: string, collectionIds: string[]): Promise<void>;
|
|
11
11
|
deleteRequestsTemplate(tenantId: string, id: string): Promise<DocumentRequestsTemplate>;
|
|
12
12
|
createRequestTemplate(tenantId: string, requestsTemplateId: string, typeId: string, comment: string): Promise<DocumentRequestTemplate>;
|
|
@@ -46,7 +46,7 @@ let DocumentRequestService = class DocumentRequestService extends Transactional
|
|
|
46
46
|
return await this.#documentRequestsTemplateRepository.updateByQuery({ tenantId, id }, parameters);
|
|
47
47
|
}
|
|
48
48
|
async applyRequestsTemplate(tenantId, id, collectionIds) {
|
|
49
|
-
const requestTemplates = await this.#documentRequestTemplateRepository.loadManyByQuery({ tenantId
|
|
49
|
+
const requestTemplates = await this.#documentRequestTemplateRepository.loadManyByQuery({ tenantId, requestsTemplateId: id });
|
|
50
50
|
await this.transaction(async (tx) => {
|
|
51
51
|
for (const { typeId, comment } of requestTemplates) {
|
|
52
52
|
await this.withTransaction(tx).createRequest(tenantId, typeId, collectionIds, comment);
|
|
@@ -3,7 +3,7 @@ import { DocumentTag, DocumentTagAssignment, type Document } from '../../models/
|
|
|
3
3
|
export declare class DocumentTagService extends Transactional {
|
|
4
4
|
readonly tagRepository: import("../../../orm/server/index.js").EntityRepository<DocumentTag>;
|
|
5
5
|
readonly tagAssignmentRepository: import("../../../orm/server/index.js").EntityRepository<DocumentTagAssignment>;
|
|
6
|
-
loadTags(tenantId: string
|
|
6
|
+
loadTags(tenantId: string): Promise<DocumentTag[]>;
|
|
7
7
|
loadOrCreate(tenantId: string, labels: string[]): Promise<DocumentTag[]>;
|
|
8
8
|
loadDocumentTags(tenantId: string, documentId: string): Promise<DocumentTag[]>;
|
|
9
9
|
assignTags(document: Document, labels: string[]): Promise<void>;
|
|
@@ -14,7 +14,7 @@ let DocumentTagService = class DocumentTagService extends Transactional {
|
|
|
14
14
|
tagRepository = injectRepository(DocumentTag);
|
|
15
15
|
tagAssignmentRepository = injectRepository(DocumentTagAssignment);
|
|
16
16
|
async loadTags(tenantId) {
|
|
17
|
-
return await this.tagRepository.loadManyByQuery({ tenantId
|
|
17
|
+
return await this.tagRepository.loadManyByQuery({ tenantId }, { order: { label: 'asc' } });
|
|
18
18
|
}
|
|
19
19
|
async loadOrCreate(tenantId, labels) {
|
|
20
20
|
if (labels.length === 0) {
|
|
@@ -22,7 +22,7 @@ let DocumentTagService = class DocumentTagService extends Transactional {
|
|
|
22
22
|
}
|
|
23
23
|
return await this.tagRepository.transaction(async (tx) => {
|
|
24
24
|
const existingTags = await this.tagRepository.withTransaction(tx).loadManyByQuery({
|
|
25
|
-
tenantId
|
|
25
|
+
tenantId,
|
|
26
26
|
label: { $in: labels },
|
|
27
27
|
});
|
|
28
28
|
const newLabels = labels.filter((label) => !existingTags.some((tag) => tag.label == label));
|