@tstdl/base 0.93.168 → 0.93.170
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ai/prompts/prompt-builder.d.ts +6 -2
- package/ai/prompts/prompt-builder.js +44 -5
- package/ai/prompts/steering.d.ts +2 -1
- package/ai/prompts/steering.js +3 -0
- package/document-management/server/services/document-management-ai.prompts.d.ts +27 -0
- package/document-management/server/services/document-management-ai.prompts.js +158 -0
- package/document-management/server/services/document-management-ai.service.d.ts +4 -17
- package/document-management/server/services/document-management-ai.service.js +49 -138
- package/document-management/server/validators/ai-validation-executor.d.ts +2 -0
- package/document-management/server/validators/ai-validation-executor.js +37 -28
- package/package.json +4 -11
|
@@ -2,6 +2,7 @@ import type { Part } from 'genkit';
|
|
|
2
2
|
import type { SchemaTestable } from '../../schema/schema.js';
|
|
3
3
|
import type { ObjectLiteral } from '../../types/index.js';
|
|
4
4
|
import { type Instructions } from './instructions-formatter.js';
|
|
5
|
+
import { type FewShotExample } from './steering.js';
|
|
5
6
|
export type PromptBuilderInstructions = Record<string, Instructions>;
|
|
6
7
|
export type PromptBuilderContext = Record<string, PromptBuilderContextItem>;
|
|
7
8
|
export type PromptBuilderContextItem = ObjectLiteral;
|
|
@@ -11,8 +12,11 @@ export declare class PromptBuilder {
|
|
|
11
12
|
setRole(role: string): this;
|
|
12
13
|
setSystemTask(task: string): this;
|
|
13
14
|
setTask(task: string): this;
|
|
14
|
-
setSystemOutputSchema(schema: SchemaTestable): this;
|
|
15
|
-
setOutputSchema(schema: SchemaTestable): this;
|
|
15
|
+
setSystemOutputSchema<Input = ObjectLiteral, Output = ObjectLiteral>(schema: SchemaTestable<Output>, examples?: FewShotExample<Input, Output>[]): this;
|
|
16
|
+
setOutputSchema<Input = ObjectLiteral, Output = ObjectLiteral>(schema: SchemaTestable<Output>, examples?: FewShotExample<Input, Output>[]): this;
|
|
17
|
+
setSystemInstructionsOverride(override: ((instructions: Instructions) => Instructions | string) | undefined): this;
|
|
18
|
+
setInstructionsOverride(override: ((instructions: Instructions) => Instructions | string) | undefined): this;
|
|
19
|
+
setLanguage(language: string): this;
|
|
16
20
|
addSystemMedia(content: Uint8Array, mimeType: string): this;
|
|
17
21
|
addMedia(content: Uint8Array, mimeType: string): this;
|
|
18
22
|
addSystemInstructions(instructions: Record<string, Instructions>): this;
|
|
@@ -3,7 +3,8 @@ import { encodeBase64 } from '../../utils/base64.js';
|
|
|
3
3
|
import { fromEntries, objectEntries, objectKeys } from '../../utils/object/index.js';
|
|
4
4
|
import { assertObjectPass, isDefined, isString, isUndefined } from '../../utils/type-guards.js';
|
|
5
5
|
import { formatData } from './format.js';
|
|
6
|
-
import { formatInstructions, sections } from './instructions-formatter.js';
|
|
6
|
+
import { formatInstructions, sections, unorderedList } from './instructions-formatter.js';
|
|
7
|
+
import { fewShotPrompt, languagePrompt } from './steering.js';
|
|
7
8
|
export class PromptBuilder {
|
|
8
9
|
#systemMedia = [];
|
|
9
10
|
#media = [];
|
|
@@ -12,11 +13,16 @@ export class PromptBuilder {
|
|
|
12
13
|
#systemTask;
|
|
13
14
|
#task;
|
|
14
15
|
#systemOutputSchema;
|
|
16
|
+
#systemOutputExamples;
|
|
15
17
|
#outputSchema;
|
|
18
|
+
#outputExamples;
|
|
16
19
|
#systemInstructions = {};
|
|
17
20
|
#instructions = {};
|
|
18
21
|
#systemContextParts = {};
|
|
19
22
|
#contextParts = {};
|
|
23
|
+
#language;
|
|
24
|
+
#systemInstructionsOverride;
|
|
25
|
+
#instructionsOverride;
|
|
20
26
|
setSystemRole(role) {
|
|
21
27
|
this.#systemRole = role;
|
|
22
28
|
return this;
|
|
@@ -33,12 +39,26 @@ export class PromptBuilder {
|
|
|
33
39
|
this.#task = task;
|
|
34
40
|
return this;
|
|
35
41
|
}
|
|
36
|
-
setSystemOutputSchema(schema) {
|
|
42
|
+
setSystemOutputSchema(schema, examples) {
|
|
37
43
|
this.#systemOutputSchema = schema;
|
|
44
|
+
this.#systemOutputExamples = examples;
|
|
38
45
|
return this;
|
|
39
46
|
}
|
|
40
|
-
setOutputSchema(schema) {
|
|
47
|
+
setOutputSchema(schema, examples) {
|
|
41
48
|
this.#outputSchema = schema;
|
|
49
|
+
this.#outputExamples = examples;
|
|
50
|
+
return this;
|
|
51
|
+
}
|
|
52
|
+
setSystemInstructionsOverride(override) {
|
|
53
|
+
this.#systemInstructionsOverride = override;
|
|
54
|
+
return this;
|
|
55
|
+
}
|
|
56
|
+
setInstructionsOverride(override) {
|
|
57
|
+
this.#instructionsOverride = override;
|
|
58
|
+
return this;
|
|
59
|
+
}
|
|
60
|
+
setLanguage(language) {
|
|
61
|
+
this.#language = language;
|
|
42
62
|
return this;
|
|
43
63
|
}
|
|
44
64
|
addSystemMedia(content, mimeType) {
|
|
@@ -79,8 +99,11 @@ export class PromptBuilder {
|
|
|
79
99
|
context: this.#systemContextParts,
|
|
80
100
|
instructions: this.#systemInstructions,
|
|
81
101
|
outputSchema: this.#systemOutputSchema,
|
|
102
|
+
outputExamples: this.#systemOutputExamples,
|
|
82
103
|
task: this.#systemTask,
|
|
83
104
|
media: this.#systemMedia,
|
|
105
|
+
language: this.#language,
|
|
106
|
+
instructionsOverride: this.#systemInstructionsOverride,
|
|
84
107
|
});
|
|
85
108
|
}
|
|
86
109
|
buildUserPrompt() {
|
|
@@ -89,8 +112,11 @@ export class PromptBuilder {
|
|
|
89
112
|
context: this.#contextParts,
|
|
90
113
|
instructions: this.#instructions,
|
|
91
114
|
outputSchema: this.#outputSchema,
|
|
115
|
+
outputExamples: this.#outputExamples,
|
|
92
116
|
task: this.#task,
|
|
93
117
|
media: this.#media,
|
|
118
|
+
language: this.#language,
|
|
119
|
+
instructionsOverride: this.#instructionsOverride,
|
|
94
120
|
});
|
|
95
121
|
}
|
|
96
122
|
}
|
|
@@ -119,12 +145,25 @@ function buildPrompt(data) {
|
|
|
119
145
|
}
|
|
120
146
|
if (isDefined(data.outputSchema)) {
|
|
121
147
|
const schema = convertToOpenApiSchema(data.outputSchema);
|
|
122
|
-
|
|
148
|
+
const schemaJson = JSON.stringify(schema, null, 2);
|
|
149
|
+
instructions['**Output Schema**'] = `\`\`\`json\n${schemaJson}\n\`\`\``;
|
|
150
|
+
instructions['**Output Schema Instructions**'] = unorderedList({
|
|
151
|
+
'Schema Compliance': 'Generate valid JSON that strictly matches the provided schema.',
|
|
152
|
+
'Nullable fields with missing data': 'Must be set to literal `null`, not the string "null".',
|
|
153
|
+
'Optional fields with missing data': 'Omit the key entirely (sparse JSON).',
|
|
154
|
+
});
|
|
155
|
+
if (isDefined(data.outputExamples) && (data.outputExamples.length > 0)) {
|
|
156
|
+
instructions['**Output Examples**'] = fewShotPrompt(data.outputExamples);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
if (isDefined(data.language)) {
|
|
160
|
+
instructions['**Output Language**'] = languagePrompt(data.language);
|
|
123
161
|
}
|
|
124
162
|
if (isDefined(data.task)) {
|
|
125
163
|
instructions['**Task**'] = data.task;
|
|
126
164
|
}
|
|
127
|
-
const
|
|
165
|
+
const instructionsWithOverride = data.instructionsOverride?.(instructions) ?? instructions;
|
|
166
|
+
const formattedInstructions = isString(instructionsWithOverride) ? instructionsWithOverride : formatInstructions(instructionsWithOverride);
|
|
128
167
|
return [
|
|
129
168
|
...(data.media ?? []),
|
|
130
169
|
{ text: formattedInstructions },
|
package/ai/prompts/steering.d.ts
CHANGED
|
@@ -7,12 +7,13 @@ export type FewShotExample<Input = ObjectLiteral, Output = ObjectLiteral> = {
|
|
|
7
7
|
/** Optional reason explaining why this example is positive or negative. */
|
|
8
8
|
reason?: string;
|
|
9
9
|
};
|
|
10
|
+
export declare function fewShotExamples<const Input = ObjectLiteral, const Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): FewShotExample<Input, Output>[];
|
|
10
11
|
/**
|
|
11
12
|
* Creates a prompt addition for few-shot learning.
|
|
12
13
|
* @param examples An array of input/output pairs.
|
|
13
14
|
* @returns A formatted few-shot prompt.
|
|
14
15
|
*/
|
|
15
|
-
export declare function fewShotPrompt<Input = ObjectLiteral, Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): {
|
|
16
|
+
export declare function fewShotPrompt<const Input = ObjectLiteral, const Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): {
|
|
16
17
|
Examples: string;
|
|
17
18
|
} | {
|
|
18
19
|
Examples: import("./instructions-formatter.js").InstructionsList;
|
package/ai/prompts/steering.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import { fromEntries } from '../../utils/object/object.js';
|
|
2
2
|
import { isDefined, isString } from '../../utils/type-guards.js';
|
|
3
3
|
import { formatInstructions, orderedList, unorderedList } from './instructions-formatter.js';
|
|
4
|
+
export function fewShotExamples(examples) {
|
|
5
|
+
return examples;
|
|
6
|
+
}
|
|
4
7
|
/**
|
|
5
8
|
* Creates a prompt addition for few-shot learning.
|
|
6
9
|
* @param examples An array of input/output pairs.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { type PromptBuilder } from '../../../ai/prompts/index.js';
|
|
2
|
+
import { type SchemaTestable } from '../../../schema/index.js';
|
|
3
|
+
export declare const DOCUMENT_MANAGEMENT_SYSTEM_ROLE = "You are a highly precise, analytical Document Management Specialist.";
|
|
4
|
+
export declare const contentExtractionSchema: import("../../../schema/index.js").ObjectSchema<{
|
|
5
|
+
content: string;
|
|
6
|
+
}>;
|
|
7
|
+
export declare function createContentExtractionPrompt(): PromptBuilder;
|
|
8
|
+
export declare function createClassifySchema(validTypes: string[]): SchemaTestable<{
|
|
9
|
+
documentType: string;
|
|
10
|
+
}>;
|
|
11
|
+
export declare function createClassifyPrompt(validTypes: string[]): PromptBuilder;
|
|
12
|
+
export declare const dataExtractionFields: {
|
|
13
|
+
Title: string;
|
|
14
|
+
Subtitle: string;
|
|
15
|
+
Summary: string;
|
|
16
|
+
Tags: string;
|
|
17
|
+
Date: string;
|
|
18
|
+
};
|
|
19
|
+
export declare function createDataExtractionPrompt(schema: SchemaTestable): PromptBuilder;
|
|
20
|
+
export declare const assignCollectionSchema: import("../../../schema/index.js").ObjectSchema<{
|
|
21
|
+
collectionIds: string[];
|
|
22
|
+
}>;
|
|
23
|
+
export declare function createAssignCollectionPrompt(): PromptBuilder;
|
|
24
|
+
export declare const assignRequestSchema: import("../../../schema/index.js").ObjectSchema<{
|
|
25
|
+
requestId: string | null;
|
|
26
|
+
}>;
|
|
27
|
+
export declare function createAssignRequestPrompt(): PromptBuilder;
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import { orderedList, promptBuilder, unorderedList } from '../../../ai/prompts/index.js';
|
|
2
|
+
import { fewShotExamples } from '../../../ai/prompts/steering.js';
|
|
3
|
+
import { array, enumeration, nullable, object, string } from '../../../schema/index.js';
|
|
4
|
+
export const DOCUMENT_MANAGEMENT_SYSTEM_ROLE = 'You are a highly precise, analytical Document Management Specialist.';
|
|
5
|
+
// --- Content Extraction ---
|
|
6
|
+
export const contentExtractionSchema = object({ content: string() });
|
|
7
|
+
export function createContentExtractionPrompt() {
|
|
8
|
+
return promptBuilder()
|
|
9
|
+
.setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
|
|
10
|
+
.setTask('Transcribe the attached document into Markdown following the instructions.')
|
|
11
|
+
.setOutputSchema(contentExtractionSchema)
|
|
12
|
+
.addInstructions({
|
|
13
|
+
'Objective': 'Convert the provided document into semantically structured, clean Markdown.',
|
|
14
|
+
'Critical Constraints': orderedList([
|
|
15
|
+
'Output ONLY the Markdown content. Do not include introductory text, conversational filler, or code block fences (```).',
|
|
16
|
+
'Do not describe the visual appearance (e.g., "This looks like an invoice"). Transcribe the content only.',
|
|
17
|
+
]),
|
|
18
|
+
'Formatting Rules': orderedList({
|
|
19
|
+
'Headings': 'Use # for the main document title (once). Use ##, ### for sections based on logical hierarchy.',
|
|
20
|
+
'Text Content': 'Transcribe text verbatim. Do not correct spelling or grammar, summarize, or rewrite.',
|
|
21
|
+
'Tables': 'Strictly use Markdown table syntax. Align columns logically based on the visual grid.',
|
|
22
|
+
'Lists': 'Detect bullet points and numbered lists and format them as Markdown lists.',
|
|
23
|
+
'Emphasis': 'Use **bold** and _italics_ only where visually distinct in the source.',
|
|
24
|
+
'Columns': 'Read multi-column text as a single continuous flow.',
|
|
25
|
+
}),
|
|
26
|
+
'Complex Elements': {
|
|
27
|
+
'Images/Visuals': 'Replace non-textual diagrams with `> [Visual: Brief description of the image/chart]`.',
|
|
28
|
+
'Signatures': 'Mark distinct signatures as `> [Signature: {Name if legible/Context}]`.',
|
|
29
|
+
'Forms': 'Represent checkboxes as `[ ]` (unchecked) or `[x]` (checked). Format label/value pairs on separate lines or as a definition list if applicable.',
|
|
30
|
+
'Math': 'Transcribe equations using LaTeX syntax enclosed in `$...$` for inline or `$$...$$` for block equations.',
|
|
31
|
+
},
|
|
32
|
+
'Page Handling': [
|
|
33
|
+
'Metadata: Start every page with `<!-- Page {n} Start -->` and end with `<!-- Page {n} End -->` on separate lines.',
|
|
34
|
+
'Artifacts: Exclude running headers, footers, and page numbers unless they contain unique data not found elsewhere.',
|
|
35
|
+
],
|
|
36
|
+
'Error Handling': [
|
|
37
|
+
'Mark illegible text as `[Illegible]`.',
|
|
38
|
+
'Mark cut-off text as `[Cut off]`.',
|
|
39
|
+
],
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
// --- Classification ---
|
|
43
|
+
const CLASSIFY_FEW_SHOT = fewShotExamples([
|
|
44
|
+
{
|
|
45
|
+
input: 'Document that contains "Invoice", a table with items, and a "Total Due" amount.',
|
|
46
|
+
output: { documentType: 'Finance -> Invoice' },
|
|
47
|
+
reason: 'Explicit keyword and layout match.',
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
input: 'Document that contains "Rental Agreement", multiple paragraphs about obligations, and signatures at the end.',
|
|
51
|
+
output: { documentType: 'Legal -> Contract' },
|
|
52
|
+
reason: 'Structural and entity-based match.',
|
|
53
|
+
},
|
|
54
|
+
]);
|
|
55
|
+
export function createClassifySchema(validTypes) {
|
|
56
|
+
return object({ documentType: enumeration(validTypes) });
|
|
57
|
+
}
|
|
58
|
+
export function createClassifyPrompt(validTypes) {
|
|
59
|
+
const schema = createClassifySchema(validTypes);
|
|
60
|
+
return promptBuilder()
|
|
61
|
+
.setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
|
|
62
|
+
.setRole('Document Taxonomy Specialist')
|
|
63
|
+
.setTask('Determine the single most accurate document type from the provided list based on the document.')
|
|
64
|
+
.setOutputSchema(schema, CLASSIFY_FEW_SHOT)
|
|
65
|
+
.addInstructions({
|
|
66
|
+
'Analysis Strategy': orderedList([
|
|
67
|
+
'Scan the header and title for explicit document type names (e.g., "Invoice", "Contract", "Bill of Lading").',
|
|
68
|
+
'Analyze the layout structure (e.g., columns often imply Invoices/Receipts; dense paragraphs imply Contracts/Letters).',
|
|
69
|
+
'Identify key entities (e.g., "Total Due" implies financial; "Signed by" implies legal).',
|
|
70
|
+
]),
|
|
71
|
+
'Selection Logic': orderedList([
|
|
72
|
+
'Exact Match: If the document explicitly states its type, select the corresponding category.',
|
|
73
|
+
'Content Match: If implicit, match the intent.',
|
|
74
|
+
'Specificity: Always choose the most specific leaf-node category available.',
|
|
75
|
+
'Fallback: If ambiguous, choose the category that best describes the *primary* purpose of the document.',
|
|
76
|
+
]),
|
|
77
|
+
'Valid category labels': unorderedList(validTypes),
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
// --- Data Extraction ---
|
|
81
|
+
export const dataExtractionFields = {
|
|
82
|
+
Title: 'Create a concise, searchable filename-style title (e.g., "Invoice - Oct 2023").',
|
|
83
|
+
Subtitle: 'Extract context usually found below the header (e.g., Project Name, Reference Number).',
|
|
84
|
+
Summary: 'Write a 2-3 sentence executive summary. Mention the type of information that can be found in the document and its purpose.',
|
|
85
|
+
Tags: 'Generate 3-5 keywords for categorization. Only use important information missing in title, subtitle and properties. Prioritize reusing of existing tags where possible.',
|
|
86
|
+
Date: 'Identify the *creation* date of the document. If multiple dates exist, prioritize the primary date (like invoice or letter Date). Return as object with year, month and day.',
|
|
87
|
+
};
|
|
88
|
+
export function createDataExtractionPrompt(schema) {
|
|
89
|
+
return promptBuilder()
|
|
90
|
+
.setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
|
|
91
|
+
.setRole('Structured Data Extraction Analyst')
|
|
92
|
+
.setTask('Analyze the document and extract metadata and specific properties defined in the output schema following the instructions.')
|
|
93
|
+
.setOutputSchema(schema)
|
|
94
|
+
.addInstructions({
|
|
95
|
+
'Field Specific Instructions': dataExtractionFields,
|
|
96
|
+
'Property Extraction': orderedList([
|
|
97
|
+
'You will be given a list of specific dynamic properties to look for.',
|
|
98
|
+
'Extract values *exactly* as they appear for strings.',
|
|
99
|
+
'Normalize numbers and dates to standard formats.',
|
|
100
|
+
'If a property is ambiguous, favor the value most prominent in the document layout.',
|
|
101
|
+
'If a property is missing, set its value to null.',
|
|
102
|
+
]),
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
// --- Collection Assignment ---
|
|
106
|
+
const ASSIGN_COLLECTION_FEW_SHOT = fewShotExamples([
|
|
107
|
+
{
|
|
108
|
+
input: {
|
|
109
|
+
document: { title: 'Invoice - Project Alpha', summary: 'Invoice for consulting services in Project Alpha.' },
|
|
110
|
+
collections: [{ id: 'col-1', name: 'Project Alpha' }, { id: 'col-2', name: 'General Finance' }],
|
|
111
|
+
},
|
|
112
|
+
output: { collectionIds: ['col-1'] },
|
|
113
|
+
reason: 'Direct match on project name.',
|
|
114
|
+
},
|
|
115
|
+
]);
|
|
116
|
+
export const assignCollectionSchema = object({ collectionIds: array(string()) });
|
|
117
|
+
export function createAssignCollectionPrompt() {
|
|
118
|
+
return promptBuilder()
|
|
119
|
+
.setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
|
|
120
|
+
.setRole('Digital Filing Assistant')
|
|
121
|
+
.setTask('Select the most appropriate collections for this document from the provided list following the instructions.')
|
|
122
|
+
.setOutputSchema(assignCollectionSchema, ASSIGN_COLLECTION_FEW_SHOT)
|
|
123
|
+
.addInstructions({
|
|
124
|
+
'Matching Logic': orderedList([
|
|
125
|
+
'Direct Key-Match: Look for exact keyword matches between the collection name and the document metadata.',
|
|
126
|
+
'Semantic Fit: Determine if the document functionally belongs to a group.',
|
|
127
|
+
'Project Association: If the document references a specific project code or name found in a collection name, assign it there.',
|
|
128
|
+
]),
|
|
129
|
+
'Output Constraints': 'Return an array of matching collection IDs. If no collection is a strong fit, return an empty array.',
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
// --- Request Assignment ---
|
|
133
|
+
const ASSIGN_REQUEST_FEW_SHOT = fewShotExamples([
|
|
134
|
+
{
|
|
135
|
+
input: {
|
|
136
|
+
document: { title: 'Medical Certificate - John Doe', date: '2023-11-01' },
|
|
137
|
+
requests: [{ id: 'req-1', comment: 'Need medical certificate from November', collections: ['HR'] }],
|
|
138
|
+
},
|
|
139
|
+
output: { requestId: 'req-1' },
|
|
140
|
+
reason: 'Document satisfies the specific request criteria.',
|
|
141
|
+
},
|
|
142
|
+
]);
|
|
143
|
+
export const assignRequestSchema = object({ requestId: nullable(string()) });
|
|
144
|
+
export function createAssignRequestPrompt() {
|
|
145
|
+
return promptBuilder()
|
|
146
|
+
.setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
|
|
147
|
+
.setRole('Workflow Routing Agent')
|
|
148
|
+
.setTask('Evaluate the document against the list of open requests and find the best match following the instructions.')
|
|
149
|
+
.setOutputSchema(assignRequestSchema, ASSIGN_REQUEST_FEW_SHOT)
|
|
150
|
+
.addInstructions({
|
|
151
|
+
'Matching Rules': orderedList({
|
|
152
|
+
'Hard Constraints': 'If a Request has a "Comment" or specific property requirement, the document MUST fulfill it strictly (e.g., "Need bill from July" must match date).',
|
|
153
|
+
'Ambiguity': 'If multiple requests match, select the one with the most specific constraints that are satisfied.',
|
|
154
|
+
'Negative Match': 'If the document satisfies the metadata but violates a comment constraint, it is unsuitable.',
|
|
155
|
+
}),
|
|
156
|
+
'Output Constraints': 'The ID of the matching request, or null if no request matches.',
|
|
157
|
+
});
|
|
158
|
+
}
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import { type TstdlGenkitGenerationOptions } from '../../../ai/genkit/index.js';
|
|
2
|
-
import { type
|
|
2
|
+
import { type PromptBuilder } from '../../../ai/prompts/index.js';
|
|
3
3
|
import { type SchemaTestable } from '../../../schema/index.js';
|
|
4
4
|
import type { ObjectLiteral } from '../../../types/types.js';
|
|
5
5
|
import type { ModelReference } from 'genkit';
|
|
6
6
|
import type { AiConfiguration, DocumentPropertyDataType, InstructionOverride } from '../../models/index.js';
|
|
7
7
|
import { Document, DocumentWorkflowStep } from '../../models/index.js';
|
|
8
8
|
import { type AiConfigurationResolveDataMap } from './document-management-ai-provider.service.js';
|
|
9
|
+
import { dataExtractionFields } from './document-management-ai.prompts.js';
|
|
9
10
|
type DocumentDataExtractionPropertyResult = {
|
|
10
11
|
propertyId: string;
|
|
11
12
|
dataType: DocumentPropertyDataType;
|
|
@@ -19,19 +20,6 @@ export type DocumentDataExtractionResult = {
|
|
|
19
20
|
tags: string[];
|
|
20
21
|
properties: DocumentDataExtractionPropertyResult[];
|
|
21
22
|
};
|
|
22
|
-
declare const dataExtractionInstructionsRaw: {
|
|
23
|
-
'JSON Output': import("../../../ai/prompts/index.js").InstructionsList;
|
|
24
|
-
Role: string;
|
|
25
|
-
Task: string;
|
|
26
|
-
'Field Specific Instructions': {
|
|
27
|
-
Title: string;
|
|
28
|
-
Subtitle: string;
|
|
29
|
-
Summary: string;
|
|
30
|
-
Tags: string;
|
|
31
|
-
Date: string;
|
|
32
|
-
};
|
|
33
|
-
'Property Extraction': string[];
|
|
34
|
-
};
|
|
35
23
|
export declare class DocumentManagementAiService {
|
|
36
24
|
#private;
|
|
37
25
|
extractContent(tenantId: string, documentId: string): Promise<string>;
|
|
@@ -42,8 +30,7 @@ export declare class DocumentManagementAiService {
|
|
|
42
30
|
protected runAi<T, Step extends DocumentWorkflowStep>(tenantId: string, step: Step, stepData: AiConfigurationResolveDataMap[Step], options: {
|
|
43
31
|
targetId?: string;
|
|
44
32
|
defaultModel: ModelReference<any>;
|
|
45
|
-
|
|
46
|
-
user: Instructions;
|
|
33
|
+
promptBuilder: PromptBuilder;
|
|
47
34
|
data?: ObjectLiteral;
|
|
48
35
|
schema: SchemaTestable<T>;
|
|
49
36
|
document?: Document;
|
|
@@ -54,7 +41,7 @@ export declare class DocumentManagementAiService {
|
|
|
54
41
|
private mergeExtractionConfigs;
|
|
55
42
|
private resolveContextKeys;
|
|
56
43
|
}
|
|
57
|
-
export declare function mergeFieldInstructions(instructionsKey: keyof typeof
|
|
44
|
+
export declare function mergeFieldInstructions(instructionsKey: keyof typeof dataExtractionFields, field: keyof NonNullable<AiConfiguration['extraction']>, aiConfig: AiConfiguration): string;
|
|
58
45
|
export declare function mergeInstructions(base: string, overrides: (InstructionOverride | undefined)[], options?: {
|
|
59
46
|
formatTemplate?: (format: string) => string;
|
|
60
47
|
}): string;
|
|
@@ -9,12 +9,12 @@ var DocumentManagementAiService_1;
|
|
|
9
9
|
import { and, isNull as drizzleIsNull, eq, inArray } from 'drizzle-orm';
|
|
10
10
|
import { P, match } from 'ts-pattern';
|
|
11
11
|
import { genkitGenerationOptions, injectGenkit, injectModel } from '../../../ai/genkit/index.js';
|
|
12
|
-
import {
|
|
12
|
+
import { formatInstructions, languagePrompt } from '../../../ai/prompts/index.js';
|
|
13
13
|
import { inject } from '../../../injector/inject.js';
|
|
14
14
|
import { Logger } from '../../../logger/logger.js';
|
|
15
15
|
import { arrayAgg } from '../../../orm/index.js';
|
|
16
16
|
import { injectRepository } from '../../../orm/server/index.js';
|
|
17
|
-
import { array, boolean,
|
|
17
|
+
import { array, boolean, integer, nullable, number, object, string } from '../../../schema/index.js';
|
|
18
18
|
import { distinct } from '../../../utils/array/index.js';
|
|
19
19
|
import { numericDateToDateTime, tryDateObjectToNumericDate } from '../../../utils/date-time.js';
|
|
20
20
|
import { fromEntries, hasOwnProperty, objectEntries, objectKeys } from '../../../utils/object/object.js';
|
|
@@ -26,104 +26,10 @@ import { DocumentCategoryTypeService } from './document-category-type.service.js
|
|
|
26
26
|
import { DocumentCollectionService } from './document-collection.service.js';
|
|
27
27
|
import { DocumentFileService } from './document-file.service.js';
|
|
28
28
|
import { DocumentManagementAiProviderService } from './document-management-ai-provider.service.js';
|
|
29
|
+
import { assignCollectionSchema, assignRequestSchema, contentExtractionSchema, createAssignCollectionPrompt, createAssignRequestPrompt, createClassifyPrompt, createClassifySchema, createContentExtractionPrompt, createDataExtractionPrompt, dataExtractionFields } from './document-management-ai.prompts.js';
|
|
29
30
|
import { DocumentPropertyService } from './document-property.service.js';
|
|
30
31
|
import { DocumentTagService } from './document-tag.service.js';
|
|
31
32
|
import { DocumentManagementSingleton } from './singleton.js';
|
|
32
|
-
// --- Instructions ---
|
|
33
|
-
const contentExtractionSystemInstructions = {
|
|
34
|
-
'Role': 'You are an expert OCR and Document Digitization engine.',
|
|
35
|
-
'Primary Objective': 'Convert the provided document into semantically structured, clean Markdown.',
|
|
36
|
-
'Critical Constraints': orderedList([
|
|
37
|
-
'Output ONLY the Markdown content. Do not include introductory text, conversational filler, or code block fences (```).',
|
|
38
|
-
'Do not describe the visual appearance (e.g., "This looks like an invoice"). Transcribe the content only.',
|
|
39
|
-
]),
|
|
40
|
-
'Formatting Rules': orderedList({
|
|
41
|
-
'Headings': 'Use # for the main document title (once). Use ##, ### for sections based on logical hierarchy.',
|
|
42
|
-
'Text Content': 'Transcribe text verbatim. Do not correct spelling or grammar, summarize, or rewrite.',
|
|
43
|
-
'Tables': 'Strictly use Markdown table syntax. Align columns logically based on the visual grid.',
|
|
44
|
-
'Lists': 'Detect bullet points and numbered lists and format them as Markdown lists.',
|
|
45
|
-
'Emphasis': 'Use **bold** and _italics_ only where visually distinct in the source.',
|
|
46
|
-
'Columns': 'Read multi-column text as a single continuous flow.',
|
|
47
|
-
}),
|
|
48
|
-
'Complex Elements': {
|
|
49
|
-
'Images/Visuals': 'Replace non-textual diagrams with `> [Visual: Brief description of the image/chart]`.',
|
|
50
|
-
'Signatures': 'Mark distinct signatures as `> [Signature: {Name if legible/Context}]`.',
|
|
51
|
-
'Forms': 'Represent checkboxes as `[ ]` (unchecked) or `[x]` (checked). Format label/value pairs on separate lines or as a definition list if applicable.',
|
|
52
|
-
'Math': 'Transcribe equations using LaTeX syntax enclosed in `$...$` for inline or `$$...$$` for block equations.',
|
|
53
|
-
},
|
|
54
|
-
'Page Handling': [
|
|
55
|
-
'Metadata: Start every page with `<!-- Page {n} Start -->` and end with `<!-- Page {n} End -->` on separate lines.',
|
|
56
|
-
'Artifacts: Exclude running headers, footers, and page numbers unless they contain unique data not found elsewhere.',
|
|
57
|
-
],
|
|
58
|
-
'Error Handling': [
|
|
59
|
-
'Mark illegible text as `[Illegible]`.',
|
|
60
|
-
'Mark cut-off text as `[Cut off]`.',
|
|
61
|
-
],
|
|
62
|
-
};
|
|
63
|
-
const contentExtractionUserInstructions = { Task: 'Transcribe the attached document into Markdown following the system instructions.' };
|
|
64
|
-
const classifySystemInstructions = {
|
|
65
|
-
'Role': 'You are a Document Taxonomy Specialist.',
|
|
66
|
-
'Task': `Analyze the visual layout and text content of the document to categorize it into exactly one of the provided hierarchical types.`,
|
|
67
|
-
'Input Context': 'You will be provided with a list of valid category labels (e.g., "Finance -> Invoice").',
|
|
68
|
-
'Analysis Strategy': orderedList([
|
|
69
|
-
'Scan the header and title for explicit document type names (e.g., "Invoice", "Contract", "Bill of Lading").',
|
|
70
|
-
'Analyze the layout structure (e.g., columns often imply Invoices/Receipts; dense paragraphs imply Contracts/Letters).',
|
|
71
|
-
'Identify key entities (e.g., "Total Due" implies financial; "Signed by" implies legal).',
|
|
72
|
-
]),
|
|
73
|
-
'Selection Logic': orderedList([
|
|
74
|
-
'Exact Match: If the document explicitly states its type, select the corresponding category.',
|
|
75
|
-
'Content Match: If implicit, match the intent.',
|
|
76
|
-
'Specificity: Always choose the most specific leaf-node category available.',
|
|
77
|
-
'Fallback: If ambiguous, choose the category that best describes the *primary* purpose of the document.',
|
|
78
|
-
]),
|
|
79
|
-
...jsonOutputInstructions,
|
|
80
|
-
};
|
|
81
|
-
const classifyUserInstructions = { Task: 'Determine the single most accurate document type from the provided list based on the document following the system instructions.' };
|
|
82
|
-
const dataExtractionInstructionsRaw = {
|
|
83
|
-
'Role': 'You are a Structured Data Extraction Analyst.',
|
|
84
|
-
'Task': 'Analyze the document and extract metadata into the defined JSON schema.',
|
|
85
|
-
'Field Specific Instructions': {
|
|
86
|
-
Title: 'Create a concise, searchable filename-style title (e.g., "Invoice - Oct 2023").',
|
|
87
|
-
Subtitle: 'Extract context usually found below the header (e.g., Project Name, Reference Number).',
|
|
88
|
-
Summary: 'Write a 2-3 sentence executive summary. Mention the type of information that can be found in the document and its purpose.',
|
|
89
|
-
Tags: 'Generate 3-5 keywords for categorization. Only use important information missing in title, subtitle and properties. Prioritize reusing of existing tags where possible.',
|
|
90
|
-
Date: 'Identify the *creation* date of the document. If multiple dates exist, prioritize the primary date (like invoice or letter Date). Return as object with year, month and day.',
|
|
91
|
-
},
|
|
92
|
-
'Property Extraction': [
|
|
93
|
-
'You will be given a list of specific dynamic properties to look for.',
|
|
94
|
-
'Extract values *exactly* as they appear for strings.',
|
|
95
|
-
'Normalize numbers and dates to standard formats.',
|
|
96
|
-
'If a property is ambiguous, favor the value most prominent in the document layout.',
|
|
97
|
-
'If a property is missing, set its value to null.',
|
|
98
|
-
],
|
|
99
|
-
...jsonOutputInstructions,
|
|
100
|
-
};
|
|
101
|
-
const dataExtractionSystemInstructions = dataExtractionInstructionsRaw;
|
|
102
|
-
const dataExtractionUserInstructions = { Task: 'Analyze the document and extract metadata and specific properties defined in the output schema following the system instructions.' };
|
|
103
|
-
const assignCollectionSystemInstructions = {
|
|
104
|
-
'Role': 'You are a Digital Filing Assistant.',
|
|
105
|
-
'Task': `Assign the document to relevant collections based on its metadata and content.`,
|
|
106
|
-
'Input': 'Document Metadata and a list of Available Collections.',
|
|
107
|
-
'Matching Logic': orderedList([
|
|
108
|
-
'Direct Key-Match: Look for exact keyword matches between the collection name and the document metadata.',
|
|
109
|
-
'Semantic Fit: Determine if the document functionally belongs to a group.',
|
|
110
|
-
'Project Association: If the document references a specific project code or name found in a collection name, assign it there.',
|
|
111
|
-
]),
|
|
112
|
-
'Output': 'Return an array of matching collection IDs. If no collection is a strong fit, return an empty array.',
|
|
113
|
-
};
|
|
114
|
-
const assignCollectionUserInstructions = { Task: 'Select the most appropriate collections for this document from the provided list following the system instructions.' };
|
|
115
|
-
const assignRequestSystemInstructions = {
|
|
116
|
-
'Role': 'You are a Workflow Routing Agent.',
|
|
117
|
-
'Task': 'Match the provided document to an existing Open Document Request.',
|
|
118
|
-
'Input': 'Document Metadata and a list of Open Requests.',
|
|
119
|
-
'Matching Rules': orderedList({
|
|
120
|
-
'Hard Constraints': 'If a Request has a "Comment" or specific property requirement, the document MUST fulfill it strictly (e.g., "Need bill from July" must match date).',
|
|
121
|
-
'Ambiguity': 'If multiple requests match, select the one with the most specific constraints that are satisfied.',
|
|
122
|
-
'Negative Match': 'If the document satisfies the metadata but violates a comment constraint, it is unsuitable.',
|
|
123
|
-
}),
|
|
124
|
-
'Output': 'The ID of the matching request, or null if no request matches.',
|
|
125
|
-
};
|
|
126
|
-
const assignRequestUserInstructions = { Task: 'Evaluate the document against the list of open requests and find the best match following the system instructions.' };
|
|
127
33
|
let DocumentManagementAiService = DocumentManagementAiService_1 = class DocumentManagementAiService {
|
|
128
34
|
#genkit = injectGenkit();
|
|
129
35
|
#contentExtractionModel = injectModel('gemini-3.1-flash-lite-preview').withConfig({ thinkingConfig: { thinkingLevel: 'LOW' } });
|
|
@@ -147,9 +53,8 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
147
53
|
this.#logger.trace(`Extracting content from document ${document.id}`);
|
|
148
54
|
const result = await this.runAi(tenantId, DocumentWorkflowStep.ContentExtraction, { document }, {
|
|
149
55
|
defaultModel: this.#contentExtractionModel,
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
schema: object({ content: string() }),
|
|
56
|
+
promptBuilder: createContentExtractionPrompt(),
|
|
57
|
+
schema: contentExtractionSchema,
|
|
153
58
|
document,
|
|
154
59
|
});
|
|
155
60
|
const markdownBlockStripped = result.content.trim().replaceAll(/^```\w*\s*|```$/gi, '').trim();
|
|
@@ -166,19 +71,17 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
166
71
|
this.#logger.trace(`Classifying document ${document.id}`);
|
|
167
72
|
const stepData = { document, categories };
|
|
168
73
|
const aiConfig = await this.resolveAiConfiguration(tenantId, DocumentWorkflowStep.Classification, stepData);
|
|
169
|
-
const
|
|
170
|
-
|
|
171
|
-
|
|
74
|
+
const schema = createClassifySchema(typeLabels);
|
|
75
|
+
const promptBuilder = createClassifyPrompt(typeLabels);
|
|
76
|
+
if (isDefined(aiConfig.classification)) {
|
|
77
|
+
promptBuilder.addSystemInstructions({
|
|
172
78
|
'Classification Overrides': mergeInstructions('Follow these additional classification rules.', [aiConfig.classification]),
|
|
173
|
-
}
|
|
174
|
-
|
|
79
|
+
});
|
|
80
|
+
}
|
|
175
81
|
const result = await this.runAi(tenantId, DocumentWorkflowStep.Classification, stepData, {
|
|
176
82
|
defaultModel: this.#classifyModel,
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
schema: object({
|
|
180
|
-
documentType: enumeration(typeLabels),
|
|
181
|
-
}),
|
|
83
|
+
promptBuilder,
|
|
84
|
+
schema,
|
|
182
85
|
document,
|
|
183
86
|
config: { maxOutputTokens: 128 },
|
|
184
87
|
aiConfig,
|
|
@@ -232,18 +135,17 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
232
135
|
const override = (isNotNull(property.key) ? aiConfig.extraction?.properties?.[property.key] : undefined) ?? aiConfig.extraction?.properties?.[property.label];
|
|
233
136
|
return isDefined(override) ? mergeInstructions(`Extract value for property "${property.label}".`, [override]) : undefined;
|
|
234
137
|
}).filter(isDefined);
|
|
235
|
-
const
|
|
236
|
-
|
|
138
|
+
const promptBuilder = createDataExtractionPrompt(generationSchema);
|
|
139
|
+
promptBuilder.addInstructions({
|
|
237
140
|
'Field Specific Instructions': mergedFieldInstructions,
|
|
238
|
-
'Property Extraction': isDefined(mergedPropertyInstructions) && (mergedPropertyInstructions.length > 0)
|
|
239
|
-
?
|
|
240
|
-
:
|
|
241
|
-
};
|
|
141
|
+
'Additional Property Extraction': isDefined(mergedPropertyInstructions) && (mergedPropertyInstructions.length > 0)
|
|
142
|
+
? mergedPropertyInstructions
|
|
143
|
+
: [],
|
|
144
|
+
});
|
|
242
145
|
const extraction = await this.runAi(tenantId, DocumentWorkflowStep.DataExtraction, stepData, {
|
|
243
146
|
targetId: documentTypeEntity.key ?? undefined,
|
|
244
147
|
defaultModel: this.#dataExtractionModel,
|
|
245
|
-
|
|
246
|
-
user: dataExtractionUserInstructions,
|
|
148
|
+
promptBuilder,
|
|
247
149
|
data: { existingTags: tagLabels },
|
|
248
150
|
schema: generationSchema,
|
|
249
151
|
document,
|
|
@@ -302,10 +204,9 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
302
204
|
const result = await this.runAi(document.tenantId, DocumentWorkflowStep.Assignment, { document, properties: documentProperties, collectionIds }, {
|
|
303
205
|
targetId: documentTypeEntity.key ?? undefined,
|
|
304
206
|
defaultModel: this.#assignModel,
|
|
305
|
-
|
|
306
|
-
user: assignCollectionUserInstructions,
|
|
207
|
+
promptBuilder: createAssignCollectionPrompt(),
|
|
307
208
|
data: { document: documentData, documentProperties: fromEntries(propertyEntries), collections },
|
|
308
|
-
schema:
|
|
209
|
+
schema: assignCollectionSchema,
|
|
309
210
|
config: { maxOutputTokens: 512 },
|
|
310
211
|
});
|
|
311
212
|
return result.collectionIds;
|
|
@@ -349,10 +250,9 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
349
250
|
const result = await this.runAi(document.tenantId, DocumentWorkflowStep.Assignment, { document, properties: documentProperties, collectionIds: requestsCollectionIds }, {
|
|
350
251
|
targetId: documentTypeEntity.key ?? undefined,
|
|
351
252
|
defaultModel: this.#assignModel,
|
|
352
|
-
|
|
353
|
-
user: assignRequestUserInstructions,
|
|
253
|
+
promptBuilder: createAssignRequestPrompt(),
|
|
354
254
|
data: { document: documentData, documentProperties: fromEntries(propertyEntries), requests },
|
|
355
|
-
schema:
|
|
255
|
+
schema: assignRequestSchema,
|
|
356
256
|
config: { maxOutputTokens: 128 },
|
|
357
257
|
});
|
|
358
258
|
return result.requestId;
|
|
@@ -360,23 +260,34 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
360
260
|
async runAi(tenantId, step, stepData, options) {
|
|
361
261
|
const config = options.aiConfig ?? await this.resolveAiConfiguration(tenantId, step, stepData);
|
|
362
262
|
const model = config.model ?? options.defaultModel;
|
|
363
|
-
const
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
263
|
+
const builder = options.promptBuilder;
|
|
264
|
+
if (isDefined(config.language)) {
|
|
265
|
+
builder.addInstructions({ 'Output Language': languagePrompt(config.language) });
|
|
266
|
+
}
|
|
267
|
+
if (isDefined(options.data)) {
|
|
268
|
+
builder.addContext('Data', options.data);
|
|
269
|
+
}
|
|
270
|
+
if (isDefined(config.prompt?.systemAddition)) {
|
|
271
|
+
builder.addSystemInstructions({ 'Additional Instructions': config.prompt.systemAddition });
|
|
272
|
+
}
|
|
273
|
+
if (isDefined(config.prompt?.userAddition)) {
|
|
274
|
+
builder.addInstructions({ 'Additional Instructions': config.prompt.userAddition });
|
|
275
|
+
}
|
|
276
|
+
if (isDefined(config.prompt?.systemOverride)) {
|
|
277
|
+
builder.setSystemInstructionsOverride(config.prompt.systemOverride);
|
|
278
|
+
}
|
|
279
|
+
if (isDefined(config.prompt?.userOverride)) {
|
|
280
|
+
builder.setInstructionsOverride(config.prompt.userOverride);
|
|
281
|
+
}
|
|
282
|
+
if (isDefined(options.document)) {
|
|
283
|
+
builder.addMedia(await this.#documentFileService.getContent(options.document), options.document.mimeType);
|
|
284
|
+
}
|
|
374
285
|
const result = await this.#genkit.generate(genkitGenerationOptions({
|
|
375
286
|
model,
|
|
376
287
|
config: options.config,
|
|
377
288
|
output: { schema: options.schema },
|
|
378
|
-
system:
|
|
379
|
-
prompt:
|
|
289
|
+
system: builder.buildSystemPrompt(),
|
|
290
|
+
prompt: builder.buildUserPrompt(),
|
|
380
291
|
}));
|
|
381
292
|
if (isNull(result.output)) {
|
|
382
293
|
throw new Error(`AI returned null output for ${step} ${options.targetId ?? ''}`);
|
|
@@ -479,7 +390,7 @@ function tryAiOutputDateObjectToNumericDate(dateObject) {
|
|
|
479
390
|
return date;
|
|
480
391
|
}
|
|
481
392
|
export function mergeFieldInstructions(instructionsKey, field, aiConfig) {
|
|
482
|
-
return mergeInstructions(
|
|
393
|
+
return mergeInstructions(dataExtractionFields[instructionsKey], [aiConfig.extraction?.[field]], { formatTemplate: getFormatTemplate(field) });
|
|
483
394
|
}
|
|
484
395
|
export function mergeInstructions(base, overrides, options = {}) {
|
|
485
396
|
let result = base;
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import { type Instructions } from '../../../ai/prompts/index.js';
|
|
2
2
|
import type { SchemaTestable } from '../../../schema/schema.js';
|
|
3
3
|
import type { AiConfiguration } from '../../models/index.js';
|
|
4
|
+
import { DocumentFileService } from '../services/document-file.service.js';
|
|
4
5
|
import { DocumentManagementAiProviderService } from '../services/document-management-ai-provider.service.js';
|
|
5
6
|
import { DocumentValidationExecutor, type DocumentValidationExecutorContext, type DocumentValidationExecutorResult } from './validator.js';
|
|
6
7
|
export declare abstract class AiValidationExecutor<R> extends DocumentValidationExecutor {
|
|
8
|
+
protected readonly documentFileService: DocumentFileService;
|
|
7
9
|
protected readonly genkit: import("genkit").Genkit;
|
|
8
10
|
protected readonly baseModel: import("genkit").ModelReference<import("zod").ZodObject<{
|
|
9
11
|
version: import("zod").ZodOptional<import("zod").ZodString>;
|
|
@@ -1,16 +1,12 @@
|
|
|
1
1
|
import { convertToGenkitSchema, injectGenkit, injectModel } from '../../../ai/genkit/index.js';
|
|
2
|
-
import {
|
|
2
|
+
import { promptBuilder } from '../../../ai/prompts/index.js';
|
|
3
3
|
import { inject } from '../../../injector/inject.js';
|
|
4
4
|
import { isDefined, isNull } from '../../../utils/type-guards.js';
|
|
5
|
+
import { DocumentFileService } from '../services/document-file.service.js';
|
|
5
6
|
import { DocumentManagementAiProviderService } from '../services/document-management-ai-provider.service.js';
|
|
6
7
|
import { DocumentValidationExecutor } from './validator.js';
|
|
7
|
-
const systemPromptBase = {
|
|
8
|
-
Role: 'You are an expert in document validation.',
|
|
9
|
-
Task: 'Validate a document based on the provided validation instructions and document content.',
|
|
10
|
-
Objective: 'Analyze the document carefully and provide a structured validation result according to the defined schema.',
|
|
11
|
-
...jsonOutputInstructions,
|
|
12
|
-
};
|
|
13
8
|
export class AiValidationExecutor extends DocumentValidationExecutor {
|
|
9
|
+
documentFileService = inject(DocumentFileService);
|
|
14
10
|
genkit = injectGenkit();
|
|
15
11
|
baseModel = injectModel('gemini-3.1-flash-lite-preview').withConfig({ thinkingConfig: { thinkingLevel: 'LOW' } });
|
|
16
12
|
aiProvider = inject(DocumentManagementAiProviderService, undefined, { optional: true });
|
|
@@ -24,29 +20,42 @@ export class AiValidationExecutor extends DocumentValidationExecutor {
|
|
|
24
20
|
const validationInstructions = await this.getValidationInstructions(context);
|
|
25
21
|
const model = providerValidationConfig?.model ?? executorConfig.model ?? providerGlobalConfig?.defaults?.model ?? this.baseModel;
|
|
26
22
|
const language = providerValidationConfig?.language ?? executorConfig.language ?? providerGlobalConfig?.defaults?.language;
|
|
27
|
-
const
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
23
|
+
const documentContent = await this.documentFileService.getContent(context.document);
|
|
24
|
+
const builder = promptBuilder()
|
|
25
|
+
.setSystemRole('You are an expert in document validation.')
|
|
26
|
+
.setSystemTask('Validate a document based on the provided validation instructions and document content.')
|
|
27
|
+
.setTask('Validate the document based on the provided system and validation instructions and the document content.')
|
|
28
|
+
.addInstructions({ 'Validation Instructions': validationInstructions })
|
|
29
|
+
.setOutputSchema(this.schema)
|
|
30
|
+
.addMedia(documentContent, context.document.mimeType);
|
|
31
|
+
if (isDefined(language)) {
|
|
32
|
+
builder.setLanguage(language);
|
|
33
|
+
}
|
|
34
|
+
const systemAdditions = [
|
|
35
|
+
providerGlobalConfig?.defaults?.prompt?.systemAddition,
|
|
36
|
+
executorConfig.prompt?.systemAddition,
|
|
37
|
+
providerValidationConfig?.prompt?.systemAddition,
|
|
38
|
+
].filter(isDefined);
|
|
39
|
+
if (systemAdditions.length > 0) {
|
|
40
|
+
builder.addSystemInstructions({ 'Additional Instructions': systemAdditions });
|
|
41
|
+
}
|
|
42
|
+
const userAdditions = [
|
|
43
|
+
providerGlobalConfig?.defaults?.prompt?.userAddition,
|
|
44
|
+
executorConfig.prompt?.userAddition,
|
|
45
|
+
providerValidationConfig?.prompt?.userAddition,
|
|
46
|
+
].filter(isDefined);
|
|
47
|
+
if (userAdditions.length > 0) {
|
|
48
|
+
builder.addInstructions({ 'Additional Instructions': userAdditions });
|
|
49
|
+
}
|
|
50
|
+
const systemOverride = providerValidationConfig?.prompt?.systemOverride ?? executorConfig.prompt?.systemOverride ?? providerGlobalConfig?.defaults?.prompt?.systemOverride;
|
|
51
|
+
builder.setSystemInstructionsOverride(systemOverride);
|
|
52
|
+
const userOverride = providerValidationConfig?.prompt?.userOverride ?? executorConfig.prompt?.userOverride ?? providerGlobalConfig?.defaults?.prompt?.userOverride;
|
|
53
|
+
builder.setInstructionsOverride(userOverride);
|
|
45
54
|
const generation = await this.genkit.generate({
|
|
46
|
-
model
|
|
55
|
+
model,
|
|
47
56
|
output: { schema: convertToGenkitSchema(this.schema) },
|
|
48
|
-
system:
|
|
49
|
-
prompt:
|
|
57
|
+
system: builder.buildSystemPrompt(),
|
|
58
|
+
prompt: builder.buildUserPrompt(),
|
|
50
59
|
});
|
|
51
60
|
if (isNull(generation.output)) {
|
|
52
61
|
throw new Error('AI returned null output');
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tstdl/base",
|
|
3
|
-
"version": "0.93.
|
|
3
|
+
"version": "0.93.170",
|
|
4
4
|
"author": "Patrick Hein",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -152,8 +152,8 @@
|
|
|
152
152
|
"type-fest": "^5.5"
|
|
153
153
|
},
|
|
154
154
|
"peerDependencies": {
|
|
155
|
-
"@aws-sdk/client-s3": "^3.
|
|
156
|
-
"@aws-sdk/s3-request-presigner": "^3.
|
|
155
|
+
"@aws-sdk/client-s3": "^3.1014",
|
|
156
|
+
"@aws-sdk/s3-request-presigner": "^3.1014",
|
|
157
157
|
"@genkit-ai/google-genai": "^1.30",
|
|
158
158
|
"@google-cloud/storage": "^7.19",
|
|
159
159
|
"@toon-format/toon": "^2.1.0",
|
|
@@ -190,7 +190,7 @@
|
|
|
190
190
|
"@types/mjml": "4.7",
|
|
191
191
|
"@types/node": "25",
|
|
192
192
|
"@types/nodemailer": "7.0",
|
|
193
|
-
"@types/pg": "8.
|
|
193
|
+
"@types/pg": "8.20",
|
|
194
194
|
"@vitest/coverage-v8": "4.1",
|
|
195
195
|
"@vitest/ui": "4.1",
|
|
196
196
|
"concurrently": "9.2",
|
|
@@ -205,12 +205,5 @@
|
|
|
205
205
|
"typescript-eslint": "8.57",
|
|
206
206
|
"vite-tsconfig-paths": "6.1",
|
|
207
207
|
"vitest": "4.1"
|
|
208
|
-
},
|
|
209
|
-
"overrides": {
|
|
210
|
-
"drizzle-kit": {
|
|
211
|
-
"@esbuild-kit/esm-loader": "^2.6",
|
|
212
|
-
"esbuild": "^0.25",
|
|
213
|
-
"esbuild-register": "^3.6"
|
|
214
|
-
}
|
|
215
208
|
}
|
|
216
209
|
}
|