@tstdl/base 0.93.168 → 0.93.171
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ai/prompts/prompt-builder.d.ts +6 -2
- package/ai/prompts/prompt-builder.js +44 -5
- package/ai/prompts/steering.d.ts +2 -1
- package/ai/prompts/steering.js +3 -0
- package/document-management/models/ai-configuration.d.ts +4 -0
- package/document-management/server/services/document-management-ai.prompts.d.ts +27 -0
- package/document-management/server/services/document-management-ai.prompts.js +158 -0
- package/document-management/server/services/document-management-ai.service.d.ts +5 -18
- package/document-management/server/services/document-management-ai.service.js +75 -152
- package/document-management/server/validators/ai-validation-executor.d.ts +2 -0
- package/document-management/server/validators/ai-validation-executor.js +37 -28
- package/examples/document-management/ai-provider.d.ts +1 -0
- package/examples/document-management/ai-provider.js +5 -4
- package/examples/document-management/main.js +1 -1
- package/package.json +4 -11
|
@@ -2,6 +2,7 @@ import type { Part } from 'genkit';
|
|
|
2
2
|
import type { SchemaTestable } from '../../schema/schema.js';
|
|
3
3
|
import type { ObjectLiteral } from '../../types/index.js';
|
|
4
4
|
import { type Instructions } from './instructions-formatter.js';
|
|
5
|
+
import { type FewShotExample } from './steering.js';
|
|
5
6
|
export type PromptBuilderInstructions = Record<string, Instructions>;
|
|
6
7
|
export type PromptBuilderContext = Record<string, PromptBuilderContextItem>;
|
|
7
8
|
export type PromptBuilderContextItem = ObjectLiteral;
|
|
@@ -11,8 +12,11 @@ export declare class PromptBuilder {
|
|
|
11
12
|
setRole(role: string): this;
|
|
12
13
|
setSystemTask(task: string): this;
|
|
13
14
|
setTask(task: string): this;
|
|
14
|
-
setSystemOutputSchema(schema: SchemaTestable): this;
|
|
15
|
-
setOutputSchema(schema: SchemaTestable): this;
|
|
15
|
+
setSystemOutputSchema<Input = ObjectLiteral, Output = ObjectLiteral>(schema: SchemaTestable<Output>, examples?: FewShotExample<Input, Output>[]): this;
|
|
16
|
+
setOutputSchema<Input = ObjectLiteral, Output = ObjectLiteral>(schema: SchemaTestable<Output>, examples?: FewShotExample<Input, Output>[]): this;
|
|
17
|
+
setSystemInstructionsOverride(override: ((instructions: Instructions) => Instructions | string) | undefined): this;
|
|
18
|
+
setInstructionsOverride(override: ((instructions: Instructions) => Instructions | string) | undefined): this;
|
|
19
|
+
setLanguage(language: string): this;
|
|
16
20
|
addSystemMedia(content: Uint8Array, mimeType: string): this;
|
|
17
21
|
addMedia(content: Uint8Array, mimeType: string): this;
|
|
18
22
|
addSystemInstructions(instructions: Record<string, Instructions>): this;
|
|
@@ -3,7 +3,8 @@ import { encodeBase64 } from '../../utils/base64.js';
|
|
|
3
3
|
import { fromEntries, objectEntries, objectKeys } from '../../utils/object/index.js';
|
|
4
4
|
import { assertObjectPass, isDefined, isString, isUndefined } from '../../utils/type-guards.js';
|
|
5
5
|
import { formatData } from './format.js';
|
|
6
|
-
import { formatInstructions, sections } from './instructions-formatter.js';
|
|
6
|
+
import { formatInstructions, sections, unorderedList } from './instructions-formatter.js';
|
|
7
|
+
import { fewShotPrompt, languagePrompt } from './steering.js';
|
|
7
8
|
export class PromptBuilder {
|
|
8
9
|
#systemMedia = [];
|
|
9
10
|
#media = [];
|
|
@@ -12,11 +13,16 @@ export class PromptBuilder {
|
|
|
12
13
|
#systemTask;
|
|
13
14
|
#task;
|
|
14
15
|
#systemOutputSchema;
|
|
16
|
+
#systemOutputExamples;
|
|
15
17
|
#outputSchema;
|
|
18
|
+
#outputExamples;
|
|
16
19
|
#systemInstructions = {};
|
|
17
20
|
#instructions = {};
|
|
18
21
|
#systemContextParts = {};
|
|
19
22
|
#contextParts = {};
|
|
23
|
+
#language;
|
|
24
|
+
#systemInstructionsOverride;
|
|
25
|
+
#instructionsOverride;
|
|
20
26
|
setSystemRole(role) {
|
|
21
27
|
this.#systemRole = role;
|
|
22
28
|
return this;
|
|
@@ -33,12 +39,26 @@ export class PromptBuilder {
|
|
|
33
39
|
this.#task = task;
|
|
34
40
|
return this;
|
|
35
41
|
}
|
|
36
|
-
setSystemOutputSchema(schema) {
|
|
42
|
+
setSystemOutputSchema(schema, examples) {
|
|
37
43
|
this.#systemOutputSchema = schema;
|
|
44
|
+
this.#systemOutputExamples = examples;
|
|
38
45
|
return this;
|
|
39
46
|
}
|
|
40
|
-
setOutputSchema(schema) {
|
|
47
|
+
setOutputSchema(schema, examples) {
|
|
41
48
|
this.#outputSchema = schema;
|
|
49
|
+
this.#outputExamples = examples;
|
|
50
|
+
return this;
|
|
51
|
+
}
|
|
52
|
+
setSystemInstructionsOverride(override) {
|
|
53
|
+
this.#systemInstructionsOverride = override;
|
|
54
|
+
return this;
|
|
55
|
+
}
|
|
56
|
+
setInstructionsOverride(override) {
|
|
57
|
+
this.#instructionsOverride = override;
|
|
58
|
+
return this;
|
|
59
|
+
}
|
|
60
|
+
setLanguage(language) {
|
|
61
|
+
this.#language = language;
|
|
42
62
|
return this;
|
|
43
63
|
}
|
|
44
64
|
addSystemMedia(content, mimeType) {
|
|
@@ -79,8 +99,11 @@ export class PromptBuilder {
|
|
|
79
99
|
context: this.#systemContextParts,
|
|
80
100
|
instructions: this.#systemInstructions,
|
|
81
101
|
outputSchema: this.#systemOutputSchema,
|
|
102
|
+
outputExamples: this.#systemOutputExamples,
|
|
82
103
|
task: this.#systemTask,
|
|
83
104
|
media: this.#systemMedia,
|
|
105
|
+
language: this.#language,
|
|
106
|
+
instructionsOverride: this.#systemInstructionsOverride,
|
|
84
107
|
});
|
|
85
108
|
}
|
|
86
109
|
buildUserPrompt() {
|
|
@@ -89,8 +112,11 @@ export class PromptBuilder {
|
|
|
89
112
|
context: this.#contextParts,
|
|
90
113
|
instructions: this.#instructions,
|
|
91
114
|
outputSchema: this.#outputSchema,
|
|
115
|
+
outputExamples: this.#outputExamples,
|
|
92
116
|
task: this.#task,
|
|
93
117
|
media: this.#media,
|
|
118
|
+
language: this.#language,
|
|
119
|
+
instructionsOverride: this.#instructionsOverride,
|
|
94
120
|
});
|
|
95
121
|
}
|
|
96
122
|
}
|
|
@@ -119,12 +145,25 @@ function buildPrompt(data) {
|
|
|
119
145
|
}
|
|
120
146
|
if (isDefined(data.outputSchema)) {
|
|
121
147
|
const schema = convertToOpenApiSchema(data.outputSchema);
|
|
122
|
-
|
|
148
|
+
const schemaJson = JSON.stringify(schema, null, 2);
|
|
149
|
+
instructions['**Output Schema**'] = `\`\`\`json\n${schemaJson}\n\`\`\``;
|
|
150
|
+
instructions['**Output Schema Instructions**'] = unorderedList({
|
|
151
|
+
'Schema Compliance': 'Generate valid JSON that strictly matches the provided schema.',
|
|
152
|
+
'Nullable fields with missing data': 'Must be set to literal `null`, not the string "null".',
|
|
153
|
+
'Optional fields with missing data': 'Omit the key entirely (sparse JSON).',
|
|
154
|
+
});
|
|
155
|
+
if (isDefined(data.outputExamples) && (data.outputExamples.length > 0)) {
|
|
156
|
+
instructions['**Output Examples**'] = fewShotPrompt(data.outputExamples);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
if (isDefined(data.language)) {
|
|
160
|
+
instructions['**Output Language**'] = languagePrompt(data.language);
|
|
123
161
|
}
|
|
124
162
|
if (isDefined(data.task)) {
|
|
125
163
|
instructions['**Task**'] = data.task;
|
|
126
164
|
}
|
|
127
|
-
const
|
|
165
|
+
const instructionsWithOverride = data.instructionsOverride?.(instructions) ?? instructions;
|
|
166
|
+
const formattedInstructions = isString(instructionsWithOverride) ? instructionsWithOverride : formatInstructions(instructionsWithOverride);
|
|
128
167
|
return [
|
|
129
168
|
...(data.media ?? []),
|
|
130
169
|
{ text: formattedInstructions },
|
package/ai/prompts/steering.d.ts
CHANGED
|
@@ -7,12 +7,13 @@ export type FewShotExample<Input = ObjectLiteral, Output = ObjectLiteral> = {
|
|
|
7
7
|
/** Optional reason explaining why this example is positive or negative. */
|
|
8
8
|
reason?: string;
|
|
9
9
|
};
|
|
10
|
+
export declare function fewShotExamples<const Input = ObjectLiteral, const Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): FewShotExample<Input, Output>[];
|
|
10
11
|
/**
|
|
11
12
|
* Creates a prompt addition for few-shot learning.
|
|
12
13
|
* @param examples An array of input/output pairs.
|
|
13
14
|
* @returns A formatted few-shot prompt.
|
|
14
15
|
*/
|
|
15
|
-
export declare function fewShotPrompt<Input = ObjectLiteral, Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): {
|
|
16
|
+
export declare function fewShotPrompt<const Input = ObjectLiteral, const Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): {
|
|
16
17
|
Examples: string;
|
|
17
18
|
} | {
|
|
18
19
|
Examples: import("./instructions-formatter.js").InstructionsList;
|
package/ai/prompts/steering.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import { fromEntries } from '../../utils/object/object.js';
|
|
2
2
|
import { isDefined, isString } from '../../utils/type-guards.js';
|
|
3
3
|
import { formatInstructions, orderedList, unorderedList } from './instructions-formatter.js';
|
|
4
|
+
export function fewShotExamples(examples) {
|
|
5
|
+
return examples;
|
|
6
|
+
}
|
|
4
7
|
/**
|
|
5
8
|
* Creates a prompt addition for few-shot learning.
|
|
6
9
|
* @param examples An array of input/output pairs.
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import type { ModelReference } from 'genkit';
|
|
2
2
|
import type { Instructions } from '../../ai/prompts/instructions-formatter.js';
|
|
3
3
|
import type { DocumentWorkflowStep } from './document-workflow.model.js';
|
|
4
|
+
import type { GeminiModelConfig } from '../../ai/index.js';
|
|
4
5
|
export type InstructionStrategy = 'replace' | 'append';
|
|
6
|
+
export type DocumentManagementThinkingLevel = NonNullable<GeminiModelConfig['thinkingConfig']>['thinkingLevel'];
|
|
5
7
|
export type InstructionOverride = string | {
|
|
6
8
|
/**
|
|
7
9
|
* Simple way: Provide a specific format pattern.
|
|
@@ -20,6 +22,8 @@ export type InstructionOverride = string | {
|
|
|
20
22
|
export type AiConfiguration = {
|
|
21
23
|
/** The model to use. */
|
|
22
24
|
model?: ModelReference<any>;
|
|
25
|
+
/** The thinking level to use for thinking models. */
|
|
26
|
+
thinkingLevel?: DocumentManagementThinkingLevel;
|
|
23
27
|
/** The language to use for AI outputs. */
|
|
24
28
|
language?: string;
|
|
25
29
|
/** Prompt overrides. */
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { type PromptBuilder } from '../../../ai/prompts/index.js';
|
|
2
|
+
import { type SchemaTestable } from '../../../schema/index.js';
|
|
3
|
+
export declare const DOCUMENT_MANAGEMENT_SYSTEM_ROLE = "You are a highly precise, analytical Document Management Specialist.";
|
|
4
|
+
export declare const contentExtractionSchema: import("../../../schema/index.js").ObjectSchema<{
|
|
5
|
+
content: string;
|
|
6
|
+
}>;
|
|
7
|
+
export declare function createContentExtractionPrompt(): PromptBuilder;
|
|
8
|
+
export declare function createClassifySchema(validTypes: string[]): SchemaTestable<{
|
|
9
|
+
documentType: string;
|
|
10
|
+
}>;
|
|
11
|
+
export declare function createClassifyPrompt(validTypes: string[]): PromptBuilder;
|
|
12
|
+
export declare const dataExtractionFields: {
|
|
13
|
+
Title: string;
|
|
14
|
+
Subtitle: string;
|
|
15
|
+
Summary: string;
|
|
16
|
+
Tags: string;
|
|
17
|
+
Date: string;
|
|
18
|
+
};
|
|
19
|
+
export declare function createDataExtractionPrompt(schema: SchemaTestable): PromptBuilder;
|
|
20
|
+
export declare const assignCollectionSchema: import("../../../schema/index.js").ObjectSchema<{
|
|
21
|
+
collectionIds: string[];
|
|
22
|
+
}>;
|
|
23
|
+
export declare function createAssignCollectionPrompt(): PromptBuilder;
|
|
24
|
+
export declare const assignRequestSchema: import("../../../schema/index.js").ObjectSchema<{
|
|
25
|
+
requestId: string | null;
|
|
26
|
+
}>;
|
|
27
|
+
export declare function createAssignRequestPrompt(): PromptBuilder;
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import { orderedList, promptBuilder, unorderedList } from '../../../ai/prompts/index.js';
|
|
2
|
+
import { fewShotExamples } from '../../../ai/prompts/steering.js';
|
|
3
|
+
import { array, enumeration, nullable, object, string } from '../../../schema/index.js';
|
|
4
|
+
export const DOCUMENT_MANAGEMENT_SYSTEM_ROLE = 'You are a highly precise, analytical Document Management Specialist.';
|
|
5
|
+
// --- Content Extraction ---
|
|
6
|
+
export const contentExtractionSchema = object({ content: string() });
|
|
7
|
+
export function createContentExtractionPrompt() {
|
|
8
|
+
return promptBuilder()
|
|
9
|
+
.setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
|
|
10
|
+
.setTask('Transcribe the attached document into Markdown following the instructions.')
|
|
11
|
+
.setOutputSchema(contentExtractionSchema)
|
|
12
|
+
.addInstructions({
|
|
13
|
+
'Objective': 'Convert the provided document into semantically structured, clean Markdown.',
|
|
14
|
+
'Critical Constraints': orderedList([
|
|
15
|
+
'Output ONLY the Markdown content. Do not include introductory text, conversational filler, or code block fences (```).',
|
|
16
|
+
'Do not describe the visual appearance (e.g., "This looks like an invoice"). Transcribe the content only.',
|
|
17
|
+
]),
|
|
18
|
+
'Formatting Rules': orderedList({
|
|
19
|
+
'Headings': 'Use # for the main document title (once). Use ##, ### for sections based on logical hierarchy.',
|
|
20
|
+
'Text Content': 'Transcribe text verbatim. Do not correct spelling or grammar, summarize, or rewrite.',
|
|
21
|
+
'Tables': 'Strictly use Markdown table syntax. Align columns logically based on the visual grid.',
|
|
22
|
+
'Lists': 'Detect bullet points and numbered lists and format them as Markdown lists.',
|
|
23
|
+
'Emphasis': 'Use **bold** and _italics_ only where visually distinct in the source.',
|
|
24
|
+
'Columns': 'Read multi-column text as a single continuous flow.',
|
|
25
|
+
}),
|
|
26
|
+
'Complex Elements': {
|
|
27
|
+
'Images/Visuals': 'Replace non-textual diagrams with `> [Visual: Brief description of the image/chart]`.',
|
|
28
|
+
'Signatures': 'Mark distinct signatures as `> [Signature: {Name if legible/Context}]`.',
|
|
29
|
+
'Forms': 'Represent checkboxes as `[ ]` (unchecked) or `[x]` (checked). Format label/value pairs on separate lines or as a definition list if applicable.',
|
|
30
|
+
'Math': 'Transcribe equations using LaTeX syntax enclosed in `$...$` for inline or `$$...$$` for block equations.',
|
|
31
|
+
},
|
|
32
|
+
'Page Handling': [
|
|
33
|
+
'Metadata: Start every page with `<!-- Page {n} Start -->` and end with `<!-- Page {n} End -->` on separate lines.',
|
|
34
|
+
'Artifacts: Exclude running headers, footers, and page numbers unless they contain unique data not found elsewhere.',
|
|
35
|
+
],
|
|
36
|
+
'Error Handling': [
|
|
37
|
+
'Mark illegible text as `[Illegible]`.',
|
|
38
|
+
'Mark cut-off text as `[Cut off]`.',
|
|
39
|
+
],
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
// --- Classification ---
|
|
43
|
+
const CLASSIFY_FEW_SHOT = fewShotExamples([
|
|
44
|
+
{
|
|
45
|
+
input: 'Document that contains "Invoice", a table with items, and a "Total Due" amount.',
|
|
46
|
+
output: { documentType: 'Finance -> Invoice' },
|
|
47
|
+
reason: 'Explicit keyword and layout match.',
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
input: 'Document that contains "Rental Agreement", multiple paragraphs about obligations, and signatures at the end.',
|
|
51
|
+
output: { documentType: 'Legal -> Contract' },
|
|
52
|
+
reason: 'Structural and entity-based match.',
|
|
53
|
+
},
|
|
54
|
+
]);
|
|
55
|
+
export function createClassifySchema(validTypes) {
|
|
56
|
+
return object({ documentType: enumeration(validTypes) });
|
|
57
|
+
}
|
|
58
|
+
export function createClassifyPrompt(validTypes) {
|
|
59
|
+
const schema = createClassifySchema(validTypes);
|
|
60
|
+
return promptBuilder()
|
|
61
|
+
.setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
|
|
62
|
+
.setRole('Document Taxonomy Specialist')
|
|
63
|
+
.setTask('Determine the single most accurate document type from the provided list based on the document.')
|
|
64
|
+
.setOutputSchema(schema, CLASSIFY_FEW_SHOT)
|
|
65
|
+
.addInstructions({
|
|
66
|
+
'Analysis Strategy': orderedList([
|
|
67
|
+
'Scan the header and title for explicit document type names (e.g., "Invoice", "Contract", "Bill of Lading").',
|
|
68
|
+
'Analyze the layout structure (e.g., columns often imply Invoices/Receipts; dense paragraphs imply Contracts/Letters).',
|
|
69
|
+
'Identify key entities (e.g., "Total Due" implies financial; "Signed by" implies legal).',
|
|
70
|
+
]),
|
|
71
|
+
'Selection Logic': orderedList([
|
|
72
|
+
'Exact Match: If the document explicitly states its type, select the corresponding category.',
|
|
73
|
+
'Content Match: If implicit, match the intent.',
|
|
74
|
+
'Specificity: Always choose the most specific leaf-node category available.',
|
|
75
|
+
'Fallback: If ambiguous, choose the category that best describes the *primary* purpose of the document.',
|
|
76
|
+
]),
|
|
77
|
+
'Valid category labels': unorderedList(validTypes),
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
// --- Data Extraction ---
|
|
81
|
+
export const dataExtractionFields = {
|
|
82
|
+
Title: 'Create a concise, searchable filename-style title (e.g., "Invoice - Oct 2023").',
|
|
83
|
+
Subtitle: 'Extract context usually found below the header (e.g., Project Name, Reference Number).',
|
|
84
|
+
Summary: 'Write a 2-3 sentence executive summary. Mention the type of information that can be found in the document and its purpose.',
|
|
85
|
+
Tags: 'Generate 3-5 keywords for categorization. Only use important information missing in title, subtitle and properties. Prioritize reusing of existing tags where possible.',
|
|
86
|
+
Date: 'Identify the *creation* date of the document. If multiple dates exist, prioritize the primary date (like invoice or letter Date). Return as object with year, month and day.',
|
|
87
|
+
};
|
|
88
|
+
export function createDataExtractionPrompt(schema) {
|
|
89
|
+
return promptBuilder()
|
|
90
|
+
.setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
|
|
91
|
+
.setRole('Structured Data Extraction Analyst')
|
|
92
|
+
.setTask('Analyze the document and extract metadata and specific properties defined in the output schema following the instructions.')
|
|
93
|
+
.setOutputSchema(schema)
|
|
94
|
+
.addInstructions({
|
|
95
|
+
'Field Specific Instructions': dataExtractionFields,
|
|
96
|
+
'Property Extraction': orderedList([
|
|
97
|
+
'You will be given a list of specific dynamic properties to look for.',
|
|
98
|
+
'Extract values *exactly* as they appear for strings.',
|
|
99
|
+
'Normalize numbers and dates to standard formats.',
|
|
100
|
+
'If a property is ambiguous, favor the value most prominent in the document layout.',
|
|
101
|
+
'If a property is missing, set its value to null.',
|
|
102
|
+
]),
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
// --- Collection Assignment ---
|
|
106
|
+
const ASSIGN_COLLECTION_FEW_SHOT = fewShotExamples([
|
|
107
|
+
{
|
|
108
|
+
input: {
|
|
109
|
+
document: { title: 'Invoice - Project Alpha', summary: 'Invoice for consulting services in Project Alpha.' },
|
|
110
|
+
collections: [{ id: 'col-1', name: 'Project Alpha' }, { id: 'col-2', name: 'General Finance' }],
|
|
111
|
+
},
|
|
112
|
+
output: { collectionIds: ['col-1'] },
|
|
113
|
+
reason: 'Direct match on project name.',
|
|
114
|
+
},
|
|
115
|
+
]);
|
|
116
|
+
export const assignCollectionSchema = object({ collectionIds: array(string()) });
|
|
117
|
+
export function createAssignCollectionPrompt() {
|
|
118
|
+
return promptBuilder()
|
|
119
|
+
.setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
|
|
120
|
+
.setRole('Digital Filing Assistant')
|
|
121
|
+
.setTask('Select the most appropriate collections for this document from the provided list following the instructions.')
|
|
122
|
+
.setOutputSchema(assignCollectionSchema, ASSIGN_COLLECTION_FEW_SHOT)
|
|
123
|
+
.addInstructions({
|
|
124
|
+
'Matching Logic': orderedList([
|
|
125
|
+
'Direct Key-Match: Look for exact keyword matches between the collection name and the document metadata.',
|
|
126
|
+
'Semantic Fit: Determine if the document functionally belongs to a group.',
|
|
127
|
+
'Project Association: If the document references a specific project code or name found in a collection name, assign it there.',
|
|
128
|
+
]),
|
|
129
|
+
'Output Constraints': 'Return an array of matching collection IDs. If no collection is a strong fit, return an empty array.',
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
// --- Request Assignment ---
|
|
133
|
+
const ASSIGN_REQUEST_FEW_SHOT = fewShotExamples([
|
|
134
|
+
{
|
|
135
|
+
input: {
|
|
136
|
+
document: { title: 'Medical Certificate - John Doe', date: '2023-11-01' },
|
|
137
|
+
requests: [{ id: 'req-1', comment: 'Need medical certificate from November', collections: ['HR'] }],
|
|
138
|
+
},
|
|
139
|
+
output: { requestId: 'req-1' },
|
|
140
|
+
reason: 'Document satisfies the specific request criteria.',
|
|
141
|
+
},
|
|
142
|
+
]);
|
|
143
|
+
export const assignRequestSchema = object({ requestId: nullable(string()) });
|
|
144
|
+
export function createAssignRequestPrompt() {
|
|
145
|
+
return promptBuilder()
|
|
146
|
+
.setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
|
|
147
|
+
.setRole('Workflow Routing Agent')
|
|
148
|
+
.setTask('Evaluate the document against the list of open requests and find the best match following the instructions.')
|
|
149
|
+
.setOutputSchema(assignRequestSchema, ASSIGN_REQUEST_FEW_SHOT)
|
|
150
|
+
.addInstructions({
|
|
151
|
+
'Matching Rules': orderedList({
|
|
152
|
+
'Hard Constraints': 'If a Request has a "Comment" or specific property requirement, the document MUST fulfill it strictly (e.g., "Need bill from July" must match date).',
|
|
153
|
+
'Ambiguity': 'If multiple requests match, select the one with the most specific constraints that are satisfied.',
|
|
154
|
+
'Negative Match': 'If the document satisfies the metadata but violates a comment constraint, it is unsuitable.',
|
|
155
|
+
}),
|
|
156
|
+
'Output Constraints': 'The ID of the matching request, or null if no request matches.',
|
|
157
|
+
});
|
|
158
|
+
}
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import { type TstdlGenkitGenerationOptions } from '../../../ai/genkit/index.js';
|
|
2
|
-
import { type
|
|
2
|
+
import { type PromptBuilder } from '../../../ai/prompts/index.js';
|
|
3
3
|
import { type SchemaTestable } from '../../../schema/index.js';
|
|
4
4
|
import type { ObjectLiteral } from '../../../types/types.js';
|
|
5
|
-
import type
|
|
5
|
+
import { type ModelReference } from 'genkit';
|
|
6
6
|
import type { AiConfiguration, DocumentPropertyDataType, InstructionOverride } from '../../models/index.js';
|
|
7
7
|
import { Document, DocumentWorkflowStep } from '../../models/index.js';
|
|
8
8
|
import { type AiConfigurationResolveDataMap } from './document-management-ai-provider.service.js';
|
|
9
|
+
import { dataExtractionFields } from './document-management-ai.prompts.js';
|
|
9
10
|
type DocumentDataExtractionPropertyResult = {
|
|
10
11
|
propertyId: string;
|
|
11
12
|
dataType: DocumentPropertyDataType;
|
|
@@ -19,19 +20,6 @@ export type DocumentDataExtractionResult = {
|
|
|
19
20
|
tags: string[];
|
|
20
21
|
properties: DocumentDataExtractionPropertyResult[];
|
|
21
22
|
};
|
|
22
|
-
declare const dataExtractionInstructionsRaw: {
|
|
23
|
-
'JSON Output': import("../../../ai/prompts/index.js").InstructionsList;
|
|
24
|
-
Role: string;
|
|
25
|
-
Task: string;
|
|
26
|
-
'Field Specific Instructions': {
|
|
27
|
-
Title: string;
|
|
28
|
-
Subtitle: string;
|
|
29
|
-
Summary: string;
|
|
30
|
-
Tags: string;
|
|
31
|
-
Date: string;
|
|
32
|
-
};
|
|
33
|
-
'Property Extraction': string[];
|
|
34
|
-
};
|
|
35
23
|
export declare class DocumentManagementAiService {
|
|
36
24
|
#private;
|
|
37
25
|
extractContent(tenantId: string, documentId: string): Promise<string>;
|
|
@@ -42,8 +30,7 @@ export declare class DocumentManagementAiService {
|
|
|
42
30
|
protected runAi<T, Step extends DocumentWorkflowStep>(tenantId: string, step: Step, stepData: AiConfigurationResolveDataMap[Step], options: {
|
|
43
31
|
targetId?: string;
|
|
44
32
|
defaultModel: ModelReference<any>;
|
|
45
|
-
|
|
46
|
-
user: Instructions;
|
|
33
|
+
promptBuilder: PromptBuilder;
|
|
47
34
|
data?: ObjectLiteral;
|
|
48
35
|
schema: SchemaTestable<T>;
|
|
49
36
|
document?: Document;
|
|
@@ -54,7 +41,7 @@ export declare class DocumentManagementAiService {
|
|
|
54
41
|
private mergeExtractionConfigs;
|
|
55
42
|
private resolveContextKeys;
|
|
56
43
|
}
|
|
57
|
-
export declare function mergeFieldInstructions(instructionsKey: keyof typeof
|
|
44
|
+
export declare function mergeFieldInstructions(instructionsKey: keyof typeof dataExtractionFields, field: keyof NonNullable<AiConfiguration['extraction']>, aiConfig: AiConfiguration): string;
|
|
58
45
|
export declare function mergeInstructions(base: string, overrides: (InstructionOverride | undefined)[], options?: {
|
|
59
46
|
formatTemplate?: (format: string) => string;
|
|
60
47
|
}): string;
|
|
@@ -9,121 +9,27 @@ var DocumentManagementAiService_1;
|
|
|
9
9
|
import { and, isNull as drizzleIsNull, eq, inArray } from 'drizzle-orm';
|
|
10
10
|
import { P, match } from 'ts-pattern';
|
|
11
11
|
import { genkitGenerationOptions, injectGenkit, injectModel } from '../../../ai/genkit/index.js';
|
|
12
|
-
import {
|
|
12
|
+
import { formatInstructions, languagePrompt } from '../../../ai/prompts/index.js';
|
|
13
13
|
import { inject } from '../../../injector/inject.js';
|
|
14
14
|
import { Logger } from '../../../logger/logger.js';
|
|
15
15
|
import { arrayAgg } from '../../../orm/index.js';
|
|
16
16
|
import { injectRepository } from '../../../orm/server/index.js';
|
|
17
|
-
import { array, boolean,
|
|
17
|
+
import { array, boolean, integer, nullable, number, object, string } from '../../../schema/index.js';
|
|
18
18
|
import { distinct } from '../../../utils/array/index.js';
|
|
19
19
|
import { numericDateToDateTime, tryDateObjectToNumericDate } from '../../../utils/date-time.js';
|
|
20
20
|
import { fromEntries, hasOwnProperty, objectEntries, objectKeys } from '../../../utils/object/object.js';
|
|
21
21
|
import { assertDefined, assertDefinedPass, assertNotNull, isDefined, isNotNull, isNotNullOrUndefined, isNull, isString, isUndefined } from '../../../utils/type-guards.js';
|
|
22
|
+
import { GenkitError } from 'genkit';
|
|
22
23
|
import { Document, DocumentProperty, DocumentRequestState, DocumentType, DocumentTypeProperty, DocumentWorkflowStep } from '../../models/index.js';
|
|
23
|
-
import { DocumentManagementConfiguration } from '../module.js';
|
|
24
24
|
import { documentCategory, documentRequest, documentRequestCollectionAssignment, documentType } from '../schemas.js';
|
|
25
25
|
import { DocumentCategoryTypeService } from './document-category-type.service.js';
|
|
26
26
|
import { DocumentCollectionService } from './document-collection.service.js';
|
|
27
27
|
import { DocumentFileService } from './document-file.service.js';
|
|
28
28
|
import { DocumentManagementAiProviderService } from './document-management-ai-provider.service.js';
|
|
29
|
+
import { assignCollectionSchema, assignRequestSchema, contentExtractionSchema, createAssignCollectionPrompt, createAssignRequestPrompt, createClassifyPrompt, createClassifySchema, createContentExtractionPrompt, createDataExtractionPrompt, dataExtractionFields } from './document-management-ai.prompts.js';
|
|
29
30
|
import { DocumentPropertyService } from './document-property.service.js';
|
|
30
31
|
import { DocumentTagService } from './document-tag.service.js';
|
|
31
32
|
import { DocumentManagementSingleton } from './singleton.js';
|
|
32
|
-
// --- Instructions ---
|
|
33
|
-
const contentExtractionSystemInstructions = {
|
|
34
|
-
'Role': 'You are an expert OCR and Document Digitization engine.',
|
|
35
|
-
'Primary Objective': 'Convert the provided document into semantically structured, clean Markdown.',
|
|
36
|
-
'Critical Constraints': orderedList([
|
|
37
|
-
'Output ONLY the Markdown content. Do not include introductory text, conversational filler, or code block fences (```).',
|
|
38
|
-
'Do not describe the visual appearance (e.g., "This looks like an invoice"). Transcribe the content only.',
|
|
39
|
-
]),
|
|
40
|
-
'Formatting Rules': orderedList({
|
|
41
|
-
'Headings': 'Use # for the main document title (once). Use ##, ### for sections based on logical hierarchy.',
|
|
42
|
-
'Text Content': 'Transcribe text verbatim. Do not correct spelling or grammar, summarize, or rewrite.',
|
|
43
|
-
'Tables': 'Strictly use Markdown table syntax. Align columns logically based on the visual grid.',
|
|
44
|
-
'Lists': 'Detect bullet points and numbered lists and format them as Markdown lists.',
|
|
45
|
-
'Emphasis': 'Use **bold** and _italics_ only where visually distinct in the source.',
|
|
46
|
-
'Columns': 'Read multi-column text as a single continuous flow.',
|
|
47
|
-
}),
|
|
48
|
-
'Complex Elements': {
|
|
49
|
-
'Images/Visuals': 'Replace non-textual diagrams with `> [Visual: Brief description of the image/chart]`.',
|
|
50
|
-
'Signatures': 'Mark distinct signatures as `> [Signature: {Name if legible/Context}]`.',
|
|
51
|
-
'Forms': 'Represent checkboxes as `[ ]` (unchecked) or `[x]` (checked). Format label/value pairs on separate lines or as a definition list if applicable.',
|
|
52
|
-
'Math': 'Transcribe equations using LaTeX syntax enclosed in `$...$` for inline or `$$...$$` for block equations.',
|
|
53
|
-
},
|
|
54
|
-
'Page Handling': [
|
|
55
|
-
'Metadata: Start every page with `<!-- Page {n} Start -->` and end with `<!-- Page {n} End -->` on separate lines.',
|
|
56
|
-
'Artifacts: Exclude running headers, footers, and page numbers unless they contain unique data not found elsewhere.',
|
|
57
|
-
],
|
|
58
|
-
'Error Handling': [
|
|
59
|
-
'Mark illegible text as `[Illegible]`.',
|
|
60
|
-
'Mark cut-off text as `[Cut off]`.',
|
|
61
|
-
],
|
|
62
|
-
};
|
|
63
|
-
const contentExtractionUserInstructions = { Task: 'Transcribe the attached document into Markdown following the system instructions.' };
|
|
64
|
-
const classifySystemInstructions = {
|
|
65
|
-
'Role': 'You are a Document Taxonomy Specialist.',
|
|
66
|
-
'Task': `Analyze the visual layout and text content of the document to categorize it into exactly one of the provided hierarchical types.`,
|
|
67
|
-
'Input Context': 'You will be provided with a list of valid category labels (e.g., "Finance -> Invoice").',
|
|
68
|
-
'Analysis Strategy': orderedList([
|
|
69
|
-
'Scan the header and title for explicit document type names (e.g., "Invoice", "Contract", "Bill of Lading").',
|
|
70
|
-
'Analyze the layout structure (e.g., columns often imply Invoices/Receipts; dense paragraphs imply Contracts/Letters).',
|
|
71
|
-
'Identify key entities (e.g., "Total Due" implies financial; "Signed by" implies legal).',
|
|
72
|
-
]),
|
|
73
|
-
'Selection Logic': orderedList([
|
|
74
|
-
'Exact Match: If the document explicitly states its type, select the corresponding category.',
|
|
75
|
-
'Content Match: If implicit, match the intent.',
|
|
76
|
-
'Specificity: Always choose the most specific leaf-node category available.',
|
|
77
|
-
'Fallback: If ambiguous, choose the category that best describes the *primary* purpose of the document.',
|
|
78
|
-
]),
|
|
79
|
-
...jsonOutputInstructions,
|
|
80
|
-
};
|
|
81
|
-
const classifyUserInstructions = { Task: 'Determine the single most accurate document type from the provided list based on the document following the system instructions.' };
|
|
82
|
-
const dataExtractionInstructionsRaw = {
|
|
83
|
-
'Role': 'You are a Structured Data Extraction Analyst.',
|
|
84
|
-
'Task': 'Analyze the document and extract metadata into the defined JSON schema.',
|
|
85
|
-
'Field Specific Instructions': {
|
|
86
|
-
Title: 'Create a concise, searchable filename-style title (e.g., "Invoice - Oct 2023").',
|
|
87
|
-
Subtitle: 'Extract context usually found below the header (e.g., Project Name, Reference Number).',
|
|
88
|
-
Summary: 'Write a 2-3 sentence executive summary. Mention the type of information that can be found in the document and its purpose.',
|
|
89
|
-
Tags: 'Generate 3-5 keywords for categorization. Only use important information missing in title, subtitle and properties. Prioritize reusing of existing tags where possible.',
|
|
90
|
-
Date: 'Identify the *creation* date of the document. If multiple dates exist, prioritize the primary date (like invoice or letter Date). Return as object with year, month and day.',
|
|
91
|
-
},
|
|
92
|
-
'Property Extraction': [
|
|
93
|
-
'You will be given a list of specific dynamic properties to look for.',
|
|
94
|
-
'Extract values *exactly* as they appear for strings.',
|
|
95
|
-
'Normalize numbers and dates to standard formats.',
|
|
96
|
-
'If a property is ambiguous, favor the value most prominent in the document layout.',
|
|
97
|
-
'If a property is missing, set its value to null.',
|
|
98
|
-
],
|
|
99
|
-
...jsonOutputInstructions,
|
|
100
|
-
};
|
|
101
|
-
const dataExtractionSystemInstructions = dataExtractionInstructionsRaw;
|
|
102
|
-
const dataExtractionUserInstructions = { Task: 'Analyze the document and extract metadata and specific properties defined in the output schema following the system instructions.' };
|
|
103
|
-
const assignCollectionSystemInstructions = {
|
|
104
|
-
'Role': 'You are a Digital Filing Assistant.',
|
|
105
|
-
'Task': `Assign the document to relevant collections based on its metadata and content.`,
|
|
106
|
-
'Input': 'Document Metadata and a list of Available Collections.',
|
|
107
|
-
'Matching Logic': orderedList([
|
|
108
|
-
'Direct Key-Match: Look for exact keyword matches between the collection name and the document metadata.',
|
|
109
|
-
'Semantic Fit: Determine if the document functionally belongs to a group.',
|
|
110
|
-
'Project Association: If the document references a specific project code or name found in a collection name, assign it there.',
|
|
111
|
-
]),
|
|
112
|
-
'Output': 'Return an array of matching collection IDs. If no collection is a strong fit, return an empty array.',
|
|
113
|
-
};
|
|
114
|
-
const assignCollectionUserInstructions = { Task: 'Select the most appropriate collections for this document from the provided list following the system instructions.' };
|
|
115
|
-
const assignRequestSystemInstructions = {
|
|
116
|
-
'Role': 'You are a Workflow Routing Agent.',
|
|
117
|
-
'Task': 'Match the provided document to an existing Open Document Request.',
|
|
118
|
-
'Input': 'Document Metadata and a list of Open Requests.',
|
|
119
|
-
'Matching Rules': orderedList({
|
|
120
|
-
'Hard Constraints': 'If a Request has a "Comment" or specific property requirement, the document MUST fulfill it strictly (e.g., "Need bill from July" must match date).',
|
|
121
|
-
'Ambiguity': 'If multiple requests match, select the one with the most specific constraints that are satisfied.',
|
|
122
|
-
'Negative Match': 'If the document satisfies the metadata but violates a comment constraint, it is unsuitable.',
|
|
123
|
-
}),
|
|
124
|
-
'Output': 'The ID of the matching request, or null if no request matches.',
|
|
125
|
-
};
|
|
126
|
-
const assignRequestUserInstructions = { Task: 'Evaluate the document against the list of open requests and find the best match following the system instructions.' };
|
|
127
33
|
let DocumentManagementAiService = DocumentManagementAiService_1 = class DocumentManagementAiService {
|
|
128
34
|
#genkit = injectGenkit();
|
|
129
35
|
#contentExtractionModel = injectModel('gemini-3.1-flash-lite-preview').withConfig({ thinkingConfig: { thinkingLevel: 'LOW' } });
|
|
@@ -135,7 +41,6 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
135
41
|
#documentCategoryTypeService = inject(DocumentCategoryTypeService);
|
|
136
42
|
#documentFileService = inject(DocumentFileService);
|
|
137
43
|
#documentPropertyService = inject(DocumentPropertyService);
|
|
138
|
-
#documentManagementConfiguration = inject(DocumentManagementConfiguration, undefined, { optional: true });
|
|
139
44
|
#aiProvider = inject(DocumentManagementAiProviderService, undefined, { optional: true });
|
|
140
45
|
#documentPropertyRepository = injectRepository(DocumentProperty);
|
|
141
46
|
#documentRepository = injectRepository(Document);
|
|
@@ -147,9 +52,8 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
147
52
|
this.#logger.trace(`Extracting content from document ${document.id}`);
|
|
148
53
|
const result = await this.runAi(tenantId, DocumentWorkflowStep.ContentExtraction, { document }, {
|
|
149
54
|
defaultModel: this.#contentExtractionModel,
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
schema: object({ content: string() }),
|
|
55
|
+
promptBuilder: createContentExtractionPrompt(),
|
|
56
|
+
schema: contentExtractionSchema,
|
|
153
57
|
document,
|
|
154
58
|
});
|
|
155
59
|
const markdownBlockStripped = result.content.trim().replaceAll(/^```\w*\s*|```$/gi, '').trim();
|
|
@@ -166,21 +70,19 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
166
70
|
this.#logger.trace(`Classifying document ${document.id}`);
|
|
167
71
|
const stepData = { document, categories };
|
|
168
72
|
const aiConfig = await this.resolveAiConfiguration(tenantId, DocumentWorkflowStep.Classification, stepData);
|
|
169
|
-
const
|
|
170
|
-
|
|
171
|
-
|
|
73
|
+
const schema = createClassifySchema(typeLabels);
|
|
74
|
+
const promptBuilder = createClassifyPrompt(typeLabels);
|
|
75
|
+
if (isDefined(aiConfig.classification)) {
|
|
76
|
+
promptBuilder.addSystemInstructions({
|
|
172
77
|
'Classification Overrides': mergeInstructions('Follow these additional classification rules.', [aiConfig.classification]),
|
|
173
|
-
}
|
|
174
|
-
|
|
78
|
+
});
|
|
79
|
+
}
|
|
175
80
|
const result = await this.runAi(tenantId, DocumentWorkflowStep.Classification, stepData, {
|
|
176
81
|
defaultModel: this.#classifyModel,
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
schema: object({
|
|
180
|
-
documentType: enumeration(typeLabels),
|
|
181
|
-
}),
|
|
82
|
+
promptBuilder,
|
|
83
|
+
schema,
|
|
182
84
|
document,
|
|
183
|
-
config: { maxOutputTokens:
|
|
85
|
+
config: { maxOutputTokens: 1024 },
|
|
184
86
|
aiConfig,
|
|
185
87
|
});
|
|
186
88
|
const typeId = typeLabelEntries.find((entry) => entry.label == result.documentType)?.id;
|
|
@@ -232,22 +134,21 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
232
134
|
const override = (isNotNull(property.key) ? aiConfig.extraction?.properties?.[property.key] : undefined) ?? aiConfig.extraction?.properties?.[property.label];
|
|
233
135
|
return isDefined(override) ? mergeInstructions(`Extract value for property "${property.label}".`, [override]) : undefined;
|
|
234
136
|
}).filter(isDefined);
|
|
235
|
-
const
|
|
236
|
-
|
|
137
|
+
const promptBuilder = createDataExtractionPrompt(generationSchema);
|
|
138
|
+
promptBuilder.addInstructions({
|
|
237
139
|
'Field Specific Instructions': mergedFieldInstructions,
|
|
238
|
-
'Property Extraction': isDefined(mergedPropertyInstructions) && (mergedPropertyInstructions.length > 0)
|
|
239
|
-
?
|
|
240
|
-
:
|
|
241
|
-
};
|
|
140
|
+
'Additional Property Extraction': isDefined(mergedPropertyInstructions) && (mergedPropertyInstructions.length > 0)
|
|
141
|
+
? mergedPropertyInstructions
|
|
142
|
+
: [],
|
|
143
|
+
});
|
|
242
144
|
const extraction = await this.runAi(tenantId, DocumentWorkflowStep.DataExtraction, stepData, {
|
|
243
145
|
targetId: documentTypeEntity.key ?? undefined,
|
|
244
146
|
defaultModel: this.#dataExtractionModel,
|
|
245
|
-
|
|
246
|
-
user: dataExtractionUserInstructions,
|
|
147
|
+
promptBuilder,
|
|
247
148
|
data: { existingTags: tagLabels },
|
|
248
149
|
schema: generationSchema,
|
|
249
150
|
document,
|
|
250
|
-
config: { maxOutputTokens:
|
|
151
|
+
config: { maxOutputTokens: 4096 },
|
|
251
152
|
aiConfig,
|
|
252
153
|
});
|
|
253
154
|
const filteredDocumentTags = extraction.documentTags.filter((tag) => (tag != extraction.documentTitle) && (tag != extraction.documentSubtitle));
|
|
@@ -302,11 +203,10 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
302
203
|
const result = await this.runAi(document.tenantId, DocumentWorkflowStep.Assignment, { document, properties: documentProperties, collectionIds }, {
|
|
303
204
|
targetId: documentTypeEntity.key ?? undefined,
|
|
304
205
|
defaultModel: this.#assignModel,
|
|
305
|
-
|
|
306
|
-
user: assignCollectionUserInstructions,
|
|
206
|
+
promptBuilder: createAssignCollectionPrompt(),
|
|
307
207
|
data: { document: documentData, documentProperties: fromEntries(propertyEntries), collections },
|
|
308
|
-
schema:
|
|
309
|
-
config: { maxOutputTokens:
|
|
208
|
+
schema: assignCollectionSchema,
|
|
209
|
+
config: { maxOutputTokens: 2048 },
|
|
310
210
|
});
|
|
311
211
|
return result.collectionIds;
|
|
312
212
|
}
|
|
@@ -349,39 +249,60 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
349
249
|
const result = await this.runAi(document.tenantId, DocumentWorkflowStep.Assignment, { document, properties: documentProperties, collectionIds: requestsCollectionIds }, {
|
|
350
250
|
targetId: documentTypeEntity.key ?? undefined,
|
|
351
251
|
defaultModel: this.#assignModel,
|
|
352
|
-
|
|
353
|
-
user: assignRequestUserInstructions,
|
|
252
|
+
promptBuilder: createAssignRequestPrompt(),
|
|
354
253
|
data: { document: documentData, documentProperties: fromEntries(propertyEntries), requests },
|
|
355
|
-
schema:
|
|
356
|
-
config: { maxOutputTokens:
|
|
254
|
+
schema: assignRequestSchema,
|
|
255
|
+
config: { maxOutputTokens: 1024 },
|
|
357
256
|
});
|
|
358
257
|
return result.requestId;
|
|
359
258
|
}
|
|
360
259
|
async runAi(tenantId, step, stepData, options) {
|
|
361
260
|
const config = options.aiConfig ?? await this.resolveAiConfiguration(tenantId, step, stepData);
|
|
362
261
|
const model = config.model ?? options.defaultModel;
|
|
363
|
-
const
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
prompt
|
|
380
|
-
}
|
|
381
|
-
if (
|
|
382
|
-
|
|
262
|
+
const thinkingLevel = config.thinkingLevel ?? 'LOW';
|
|
263
|
+
const configuredModel = isString(model) ? model : model.withConfig({ thinkingConfig: { thinkingLevel } });
|
|
264
|
+
const builder = options.promptBuilder;
|
|
265
|
+
if (isDefined(config.language)) {
|
|
266
|
+
builder.addInstructions({ 'Output Language': languagePrompt(config.language) });
|
|
267
|
+
}
|
|
268
|
+
if (isDefined(options.data)) {
|
|
269
|
+
builder.addContext('Data', options.data);
|
|
270
|
+
}
|
|
271
|
+
if (isDefined(config.prompt?.systemAddition)) {
|
|
272
|
+
builder.addSystemInstructions({ 'Additional Instructions': config.prompt.systemAddition });
|
|
273
|
+
}
|
|
274
|
+
if (isDefined(config.prompt?.userAddition)) {
|
|
275
|
+
builder.addInstructions({ 'Additional Instructions': config.prompt.userAddition });
|
|
276
|
+
}
|
|
277
|
+
if (isDefined(config.prompt?.systemOverride)) {
|
|
278
|
+
builder.setSystemInstructionsOverride(config.prompt.systemOverride);
|
|
279
|
+
}
|
|
280
|
+
if (isDefined(config.prompt?.userOverride)) {
|
|
281
|
+
builder.setInstructionsOverride(config.prompt.userOverride);
|
|
282
|
+
}
|
|
283
|
+
if (isDefined(options.document)) {
|
|
284
|
+
builder.addMedia(await this.#documentFileService.getContent(options.document), options.document.mimeType);
|
|
285
|
+
}
|
|
286
|
+
try {
|
|
287
|
+
const result = await this.#genkit.generate(genkitGenerationOptions({
|
|
288
|
+
model: configuredModel,
|
|
289
|
+
config: options.config,
|
|
290
|
+
output: { schema: options.schema },
|
|
291
|
+
system: builder.buildSystemPrompt(),
|
|
292
|
+
prompt: builder.buildUserPrompt(),
|
|
293
|
+
}));
|
|
294
|
+
this.#logger.trace(`AI result for ${step} ${options.targetId ?? ''}: usage=${JSON.stringify(result.usage)}, finishReason=${result.finishReason}`);
|
|
295
|
+
if (isNull(result.output)) {
|
|
296
|
+
throw new Error(`AI returned null output for ${step} ${options.targetId ?? ''}. Finish reason: ${result.finishReason}`);
|
|
297
|
+
}
|
|
298
|
+
return result.output;
|
|
299
|
+
}
|
|
300
|
+
catch (error) {
|
|
301
|
+
if (error instanceof GenkitError) {
|
|
302
|
+
this.#logger.error(`AI ${step} ${options.targetId ?? ''} failed: status=${error.status}, message=${error.originalMessage}, details=${JSON.stringify(error.detail)}`);
|
|
303
|
+
}
|
|
304
|
+
throw error;
|
|
383
305
|
}
|
|
384
|
-
return result.output;
|
|
385
306
|
}
|
|
386
307
|
async resolveAiConfiguration(tenantId, step, data) {
|
|
387
308
|
const globalConfig = await this.#aiProvider?.getGlobalConfiguration(tenantId);
|
|
@@ -401,6 +322,7 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
401
322
|
stepConfig,
|
|
402
323
|
].filter(isDefined);
|
|
403
324
|
const model = mergeMostSpecific(configs, 'model');
|
|
325
|
+
const thinkingLevel = mergeMostSpecific(configs, 'thinkingLevel');
|
|
404
326
|
const language = mergeMostSpecific(configs, 'language');
|
|
405
327
|
const systemAddition = resolveAdditions(configs.map((c) => c.prompt?.systemAddition));
|
|
406
328
|
const userAddition = resolveAdditions(configs.map((c) => c.prompt?.userAddition));
|
|
@@ -410,6 +332,7 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
|
|
|
410
332
|
const classification = mergeMostSpecific(configs, 'classification');
|
|
411
333
|
return {
|
|
412
334
|
model,
|
|
335
|
+
thinkingLevel,
|
|
413
336
|
language,
|
|
414
337
|
prompt: {
|
|
415
338
|
systemAddition,
|
|
@@ -479,7 +402,7 @@ function tryAiOutputDateObjectToNumericDate(dateObject) {
|
|
|
479
402
|
return date;
|
|
480
403
|
}
|
|
481
404
|
export function mergeFieldInstructions(instructionsKey, field, aiConfig) {
|
|
482
|
-
return mergeInstructions(
|
|
405
|
+
return mergeInstructions(dataExtractionFields[instructionsKey], [aiConfig.extraction?.[field]], { formatTemplate: getFormatTemplate(field) });
|
|
483
406
|
}
|
|
484
407
|
export function mergeInstructions(base, overrides, options = {}) {
|
|
485
408
|
let result = base;
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import { type Instructions } from '../../../ai/prompts/index.js';
|
|
2
2
|
import type { SchemaTestable } from '../../../schema/schema.js';
|
|
3
3
|
import type { AiConfiguration } from '../../models/index.js';
|
|
4
|
+
import { DocumentFileService } from '../services/document-file.service.js';
|
|
4
5
|
import { DocumentManagementAiProviderService } from '../services/document-management-ai-provider.service.js';
|
|
5
6
|
import { DocumentValidationExecutor, type DocumentValidationExecutorContext, type DocumentValidationExecutorResult } from './validator.js';
|
|
6
7
|
export declare abstract class AiValidationExecutor<R> extends DocumentValidationExecutor {
|
|
8
|
+
protected readonly documentFileService: DocumentFileService;
|
|
7
9
|
protected readonly genkit: import("genkit").Genkit;
|
|
8
10
|
protected readonly baseModel: import("genkit").ModelReference<import("zod").ZodObject<{
|
|
9
11
|
version: import("zod").ZodOptional<import("zod").ZodString>;
|
|
@@ -1,16 +1,12 @@
|
|
|
1
1
|
import { convertToGenkitSchema, injectGenkit, injectModel } from '../../../ai/genkit/index.js';
|
|
2
|
-
import {
|
|
2
|
+
import { promptBuilder } from '../../../ai/prompts/index.js';
|
|
3
3
|
import { inject } from '../../../injector/inject.js';
|
|
4
4
|
import { isDefined, isNull } from '../../../utils/type-guards.js';
|
|
5
|
+
import { DocumentFileService } from '../services/document-file.service.js';
|
|
5
6
|
import { DocumentManagementAiProviderService } from '../services/document-management-ai-provider.service.js';
|
|
6
7
|
import { DocumentValidationExecutor } from './validator.js';
|
|
7
|
-
const systemPromptBase = {
|
|
8
|
-
Role: 'You are an expert in document validation.',
|
|
9
|
-
Task: 'Validate a document based on the provided validation instructions and document content.',
|
|
10
|
-
Objective: 'Analyze the document carefully and provide a structured validation result according to the defined schema.',
|
|
11
|
-
...jsonOutputInstructions,
|
|
12
|
-
};
|
|
13
8
|
export class AiValidationExecutor extends DocumentValidationExecutor {
|
|
9
|
+
documentFileService = inject(DocumentFileService);
|
|
14
10
|
genkit = injectGenkit();
|
|
15
11
|
baseModel = injectModel('gemini-3.1-flash-lite-preview').withConfig({ thinkingConfig: { thinkingLevel: 'LOW' } });
|
|
16
12
|
aiProvider = inject(DocumentManagementAiProviderService, undefined, { optional: true });
|
|
@@ -24,29 +20,42 @@ export class AiValidationExecutor extends DocumentValidationExecutor {
|
|
|
24
20
|
const validationInstructions = await this.getValidationInstructions(context);
|
|
25
21
|
const model = providerValidationConfig?.model ?? executorConfig.model ?? providerGlobalConfig?.defaults?.model ?? this.baseModel;
|
|
26
22
|
const language = providerValidationConfig?.language ?? executorConfig.language ?? providerGlobalConfig?.defaults?.language;
|
|
27
|
-
const
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
23
|
+
const documentContent = await this.documentFileService.getContent(context.document);
|
|
24
|
+
const builder = promptBuilder()
|
|
25
|
+
.setSystemRole('You are an expert in document validation.')
|
|
26
|
+
.setSystemTask('Validate a document based on the provided validation instructions and document content.')
|
|
27
|
+
.setTask('Validate the document based on the provided system and validation instructions and the document content.')
|
|
28
|
+
.addInstructions({ 'Validation Instructions': validationInstructions })
|
|
29
|
+
.setOutputSchema(this.schema)
|
|
30
|
+
.addMedia(documentContent, context.document.mimeType);
|
|
31
|
+
if (isDefined(language)) {
|
|
32
|
+
builder.setLanguage(language);
|
|
33
|
+
}
|
|
34
|
+
const systemAdditions = [
|
|
35
|
+
providerGlobalConfig?.defaults?.prompt?.systemAddition,
|
|
36
|
+
executorConfig.prompt?.systemAddition,
|
|
37
|
+
providerValidationConfig?.prompt?.systemAddition,
|
|
38
|
+
].filter(isDefined);
|
|
39
|
+
if (systemAdditions.length > 0) {
|
|
40
|
+
builder.addSystemInstructions({ 'Additional Instructions': systemAdditions });
|
|
41
|
+
}
|
|
42
|
+
const userAdditions = [
|
|
43
|
+
providerGlobalConfig?.defaults?.prompt?.userAddition,
|
|
44
|
+
executorConfig.prompt?.userAddition,
|
|
45
|
+
providerValidationConfig?.prompt?.userAddition,
|
|
46
|
+
].filter(isDefined);
|
|
47
|
+
if (userAdditions.length > 0) {
|
|
48
|
+
builder.addInstructions({ 'Additional Instructions': userAdditions });
|
|
49
|
+
}
|
|
50
|
+
const systemOverride = providerValidationConfig?.prompt?.systemOverride ?? executorConfig.prompt?.systemOverride ?? providerGlobalConfig?.defaults?.prompt?.systemOverride;
|
|
51
|
+
builder.setSystemInstructionsOverride(systemOverride);
|
|
52
|
+
const userOverride = providerValidationConfig?.prompt?.userOverride ?? executorConfig.prompt?.userOverride ?? providerGlobalConfig?.defaults?.prompt?.userOverride;
|
|
53
|
+
builder.setInstructionsOverride(userOverride);
|
|
45
54
|
const generation = await this.genkit.generate({
|
|
46
|
-
model
|
|
55
|
+
model,
|
|
47
56
|
output: { schema: convertToGenkitSchema(this.schema) },
|
|
48
|
-
system:
|
|
49
|
-
prompt:
|
|
57
|
+
system: builder.buildSystemPrompt(),
|
|
58
|
+
prompt: builder.buildUserPrompt(),
|
|
50
59
|
});
|
|
51
60
|
if (isNull(generation.output)) {
|
|
52
61
|
throw new Error('AI returned null output');
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { AiConfiguration, DocumentManagementAiConfiguration, DocumentWorkflowStep } from '../../document-management/index.js';
|
|
2
2
|
import { DocumentManagementAiProviderService, type AiConfigurationResolveDataMap } from '../../document-management/server/index.js';
|
|
3
3
|
export declare class ExampleAiProviderService extends DocumentManagementAiProviderService {
|
|
4
|
+
#private;
|
|
4
5
|
/**
|
|
5
6
|
* Providing global defaults for the entire Document Management module.
|
|
6
7
|
*/
|
|
@@ -9,6 +9,7 @@ import { fewShotPrompt, stylePrompt } from '../../ai/prompts/index.js';
|
|
|
9
9
|
import { DocumentManagementAiProviderService } from '../../document-management/server/index.js';
|
|
10
10
|
import { Singleton } from '../../injector/index.js';
|
|
11
11
|
let ExampleAiProviderService = class ExampleAiProviderService extends DocumentManagementAiProviderService {
|
|
12
|
+
#model = injectModel('gemini-3.1-flash-lite-preview').withConfig({ thinkingConfig: { thinkingLevel: 'LOW' } });
|
|
12
13
|
/**
|
|
13
14
|
* Providing global defaults for the entire Document Management module.
|
|
14
15
|
*/
|
|
@@ -16,9 +17,9 @@ let ExampleAiProviderService = class ExampleAiProviderService extends DocumentMa
|
|
|
16
17
|
return {
|
|
17
18
|
defaults: {
|
|
18
19
|
language: 'German', // Steer all AI outputs to German
|
|
19
|
-
model:
|
|
20
|
+
model: this.#model,
|
|
20
21
|
prompt: {
|
|
21
|
-
|
|
22
|
+
// systemAddition: 'Additional global instructions for all AI steps.',
|
|
22
23
|
},
|
|
23
24
|
},
|
|
24
25
|
};
|
|
@@ -32,9 +33,9 @@ let ExampleAiProviderService = class ExampleAiProviderService extends DocumentMa
|
|
|
32
33
|
getClassificationConfiguration() {
|
|
33
34
|
return {
|
|
34
35
|
// Use a faster/cheaper model for classification
|
|
35
|
-
model:
|
|
36
|
+
model: this.#model,
|
|
36
37
|
prompt: {
|
|
37
|
-
systemAddition: 'For classification, focus strictly on
|
|
38
|
+
systemAddition: 'For classification, focus strictly on document content.',
|
|
38
39
|
},
|
|
39
40
|
};
|
|
40
41
|
}
|
|
@@ -49,7 +49,7 @@ const config = {
|
|
|
49
49
|
},
|
|
50
50
|
},
|
|
51
51
|
s3: {
|
|
52
|
-
endpoint: string('S3_ENDPOINT', 'http://localhost:
|
|
52
|
+
endpoint: string('S3_ENDPOINT', 'http://localhost:19552'),
|
|
53
53
|
accessKey: string('S3_ACCESS_KEY', 'tstdl-dev'),
|
|
54
54
|
secretKey: string('S3_SECRET_KEY', 'tstdl-dev'),
|
|
55
55
|
bucket: string('S3_BUCKET', undefined),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tstdl/base",
|
|
3
|
-
"version": "0.93.
|
|
3
|
+
"version": "0.93.171",
|
|
4
4
|
"author": "Patrick Hein",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -152,8 +152,8 @@
|
|
|
152
152
|
"type-fest": "^5.5"
|
|
153
153
|
},
|
|
154
154
|
"peerDependencies": {
|
|
155
|
-
"@aws-sdk/client-s3": "^3.
|
|
156
|
-
"@aws-sdk/s3-request-presigner": "^3.
|
|
155
|
+
"@aws-sdk/client-s3": "^3.1014",
|
|
156
|
+
"@aws-sdk/s3-request-presigner": "^3.1014",
|
|
157
157
|
"@genkit-ai/google-genai": "^1.30",
|
|
158
158
|
"@google-cloud/storage": "^7.19",
|
|
159
159
|
"@toon-format/toon": "^2.1.0",
|
|
@@ -190,7 +190,7 @@
|
|
|
190
190
|
"@types/mjml": "4.7",
|
|
191
191
|
"@types/node": "25",
|
|
192
192
|
"@types/nodemailer": "7.0",
|
|
193
|
-
"@types/pg": "8.
|
|
193
|
+
"@types/pg": "8.20",
|
|
194
194
|
"@vitest/coverage-v8": "4.1",
|
|
195
195
|
"@vitest/ui": "4.1",
|
|
196
196
|
"concurrently": "9.2",
|
|
@@ -205,12 +205,5 @@
|
|
|
205
205
|
"typescript-eslint": "8.57",
|
|
206
206
|
"vite-tsconfig-paths": "6.1",
|
|
207
207
|
"vitest": "4.1"
|
|
208
|
-
},
|
|
209
|
-
"overrides": {
|
|
210
|
-
"drizzle-kit": {
|
|
211
|
-
"@esbuild-kit/esm-loader": "^2.6",
|
|
212
|
-
"esbuild": "^0.25",
|
|
213
|
-
"esbuild-register": "^3.6"
|
|
214
|
-
}
|
|
215
208
|
}
|
|
216
209
|
}
|