@tstdl/base 0.93.168 → 0.93.170

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import type { Part } from 'genkit';
2
2
  import type { SchemaTestable } from '../../schema/schema.js';
3
3
  import type { ObjectLiteral } from '../../types/index.js';
4
4
  import { type Instructions } from './instructions-formatter.js';
5
+ import { type FewShotExample } from './steering.js';
5
6
  export type PromptBuilderInstructions = Record<string, Instructions>;
6
7
  export type PromptBuilderContext = Record<string, PromptBuilderContextItem>;
7
8
  export type PromptBuilderContextItem = ObjectLiteral;
@@ -11,8 +12,11 @@ export declare class PromptBuilder {
11
12
  setRole(role: string): this;
12
13
  setSystemTask(task: string): this;
13
14
  setTask(task: string): this;
14
- setSystemOutputSchema(schema: SchemaTestable): this;
15
- setOutputSchema(schema: SchemaTestable): this;
15
+ setSystemOutputSchema<Input = ObjectLiteral, Output = ObjectLiteral>(schema: SchemaTestable<Output>, examples?: FewShotExample<Input, Output>[]): this;
16
+ setOutputSchema<Input = ObjectLiteral, Output = ObjectLiteral>(schema: SchemaTestable<Output>, examples?: FewShotExample<Input, Output>[]): this;
17
+ setSystemInstructionsOverride(override: ((instructions: Instructions) => Instructions | string) | undefined): this;
18
+ setInstructionsOverride(override: ((instructions: Instructions) => Instructions | string) | undefined): this;
19
+ setLanguage(language: string): this;
16
20
  addSystemMedia(content: Uint8Array, mimeType: string): this;
17
21
  addMedia(content: Uint8Array, mimeType: string): this;
18
22
  addSystemInstructions(instructions: Record<string, Instructions>): this;
@@ -3,7 +3,8 @@ import { encodeBase64 } from '../../utils/base64.js';
3
3
  import { fromEntries, objectEntries, objectKeys } from '../../utils/object/index.js';
4
4
  import { assertObjectPass, isDefined, isString, isUndefined } from '../../utils/type-guards.js';
5
5
  import { formatData } from './format.js';
6
- import { formatInstructions, sections } from './instructions-formatter.js';
6
+ import { formatInstructions, sections, unorderedList } from './instructions-formatter.js';
7
+ import { fewShotPrompt, languagePrompt } from './steering.js';
7
8
  export class PromptBuilder {
8
9
  #systemMedia = [];
9
10
  #media = [];
@@ -12,11 +13,16 @@ export class PromptBuilder {
12
13
  #systemTask;
13
14
  #task;
14
15
  #systemOutputSchema;
16
+ #systemOutputExamples;
15
17
  #outputSchema;
18
+ #outputExamples;
16
19
  #systemInstructions = {};
17
20
  #instructions = {};
18
21
  #systemContextParts = {};
19
22
  #contextParts = {};
23
+ #language;
24
+ #systemInstructionsOverride;
25
+ #instructionsOverride;
20
26
  setSystemRole(role) {
21
27
  this.#systemRole = role;
22
28
  return this;
@@ -33,12 +39,26 @@ export class PromptBuilder {
33
39
  this.#task = task;
34
40
  return this;
35
41
  }
36
- setSystemOutputSchema(schema) {
42
+ setSystemOutputSchema(schema, examples) {
37
43
  this.#systemOutputSchema = schema;
44
+ this.#systemOutputExamples = examples;
38
45
  return this;
39
46
  }
40
- setOutputSchema(schema) {
47
+ setOutputSchema(schema, examples) {
41
48
  this.#outputSchema = schema;
49
+ this.#outputExamples = examples;
50
+ return this;
51
+ }
52
+ setSystemInstructionsOverride(override) {
53
+ this.#systemInstructionsOverride = override;
54
+ return this;
55
+ }
56
+ setInstructionsOverride(override) {
57
+ this.#instructionsOverride = override;
58
+ return this;
59
+ }
60
+ setLanguage(language) {
61
+ this.#language = language;
42
62
  return this;
43
63
  }
44
64
  addSystemMedia(content, mimeType) {
@@ -79,8 +99,11 @@ export class PromptBuilder {
79
99
  context: this.#systemContextParts,
80
100
  instructions: this.#systemInstructions,
81
101
  outputSchema: this.#systemOutputSchema,
102
+ outputExamples: this.#systemOutputExamples,
82
103
  task: this.#systemTask,
83
104
  media: this.#systemMedia,
105
+ language: this.#language,
106
+ instructionsOverride: this.#systemInstructionsOverride,
84
107
  });
85
108
  }
86
109
  buildUserPrompt() {
@@ -89,8 +112,11 @@ export class PromptBuilder {
89
112
  context: this.#contextParts,
90
113
  instructions: this.#instructions,
91
114
  outputSchema: this.#outputSchema,
115
+ outputExamples: this.#outputExamples,
92
116
  task: this.#task,
93
117
  media: this.#media,
118
+ language: this.#language,
119
+ instructionsOverride: this.#instructionsOverride,
94
120
  });
95
121
  }
96
122
  }
@@ -119,12 +145,25 @@ function buildPrompt(data) {
119
145
  }
120
146
  if (isDefined(data.outputSchema)) {
121
147
  const schema = convertToOpenApiSchema(data.outputSchema);
122
- instructions['**Output Schema**'] = `\`\`\`json\n${JSON.stringify(schema, null, 2)}\n\`\`\``;
148
+ const schemaJson = JSON.stringify(schema, null, 2);
149
+ instructions['**Output Schema**'] = `\`\`\`json\n${schemaJson}\n\`\`\``;
150
+ instructions['**Output Schema Instructions**'] = unorderedList({
151
+ 'Schema Compliance': 'Generate valid JSON that strictly matches the provided schema.',
152
+ 'Nullable fields with missing data': 'Must be set to literal `null`, not the string "null".',
153
+ 'Optional fields with missing data': 'Omit the key entirely (sparse JSON).',
154
+ });
155
+ if (isDefined(data.outputExamples) && (data.outputExamples.length > 0)) {
156
+ instructions['**Output Examples**'] = fewShotPrompt(data.outputExamples);
157
+ }
158
+ }
159
+ if (isDefined(data.language)) {
160
+ instructions['**Output Language**'] = languagePrompt(data.language);
123
161
  }
124
162
  if (isDefined(data.task)) {
125
163
  instructions['**Task**'] = data.task;
126
164
  }
127
- const formattedInstructions = formatInstructions(instructions);
165
+ const instructionsWithOverride = data.instructionsOverride?.(instructions) ?? instructions;
166
+ const formattedInstructions = isString(instructionsWithOverride) ? instructionsWithOverride : formatInstructions(instructionsWithOverride);
128
167
  return [
129
168
  ...(data.media ?? []),
130
169
  { text: formattedInstructions },
@@ -7,12 +7,13 @@ export type FewShotExample<Input = ObjectLiteral, Output = ObjectLiteral> = {
7
7
  /** Optional reason explaining why this example is positive or negative. */
8
8
  reason?: string;
9
9
  };
10
+ export declare function fewShotExamples<const Input = ObjectLiteral, const Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): FewShotExample<Input, Output>[];
10
11
  /**
11
12
  * Creates a prompt addition for few-shot learning.
12
13
  * @param examples An array of input/output pairs.
13
14
  * @returns A formatted few-shot prompt.
14
15
  */
15
- export declare function fewShotPrompt<Input = ObjectLiteral, Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): {
16
+ export declare function fewShotPrompt<const Input = ObjectLiteral, const Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): {
16
17
  Examples: string;
17
18
  } | {
18
19
  Examples: import("./instructions-formatter.js").InstructionsList;
@@ -1,6 +1,9 @@
1
1
  import { fromEntries } from '../../utils/object/object.js';
2
2
  import { isDefined, isString } from '../../utils/type-guards.js';
3
3
  import { formatInstructions, orderedList, unorderedList } from './instructions-formatter.js';
4
+ export function fewShotExamples(examples) {
5
+ return examples;
6
+ }
4
7
  /**
5
8
  * Creates a prompt addition for few-shot learning.
6
9
  * @param examples An array of input/output pairs.
@@ -0,0 +1,27 @@
1
+ import { type PromptBuilder } from '../../../ai/prompts/index.js';
2
+ import { type SchemaTestable } from '../../../schema/index.js';
3
+ export declare const DOCUMENT_MANAGEMENT_SYSTEM_ROLE = "You are a highly precise, analytical Document Management Specialist.";
4
+ export declare const contentExtractionSchema: import("../../../schema/index.js").ObjectSchema<{
5
+ content: string;
6
+ }>;
7
+ export declare function createContentExtractionPrompt(): PromptBuilder;
8
+ export declare function createClassifySchema(validTypes: string[]): SchemaTestable<{
9
+ documentType: string;
10
+ }>;
11
+ export declare function createClassifyPrompt(validTypes: string[]): PromptBuilder;
12
+ export declare const dataExtractionFields: {
13
+ Title: string;
14
+ Subtitle: string;
15
+ Summary: string;
16
+ Tags: string;
17
+ Date: string;
18
+ };
19
+ export declare function createDataExtractionPrompt(schema: SchemaTestable): PromptBuilder;
20
+ export declare const assignCollectionSchema: import("../../../schema/index.js").ObjectSchema<{
21
+ collectionIds: string[];
22
+ }>;
23
+ export declare function createAssignCollectionPrompt(): PromptBuilder;
24
+ export declare const assignRequestSchema: import("../../../schema/index.js").ObjectSchema<{
25
+ requestId: string | null;
26
+ }>;
27
+ export declare function createAssignRequestPrompt(): PromptBuilder;
@@ -0,0 +1,158 @@
1
+ import { orderedList, promptBuilder, unorderedList } from '../../../ai/prompts/index.js';
2
+ import { fewShotExamples } from '../../../ai/prompts/steering.js';
3
+ import { array, enumeration, nullable, object, string } from '../../../schema/index.js';
4
+ export const DOCUMENT_MANAGEMENT_SYSTEM_ROLE = 'You are a highly precise, analytical Document Management Specialist.';
5
+ // --- Content Extraction ---
6
+ export const contentExtractionSchema = object({ content: string() });
7
+ export function createContentExtractionPrompt() {
8
+ return promptBuilder()
9
+ .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
10
+ .setTask('Transcribe the attached document into Markdown following the instructions.')
11
+ .setOutputSchema(contentExtractionSchema)
12
+ .addInstructions({
13
+ 'Objective': 'Convert the provided document into semantically structured, clean Markdown.',
14
+ 'Critical Constraints': orderedList([
15
+ 'Output ONLY the Markdown content. Do not include introductory text, conversational filler, or code block fences (```).',
16
+ 'Do not describe the visual appearance (e.g., "This looks like an invoice"). Transcribe the content only.',
17
+ ]),
18
+ 'Formatting Rules': orderedList({
19
+ 'Headings': 'Use # for the main document title (once). Use ##, ### for sections based on logical hierarchy.',
20
+ 'Text Content': 'Transcribe text verbatim. Do not correct spelling or grammar, summarize, or rewrite.',
21
+ 'Tables': 'Strictly use Markdown table syntax. Align columns logically based on the visual grid.',
22
+ 'Lists': 'Detect bullet points and numbered lists and format them as Markdown lists.',
23
+ 'Emphasis': 'Use **bold** and _italics_ only where visually distinct in the source.',
24
+ 'Columns': 'Read multi-column text as a single continuous flow.',
25
+ }),
26
+ 'Complex Elements': {
27
+ 'Images/Visuals': 'Replace non-textual diagrams with `> [Visual: Brief description of the image/chart]`.',
28
+ 'Signatures': 'Mark distinct signatures as `> [Signature: {Name if legible/Context}]`.',
29
+ 'Forms': 'Represent checkboxes as `[ ]` (unchecked) or `[x]` (checked). Format label/value pairs on separate lines or as a definition list if applicable.',
30
+ 'Math': 'Transcribe equations using LaTeX syntax enclosed in `$...$` for inline or `$$...$$` for block equations.',
31
+ },
32
+ 'Page Handling': [
33
+ 'Metadata: Start every page with `<!-- Page {n} Start -->` and end with `<!-- Page {n} End -->` on separate lines.',
34
+ 'Artifacts: Exclude running headers, footers, and page numbers unless they contain unique data not found elsewhere.',
35
+ ],
36
+ 'Error Handling': [
37
+ 'Mark illegible text as `[Illegible]`.',
38
+ 'Mark cut-off text as `[Cut off]`.',
39
+ ],
40
+ });
41
+ }
42
+ // --- Classification ---
43
+ const CLASSIFY_FEW_SHOT = fewShotExamples([
44
+ {
45
+ input: 'Document that contains "Invoice", a table with items, and a "Total Due" amount.',
46
+ output: { documentType: 'Finance -> Invoice' },
47
+ reason: 'Explicit keyword and layout match.',
48
+ },
49
+ {
50
+ input: 'Document that contains "Rental Agreement", multiple paragraphs about obligations, and signatures at the end.',
51
+ output: { documentType: 'Legal -> Contract' },
52
+ reason: 'Structural and entity-based match.',
53
+ },
54
+ ]);
55
+ export function createClassifySchema(validTypes) {
56
+ return object({ documentType: enumeration(validTypes) });
57
+ }
58
+ export function createClassifyPrompt(validTypes) {
59
+ const schema = createClassifySchema(validTypes);
60
+ return promptBuilder()
61
+ .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
62
+ .setRole('Document Taxonomy Specialist')
63
+ .setTask('Determine the single most accurate document type from the provided list based on the document.')
64
+ .setOutputSchema(schema, CLASSIFY_FEW_SHOT)
65
+ .addInstructions({
66
+ 'Analysis Strategy': orderedList([
67
+ 'Scan the header and title for explicit document type names (e.g., "Invoice", "Contract", "Bill of Lading").',
68
+ 'Analyze the layout structure (e.g., columns often imply Invoices/Receipts; dense paragraphs imply Contracts/Letters).',
69
+ 'Identify key entities (e.g., "Total Due" implies financial; "Signed by" implies legal).',
70
+ ]),
71
+ 'Selection Logic': orderedList([
72
+ 'Exact Match: If the document explicitly states its type, select the corresponding category.',
73
+ 'Content Match: If implicit, match the intent.',
74
+ 'Specificity: Always choose the most specific leaf-node category available.',
75
+ 'Fallback: If ambiguous, choose the category that best describes the *primary* purpose of the document.',
76
+ ]),
77
+ 'Valid category labels': unorderedList(validTypes),
78
+ });
79
+ }
80
+ // --- Data Extraction ---
81
+ export const dataExtractionFields = {
82
+ Title: 'Create a concise, searchable filename-style title (e.g., "Invoice - Oct 2023").',
83
+ Subtitle: 'Extract context usually found below the header (e.g., Project Name, Reference Number).',
84
+ Summary: 'Write a 2-3 sentence executive summary. Mention the type of information that can be found in the document and its purpose.',
85
+ Tags: 'Generate 3-5 keywords for categorization. Only use important information missing in title, subtitle and properties. Prioritize reusing of existing tags where possible.',
86
+ Date: 'Identify the *creation* date of the document. If multiple dates exist, prioritize the primary date (like invoice or letter Date). Return as object with year, month and day.',
87
+ };
88
+ export function createDataExtractionPrompt(schema) {
89
+ return promptBuilder()
90
+ .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
91
+ .setRole('Structured Data Extraction Analyst')
92
+ .setTask('Analyze the document and extract metadata and specific properties defined in the output schema following the instructions.')
93
+ .setOutputSchema(schema)
94
+ .addInstructions({
95
+ 'Field Specific Instructions': dataExtractionFields,
96
+ 'Property Extraction': orderedList([
97
+ 'You will be given a list of specific dynamic properties to look for.',
98
+ 'Extract values *exactly* as they appear for strings.',
99
+ 'Normalize numbers and dates to standard formats.',
100
+ 'If a property is ambiguous, favor the value most prominent in the document layout.',
101
+ 'If a property is missing, set its value to null.',
102
+ ]),
103
+ });
104
+ }
105
+ // --- Collection Assignment ---
106
+ const ASSIGN_COLLECTION_FEW_SHOT = fewShotExamples([
107
+ {
108
+ input: {
109
+ document: { title: 'Invoice - Project Alpha', summary: 'Invoice for consulting services in Project Alpha.' },
110
+ collections: [{ id: 'col-1', name: 'Project Alpha' }, { id: 'col-2', name: 'General Finance' }],
111
+ },
112
+ output: { collectionIds: ['col-1'] },
113
+ reason: 'Direct match on project name.',
114
+ },
115
+ ]);
116
+ export const assignCollectionSchema = object({ collectionIds: array(string()) });
117
+ export function createAssignCollectionPrompt() {
118
+ return promptBuilder()
119
+ .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
120
+ .setRole('Digital Filing Assistant')
121
+ .setTask('Select the most appropriate collections for this document from the provided list following the instructions.')
122
+ .setOutputSchema(assignCollectionSchema, ASSIGN_COLLECTION_FEW_SHOT)
123
+ .addInstructions({
124
+ 'Matching Logic': orderedList([
125
+ 'Direct Key-Match: Look for exact keyword matches between the collection name and the document metadata.',
126
+ 'Semantic Fit: Determine if the document functionally belongs to a group.',
127
+ 'Project Association: If the document references a specific project code or name found in a collection name, assign it there.',
128
+ ]),
129
+ 'Output Constraints': 'Return an array of matching collection IDs. If no collection is a strong fit, return an empty array.',
130
+ });
131
+ }
132
+ // --- Request Assignment ---
133
+ const ASSIGN_REQUEST_FEW_SHOT = fewShotExamples([
134
+ {
135
+ input: {
136
+ document: { title: 'Medical Certificate - John Doe', date: '2023-11-01' },
137
+ requests: [{ id: 'req-1', comment: 'Need medical certificate from November', collections: ['HR'] }],
138
+ },
139
+ output: { requestId: 'req-1' },
140
+ reason: 'Document satisfies the specific request criteria.',
141
+ },
142
+ ]);
143
+ export const assignRequestSchema = object({ requestId: nullable(string()) });
144
+ export function createAssignRequestPrompt() {
145
+ return promptBuilder()
146
+ .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
147
+ .setRole('Workflow Routing Agent')
148
+ .setTask('Evaluate the document against the list of open requests and find the best match following the instructions.')
149
+ .setOutputSchema(assignRequestSchema, ASSIGN_REQUEST_FEW_SHOT)
150
+ .addInstructions({
151
+ 'Matching Rules': orderedList({
152
+ 'Hard Constraints': 'If a Request has a "Comment" or specific property requirement, the document MUST fulfill it strictly (e.g., "Need bill from July" must match date).',
153
+ 'Ambiguity': 'If multiple requests match, select the one with the most specific constraints that are satisfied.',
154
+ 'Negative Match': 'If the document satisfies the metadata but violates a comment constraint, it is unsuitable.',
155
+ }),
156
+ 'Output Constraints': 'The ID of the matching request, or null if no request matches.',
157
+ });
158
+ }
@@ -1,11 +1,12 @@
1
1
  import { type TstdlGenkitGenerationOptions } from '../../../ai/genkit/index.js';
2
- import { type Instructions } from '../../../ai/prompts/index.js';
2
+ import { type PromptBuilder } from '../../../ai/prompts/index.js';
3
3
  import { type SchemaTestable } from '../../../schema/index.js';
4
4
  import type { ObjectLiteral } from '../../../types/types.js';
5
5
  import type { ModelReference } from 'genkit';
6
6
  import type { AiConfiguration, DocumentPropertyDataType, InstructionOverride } from '../../models/index.js';
7
7
  import { Document, DocumentWorkflowStep } from '../../models/index.js';
8
8
  import { type AiConfigurationResolveDataMap } from './document-management-ai-provider.service.js';
9
+ import { dataExtractionFields } from './document-management-ai.prompts.js';
9
10
  type DocumentDataExtractionPropertyResult = {
10
11
  propertyId: string;
11
12
  dataType: DocumentPropertyDataType;
@@ -19,19 +20,6 @@ export type DocumentDataExtractionResult = {
19
20
  tags: string[];
20
21
  properties: DocumentDataExtractionPropertyResult[];
21
22
  };
22
- declare const dataExtractionInstructionsRaw: {
23
- 'JSON Output': import("../../../ai/prompts/index.js").InstructionsList;
24
- Role: string;
25
- Task: string;
26
- 'Field Specific Instructions': {
27
- Title: string;
28
- Subtitle: string;
29
- Summary: string;
30
- Tags: string;
31
- Date: string;
32
- };
33
- 'Property Extraction': string[];
34
- };
35
23
  export declare class DocumentManagementAiService {
36
24
  #private;
37
25
  extractContent(tenantId: string, documentId: string): Promise<string>;
@@ -42,8 +30,7 @@ export declare class DocumentManagementAiService {
42
30
  protected runAi<T, Step extends DocumentWorkflowStep>(tenantId: string, step: Step, stepData: AiConfigurationResolveDataMap[Step], options: {
43
31
  targetId?: string;
44
32
  defaultModel: ModelReference<any>;
45
- system: Instructions;
46
- user: Instructions;
33
+ promptBuilder: PromptBuilder;
47
34
  data?: ObjectLiteral;
48
35
  schema: SchemaTestable<T>;
49
36
  document?: Document;
@@ -54,7 +41,7 @@ export declare class DocumentManagementAiService {
54
41
  private mergeExtractionConfigs;
55
42
  private resolveContextKeys;
56
43
  }
57
- export declare function mergeFieldInstructions(instructionsKey: keyof typeof dataExtractionInstructionsRaw['Field Specific Instructions'], field: keyof NonNullable<AiConfiguration['extraction']>, aiConfig: AiConfiguration): string;
44
+ export declare function mergeFieldInstructions(instructionsKey: keyof typeof dataExtractionFields, field: keyof NonNullable<AiConfiguration['extraction']>, aiConfig: AiConfiguration): string;
58
45
  export declare function mergeInstructions(base: string, overrides: (InstructionOverride | undefined)[], options?: {
59
46
  formatTemplate?: (format: string) => string;
60
47
  }): string;
@@ -9,12 +9,12 @@ var DocumentManagementAiService_1;
9
9
  import { and, isNull as drizzleIsNull, eq, inArray } from 'drizzle-orm';
10
10
  import { P, match } from 'ts-pattern';
11
11
  import { genkitGenerationOptions, injectGenkit, injectModel } from '../../../ai/genkit/index.js';
12
- import { buildPrompts, formatInstructions, jsonOutputInstructions, orderedList } from '../../../ai/prompts/index.js';
12
+ import { formatInstructions, languagePrompt } from '../../../ai/prompts/index.js';
13
13
  import { inject } from '../../../injector/inject.js';
14
14
  import { Logger } from '../../../logger/logger.js';
15
15
  import { arrayAgg } from '../../../orm/index.js';
16
16
  import { injectRepository } from '../../../orm/server/index.js';
17
- import { array, boolean, enumeration, integer, nullable, number, object, string } from '../../../schema/index.js';
17
+ import { array, boolean, integer, nullable, number, object, string } from '../../../schema/index.js';
18
18
  import { distinct } from '../../../utils/array/index.js';
19
19
  import { numericDateToDateTime, tryDateObjectToNumericDate } from '../../../utils/date-time.js';
20
20
  import { fromEntries, hasOwnProperty, objectEntries, objectKeys } from '../../../utils/object/object.js';
@@ -26,104 +26,10 @@ import { DocumentCategoryTypeService } from './document-category-type.service.js
26
26
  import { DocumentCollectionService } from './document-collection.service.js';
27
27
  import { DocumentFileService } from './document-file.service.js';
28
28
  import { DocumentManagementAiProviderService } from './document-management-ai-provider.service.js';
29
+ import { assignCollectionSchema, assignRequestSchema, contentExtractionSchema, createAssignCollectionPrompt, createAssignRequestPrompt, createClassifyPrompt, createClassifySchema, createContentExtractionPrompt, createDataExtractionPrompt, dataExtractionFields } from './document-management-ai.prompts.js';
29
30
  import { DocumentPropertyService } from './document-property.service.js';
30
31
  import { DocumentTagService } from './document-tag.service.js';
31
32
  import { DocumentManagementSingleton } from './singleton.js';
32
- // --- Instructions ---
33
- const contentExtractionSystemInstructions = {
34
- 'Role': 'You are an expert OCR and Document Digitization engine.',
35
- 'Primary Objective': 'Convert the provided document into semantically structured, clean Markdown.',
36
- 'Critical Constraints': orderedList([
37
- 'Output ONLY the Markdown content. Do not include introductory text, conversational filler, or code block fences (```).',
38
- 'Do not describe the visual appearance (e.g., "This looks like an invoice"). Transcribe the content only.',
39
- ]),
40
- 'Formatting Rules': orderedList({
41
- 'Headings': 'Use # for the main document title (once). Use ##, ### for sections based on logical hierarchy.',
42
- 'Text Content': 'Transcribe text verbatim. Do not correct spelling or grammar, summarize, or rewrite.',
43
- 'Tables': 'Strictly use Markdown table syntax. Align columns logically based on the visual grid.',
44
- 'Lists': 'Detect bullet points and numbered lists and format them as Markdown lists.',
45
- 'Emphasis': 'Use **bold** and _italics_ only where visually distinct in the source.',
46
- 'Columns': 'Read multi-column text as a single continuous flow.',
47
- }),
48
- 'Complex Elements': {
49
- 'Images/Visuals': 'Replace non-textual diagrams with `> [Visual: Brief description of the image/chart]`.',
50
- 'Signatures': 'Mark distinct signatures as `> [Signature: {Name if legible/Context}]`.',
51
- 'Forms': 'Represent checkboxes as `[ ]` (unchecked) or `[x]` (checked). Format label/value pairs on separate lines or as a definition list if applicable.',
52
- 'Math': 'Transcribe equations using LaTeX syntax enclosed in `$...$` for inline or `$$...$$` for block equations.',
53
- },
54
- 'Page Handling': [
55
- 'Metadata: Start every page with `<!-- Page {n} Start -->` and end with `<!-- Page {n} End -->` on separate lines.',
56
- 'Artifacts: Exclude running headers, footers, and page numbers unless they contain unique data not found elsewhere.',
57
- ],
58
- 'Error Handling': [
59
- 'Mark illegible text as `[Illegible]`.',
60
- 'Mark cut-off text as `[Cut off]`.',
61
- ],
62
- };
63
- const contentExtractionUserInstructions = { Task: 'Transcribe the attached document into Markdown following the system instructions.' };
64
- const classifySystemInstructions = {
65
- 'Role': 'You are a Document Taxonomy Specialist.',
66
- 'Task': `Analyze the visual layout and text content of the document to categorize it into exactly one of the provided hierarchical types.`,
67
- 'Input Context': 'You will be provided with a list of valid category labels (e.g., "Finance -> Invoice").',
68
- 'Analysis Strategy': orderedList([
69
- 'Scan the header and title for explicit document type names (e.g., "Invoice", "Contract", "Bill of Lading").',
70
- 'Analyze the layout structure (e.g., columns often imply Invoices/Receipts; dense paragraphs imply Contracts/Letters).',
71
- 'Identify key entities (e.g., "Total Due" implies financial; "Signed by" implies legal).',
72
- ]),
73
- 'Selection Logic': orderedList([
74
- 'Exact Match: If the document explicitly states its type, select the corresponding category.',
75
- 'Content Match: If implicit, match the intent.',
76
- 'Specificity: Always choose the most specific leaf-node category available.',
77
- 'Fallback: If ambiguous, choose the category that best describes the *primary* purpose of the document.',
78
- ]),
79
- ...jsonOutputInstructions,
80
- };
81
- const classifyUserInstructions = { Task: 'Determine the single most accurate document type from the provided list based on the document following the system instructions.' };
82
- const dataExtractionInstructionsRaw = {
83
- 'Role': 'You are a Structured Data Extraction Analyst.',
84
- 'Task': 'Analyze the document and extract metadata into the defined JSON schema.',
85
- 'Field Specific Instructions': {
86
- Title: 'Create a concise, searchable filename-style title (e.g., "Invoice - Oct 2023").',
87
- Subtitle: 'Extract context usually found below the header (e.g., Project Name, Reference Number).',
88
- Summary: 'Write a 2-3 sentence executive summary. Mention the type of information that can be found in the document and its purpose.',
89
- Tags: 'Generate 3-5 keywords for categorization. Only use important information missing in title, subtitle and properties. Prioritize reusing of existing tags where possible.',
90
- Date: 'Identify the *creation* date of the document. If multiple dates exist, prioritize the primary date (like invoice or letter Date). Return as object with year, month and day.',
91
- },
92
- 'Property Extraction': [
93
- 'You will be given a list of specific dynamic properties to look for.',
94
- 'Extract values *exactly* as they appear for strings.',
95
- 'Normalize numbers and dates to standard formats.',
96
- 'If a property is ambiguous, favor the value most prominent in the document layout.',
97
- 'If a property is missing, set its value to null.',
98
- ],
99
- ...jsonOutputInstructions,
100
- };
101
- const dataExtractionSystemInstructions = dataExtractionInstructionsRaw;
102
- const dataExtractionUserInstructions = { Task: 'Analyze the document and extract metadata and specific properties defined in the output schema following the system instructions.' };
103
- const assignCollectionSystemInstructions = {
104
- 'Role': 'You are a Digital Filing Assistant.',
105
- 'Task': `Assign the document to relevant collections based on its metadata and content.`,
106
- 'Input': 'Document Metadata and a list of Available Collections.',
107
- 'Matching Logic': orderedList([
108
- 'Direct Key-Match: Look for exact keyword matches between the collection name and the document metadata.',
109
- 'Semantic Fit: Determine if the document functionally belongs to a group.',
110
- 'Project Association: If the document references a specific project code or name found in a collection name, assign it there.',
111
- ]),
112
- 'Output': 'Return an array of matching collection IDs. If no collection is a strong fit, return an empty array.',
113
- };
114
- const assignCollectionUserInstructions = { Task: 'Select the most appropriate collections for this document from the provided list following the system instructions.' };
115
- const assignRequestSystemInstructions = {
116
- 'Role': 'You are a Workflow Routing Agent.',
117
- 'Task': 'Match the provided document to an existing Open Document Request.',
118
- 'Input': 'Document Metadata and a list of Open Requests.',
119
- 'Matching Rules': orderedList({
120
- 'Hard Constraints': 'If a Request has a "Comment" or specific property requirement, the document MUST fulfill it strictly (e.g., "Need bill from July" must match date).',
121
- 'Ambiguity': 'If multiple requests match, select the one with the most specific constraints that are satisfied.',
122
- 'Negative Match': 'If the document satisfies the metadata but violates a comment constraint, it is unsuitable.',
123
- }),
124
- 'Output': 'The ID of the matching request, or null if no request matches.',
125
- };
126
- const assignRequestUserInstructions = { Task: 'Evaluate the document against the list of open requests and find the best match following the system instructions.' };
127
33
  let DocumentManagementAiService = DocumentManagementAiService_1 = class DocumentManagementAiService {
128
34
  #genkit = injectGenkit();
129
35
  #contentExtractionModel = injectModel('gemini-3.1-flash-lite-preview').withConfig({ thinkingConfig: { thinkingLevel: 'LOW' } });
@@ -147,9 +53,8 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
147
53
  this.#logger.trace(`Extracting content from document ${document.id}`);
148
54
  const result = await this.runAi(tenantId, DocumentWorkflowStep.ContentExtraction, { document }, {
149
55
  defaultModel: this.#contentExtractionModel,
150
- system: contentExtractionSystemInstructions,
151
- user: contentExtractionUserInstructions,
152
- schema: object({ content: string() }),
56
+ promptBuilder: createContentExtractionPrompt(),
57
+ schema: contentExtractionSchema,
153
58
  document,
154
59
  });
155
60
  const markdownBlockStripped = result.content.trim().replaceAll(/^```\w*\s*|```$/gi, '').trim();
@@ -166,19 +71,17 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
166
71
  this.#logger.trace(`Classifying document ${document.id}`);
167
72
  const stepData = { document, categories };
168
73
  const aiConfig = await this.resolveAiConfiguration(tenantId, DocumentWorkflowStep.Classification, stepData);
169
- const systemInstructions = isDefined(aiConfig.classification)
170
- ? {
171
- ...classifySystemInstructions,
74
+ const schema = createClassifySchema(typeLabels);
75
+ const promptBuilder = createClassifyPrompt(typeLabels);
76
+ if (isDefined(aiConfig.classification)) {
77
+ promptBuilder.addSystemInstructions({
172
78
  'Classification Overrides': mergeInstructions('Follow these additional classification rules.', [aiConfig.classification]),
173
- }
174
- : classifySystemInstructions;
79
+ });
80
+ }
175
81
  const result = await this.runAi(tenantId, DocumentWorkflowStep.Classification, stepData, {
176
82
  defaultModel: this.#classifyModel,
177
- system: systemInstructions,
178
- user: classifyUserInstructions,
179
- schema: object({
180
- documentType: enumeration(typeLabels),
181
- }),
83
+ promptBuilder,
84
+ schema,
182
85
  document,
183
86
  config: { maxOutputTokens: 128 },
184
87
  aiConfig,
@@ -232,18 +135,17 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
232
135
  const override = (isNotNull(property.key) ? aiConfig.extraction?.properties?.[property.key] : undefined) ?? aiConfig.extraction?.properties?.[property.label];
233
136
  return isDefined(override) ? mergeInstructions(`Extract value for property "${property.label}".`, [override]) : undefined;
234
137
  }).filter(isDefined);
235
- const systemInstructions = {
236
- ...dataExtractionSystemInstructions,
138
+ const promptBuilder = createDataExtractionPrompt(generationSchema);
139
+ promptBuilder.addInstructions({
237
140
  'Field Specific Instructions': mergedFieldInstructions,
238
- 'Property Extraction': isDefined(mergedPropertyInstructions) && (mergedPropertyInstructions.length > 0)
239
- ? [...dataExtractionInstructionsRaw['Property Extraction'], ...mergedPropertyInstructions]
240
- : dataExtractionInstructionsRaw['Property Extraction'],
241
- };
141
+ 'Additional Property Extraction': isDefined(mergedPropertyInstructions) && (mergedPropertyInstructions.length > 0)
142
+ ? mergedPropertyInstructions
143
+ : [],
144
+ });
242
145
  const extraction = await this.runAi(tenantId, DocumentWorkflowStep.DataExtraction, stepData, {
243
146
  targetId: documentTypeEntity.key ?? undefined,
244
147
  defaultModel: this.#dataExtractionModel,
245
- system: systemInstructions,
246
- user: dataExtractionUserInstructions,
148
+ promptBuilder,
247
149
  data: { existingTags: tagLabels },
248
150
  schema: generationSchema,
249
151
  document,
@@ -302,10 +204,9 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
302
204
  const result = await this.runAi(document.tenantId, DocumentWorkflowStep.Assignment, { document, properties: documentProperties, collectionIds }, {
303
205
  targetId: documentTypeEntity.key ?? undefined,
304
206
  defaultModel: this.#assignModel,
305
- system: assignCollectionSystemInstructions,
306
- user: assignCollectionUserInstructions,
207
+ promptBuilder: createAssignCollectionPrompt(),
307
208
  data: { document: documentData, documentProperties: fromEntries(propertyEntries), collections },
308
- schema: object({ collectionIds: array(string()) }),
209
+ schema: assignCollectionSchema,
309
210
  config: { maxOutputTokens: 512 },
310
211
  });
311
212
  return result.collectionIds;
@@ -349,10 +250,9 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
349
250
  const result = await this.runAi(document.tenantId, DocumentWorkflowStep.Assignment, { document, properties: documentProperties, collectionIds: requestsCollectionIds }, {
350
251
  targetId: documentTypeEntity.key ?? undefined,
351
252
  defaultModel: this.#assignModel,
352
- system: assignRequestSystemInstructions,
353
- user: assignRequestUserInstructions,
253
+ promptBuilder: createAssignRequestPrompt(),
354
254
  data: { document: documentData, documentProperties: fromEntries(propertyEntries), requests },
355
- schema: object({ requestId: nullable(string()) }),
255
+ schema: assignRequestSchema,
356
256
  config: { maxOutputTokens: 128 },
357
257
  });
358
258
  return result.requestId;
@@ -360,23 +260,34 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
360
260
  async runAi(tenantId, step, stepData, options) {
361
261
  const config = options.aiConfig ?? await this.resolveAiConfiguration(tenantId, step, stepData);
362
262
  const model = config.model ?? options.defaultModel;
363
- const { systemPrompt, userPrompt } = buildPrompts({
364
- baseSystemInstructions: options.system,
365
- baseUserInstructions: options.user,
366
- additionalSystemInstructions: config.prompt?.systemAddition,
367
- additionalUserInstructions: config.prompt?.userAddition,
368
- systemInstructionsOverride: config.prompt?.systemOverride,
369
- userInstructionsOverride: config.prompt?.userOverride,
370
- data: options.data,
371
- media: isDefined(options.document) ? { mimeType: options.document.mimeType, content: await this.#documentFileService.getContent(options.document) } : undefined,
372
- language: config.language,
373
- });
263
+ const builder = options.promptBuilder;
264
+ if (isDefined(config.language)) {
265
+ builder.addInstructions({ 'Output Language': languagePrompt(config.language) });
266
+ }
267
+ if (isDefined(options.data)) {
268
+ builder.addContext('Data', options.data);
269
+ }
270
+ if (isDefined(config.prompt?.systemAddition)) {
271
+ builder.addSystemInstructions({ 'Additional Instructions': config.prompt.systemAddition });
272
+ }
273
+ if (isDefined(config.prompt?.userAddition)) {
274
+ builder.addInstructions({ 'Additional Instructions': config.prompt.userAddition });
275
+ }
276
+ if (isDefined(config.prompt?.systemOverride)) {
277
+ builder.setSystemInstructionsOverride(config.prompt.systemOverride);
278
+ }
279
+ if (isDefined(config.prompt?.userOverride)) {
280
+ builder.setInstructionsOverride(config.prompt.userOverride);
281
+ }
282
+ if (isDefined(options.document)) {
283
+ builder.addMedia(await this.#documentFileService.getContent(options.document), options.document.mimeType);
284
+ }
374
285
  const result = await this.#genkit.generate(genkitGenerationOptions({
375
286
  model,
376
287
  config: options.config,
377
288
  output: { schema: options.schema },
378
- system: systemPrompt,
379
- prompt: userPrompt,
289
+ system: builder.buildSystemPrompt(),
290
+ prompt: builder.buildUserPrompt(),
380
291
  }));
381
292
  if (isNull(result.output)) {
382
293
  throw new Error(`AI returned null output for ${step} ${options.targetId ?? ''}`);
@@ -479,7 +390,7 @@ function tryAiOutputDateObjectToNumericDate(dateObject) {
479
390
  return date;
480
391
  }
481
392
  export function mergeFieldInstructions(instructionsKey, field, aiConfig) {
482
- return mergeInstructions(dataExtractionInstructionsRaw['Field Specific Instructions'][instructionsKey], [aiConfig.extraction?.[field]], { formatTemplate: getFormatTemplate(field) });
393
+ return mergeInstructions(dataExtractionFields[instructionsKey], [aiConfig.extraction?.[field]], { formatTemplate: getFormatTemplate(field) });
483
394
  }
484
395
  export function mergeInstructions(base, overrides, options = {}) {
485
396
  let result = base;
@@ -1,9 +1,11 @@
1
1
  import { type Instructions } from '../../../ai/prompts/index.js';
2
2
  import type { SchemaTestable } from '../../../schema/schema.js';
3
3
  import type { AiConfiguration } from '../../models/index.js';
4
+ import { DocumentFileService } from '../services/document-file.service.js';
4
5
  import { DocumentManagementAiProviderService } from '../services/document-management-ai-provider.service.js';
5
6
  import { DocumentValidationExecutor, type DocumentValidationExecutorContext, type DocumentValidationExecutorResult } from './validator.js';
6
7
  export declare abstract class AiValidationExecutor<R> extends DocumentValidationExecutor {
8
+ protected readonly documentFileService: DocumentFileService;
7
9
  protected readonly genkit: import("genkit").Genkit;
8
10
  protected readonly baseModel: import("genkit").ModelReference<import("zod").ZodObject<{
9
11
  version: import("zod").ZodOptional<import("zod").ZodString>;
@@ -1,16 +1,12 @@
1
1
  import { convertToGenkitSchema, injectGenkit, injectModel } from '../../../ai/genkit/index.js';
2
- import { buildPrompts, jsonOutputInstructions } from '../../../ai/prompts/index.js';
2
+ import { promptBuilder } from '../../../ai/prompts/index.js';
3
3
  import { inject } from '../../../injector/inject.js';
4
4
  import { isDefined, isNull } from '../../../utils/type-guards.js';
5
+ import { DocumentFileService } from '../services/document-file.service.js';
5
6
  import { DocumentManagementAiProviderService } from '../services/document-management-ai-provider.service.js';
6
7
  import { DocumentValidationExecutor } from './validator.js';
7
- const systemPromptBase = {
8
- Role: 'You are an expert in document validation.',
9
- Task: 'Validate a document based on the provided validation instructions and document content.',
10
- Objective: 'Analyze the document carefully and provide a structured validation result according to the defined schema.',
11
- ...jsonOutputInstructions,
12
- };
13
8
  export class AiValidationExecutor extends DocumentValidationExecutor {
9
+ documentFileService = inject(DocumentFileService);
14
10
  genkit = injectGenkit();
15
11
  baseModel = injectModel('gemini-3.1-flash-lite-preview').withConfig({ thinkingConfig: { thinkingLevel: 'LOW' } });
16
12
  aiProvider = inject(DocumentManagementAiProviderService, undefined, { optional: true });
@@ -24,29 +20,42 @@ export class AiValidationExecutor extends DocumentValidationExecutor {
24
20
  const validationInstructions = await this.getValidationInstructions(context);
25
21
  const model = providerValidationConfig?.model ?? executorConfig.model ?? providerGlobalConfig?.defaults?.model ?? this.baseModel;
26
22
  const language = providerValidationConfig?.language ?? executorConfig.language ?? providerGlobalConfig?.defaults?.language;
27
- const { systemPrompt, userPrompt } = buildPrompts({
28
- baseSystemInstructions: systemPromptBase,
29
- baseUserInstructions: { Task: 'Validate the document based on the provided system and validation instructions and the document content.' },
30
- additionalSystemInstructions: [
31
- providerGlobalConfig?.defaults?.prompt?.systemAddition,
32
- executorConfig.prompt?.systemAddition,
33
- providerValidationConfig?.prompt?.systemAddition,
34
- ].filter(isDefined),
35
- additionalUserInstructions: [
36
- { 'Validation Instructions': validationInstructions },
37
- providerGlobalConfig?.defaults?.prompt?.userAddition,
38
- executorConfig.prompt?.userAddition,
39
- providerValidationConfig?.prompt?.userAddition,
40
- ].filter(isDefined),
41
- systemInstructionsOverride: providerValidationConfig?.prompt?.systemOverride ?? executorConfig.prompt?.systemOverride ?? providerGlobalConfig?.defaults?.prompt?.systemOverride,
42
- userInstructionsOverride: providerValidationConfig?.prompt?.userOverride ?? executorConfig.prompt?.userOverride ?? providerGlobalConfig?.defaults?.prompt?.userOverride,
43
- language,
44
- });
23
+ const documentContent = await this.documentFileService.getContent(context.document);
24
+ const builder = promptBuilder()
25
+ .setSystemRole('You are an expert in document validation.')
26
+ .setSystemTask('Validate a document based on the provided validation instructions and document content.')
27
+ .setTask('Validate the document based on the provided system and validation instructions and the document content.')
28
+ .addInstructions({ 'Validation Instructions': validationInstructions })
29
+ .setOutputSchema(this.schema)
30
+ .addMedia(documentContent, context.document.mimeType);
31
+ if (isDefined(language)) {
32
+ builder.setLanguage(language);
33
+ }
34
+ const systemAdditions = [
35
+ providerGlobalConfig?.defaults?.prompt?.systemAddition,
36
+ executorConfig.prompt?.systemAddition,
37
+ providerValidationConfig?.prompt?.systemAddition,
38
+ ].filter(isDefined);
39
+ if (systemAdditions.length > 0) {
40
+ builder.addSystemInstructions({ 'Additional Instructions': systemAdditions });
41
+ }
42
+ const userAdditions = [
43
+ providerGlobalConfig?.defaults?.prompt?.userAddition,
44
+ executorConfig.prompt?.userAddition,
45
+ providerValidationConfig?.prompt?.userAddition,
46
+ ].filter(isDefined);
47
+ if (userAdditions.length > 0) {
48
+ builder.addInstructions({ 'Additional Instructions': userAdditions });
49
+ }
50
+ const systemOverride = providerValidationConfig?.prompt?.systemOverride ?? executorConfig.prompt?.systemOverride ?? providerGlobalConfig?.defaults?.prompt?.systemOverride;
51
+ builder.setSystemInstructionsOverride(systemOverride);
52
+ const userOverride = providerValidationConfig?.prompt?.userOverride ?? executorConfig.prompt?.userOverride ?? providerGlobalConfig?.defaults?.prompt?.userOverride;
53
+ builder.setInstructionsOverride(userOverride);
45
54
  const generation = await this.genkit.generate({
46
- model: model,
55
+ model,
47
56
  output: { schema: convertToGenkitSchema(this.schema) },
48
- system: systemPrompt,
49
- prompt: userPrompt,
57
+ system: builder.buildSystemPrompt(),
58
+ prompt: builder.buildUserPrompt(),
50
59
  });
51
60
  if (isNull(generation.output)) {
52
61
  throw new Error('AI returned null output');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tstdl/base",
3
- "version": "0.93.168",
3
+ "version": "0.93.170",
4
4
  "author": "Patrick Hein",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -152,8 +152,8 @@
152
152
  "type-fest": "^5.5"
153
153
  },
154
154
  "peerDependencies": {
155
- "@aws-sdk/client-s3": "^3.1012",
156
- "@aws-sdk/s3-request-presigner": "^3.1012",
155
+ "@aws-sdk/client-s3": "^3.1014",
156
+ "@aws-sdk/s3-request-presigner": "^3.1014",
157
157
  "@genkit-ai/google-genai": "^1.30",
158
158
  "@google-cloud/storage": "^7.19",
159
159
  "@toon-format/toon": "^2.1.0",
@@ -190,7 +190,7 @@
190
190
  "@types/mjml": "4.7",
191
191
  "@types/node": "25",
192
192
  "@types/nodemailer": "7.0",
193
- "@types/pg": "8.18",
193
+ "@types/pg": "8.20",
194
194
  "@vitest/coverage-v8": "4.1",
195
195
  "@vitest/ui": "4.1",
196
196
  "concurrently": "9.2",
@@ -205,12 +205,5 @@
205
205
  "typescript-eslint": "8.57",
206
206
  "vite-tsconfig-paths": "6.1",
207
207
  "vitest": "4.1"
208
- },
209
- "overrides": {
210
- "drizzle-kit": {
211
- "@esbuild-kit/esm-loader": "^2.6",
212
- "esbuild": "^0.25",
213
- "esbuild-register": "^3.6"
214
- }
215
208
  }
216
209
  }