npm - @tstdl/base - Versions diffs - 0.93.168 → 0.93.170 - Mend

@tstdl/base 0.93.168 → 0.93.170

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/ai/prompts/prompt-builder.d.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import type { Part } from 'genkit';
 import type { SchemaTestable } from '../../schema/schema.js';
 import type { ObjectLiteral } from '../../types/index.js';
 import { type Instructions } from './instructions-formatter.js';
+import { type FewShotExample } from './steering.js';
 export type PromptBuilderInstructions = Record<string, Instructions>;
 export type PromptBuilderContext = Record<string, PromptBuilderContextItem>;
 export type PromptBuilderContextItem = ObjectLiteral;
@@ -11,8 +12,11 @@ export declare class PromptBuilder {
     setRole(role: string): this;
     setSystemTask(task: string): this;
     setTask(task: string): this;
-    setSystemOutputSchema(schema: SchemaTestable): this;
-    setOutputSchema(schema: SchemaTestable): this;
+    setSystemOutputSchema<Input = ObjectLiteral, Output = ObjectLiteral>(schema: SchemaTestable<Output>, examples?: FewShotExample<Input, Output>[]): this;
+    setOutputSchema<Input = ObjectLiteral, Output = ObjectLiteral>(schema: SchemaTestable<Output>, examples?: FewShotExample<Input, Output>[]): this;
+    setSystemInstructionsOverride(override: ((instructions: Instructions) => Instructions | string) | undefined): this;
+    setInstructionsOverride(override: ((instructions: Instructions) => Instructions | string) | undefined): this;
+    setLanguage(language: string): this;
     addSystemMedia(content: Uint8Array, mimeType: string): this;
     addMedia(content: Uint8Array, mimeType: string): this;
     addSystemInstructions(instructions: Record<string, Instructions>): this;

package/ai/prompts/prompt-builder.js CHANGED Viewed

@@ -3,7 +3,8 @@ import { encodeBase64 } from '../../utils/base64.js';
 import { fromEntries, objectEntries, objectKeys } from '../../utils/object/index.js';
 import { assertObjectPass, isDefined, isString, isUndefined } from '../../utils/type-guards.js';
 import { formatData } from './format.js';
-import { formatInstructions, sections } from './instructions-formatter.js';
+import { formatInstructions, sections, unorderedList } from './instructions-formatter.js';
+import { fewShotPrompt, languagePrompt } from './steering.js';
 export class PromptBuilder {
     #systemMedia = [];
     #media = [];
@@ -12,11 +13,16 @@ export class PromptBuilder {
     #systemTask;
     #task;
     #systemOutputSchema;
+    #systemOutputExamples;
     #outputSchema;
+    #outputExamples;
     #systemInstructions = {};
     #instructions = {};
     #systemContextParts = {};
     #contextParts = {};
+    #language;
+    #systemInstructionsOverride;
+    #instructionsOverride;
     setSystemRole(role) {
         this.#systemRole = role;
         return this;
@@ -33,12 +39,26 @@ export class PromptBuilder {
         this.#task = task;
         return this;
     }
-    setSystemOutputSchema(schema) {
+    setSystemOutputSchema(schema, examples) {
         this.#systemOutputSchema = schema;
+        this.#systemOutputExamples = examples;
         return this;
     }
-    setOutputSchema(schema) {
+    setOutputSchema(schema, examples) {
         this.#outputSchema = schema;
+        this.#outputExamples = examples;
+        return this;
+    }
+    setSystemInstructionsOverride(override) {
+        this.#systemInstructionsOverride = override;
+        return this;
+    }
+    setInstructionsOverride(override) {
+        this.#instructionsOverride = override;
+        return this;
+    }
+    setLanguage(language) {
+        this.#language = language;
         return this;
     }
     addSystemMedia(content, mimeType) {
@@ -79,8 +99,11 @@ export class PromptBuilder {
             context: this.#systemContextParts,
             instructions: this.#systemInstructions,
             outputSchema: this.#systemOutputSchema,
+            outputExamples: this.#systemOutputExamples,
             task: this.#systemTask,
             media: this.#systemMedia,
+            language: this.#language,
+            instructionsOverride: this.#systemInstructionsOverride,
         });
     }
     buildUserPrompt() {
@@ -89,8 +112,11 @@ export class PromptBuilder {
             context: this.#contextParts,
             instructions: this.#instructions,
             outputSchema: this.#outputSchema,
+            outputExamples: this.#outputExamples,
             task: this.#task,
             media: this.#media,
+            language: this.#language,
+            instructionsOverride: this.#instructionsOverride,
         });
     }
 }
@@ -119,12 +145,25 @@ function buildPrompt(data) {
     }
     if (isDefined(data.outputSchema)) {
         const schema = convertToOpenApiSchema(data.outputSchema);
-        instructions['**Output Schema**'] = `\`\`\`json\n${JSON.stringify(schema, null, 2)}\n\`\`\``;
+        const schemaJson = JSON.stringify(schema, null, 2);
+        instructions['**Output Schema**'] = `\`\`\`json\n${schemaJson}\n\`\`\``;
+        instructions['**Output Schema Instructions**'] = unorderedList({
+            'Schema Compliance': 'Generate valid JSON that strictly matches the provided schema.',
+            'Nullable fields with missing data': 'Must be set to literal `null`, not the string "null".',
+            'Optional fields with missing data': 'Omit the key entirely (sparse JSON).',
+        });
+        if (isDefined(data.outputExamples) && (data.outputExamples.length > 0)) {
+            instructions['**Output Examples**'] = fewShotPrompt(data.outputExamples);
+        }
+    }
+    if (isDefined(data.language)) {
+        instructions['**Output Language**'] = languagePrompt(data.language);
     }
     if (isDefined(data.task)) {
         instructions['**Task**'] = data.task;
     }
-    const formattedInstructions = formatInstructions(instructions);
+    const instructionsWithOverride = data.instructionsOverride?.(instructions) ?? instructions;
+    const formattedInstructions = isString(instructionsWithOverride) ? instructionsWithOverride : formatInstructions(instructionsWithOverride);
     return [
         ...(data.media ?? []),
         { text: formattedInstructions },

package/ai/prompts/steering.d.ts CHANGED Viewed

@@ -7,12 +7,13 @@ export type FewShotExample<Input = ObjectLiteral, Output = ObjectLiteral> = {
     /** Optional reason explaining why this example is positive or negative. */
     reason?: string;
 };
+export declare function fewShotExamples<const Input = ObjectLiteral, const Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): FewShotExample<Input, Output>[];
 /**
  * Creates a prompt addition for few-shot learning.
  * @param examples An array of input/output pairs.
  * @returns A formatted few-shot prompt.
  */
-export declare function fewShotPrompt<Input = ObjectLiteral, Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): {
+export declare function fewShotPrompt<const Input = ObjectLiteral, const Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): {
     Examples: string;
 } | {
     Examples: import("./instructions-formatter.js").InstructionsList;

package/ai/prompts/steering.js CHANGED Viewed

@@ -1,6 +1,9 @@
 import { fromEntries } from '../../utils/object/object.js';
 import { isDefined, isString } from '../../utils/type-guards.js';
 import { formatInstructions, orderedList, unorderedList } from './instructions-formatter.js';
+export function fewShotExamples(examples) {
+    return examples;
+}
 /**
  * Creates a prompt addition for few-shot learning.
  * @param examples An array of input/output pairs.

package/document-management/server/services/document-management-ai.prompts.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+import { type PromptBuilder } from '../../../ai/prompts/index.js';
+import { type SchemaTestable } from '../../../schema/index.js';
+export declare const DOCUMENT_MANAGEMENT_SYSTEM_ROLE = "You are a highly precise, analytical Document Management Specialist.";
+export declare const contentExtractionSchema: import("../../../schema/index.js").ObjectSchema<{
+    content: string;
+}>;
+export declare function createContentExtractionPrompt(): PromptBuilder;
+export declare function createClassifySchema(validTypes: string[]): SchemaTestable<{
+    documentType: string;
+}>;
+export declare function createClassifyPrompt(validTypes: string[]): PromptBuilder;
+export declare const dataExtractionFields: {
+    Title: string;
+    Subtitle: string;
+    Summary: string;
+    Tags: string;
+    Date: string;
+};
+export declare function createDataExtractionPrompt(schema: SchemaTestable): PromptBuilder;
+export declare const assignCollectionSchema: import("../../../schema/index.js").ObjectSchema<{
+    collectionIds: string[];
+}>;
+export declare function createAssignCollectionPrompt(): PromptBuilder;
+export declare const assignRequestSchema: import("../../../schema/index.js").ObjectSchema<{
+    requestId: string | null;
+}>;
+export declare function createAssignRequestPrompt(): PromptBuilder;

package/document-management/server/services/document-management-ai.prompts.js ADDED Viewed

@@ -0,0 +1,158 @@
+import { orderedList, promptBuilder, unorderedList } from '../../../ai/prompts/index.js';
+import { fewShotExamples } from '../../../ai/prompts/steering.js';
+import { array, enumeration, nullable, object, string } from '../../../schema/index.js';
+export const DOCUMENT_MANAGEMENT_SYSTEM_ROLE = 'You are a highly precise, analytical Document Management Specialist.';
+// --- Content Extraction ---
+export const contentExtractionSchema = object({ content: string() });
+export function createContentExtractionPrompt() {
+    return promptBuilder()
+        .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
+        .setTask('Transcribe the attached document into Markdown following the instructions.')
+        .setOutputSchema(contentExtractionSchema)
+        .addInstructions({
+        'Objective': 'Convert the provided document into semantically structured, clean Markdown.',
+        'Critical Constraints': orderedList([
+            'Output ONLY the Markdown content. Do not include introductory text, conversational filler, or code block fences (```).',
+            'Do not describe the visual appearance (e.g., "This looks like an invoice"). Transcribe the content only.',
+        ]),
+        'Formatting Rules': orderedList({
+            'Headings': 'Use # for the main document title (once). Use ##, ### for sections based on logical hierarchy.',
+            'Text Content': 'Transcribe text verbatim. Do not correct spelling or grammar, summarize, or rewrite.',
+            'Tables': 'Strictly use Markdown table syntax. Align columns logically based on the visual grid.',
+            'Lists': 'Detect bullet points and numbered lists and format them as Markdown lists.',
+            'Emphasis': 'Use **bold** and _italics_ only where visually distinct in the source.',
+            'Columns': 'Read multi-column text as a single continuous flow.',
+        }),
+        'Complex Elements': {
+            'Images/Visuals': 'Replace non-textual diagrams with `> [Visual: Brief description of the image/chart]`.',
+            'Signatures': 'Mark distinct signatures as `> [Signature: {Name if legible/Context}]`.',
+            'Forms': 'Represent checkboxes as `[ ]` (unchecked) or `[x]` (checked). Format label/value pairs on separate lines or as a definition list if applicable.',
+            'Math': 'Transcribe equations using LaTeX syntax enclosed in `$...$` for inline or `$$...$$` for block equations.',
+        },
+        'Page Handling': [
+            'Metadata: Start every page with `<!-- Page {n} Start -->` and end with `<!-- Page {n} End -->` on separate lines.',
+            'Artifacts: Exclude running headers, footers, and page numbers unless they contain unique data not found elsewhere.',
+        ],
+        'Error Handling': [
+            'Mark illegible text as `[Illegible]`.',
+            'Mark cut-off text as `[Cut off]`.',
+        ],
+    });
+}
+// --- Classification ---
+const CLASSIFY_FEW_SHOT = fewShotExamples([
+    {
+        input: 'Document that contains "Invoice", a table with items, and a "Total Due" amount.',
+        output: { documentType: 'Finance -> Invoice' },
+        reason: 'Explicit keyword and layout match.',
+    },
+    {
+        input: 'Document that contains "Rental Agreement", multiple paragraphs about obligations, and signatures at the end.',
+        output: { documentType: 'Legal -> Contract' },
+        reason: 'Structural and entity-based match.',
+    },
+]);
+export function createClassifySchema(validTypes) {
+    return object({ documentType: enumeration(validTypes) });
+}
+export function createClassifyPrompt(validTypes) {
+    const schema = createClassifySchema(validTypes);
+    return promptBuilder()
+        .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
+        .setRole('Document Taxonomy Specialist')
+        .setTask('Determine the single most accurate document type from the provided list based on the document.')
+        .setOutputSchema(schema, CLASSIFY_FEW_SHOT)
+        .addInstructions({
+        'Analysis Strategy': orderedList([
+            'Scan the header and title for explicit document type names (e.g., "Invoice", "Contract", "Bill of Lading").',
+            'Analyze the layout structure (e.g., columns often imply Invoices/Receipts; dense paragraphs imply Contracts/Letters).',
+            'Identify key entities (e.g., "Total Due" implies financial; "Signed by" implies legal).',
+        ]),
+        'Selection Logic': orderedList([
+            'Exact Match: If the document explicitly states its type, select the corresponding category.',
+            'Content Match: If implicit, match the intent.',
+            'Specificity: Always choose the most specific leaf-node category available.',
+            'Fallback: If ambiguous, choose the category that best describes the *primary* purpose of the document.',
+        ]),
+        'Valid category labels': unorderedList(validTypes),
+    });
+}
+// --- Data Extraction ---
+export const dataExtractionFields = {
+    Title: 'Create a concise, searchable filename-style title (e.g., "Invoice - Oct 2023").',
+    Subtitle: 'Extract context usually found below the header (e.g., Project Name, Reference Number).',
+    Summary: 'Write a 2-3 sentence executive summary. Mention the type of information that can be found in the document and its purpose.',
+    Tags: 'Generate 3-5 keywords for categorization. Only use important information missing in title, subtitle and properties. Prioritize reusing of existing tags where possible.',
+    Date: 'Identify the *creation* date of the document. If multiple dates exist, prioritize the primary date (like invoice or letter Date). Return as object with year, month and day.',
+};
+export function createDataExtractionPrompt(schema) {
+    return promptBuilder()
+        .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
+        .setRole('Structured Data Extraction Analyst')
+        .setTask('Analyze the document and extract metadata and specific properties defined in the output schema following the instructions.')
+        .setOutputSchema(schema)
+        .addInstructions({
+        'Field Specific Instructions': dataExtractionFields,
+        'Property Extraction': orderedList([
+            'You will be given a list of specific dynamic properties to look for.',
+            'Extract values *exactly* as they appear for strings.',
+            'Normalize numbers and dates to standard formats.',
+            'If a property is ambiguous, favor the value most prominent in the document layout.',
+            'If a property is missing, set its value to null.',
+        ]),
+    });
+}
+// --- Collection Assignment ---
+const ASSIGN_COLLECTION_FEW_SHOT = fewShotExamples([
+    {
+        input: {
+            document: { title: 'Invoice - Project Alpha', summary: 'Invoice for consulting services in Project Alpha.' },
+            collections: [{ id: 'col-1', name: 'Project Alpha' }, { id: 'col-2', name: 'General Finance' }],
+        },
+        output: { collectionIds: ['col-1'] },
+        reason: 'Direct match on project name.',
+    },
+]);
+export const assignCollectionSchema = object({ collectionIds: array(string()) });
+export function createAssignCollectionPrompt() {
+    return promptBuilder()
+        .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
+        .setRole('Digital Filing Assistant')
+        .setTask('Select the most appropriate collections for this document from the provided list following the instructions.')
+        .setOutputSchema(assignCollectionSchema, ASSIGN_COLLECTION_FEW_SHOT)
+        .addInstructions({
+        'Matching Logic': orderedList([
+            'Direct Key-Match: Look for exact keyword matches between the collection name and the document metadata.',
+            'Semantic Fit: Determine if the document functionally belongs to a group.',
+            'Project Association: If the document references a specific project code or name found in a collection name, assign it there.',
+        ]),
+        'Output Constraints': 'Return an array of matching collection IDs. If no collection is a strong fit, return an empty array.',
+    });
+}
+// --- Request Assignment ---
+const ASSIGN_REQUEST_FEW_SHOT = fewShotExamples([
+    {
+        input: {
+            document: { title: 'Medical Certificate - John Doe', date: '2023-11-01' },
+            requests: [{ id: 'req-1', comment: 'Need medical certificate from November', collections: ['HR'] }],
+        },
+        output: { requestId: 'req-1' },
+        reason: 'Document satisfies the specific request criteria.',
+    },
+]);
+export const assignRequestSchema = object({ requestId: nullable(string()) });
+export function createAssignRequestPrompt() {
+    return promptBuilder()
+        .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
+        .setRole('Workflow Routing Agent')
+        .setTask('Evaluate the document against the list of open requests and find the best match following the instructions.')
+        .setOutputSchema(assignRequestSchema, ASSIGN_REQUEST_FEW_SHOT)
+        .addInstructions({
+        'Matching Rules': orderedList({
+            'Hard Constraints': 'If a Request has a "Comment" or specific property requirement, the document MUST fulfill it strictly (e.g., "Need bill from July" must match date).',
+            'Ambiguity': 'If multiple requests match, select the one with the most specific constraints that are satisfied.',
+            'Negative Match': 'If the document satisfies the metadata but violates a comment constraint, it is unsuitable.',
+        }),
+        'Output Constraints': 'The ID of the matching request, or null if no request matches.',
+    });
+}

package/document-management/server/services/document-management-ai.service.d.ts CHANGED Viewed

@@ -1,11 +1,12 @@
 import { type TstdlGenkitGenerationOptions } from '../../../ai/genkit/index.js';
-import { type Instructions } from '../../../ai/prompts/index.js';
+import { type PromptBuilder } from '../../../ai/prompts/index.js';
 import { type SchemaTestable } from '../../../schema/index.js';
 import type { ObjectLiteral } from '../../../types/types.js';
 import type { ModelReference } from 'genkit';
 import type { AiConfiguration, DocumentPropertyDataType, InstructionOverride } from '../../models/index.js';
 import { Document, DocumentWorkflowStep } from '../../models/index.js';
 import { type AiConfigurationResolveDataMap } from './document-management-ai-provider.service.js';
+import { dataExtractionFields } from './document-management-ai.prompts.js';
 type DocumentDataExtractionPropertyResult = {
     propertyId: string;
     dataType: DocumentPropertyDataType;
@@ -19,19 +20,6 @@ export type DocumentDataExtractionResult = {
     tags: string[];
     properties: DocumentDataExtractionPropertyResult[];
 };
-declare const dataExtractionInstructionsRaw: {
-    'JSON Output': import("../../../ai/prompts/index.js").InstructionsList;
-    Role: string;
-    Task: string;
-    'Field Specific Instructions': {
-        Title: string;
-        Subtitle: string;
-        Summary: string;
-        Tags: string;
-        Date: string;
-    };
-    'Property Extraction': string[];
-};
 export declare class DocumentManagementAiService {
     #private;
     extractContent(tenantId: string, documentId: string): Promise<string>;
@@ -42,8 +30,7 @@ export declare class DocumentManagementAiService {
     protected runAi<T, Step extends DocumentWorkflowStep>(tenantId: string, step: Step, stepData: AiConfigurationResolveDataMap[Step], options: {
         targetId?: string;
         defaultModel: ModelReference<any>;
-        system: Instructions;
-        user: Instructions;
+        promptBuilder: PromptBuilder;
         data?: ObjectLiteral;
         schema: SchemaTestable<T>;
         document?: Document;
@@ -54,7 +41,7 @@ export declare class DocumentManagementAiService {
     private mergeExtractionConfigs;
     private resolveContextKeys;
 }
-export declare function mergeFieldInstructions(instructionsKey: keyof typeof dataExtractionInstructionsRaw['Field Specific Instructions'], field: keyof NonNullable<AiConfiguration['extraction']>, aiConfig: AiConfiguration): string;
+export declare function mergeFieldInstructions(instructionsKey: keyof typeof dataExtractionFields, field: keyof NonNullable<AiConfiguration['extraction']>, aiConfig: AiConfiguration): string;
 export declare function mergeInstructions(base: string, overrides: (InstructionOverride | undefined)[], options?: {
     formatTemplate?: (format: string) => string;
 }): string;

package/document-management/server/services/document-management-ai.service.js CHANGED Viewed

@@ -9,12 +9,12 @@ var DocumentManagementAiService_1;
 import { and, isNull as drizzleIsNull, eq, inArray } from 'drizzle-orm';
 import { P, match } from 'ts-pattern';
 import { genkitGenerationOptions, injectGenkit, injectModel } from '../../../ai/genkit/index.js';
-import { buildPrompts, formatInstructions, jsonOutputInstructions, orderedList } from '../../../ai/prompts/index.js';
+import { formatInstructions, languagePrompt } from '../../../ai/prompts/index.js';
 import { inject } from '../../../injector/inject.js';
 import { Logger } from '../../../logger/logger.js';
 import { arrayAgg } from '../../../orm/index.js';
 import { injectRepository } from '../../../orm/server/index.js';
-import { array, boolean, enumeration, integer, nullable, number, object, string } from '../../../schema/index.js';
+import { array, boolean, integer, nullable, number, object, string } from '../../../schema/index.js';
 import { distinct } from '../../../utils/array/index.js';
 import { numericDateToDateTime, tryDateObjectToNumericDate } from '../../../utils/date-time.js';
 import { fromEntries, hasOwnProperty, objectEntries, objectKeys } from '../../../utils/object/object.js';
@@ -26,104 +26,10 @@ import { DocumentCategoryTypeService } from './document-category-type.service.js
 import { DocumentCollectionService } from './document-collection.service.js';
 import { DocumentFileService } from './document-file.service.js';
 import { DocumentManagementAiProviderService } from './document-management-ai-provider.service.js';
+import { assignCollectionSchema, assignRequestSchema, contentExtractionSchema, createAssignCollectionPrompt, createAssignRequestPrompt, createClassifyPrompt, createClassifySchema, createContentExtractionPrompt, createDataExtractionPrompt, dataExtractionFields } from './document-management-ai.prompts.js';
 import { DocumentPropertyService } from './document-property.service.js';
 import { DocumentTagService } from './document-tag.service.js';
 import { DocumentManagementSingleton } from './singleton.js';
-// --- Instructions ---
-const contentExtractionSystemInstructions = {
-    'Role': 'You are an expert OCR and Document Digitization engine.',
-    'Primary Objective': 'Convert the provided document into semantically structured, clean Markdown.',
-    'Critical Constraints': orderedList([
-        'Output ONLY the Markdown content. Do not include introductory text, conversational filler, or code block fences (```).',
-        'Do not describe the visual appearance (e.g., "This looks like an invoice"). Transcribe the content only.',
-    ]),
-    'Formatting Rules': orderedList({
-        'Headings': 'Use # for the main document title (once). Use ##, ### for sections based on logical hierarchy.',
-        'Text Content': 'Transcribe text verbatim. Do not correct spelling or grammar, summarize, or rewrite.',
-        'Tables': 'Strictly use Markdown table syntax. Align columns logically based on the visual grid.',
-        'Lists': 'Detect bullet points and numbered lists and format them as Markdown lists.',
-        'Emphasis': 'Use **bold** and _italics_ only where visually distinct in the source.',
-        'Columns': 'Read multi-column text as a single continuous flow.',
-    }),
-    'Complex Elements': {
-        'Images/Visuals': 'Replace non-textual diagrams with `> [Visual: Brief description of the image/chart]`.',
-        'Signatures': 'Mark distinct signatures as `> [Signature: {Name if legible/Context}]`.',
-        'Forms': 'Represent checkboxes as `[ ]` (unchecked) or `[x]` (checked). Format label/value pairs on separate lines or as a definition list if applicable.',
-        'Math': 'Transcribe equations using LaTeX syntax enclosed in `$...$` for inline or `$$...$$` for block equations.',
-    },
-    'Page Handling': [
-        'Metadata: Start every page with `<!-- Page {n} Start -->` and end with `<!-- Page {n} End -->` on separate lines.',
-        'Artifacts: Exclude running headers, footers, and page numbers unless they contain unique data not found elsewhere.',
-    ],
-    'Error Handling': [
-        'Mark illegible text as `[Illegible]`.',
-        'Mark cut-off text as `[Cut off]`.',
-    ],
-};
-const contentExtractionUserInstructions = { Task: 'Transcribe the attached document into Markdown following the system instructions.' };
-const classifySystemInstructions = {
-    'Role': 'You are a Document Taxonomy Specialist.',
-    'Task': `Analyze the visual layout and text content of the document to categorize it into exactly one of the provided hierarchical types.`,
-    'Input Context': 'You will be provided with a list of valid category labels (e.g., "Finance -> Invoice").',
-    'Analysis Strategy': orderedList([
-        'Scan the header and title for explicit document type names (e.g., "Invoice", "Contract", "Bill of Lading").',
-        'Analyze the layout structure (e.g., columns often imply Invoices/Receipts; dense paragraphs imply Contracts/Letters).',
-        'Identify key entities (e.g., "Total Due" implies financial; "Signed by" implies legal).',
-    ]),
-    'Selection Logic': orderedList([
-        'Exact Match: If the document explicitly states its type, select the corresponding category.',
-        'Content Match: If implicit, match the intent.',
-        'Specificity: Always choose the most specific leaf-node category available.',
-        'Fallback: If ambiguous, choose the category that best describes the *primary* purpose of the document.',
-    ]),
-    ...jsonOutputInstructions,
-};
-const classifyUserInstructions = { Task: 'Determine the single most accurate document type from the provided list based on the document following the system instructions.' };
-const dataExtractionInstructionsRaw = {
-    'Role': 'You are a Structured Data Extraction Analyst.',
-    'Task': 'Analyze the document and extract metadata into the defined JSON schema.',
-    'Field Specific Instructions': {
-        Title: 'Create a concise, searchable filename-style title (e.g., "Invoice - Oct 2023").',
-        Subtitle: 'Extract context usually found below the header (e.g., Project Name, Reference Number).',
-        Summary: 'Write a 2-3 sentence executive summary. Mention the type of information that can be found in the document and its purpose.',
-        Tags: 'Generate 3-5 keywords for categorization. Only use important information missing in title, subtitle and properties. Prioritize reusing of existing tags where possible.',
-        Date: 'Identify the *creation* date of the document. If multiple dates exist, prioritize the primary date (like invoice or letter Date). Return as object with year, month and day.',
-    },
-    'Property Extraction': [
-        'You will be given a list of specific dynamic properties to look for.',
-        'Extract values *exactly* as they appear for strings.',
-        'Normalize numbers and dates to standard formats.',
-        'If a property is ambiguous, favor the value most prominent in the document layout.',
-        'If a property is missing, set its value to null.',
-    ],
-    ...jsonOutputInstructions,
-};
-const dataExtractionSystemInstructions = dataExtractionInstructionsRaw;
-const dataExtractionUserInstructions = { Task: 'Analyze the document and extract metadata and specific properties defined in the output schema following the system instructions.' };
-const assignCollectionSystemInstructions = {
-    'Role': 'You are a Digital Filing Assistant.',
-    'Task': `Assign the document to relevant collections based on its metadata and content.`,
-    'Input': 'Document Metadata and a list of Available Collections.',
-    'Matching Logic': orderedList([
-        'Direct Key-Match: Look for exact keyword matches between the collection name and the document metadata.',
-        'Semantic Fit: Determine if the document functionally belongs to a group.',
-        'Project Association: If the document references a specific project code or name found in a collection name, assign it there.',
-    ]),
-    'Output': 'Return an array of matching collection IDs. If no collection is a strong fit, return an empty array.',
-};
-const assignCollectionUserInstructions = { Task: 'Select the most appropriate collections for this document from the provided list following the system instructions.' };
-const assignRequestSystemInstructions = {
-    'Role': 'You are a Workflow Routing Agent.',
-    'Task': 'Match the provided document to an existing Open Document Request.',
-    'Input': 'Document Metadata and a list of Open Requests.',
-    'Matching Rules': orderedList({
-        'Hard Constraints': 'If a Request has a "Comment" or specific property requirement, the document MUST fulfill it strictly (e.g., "Need bill from July" must match date).',
-        'Ambiguity': 'If multiple requests match, select the one with the most specific constraints that are satisfied.',
-        'Negative Match': 'If the document satisfies the metadata but violates a comment constraint, it is unsuitable.',
-    }),
-    'Output': 'The ID of the matching request, or null if no request matches.',
-};
-const assignRequestUserInstructions = { Task: 'Evaluate the document against the list of open requests and find the best match following the system instructions.' };
 let DocumentManagementAiService = DocumentManagementAiService_1 = class DocumentManagementAiService {
     #genkit = injectGenkit();
     #contentExtractionModel = injectModel('gemini-3.1-flash-lite-preview').withConfig({ thinkingConfig: { thinkingLevel: 'LOW' } });
@@ -147,9 +53,8 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
         this.#logger.trace(`Extracting content from document ${document.id}`);
         const result = await this.runAi(tenantId, DocumentWorkflowStep.ContentExtraction, { document }, {
             defaultModel: this.#contentExtractionModel,
-            system: contentExtractionSystemInstructions,
-            user: contentExtractionUserInstructions,
-            schema: object({ content: string() }),
+            promptBuilder: createContentExtractionPrompt(),
+            schema: contentExtractionSchema,
             document,
         });
         const markdownBlockStripped = result.content.trim().replaceAll(/^```\w*\s*|```$/gi, '').trim();
@@ -166,19 +71,17 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
         this.#logger.trace(`Classifying document ${document.id}`);
         const stepData = { document, categories };
         const aiConfig = await this.resolveAiConfiguration(tenantId, DocumentWorkflowStep.Classification, stepData);
-        const systemInstructions = isDefined(aiConfig.classification)
-            ? {
-                ...classifySystemInstructions,
+        const schema = createClassifySchema(typeLabels);
+        const promptBuilder = createClassifyPrompt(typeLabels);
+        if (isDefined(aiConfig.classification)) {
+            promptBuilder.addSystemInstructions({
                 'Classification Overrides': mergeInstructions('Follow these additional classification rules.', [aiConfig.classification]),
-            }
-            : classifySystemInstructions;
+            });
+        }
         const result = await this.runAi(tenantId, DocumentWorkflowStep.Classification, stepData, {
             defaultModel: this.#classifyModel,
-            system: systemInstructions,
-            user: classifyUserInstructions,
-            schema: object({
-                documentType: enumeration(typeLabels),
-            }),
+            promptBuilder,
+            schema,
             document,
             config: { maxOutputTokens: 128 },
             aiConfig,
@@ -232,18 +135,17 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
             const override = (isNotNull(property.key) ? aiConfig.extraction?.properties?.[property.key] : undefined) ?? aiConfig.extraction?.properties?.[property.label];
             return isDefined(override) ? mergeInstructions(`Extract value for property "${property.label}".`, [override]) : undefined;
         }).filter(isDefined);
-        const systemInstructions = {
-            ...dataExtractionSystemInstructions,
+        const promptBuilder = createDataExtractionPrompt(generationSchema);
+        promptBuilder.addInstructions({
             'Field Specific Instructions': mergedFieldInstructions,
-            'Property Extraction': isDefined(mergedPropertyInstructions) && (mergedPropertyInstructions.length > 0)
-                ? [...dataExtractionInstructionsRaw['Property Extraction'], ...mergedPropertyInstructions]
-                : dataExtractionInstructionsRaw['Property Extraction'],
-        };
+            'Additional Property Extraction': isDefined(mergedPropertyInstructions) && (mergedPropertyInstructions.length > 0)
+                ? mergedPropertyInstructions
+                : [],
+        });
         const extraction = await this.runAi(tenantId, DocumentWorkflowStep.DataExtraction, stepData, {
             targetId: documentTypeEntity.key ?? undefined,
             defaultModel: this.#dataExtractionModel,
-            system: systemInstructions,
-            user: dataExtractionUserInstructions,
+            promptBuilder,
             data: { existingTags: tagLabels },
             schema: generationSchema,
             document,
@@ -302,10 +204,9 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
         const result = await this.runAi(document.tenantId, DocumentWorkflowStep.Assignment, { document, properties: documentProperties, collectionIds }, {
             targetId: documentTypeEntity.key ?? undefined,
             defaultModel: this.#assignModel,
-            system: assignCollectionSystemInstructions,
-            user: assignCollectionUserInstructions,
+            promptBuilder: createAssignCollectionPrompt(),
             data: { document: documentData, documentProperties: fromEntries(propertyEntries), collections },
-            schema: object({ collectionIds: array(string()) }),
+            schema: assignCollectionSchema,
             config: { maxOutputTokens: 512 },
         });
         return result.collectionIds;
@@ -349,10 +250,9 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
         const result = await this.runAi(document.tenantId, DocumentWorkflowStep.Assignment, { document, properties: documentProperties, collectionIds: requestsCollectionIds }, {
             targetId: documentTypeEntity.key ?? undefined,
             defaultModel: this.#assignModel,
-            system: assignRequestSystemInstructions,
-            user: assignRequestUserInstructions,
+            promptBuilder: createAssignRequestPrompt(),
             data: { document: documentData, documentProperties: fromEntries(propertyEntries), requests },
-            schema: object({ requestId: nullable(string()) }),
+            schema: assignRequestSchema,
             config: { maxOutputTokens: 128 },
         });
         return result.requestId;
@@ -360,23 +260,34 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
     async runAi(tenantId, step, stepData, options) {
         const config = options.aiConfig ?? await this.resolveAiConfiguration(tenantId, step, stepData);
         const model = config.model ?? options.defaultModel;
-        const { systemPrompt, userPrompt } = buildPrompts({
-            baseSystemInstructions: options.system,
-            baseUserInstructions: options.user,
-            additionalSystemInstructions: config.prompt?.systemAddition,
-            additionalUserInstructions: config.prompt?.userAddition,
-            systemInstructionsOverride: config.prompt?.systemOverride,
-            userInstructionsOverride: config.prompt?.userOverride,
-            data: options.data,
-            media: isDefined(options.document) ? { mimeType: options.document.mimeType, content: await this.#documentFileService.getContent(options.document) } : undefined,
-            language: config.language,
-        });
+        const builder = options.promptBuilder;
+        if (isDefined(config.language)) {
+            builder.addInstructions({ 'Output Language': languagePrompt(config.language) });
+        }
+        if (isDefined(options.data)) {
+            builder.addContext('Data', options.data);
+        }
+        if (isDefined(config.prompt?.systemAddition)) {
+            builder.addSystemInstructions({ 'Additional Instructions': config.prompt.systemAddition });
+        }
+        if (isDefined(config.prompt?.userAddition)) {
+            builder.addInstructions({ 'Additional Instructions': config.prompt.userAddition });
+        }
+        if (isDefined(config.prompt?.systemOverride)) {
+            builder.setSystemInstructionsOverride(config.prompt.systemOverride);
+        }
+        if (isDefined(config.prompt?.userOverride)) {
+            builder.setInstructionsOverride(config.prompt.userOverride);
+        }
+        if (isDefined(options.document)) {
+            builder.addMedia(await this.#documentFileService.getContent(options.document), options.document.mimeType);
+        }
         const result = await this.#genkit.generate(genkitGenerationOptions({
             model,
             config: options.config,
             output: { schema: options.schema },
-            system: systemPrompt,
-            prompt: userPrompt,
+            system: builder.buildSystemPrompt(),
+            prompt: builder.buildUserPrompt(),
         }));
         if (isNull(result.output)) {
             throw new Error(`AI returned null output for ${step} ${options.targetId ?? ''}`);
@@ -479,7 +390,7 @@ function tryAiOutputDateObjectToNumericDate(dateObject) {
     return date;
 }
 export function mergeFieldInstructions(instructionsKey, field, aiConfig) {
-    return mergeInstructions(dataExtractionInstructionsRaw['Field Specific Instructions'][instructionsKey], [aiConfig.extraction?.[field]], { formatTemplate: getFormatTemplate(field) });
+    return mergeInstructions(dataExtractionFields[instructionsKey], [aiConfig.extraction?.[field]], { formatTemplate: getFormatTemplate(field) });
 }
 export function mergeInstructions(base, overrides, options = {}) {
     let result = base;

package/document-management/server/validators/ai-validation-executor.d.ts CHANGED Viewed

@@ -1,9 +1,11 @@
 import { type Instructions } from '../../../ai/prompts/index.js';
 import type { SchemaTestable } from '../../../schema/schema.js';
 import type { AiConfiguration } from '../../models/index.js';
+import { DocumentFileService } from '../services/document-file.service.js';
 import { DocumentManagementAiProviderService } from '../services/document-management-ai-provider.service.js';
 import { DocumentValidationExecutor, type DocumentValidationExecutorContext, type DocumentValidationExecutorResult } from './validator.js';
 export declare abstract class AiValidationExecutor<R> extends DocumentValidationExecutor {
+    protected readonly documentFileService: DocumentFileService;
     protected readonly genkit: import("genkit").Genkit;
     protected readonly baseModel: import("genkit").ModelReference<import("zod").ZodObject<{
         version: import("zod").ZodOptional<import("zod").ZodString>;

package/document-management/server/validators/ai-validation-executor.js CHANGED Viewed

@@ -1,16 +1,12 @@
 import { convertToGenkitSchema, injectGenkit, injectModel } from '../../../ai/genkit/index.js';
-import { buildPrompts, jsonOutputInstructions } from '../../../ai/prompts/index.js';
+import { promptBuilder } from '../../../ai/prompts/index.js';
 import { inject } from '../../../injector/inject.js';
 import { isDefined, isNull } from '../../../utils/type-guards.js';
+import { DocumentFileService } from '../services/document-file.service.js';
 import { DocumentManagementAiProviderService } from '../services/document-management-ai-provider.service.js';
 import { DocumentValidationExecutor } from './validator.js';
-const systemPromptBase = {
-    Role: 'You are an expert in document validation.',
-    Task: 'Validate a document based on the provided validation instructions and document content.',
-    Objective: 'Analyze the document carefully and provide a structured validation result according to the defined schema.',
-    ...jsonOutputInstructions,
-};
 export class AiValidationExecutor extends DocumentValidationExecutor {
+    documentFileService = inject(DocumentFileService);
     genkit = injectGenkit();
     baseModel = injectModel('gemini-3.1-flash-lite-preview').withConfig({ thinkingConfig: { thinkingLevel: 'LOW' } });
     aiProvider = inject(DocumentManagementAiProviderService, undefined, { optional: true });
@@ -24,29 +20,42 @@ export class AiValidationExecutor extends DocumentValidationExecutor {
         const validationInstructions = await this.getValidationInstructions(context);
         const model = providerValidationConfig?.model ?? executorConfig.model ?? providerGlobalConfig?.defaults?.model ?? this.baseModel;
         const language = providerValidationConfig?.language ?? executorConfig.language ?? providerGlobalConfig?.defaults?.language;
-        const { systemPrompt, userPrompt } = buildPrompts({
-            baseSystemInstructions: systemPromptBase,
-            baseUserInstructions: { Task: 'Validate the document based on the provided system and validation instructions and the document content.' },
-            additionalSystemInstructions: [
-                providerGlobalConfig?.defaults?.prompt?.systemAddition,
-                executorConfig.prompt?.systemAddition,
-                providerValidationConfig?.prompt?.systemAddition,
-            ].filter(isDefined),
-            additionalUserInstructions: [
-                { 'Validation Instructions': validationInstructions },
-                providerGlobalConfig?.defaults?.prompt?.userAddition,
-                executorConfig.prompt?.userAddition,
-                providerValidationConfig?.prompt?.userAddition,
-            ].filter(isDefined),
-            systemInstructionsOverride: providerValidationConfig?.prompt?.systemOverride ?? executorConfig.prompt?.systemOverride ?? providerGlobalConfig?.defaults?.prompt?.systemOverride,
-            userInstructionsOverride: providerValidationConfig?.prompt?.userOverride ?? executorConfig.prompt?.userOverride ?? providerGlobalConfig?.defaults?.prompt?.userOverride,
-            language,
-        });
+        const documentContent = await this.documentFileService.getContent(context.document);
+        const builder = promptBuilder()
+            .setSystemRole('You are an expert in document validation.')
+            .setSystemTask('Validate a document based on the provided validation instructions and document content.')
+            .setTask('Validate the document based on the provided system and validation instructions and the document content.')
+            .addInstructions({ 'Validation Instructions': validationInstructions })
+            .setOutputSchema(this.schema)
+            .addMedia(documentContent, context.document.mimeType);
+        if (isDefined(language)) {
+            builder.setLanguage(language);
+        }
+        const systemAdditions = [
+            providerGlobalConfig?.defaults?.prompt?.systemAddition,
+            executorConfig.prompt?.systemAddition,
+            providerValidationConfig?.prompt?.systemAddition,
+        ].filter(isDefined);
+        if (systemAdditions.length > 0) {
+            builder.addSystemInstructions({ 'Additional Instructions': systemAdditions });
+        }
+        const userAdditions = [
+            providerGlobalConfig?.defaults?.prompt?.userAddition,
+            executorConfig.prompt?.userAddition,
+            providerValidationConfig?.prompt?.userAddition,
+        ].filter(isDefined);
+        if (userAdditions.length > 0) {
+            builder.addInstructions({ 'Additional Instructions': userAdditions });
+        }
+        const systemOverride = providerValidationConfig?.prompt?.systemOverride ?? executorConfig.prompt?.systemOverride ?? providerGlobalConfig?.defaults?.prompt?.systemOverride;
+        builder.setSystemInstructionsOverride(systemOverride);
+        const userOverride = providerValidationConfig?.prompt?.userOverride ?? executorConfig.prompt?.userOverride ?? providerGlobalConfig?.defaults?.prompt?.userOverride;
+        builder.setInstructionsOverride(userOverride);
         const generation = await this.genkit.generate({
-            model: model,
+            model,
             output: { schema: convertToGenkitSchema(this.schema) },
-            system: systemPrompt,
-            prompt: userPrompt,
+            system: builder.buildSystemPrompt(),
+            prompt: builder.buildUserPrompt(),
         });
         if (isNull(generation.output)) {
             throw new Error('AI returned null output');

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tstdl/base",
-  "version": "0.93.168",
+  "version": "0.93.170",
   "author": "Patrick Hein",
   "publishConfig": {
     "access": "public"
@@ -152,8 +152,8 @@
     "type-fest": "^5.5"
   },
   "peerDependencies": {
-    "@aws-sdk/client-s3": "^3.1012",
-    "@aws-sdk/s3-request-presigner": "^3.1012",
+    "@aws-sdk/client-s3": "^3.1014",
+    "@aws-sdk/s3-request-presigner": "^3.1014",
     "@genkit-ai/google-genai": "^1.30",
     "@google-cloud/storage": "^7.19",
     "@toon-format/toon": "^2.1.0",
@@ -190,7 +190,7 @@
     "@types/mjml": "4.7",
     "@types/node": "25",
     "@types/nodemailer": "7.0",
-    "@types/pg": "8.18",
+    "@types/pg": "8.20",
     "@vitest/coverage-v8": "4.1",
     "@vitest/ui": "4.1",
     "concurrently": "9.2",
@@ -205,12 +205,5 @@
     "typescript-eslint": "8.57",
     "vite-tsconfig-paths": "6.1",
     "vitest": "4.1"
-  },
-  "overrides": {
-    "drizzle-kit": {
-      "@esbuild-kit/esm-loader": "^2.6",
-      "esbuild": "^0.25",
-      "esbuild-register": "^3.6"
-    }
   }
 }