@tstdl/base 0.93.192 → 0.93.194

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,8 @@
1
1
  import { memoize } from '../../utils/function/memoize.js';
2
- import { hasOwnProperty, objectEntries } from '../../utils/object/object.js';
2
+ import { objectEntries } from '../../utils/object/object.js';
3
3
  import { assertDefined, isArray, isDefined, isObject, isString } from '../../utils/type-guards.js';
4
4
  const whitespacePattern = /^\s*/;
5
+ const INDENT_SIZE = 4;
5
6
  // --- Factories ---
6
7
  function list(style, instructionOrItems, itemsOrNothing) {
7
8
  const instruction = isString(instructionOrItems) ? instructionOrItems : undefined;
@@ -20,10 +21,9 @@ export function unorderedList(instructionOrItems, itemsOrNothing) {
20
21
  }
21
22
  // --- Type Guards ---
22
23
  function isInstructionsList(obj) {
23
- return isObject(obj) && hasOwnProperty(obj, 'style') && hasOwnProperty(obj, 'items');
24
+ return isObject(obj) && ('style' in obj) && ('items' in obj);
24
25
  }
25
- // --- Formatter Logic ---
26
- const INDENT_SIZE = 4;
26
+ // --- Formatting Helpers ---
27
27
  function getPrefix(style, index, sectionDepth) {
28
28
  switch (style) {
29
29
  case 'ordered':
@@ -36,20 +36,23 @@ function getPrefix(style, index, sectionDepth) {
36
36
  return '';
37
37
  }
38
38
  }
39
+ function getIndent(context) {
40
+ return context.style == 'sections' ? '' : ' '.repeat(context.indentDepth * INDENT_SIZE);
41
+ }
39
42
  function dedent(text) {
40
43
  const lines = text.split('\n');
41
44
  if (lines.length <= 1) {
42
45
  return text.trim();
43
46
  }
44
47
  // Remove leading empty line if any (common in template literals)
45
- if (lines[0].trim().length === 0) {
48
+ if (lines[0].trim().length == 0) {
46
49
  lines.shift();
47
50
  }
48
51
  // Remove trailing empty line if any
49
- if (lines.length > 0 && lines.at(-1).trim().length === 0) {
52
+ if (lines.length > 0 && lines.at(-1).trim().length == 0) {
50
53
  lines.pop();
51
54
  }
52
- if (lines.length === 0) {
55
+ if (lines.length == 0) {
53
56
  return '';
54
57
  }
55
58
  // Find minimum indentation (excluding empty lines)
@@ -84,118 +87,103 @@ function formatWithHangingIndent(text, baseIndent, prefix) {
84
87
  ...lines.slice(1).map((line) => `${hangingSpacer}${line}`),
85
88
  ].join('\n');
86
89
  }
87
- /**
88
- * Main recursive formatter.
89
- */
90
- function processNode(node, context) {
91
- // 0. Handle Strings
92
- if (isString(node)) {
93
- const isSection = context.style == 'sections';
94
- const currentBaseIndent = ' '.repeat(isSection ? 0 : context.indentDepth * INDENT_SIZE);
95
- return formatWithHangingIndent(dedent(node), currentBaseIndent, '');
96
- }
97
- // 1. Unwrap InstructionsList (The Wrapper)
98
- // If the node is a wrapper, we adopt its style and instruction, then process its items.
99
- if (isInstructionsList(node)) {
100
- // Note: We don't print the "instruction" here (e.g., "Use Markdown").
101
- // If this Wrapper is a root node, the instruction is usually printed by the caller or ignored.
102
- // If this Wrapper is a value of a key, the key printer handles the instruction.
103
- // However, if we have a "Root Wrapper" with an instruction (rare in your example), handle it:
104
- const header = node.instruction ? `${dedent(node.instruction)}\n\n` : '';
105
- // Determine next context
106
- const nextContext = {
107
- indentDepth: context.indentDepth, // Wrappers don't indent themselves, their content does
90
+ function createNextContext(context, overrides = {}) {
91
+ return { ...context, isRoot: false, ...overrides };
92
+ }
93
+ // --- Node Processors ---
94
+ function processString(node, context, prefix = '') {
95
+ return formatWithHangingIndent(dedent(node), getIndent(context), prefix);
96
+ }
97
+ function processWrapper(node, context, prefix = '') {
98
+ if (isDefined(node.instruction)) {
99
+ if (context.isRoot == true) {
100
+ const header = `${dedent(node.instruction)}\n\n`;
101
+ return header + processNode(node.items, createNextContext(context, { style: node.style }));
102
+ }
103
+ const header = formatWithHangingIndent(dedent(node.instruction), getIndent(context), prefix);
104
+ const nextContext = createNextContext(context, {
105
+ indentDepth: context.indentDepth + 1,
108
106
  style: node.style,
109
- sectionDepth: node.style == 'sections' ? context.sectionDepth : context.sectionDepth, // Section depth increments internally
110
- };
111
- return header + processNode(node.items, nextContext);
107
+ sectionDepth: node.style == 'sections' ? context.sectionDepth : context.sectionDepth,
108
+ });
109
+ return `${header}\n${processNode(node.items, nextContext)}`;
112
110
  }
113
- // Common Constants for this level
111
+ return processNode(node.items, createNextContext(context, { style: node.style }));
112
+ }
113
+ function processArray(node, context) {
114
+ const separator = context.style == 'sections' ? '\n\n' : '\n';
115
+ return node.map((item, index) => {
116
+ const prefix = getPrefix(context.style, index, context.sectionDepth);
117
+ if (isString(item)) {
118
+ return processString(item, context, prefix);
119
+ }
120
+ if (isInstructionsList(item)) {
121
+ return processWrapper(item, context, prefix);
122
+ }
123
+ return processNode(item, createNextContext(context));
124
+ }).join(separator);
125
+ }
126
+ function processObject(node, context) {
114
127
  const isSection = context.style == 'sections';
115
- const currentBaseIndent = ' '.repeat(isSection ? 0 : context.indentDepth * INDENT_SIZE);
128
+ const baseIndent = getIndent(context);
116
129
  const separator = isSection ? '\n\n' : '\n';
117
- // 2. Handle Arrays
118
- if (isArray(node)) {
119
- return node.map((item, index) => {
120
- const prefix = getPrefix(context.style, index, context.sectionDepth);
121
- if (isString(item)) {
122
- return formatWithHangingIndent(dedent(item), currentBaseIndent, prefix);
123
- }
124
- return processNode(item, context);
125
- }).join(separator);
126
- }
127
- // 3. Handle Objects (Key-Value Maps)
128
130
  return objectEntries(node).map(([key, value], index) => {
129
131
  const prefix = getPrefix(context.style, index, context.sectionDepth);
130
- // Detect if the Value is a Wrapper (e.g. `Key: ordered(...)`)
131
- // This allows us to pull the wrapper's "instruction" up to the Key line.
132
132
  const isValueWrapper = isInstructionsList(value);
133
133
  const effectiveValue = isValueWrapper ? value.items : value;
134
134
  const instruction = (isValueWrapper && isDefined(value.instruction)) ? ` ${dedent(value.instruction)}` : '';
135
- const childStyle = isValueWrapper ? value.style : (isSection ? 'unordered' : 'unordered');
136
- // Formatting the Header Line (The Key)
135
+ const childStyle = isValueWrapper ? value.style : 'unordered';
137
136
  let headerLine = '';
138
137
  if (isSection) {
139
- // Header format: "# Key" or "# Key\n\nInstruction"
140
138
  const instructionPart = (instruction.length > 0) ? `\n\n${instruction.trim()}` : '';
141
- headerLine = `${currentBaseIndent}${prefix}${key}${instructionPart}`;
139
+ headerLine = `${baseIndent}${prefix}${key}${instructionPart}`;
142
140
  }
143
141
  else {
144
- // List format: "- **Key:**" or "- **Key:** Instruction"
145
- const keyPart = `**${key}:**`;
146
- headerLine = `${currentBaseIndent}${prefix}${keyPart}${instruction}`;
142
+ headerLine = `${baseIndent}${prefix}**${key}:**${instruction}`;
147
143
  }
148
- // Determine context for the children
149
- // If we are a Section, children reset indent to 0.
150
- // If we are a List, children indent + 1.
151
- const nextIndentDepth = isSection ? 0 : context.indentDepth + 1;
152
- // If the child acts as a section (Wrapper was `sections(...)`), increment H-level.
153
- const nextSectionDepth = (isValueWrapper && value.style == 'sections')
154
- ? context.sectionDepth + 1
155
- : context.sectionDepth;
156
- // Recurse
157
- // If the value is a simple string, we print it inline if possible, or block if it's long?
158
- // Your requirement: Strings in objects are usually descriptions.
144
+ const nextContext = createNextContext(context, {
145
+ indentDepth: isSection ? 0 : context.indentDepth + 1,
146
+ sectionDepth: (isValueWrapper && value.style == 'sections') ? context.sectionDepth + 1 : context.sectionDepth,
147
+ style: childStyle,
148
+ });
159
149
  if (isString(effectiveValue)) {
160
- // If it's a string, we treat it as content on the SAME line for lists (via hanging indent logic),
161
- // or a new paragraph for Sections.
162
150
  if (isSection) {
163
- // Section: Header \n\n Content
164
151
  return `${headerLine}\n\n${dedent(effectiveValue)}`;
165
152
  }
166
- // List: "- **Key:** Value"
167
- // We need to construct the full string to calculate hanging indent correctly.
168
- // headerLine already contains indentation + prefix + key.
169
- // We strip the indentation to feed it into the formatting helper effectively.
170
153
  const fullLine = `${headerLine} ${dedent(effectiveValue)}`.trim();
171
- return formatWithHangingIndent(fullLine, currentBaseIndent, '');
154
+ return formatWithHangingIndent(fullLine, baseIndent, '');
172
155
  }
173
- // If Value is Object/Array/Wrapper
174
- const body = processNode(effectiveValue, {
175
- indentDepth: nextIndentDepth,
176
- style: childStyle,
177
- sectionDepth: nextSectionDepth,
178
- });
156
+ const body = processNode(effectiveValue, nextContext);
179
157
  const bodySeparator = isSection ? '\n\n' : '\n';
180
- // Edge case: If it's a section, we constructed the header, now append body.
181
- // If it's a list, the header line serves as the parent item.
182
158
  return `${headerLine}${bodySeparator}${body}`;
183
159
  }).join(separator);
184
160
  }
161
+ /**
162
+ * Main recursive formatter.
163
+ */
164
+ function processNode(node, context) {
165
+ if (isString(node)) {
166
+ return processString(node, context);
167
+ }
168
+ if (isInstructionsList(node)) {
169
+ return processWrapper(node, context);
170
+ }
171
+ if (isArray(node)) {
172
+ return processArray(node, context);
173
+ }
174
+ return processObject(node, context);
175
+ }
185
176
  function _formatInstructions(node, options = {}) {
186
- // Heuristic: If passing a raw object, assume it's a Root Section unless specified otherwise.
187
- // If passing a Wrapper, the Wrapper dictates the style.
188
177
  const initialStyle = isInstructionsList(node)
189
178
  ? node.style
190
- : isArray(node)
179
+ : isArray(node) || isString(node)
191
180
  ? 'unordered'
192
- : isString(node)
193
- ? 'unordered'
194
- : 'sections';
181
+ : 'sections';
195
182
  return processNode(node, {
196
183
  indentDepth: options.initialDepth ?? 0,
197
184
  sectionDepth: 1,
198
185
  style: initialStyle,
186
+ isRoot: true,
199
187
  }).trim();
200
188
  }
201
189
  const formatInstructionsMemoized = memoize(_formatInstructions, { weak: true });
@@ -16,6 +16,8 @@ export declare class PromptBuilder {
16
16
  setOutputSchema<Input = ObjectLiteral, Output = ObjectLiteral>(schema: SchemaTestable<Output>, examples?: FewShotExample<Input, Output>[]): this;
17
17
  setSystemOutputExamples<Input = ObjectLiteral, Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): this;
18
18
  setOutputExamples<Input = ObjectLiteral, Output = ObjectLiteral>(examples: FewShotExample<Input, Output>[]): this;
19
+ enableSystemOutputRules(enabled?: boolean): this;
20
+ enableOutputRules(enabled?: boolean): this;
19
21
  setSystemInstructionsOverride(override: ((instructions: Instructions) => Instructions | string) | undefined): this;
20
22
  setInstructionsOverride(override: ((instructions: Instructions) => Instructions | string) | undefined): this;
21
23
  setLanguage(language: string): this;
@@ -14,8 +14,10 @@ export class PromptBuilder {
14
14
  #task;
15
15
  #systemOutputSchema;
16
16
  #systemOutputExamples;
17
+ #systemOutputRules = false;
17
18
  #outputSchema;
18
19
  #outputExamples;
20
+ #outputRules = false;
19
21
  #systemInstructions = {};
20
22
  #instructions = {};
21
23
  #systemContextParts = {};
@@ -57,6 +59,14 @@ export class PromptBuilder {
57
59
  this.#outputExamples = examples;
58
60
  return this;
59
61
  }
62
+ enableSystemOutputRules(enabled = true) {
63
+ this.#systemOutputRules = enabled;
64
+ return this;
65
+ }
66
+ enableOutputRules(enabled = true) {
67
+ this.#outputRules = enabled;
68
+ return this;
69
+ }
60
70
  setSystemInstructionsOverride(override) {
61
71
  this.#systemInstructionsOverride = override;
62
72
  return this;
@@ -108,6 +118,7 @@ export class PromptBuilder {
108
118
  instructions: this.#systemInstructions,
109
119
  outputSchema: this.#systemOutputSchema,
110
120
  outputExamples: this.#systemOutputExamples,
121
+ outputRules: this.#systemOutputRules,
111
122
  task: this.#systemTask,
112
123
  media: this.#systemMedia,
113
124
  language: this.#language,
@@ -121,6 +132,7 @@ export class PromptBuilder {
121
132
  instructions: this.#instructions,
122
133
  outputSchema: this.#outputSchema,
123
134
  outputExamples: this.#outputExamples,
135
+ outputRules: this.#outputRules,
124
136
  task: this.#task,
125
137
  media: this.#media,
126
138
  language: this.#language,
@@ -151,15 +163,20 @@ function buildPrompt(data) {
151
163
  if (isDefined(data.instructions) && (objectKeys(data.instructions).length > 0)) {
152
164
  instructions['**Instructions**'] = data.instructions;
153
165
  }
154
- if (isDefined(data.outputSchema)) {
155
- const schema = convertToOpenApiSchema(data.outputSchema);
156
- const schemaJson = JSON.stringify(schema, null, 2);
157
- instructions['**Output Schema**'] = `\`\`\`json\n${schemaJson}\n\`\`\``;
158
- instructions['**Output Schema Instructions**'] = unorderedList({
159
- 'Schema Compliance': 'Generate valid JSON that strictly matches the provided schema.',
160
- 'Nullable fields with missing data': 'Must be set to literal `null`, not the string "null".',
161
- 'Optional fields with missing data': 'Omit the key entirely (sparse JSON).',
162
- });
166
+ const hasOutputSchema = isDefined(data.outputSchema);
167
+ if (hasOutputSchema || data.outputRules || isDefined(data.outputExamples)) {
168
+ if (hasOutputSchema) {
169
+ const schema = convertToOpenApiSchema(data.outputSchema);
170
+ const schemaJson = JSON.stringify(schema, null, 2);
171
+ instructions['**Output Schema**'] = `\`\`\`json\n${schemaJson}\n\`\`\``;
172
+ }
173
+ if (data.outputRules) {
174
+ instructions['**Output Schema Instructions**'] = unorderedList({
175
+ 'Schema Compliance': 'Generate valid JSON that strictly matches the provided schema.',
176
+ 'Nullable fields with missing data': 'Must be set to literal `null`, not the string "null".',
177
+ 'Optional fields with missing data': 'Omit the key entirely (sparse JSON).',
178
+ });
179
+ }
163
180
  if (isDefined(data.outputExamples) && (data.outputExamples.length > 0)) {
164
181
  instructions['**Output Examples**'] = fewShotPrompt(data.outputExamples);
165
182
  }
@@ -55,5 +55,5 @@ function formatExampleValue(value) {
55
55
  if (isString(value)) {
56
56
  return value;
57
57
  }
58
- return JSON.stringify(value);
58
+ return JSON.stringify(value, null, 2);
59
59
  }
@@ -16,7 +16,7 @@ export declare const dataExtractionFields: {
16
16
  Tags: string;
17
17
  Date: string;
18
18
  };
19
- export declare function createDataExtractionPrompt(schema: SchemaTestable): PromptBuilder;
19
+ export declare function createDataExtractionPrompt(): PromptBuilder;
20
20
  export declare const assignCollectionSchema: import("../../../schema/index.js").ObjectSchema<{
21
21
  collectionIds: string[];
22
22
  }>;
@@ -8,7 +8,7 @@ export function createContentExtractionPrompt() {
8
8
  return promptBuilder()
9
9
  .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
10
10
  .setTask('Transcribe the attached document into Markdown following the instructions.')
11
- .setOutputSchema(contentExtractionSchema)
11
+ .enableOutputRules()
12
12
  .addInstructions({
13
13
  'Objective': 'Convert the provided document into semantically structured, clean Markdown.',
14
14
  'Critical Constraints': orderedList([
@@ -56,12 +56,11 @@ export function createClassifySchema(validTypes) {
56
56
  return object({ documentType: enumeration(validTypes) });
57
57
  }
58
58
  export function createClassifyPrompt(validTypes) {
59
- const schema = createClassifySchema(validTypes);
60
59
  return promptBuilder()
61
60
  .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
62
61
  .setRole('Document Taxonomy Specialist')
63
62
  .setTask('Determine the single most accurate document type from the provided list based on the document.')
64
- .setOutputSchema(schema, CLASSIFY_FEW_SHOT)
63
+ .setOutputExamples(CLASSIFY_FEW_SHOT)
65
64
  .addInstructions({
66
65
  'Analysis Strategy': orderedList([
67
66
  'Scan the header and title for explicit document type names (e.g., "Invoice", "Contract", "Bill of Lading").',
@@ -85,12 +84,12 @@ export const dataExtractionFields = {
85
84
  Tags: 'Generate 3-5 keywords for categorization. Only use important information missing in title, subtitle and properties. Prioritize reusing of existing tags where possible.',
86
85
  Date: 'Identify the *creation* date of the document. If multiple dates exist, prioritize the primary date (like invoice or letter Date). Return as object with year, month and day.',
87
86
  };
88
- export function createDataExtractionPrompt(schema) {
87
+ export function createDataExtractionPrompt() {
89
88
  return promptBuilder()
90
89
  .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
91
90
  .setRole('Structured Data Extraction Analyst')
92
91
  .setTask('Analyze the document and extract metadata and specific properties defined in the output schema following the instructions.')
93
- .setOutputSchema(schema)
92
+ .enableOutputRules()
94
93
  .addInstructions({
95
94
  'Field Specific Instructions': dataExtractionFields,
96
95
  'Property Extraction': orderedList([
@@ -119,7 +118,7 @@ export function createAssignCollectionPrompt() {
119
118
  .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
120
119
  .setRole('Digital Filing Assistant')
121
120
  .setTask('Select the most appropriate collections for this document from the provided list following the instructions.')
122
- .setOutputSchema(assignCollectionSchema, ASSIGN_COLLECTION_FEW_SHOT)
121
+ .setOutputExamples(ASSIGN_COLLECTION_FEW_SHOT)
123
122
  .addInstructions({
124
123
  'Matching Logic': orderedList([
125
124
  'Direct Key-Match: Look for exact keyword matches between the collection name and the document metadata.',
@@ -146,7 +145,7 @@ export function createAssignRequestPrompt() {
146
145
  .setSystemRole(DOCUMENT_MANAGEMENT_SYSTEM_ROLE)
147
146
  .setRole('Workflow Routing Agent')
148
147
  .setTask('Evaluate the document against the list of open requests and find the best match following the instructions.')
149
- .setOutputSchema(assignRequestSchema, ASSIGN_REQUEST_FEW_SHOT)
148
+ .setOutputExamples(ASSIGN_REQUEST_FEW_SHOT)
150
149
  .addInstructions({
151
150
  'Matching Rules': orderedList({
152
151
  'Hard Constraints': 'If a Request has a "Comment" or specific property requirement, the document MUST fulfill it strictly (e.g., "Need bill from July" must match date).',
@@ -134,7 +134,7 @@ let DocumentManagementAiService = DocumentManagementAiService_1 = class Document
134
134
  const override = (isNotNull(property.key) ? aiConfig.extraction?.properties?.[property.key] : undefined) ?? aiConfig.extraction?.properties?.[property.label];
135
135
  return isDefined(override) ? mergeInstructions(`Extract value for property "${property.label}".`, [override]) : undefined;
136
136
  }).filter(isDefined);
137
- const promptBuilder = createDataExtractionPrompt(generationSchema);
137
+ const promptBuilder = createDataExtractionPrompt();
138
138
  promptBuilder.addInstructions({
139
139
  'Field Specific Instructions': mergedFieldInstructions,
140
140
  'Additional Property Extraction': isDefined(mergedPropertyInstructions) && (mergedPropertyInstructions.length > 0)
@@ -26,7 +26,7 @@ export class AiValidationExecutor extends DocumentValidationExecutor {
26
26
  .setSystemTask('Validate a document based on the provided validation instructions and document content.')
27
27
  .setTask('Validate the document based on the provided system and validation instructions and the document content.')
28
28
  .addInstructions({ 'Validation Instructions': validationInstructions })
29
- .setOutputSchema(this.schema)
29
+ .enableOutputRules()
30
30
  .addMedia(documentContent, context.document.mimeType);
31
31
  if (isDefined(language)) {
32
32
  builder.setLanguage(language);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tstdl/base",
3
- "version": "0.93.192",
3
+ "version": "0.93.194",
4
4
  "author": "Patrick Hein",
5
5
  "publishConfig": {
6
6
  "access": "public"