@ryanfw/prompt-orchestration-pipeline 0.16.1 → 0.16.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,268 @@
1
+ # POP Pipeline Task Guide
2
+
3
+ > Unified reference for creating valid pipeline tasks. Only exported stage functions with exact names below are called by the pipeline runner.
4
+
5
+ ---
6
+
7
+ ## Critical Rules
8
+
9
+ ### Valid Stage Names (Exhaustive List)
10
+
11
+ The pipeline runner **ONLY** calls these 11 exported functions:
12
+
13
+ | Stage | Required | Purpose |
14
+ |-------|----------|---------|
15
+ | `ingestion` | Yes | Load input from `data.seed` |
16
+ | `preProcessing` | No | Normalize/enrich data |
17
+ | `promptTemplating` | Yes | Build LLM prompts |
18
+ | `inference` | Yes | Call LLM |
19
+ | `parsing` | No | Parse LLM output |
20
+ | `validateStructure` | No | JSON schema validation |
21
+ | `validateQuality` | No | Domain-specific checks |
22
+ | `critique` | No | Analyze failures |
23
+ | `refine` | No | Produce improved output |
24
+ | `finalValidation` | No | Final validation gate |
25
+ | `integration` | No | Persist results |
26
+
27
+ ### Required Contract
28
+
29
+ Every stage function must:
30
+ 1. Be exported: `export const stageName = ...`
31
+ 2. Return: `{ output: any, flags: object }`
32
+
33
+ ### Anti-Patterns (Invalid)
34
+
35
+ ```js
36
+ // ❌ WRONG: Helper functions are NEVER called by pipeline
37
+ function formatPrompt(topic) { return `...${topic}...`; }
38
+
39
+ // ❌ WRONG: Non-standard export names are NEVER called
40
+ export const myCustomStage = () => ({ output: {}, flags: {} });
41
+
42
+ // ❌ WRONG: Must return { output, flags } object
43
+ export const ingestion = () => "just a string";
44
+ ```
45
+
46
+ ---
47
+
48
+ ## Minimal Working Example
49
+
50
+ A simple 3-stage task (most tasks only need ingestion → promptTemplating → inference):
51
+
52
+ ```js
53
+ export const ingestion = ({
54
+ data: { seed: { data: { topic } } },
55
+ flags,
56
+ }) => ({
57
+ output: { topic },
58
+ flags,
59
+ });
60
+
61
+ export const promptTemplating = ({
62
+ data: { ingestion: { topic } },
63
+ flags,
64
+ }) => ({
65
+ output: {
66
+ system: "You are a helpful assistant. Respond in JSON.",
67
+ prompt: `Write about: ${topic}\n\nRespond as: { "content": "..." }`,
68
+ },
69
+ flags,
70
+ });
71
+
72
+ export const inference = async ({
73
+ io,
74
+ llm: { deepseek },
75
+ data: { promptTemplating: { system, prompt } },
76
+ flags,
77
+ }) => {
78
+ const response = await deepseek.chat({
79
+ messages: [
80
+ { role: "system", content: system },
81
+ { role: "user", content: prompt },
82
+ ],
83
+ });
84
+
85
+ const parsed = typeof response.content === "string"
86
+ ? JSON.parse(response.content)
87
+ : response.content;
88
+
89
+ await io.writeArtifact("output.json", JSON.stringify(parsed, null, 2));
90
+ return { output: {}, flags };
91
+ };
92
+ ```
93
+
94
+ ---
95
+
96
+ ## Stage Function Signatures
97
+
98
+ ### ingestion
99
+ ```js
100
+ export const ingestion = ({ data: { seed }, flags }) => ({
101
+ output: { /* extracted fields */ },
102
+ flags,
103
+ });
104
+ ```
105
+
106
+ ### promptTemplating
107
+ ```js
108
+ export const promptTemplating = ({ data: { ingestion }, flags }) => ({
109
+ output: { system: "...", prompt: "..." },
110
+ flags,
111
+ });
112
+ ```
113
+
114
+ ### inference
115
+ **Rule**: Read prompts from `data.promptTemplating`, not from other sources.
116
+ ```js
117
+ export const inference = async ({
118
+ io,
119
+ llm: { provider },
120
+ data: { promptTemplating: { system, prompt } },
121
+ flags,
122
+ }) => {
123
+ const response = await provider.chat({ messages: [...] });
124
+ const parsed = response.parsed;
125
+ await io.writeArtifact("output.json", JSON.stringify(parsed, null, 2));
126
+ return { output: {}, flags };
127
+ };
128
+ ```
129
+
130
+ ### validateStructure
131
+ ```js
132
+ export const validateStructure = async ({
133
+ io,
134
+ flags,
135
+ validators: { validateWithSchema },
136
+ }) => {
137
+ const content = await io.readArtifact("output.json");
138
+ // Provide your JSON schema here, for example the `outputSchema` from the "JSON Schema Export" section.
139
+ const mySchema = /* your JSON schema object */ {};
140
+ const result = validateWithSchema(mySchema, content);
141
+ if (!result.valid) {
142
+ return { output: {}, flags: { ...flags, validationFailed: true } };
143
+ }
144
+ return { output: {}, flags };
145
+ };
146
+ ```
147
+
148
+ ---
149
+
150
+ ## IO API
151
+
152
+ Available on `io` object passed to stages.
153
+
154
+ | Function | Parameters | Returns | Description |
155
+ |----------|------------|---------|-------------|
156
+ | `io.writeArtifact` | `name, content, { mode? }` | `Promise<string>` | Persist output files |
157
+ | `io.writeLog` | `name, content, { mode? }` | `Promise<string>` | Debug/progress logs |
158
+ | `io.writeTmp` | `name, content, { mode? }` | `Promise<string>` | Scratch data |
159
+ | `io.readArtifact` | `name` | `Promise<string>` | Load artifact |
160
+ | `io.readLog` | `name` | `Promise<string>` | Read log |
161
+ | `io.readTmp` | `name` | `Promise<string>` | Read temp file |
162
+ | `io.getTaskDir` | — | `string` | Current task directory |
163
+ | `io.getDB` | `options?` | `Database` | SQLite for job (WAL mode) |
164
+ | `io.runBatch` | `{ jobs, processor, ... }` | `Promise<{ completed, failed }>` | Concurrent batch processing |
165
+
166
+ **When to use artifacts vs stage output**: Use `io.writeArtifact` for large outputs, model-native text, values needed by multiple stages, or for auditability. Use stage `output` for small structured values needed immediately by the next stage.
167
+
168
+ ---
169
+
170
+ ## LLM API
171
+
172
+ Available on `llm` object. Call with messages array:
173
+
174
+ ```js
175
+ const response = await llm.deepseek.chat({
176
+ messages: [
177
+ { role: "system", content: "..." },
178
+ { role: "user", content: "..." },
179
+ ],
180
+ temperature: 0.7, // optional: 0-2
181
+ maxTokens: 1000, // optional
182
+ responseFormat: "json" // optional
183
+ });
184
+ // Returns: { content: any, usage?: object }
185
+ ```
186
+
187
+ ### Available Providers
188
+ - `llm.deepseek.chat()`
189
+ - `llm.anthropic.sonnet45()`
190
+ - `llm.openai.gpt5Mini()`
191
+ - `llm.gemini.flash25()`
192
+
193
+ ---
194
+
195
+ ## Validation API
196
+
197
+ Available via `validators` object in stages that need schema validation.
198
+
199
+ ```js
200
+ validateWithSchema(schema, data) → { valid: boolean, errors?: AjvError[] }
201
+ ```
202
+
203
+ - Accepts string or object (strings parsed as JSON)
204
+ - Uses Ajv with `{ allErrors: true, strict: false }`
205
+
206
+ ---
207
+
208
+ ## JSON Schema Export
209
+
210
+ Tasks export schemas to validate their output:
211
+
212
+ ```js
213
+ export const outputSchema = {
214
+ $schema: "http://json-schema.org/draft-07/schema#",
215
+ type: "object",
216
+ required: ["content"],
217
+ properties: {
218
+ content: { type: "string", minLength: 1 }
219
+ }
220
+ };
221
+ ```
222
+
223
+ ---
224
+
225
+ ## Seed File Format
226
+
227
+ Pipeline jobs start from a seed file in `pending/`:
228
+
229
+ ```json
230
+ {
231
+ "name": "unique-job-id",
232
+ "pipeline": "pipeline-slug",
233
+ "data": { /* context for tasks */ }
234
+ }
235
+ ```
236
+
237
+ ---
238
+
239
+ ## Context Object Reference
240
+
241
+ Each stage receives:
242
+
243
+ ```js
244
+ {
245
+ io, // File I/O (may be null)
246
+ llm, // LLM client
247
+ validators, // { validateWithSchema }
248
+ flags, // Control flags
249
+ meta: { taskName, workDir, jobId },
250
+ data: {
251
+ seed, // Initial payload
252
+ ingestion, // Output from ingestion
253
+ preProcessing, // Output from preProcessing
254
+ promptTemplating, // Output from promptTemplating
255
+ // ... other stage outputs
256
+ },
257
+ output, // Previous non-validation stage output
258
+ }
259
+ ```
260
+
261
+ ---
262
+
263
+ ## Summary
264
+
265
+ 1. Export only valid stage names: `ingestion`, `preProcessing`, `promptTemplating`, `inference`, `parsing`, `validateStructure`, `validateQuality`, `critique`, `refine`, `finalValidation`, `integration`
266
+ 2. Return `{ output, flags }` from every stage
267
+ 3. Custom helper functions are valid JavaScript but will not be called by the pipeline—only use them if called from within a valid stage
268
+ 4. Most simple tasks need only: `ingestion` → `promptTemplating` → `inference`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ryanfw/prompt-orchestration-pipeline",
3
- "version": "0.16.1",
3
+ "version": "0.16.3",
4
4
  "description": "A Prompt-orchestration pipeline (POP) is a framework for building, running, and experimenting with complex chains of LLM tasks.",
5
5
  "type": "module",
6
6
  "main": "src/ui/server.js",
@@ -9,6 +9,7 @@
9
9
  },
10
10
  "files": [
11
11
  "src",
12
+ "docs/pop-task-guide.md",
12
13
  "README.md",
13
14
  "LICENSE"
14
15
  ],
@@ -35,7 +35,7 @@ export async function sleep(ms) {
35
35
 
36
36
  /**
37
37
  * Strip markdown code fences from text unconditionally.
38
- * Handles ```json, ```JSON, and plain ``` with or without newlines.
38
+ * Handles any language identifier (```json, ```javascript, etc.) or plain ```.
39
39
  * @param {string} text - The text to strip fences from
40
40
  * @returns {string} The cleaned text, or original if not a string
41
41
  */
@@ -43,8 +43,8 @@ export function stripMarkdownFences(text) {
43
43
  if (typeof text !== "string") return text;
44
44
  const trimmed = text.trim();
45
45
  if (trimmed.startsWith("```")) {
46
- // Remove opening fence (```json, ```JSON, or just ```)
47
- let cleaned = trimmed.replace(/^```(?:json|JSON)?\s*\n?/, "");
46
+ // Remove opening fence with any language identifier
47
+ let cleaned = trimmed.replace(/^```[a-zA-Z]*\s*\n?/, "");
48
48
  // Remove closing fence
49
49
  cleaned = cleaned.replace(/\n?```\s*$/, "");
50
50
  return cleaned.trim();
@@ -54,6 +54,10 @@ export async function deduceArtifactSchema(taskCode, artifact) {
54
54
  }
55
55
 
56
56
  // Validate the generated example against the generated schema
57
+ // Remove any existing schema with the same $id to avoid "schema already exists" error
58
+ if (schema.$id && ajv.getSchema(schema.$id)) {
59
+ ajv.removeSchema(schema.$id);
60
+ }
57
61
  const validate = ajv.compile(schema);
58
62
  if (!validate(example)) {
59
63
  throw new Error(
@@ -1,49 +1,116 @@
1
1
  import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { fileURLToPath } from "node:url";
2
4
  import { streamSSE } from "../lib/sse.js";
3
5
  import { createHighLevelLLM } from "../../llm/index.js";
6
+ import { parseMentions } from "../lib/mention-parser.js";
7
+ import {
8
+ loadSchemaContext,
9
+ buildSchemaPromptSection,
10
+ } from "../lib/schema-loader.js";
11
+ import { createLogger } from "../../core/logger.js";
4
12
 
5
- export async function handleTaskPlan(req, res) {
6
- console.log("[task-creation-endpoint] Request received");
13
+ const logger = createLogger("TaskCreationEndpoint");
7
14
 
8
- const { messages, pipelineSlug } = req.body;
15
+ // Resolve path relative to this module for NPM distribution
16
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
17
+ const guidelinesPath = path.resolve(__dirname, "../../../docs/pop-task-guide.md");
9
18
 
10
- console.log("[task-creation-endpoint] Request details:", {
11
- hasMessages: !!messages,
12
- messageCount: Array.isArray(messages) ? messages.length : 0,
13
- pipelineSlug,
14
- bodyKeys: Object.keys(req.body),
15
- });
19
+ export async function handleTaskPlan(req, res) {
20
+ const { messages, pipelineSlug } = req.body;
16
21
 
17
22
  // Validate input
18
23
  if (!Array.isArray(messages)) {
19
- console.error(
20
- "[task-creation-endpoint] Validation failed: messages is not an array"
21
- );
22
24
  res.status(400).json({ error: "messages must be an array" });
23
25
  return;
24
26
  }
25
27
 
26
- console.log(
27
- "[task-creation-endpoint] Loading guidelines from docs/pipeline-task-guidelines.md..."
28
- );
29
-
30
28
  // Load guidelines - let it throw if missing
31
- const guidelinesPath = "docs/pipeline-task-guidelines.md";
32
29
  const guidelines = fs.readFileSync(guidelinesPath, "utf-8");
33
30
 
34
- console.log(
35
- "[task-creation-endpoint] Guidelines loaded, length:",
36
- guidelines.length
37
- );
31
+ // Parse @mentions and load schema contexts for enrichment
32
+ const mentionedFiles = parseMentions(messages);
33
+ const schemaContexts = [];
34
+ // Load schema contexts sequentially to avoid unbounded concurrent file I/O
35
+ for (const fileName of mentionedFiles) {
36
+ // eslint-disable-next-line no-await-in-loop
37
+ const context = await loadSchemaContext(pipelineSlug, fileName);
38
+ if (context) {
39
+ schemaContexts.push(context);
40
+ }
41
+ }
42
+ const schemaEnrichment = buildSchemaPromptSection(schemaContexts);
38
43
 
39
44
  // Build LLM messages array
40
- const systemPrompt = `You are a pipeline task assistant. Help users create task definitions following these guidelines:
45
+ const systemPrompt = `You are a pipeline task assistant. You help users understand the POP (Prompt Orchestration Pipeline) system and create task definitions.
46
+
47
+ ## How to Answer Questions
48
+
49
+ When users ask questions, identify which topic area applies and reference the relevant section of knowledge below:
50
+
51
+ - **LLM/Provider questions** → See "Available LLM Providers" section
52
+ - **Stage/Function questions** → See "Valid Stage Names" and "Stage Function Signatures" sections
53
+ - **IO/Database questions** → See "IO API" section
54
+ - **Validation questions** → See "Validation API" and "JSON Schema Export" sections
55
+ - **Task creation requests** → Use all sections to build a complete task
56
+
57
+ Be concise and direct. Use code examples when helpful. Reference specific API signatures.
58
+
59
+ ---
60
+
61
+ # KNOWLEDGE BASE
41
62
 
42
63
  ${guidelines}
64
+ ${schemaEnrichment ? `\n${schemaEnrichment}\n` : ""}
65
+
66
+ ---
67
+
68
+ ## Quick Reference: Common Questions
69
+
70
+ **Q: What LLM models/providers are available?**
71
+ Available providers via the \`llm\` object:
72
+ - \`llm.deepseek.chat()\` - DeepSeek model
73
+ - \`llm.anthropic.sonnet45()\` - Anthropic Claude Sonnet 4.5
74
+ - \`llm.openai.gpt5Mini()\` - OpenAI GPT-5 Mini
75
+ - \`llm.gemini.flash25()\` - Google Gemini Flash 2.5
76
+
77
+ **Q: What functions/stages do I need to define?**
78
+ Minimum required: \`ingestion\`, \`promptTemplating\`, \`inference\`
79
+ Optional: \`preProcessing\`, \`parsing\`, \`validateStructure\`, \`validateQuality\`, \`critique\`, \`refine\`, \`finalValidation\`, \`integration\`
80
+
81
+ **Q: How do I use the database?**
82
+ Use \`io.getDB()\` to get a SQLite database instance (WAL mode):
83
+ \`\`\`js
84
+ const db = io.getDB();
85
+ db.exec('CREATE TABLE IF NOT EXISTS results (id INTEGER PRIMARY KEY, data TEXT)');
86
+ db.prepare('INSERT INTO results (data) VALUES (?)').run(JSON.stringify(myData));
87
+ \`\`\`
88
+
89
+ **Q: How do I read/write files?**
90
+ Use the \`io\` object:
91
+ - \`io.writeArtifact(name, content)\` - Persist output files
92
+ - \`io.readArtifact(name)\` - Load artifact
93
+ - \`io.writeTmp(name, content)\` - Scratch data
94
+ - \`io.writeLog(name, content)\` - Debug/progress logs
95
+
96
+ ---
97
+
98
+ ## Task Proposal Guidelines
43
99
 
44
100
  Provide complete, working code. Use markdown code blocks.
45
101
 
46
- When you have completed a task definition that the user wants to create, wrap it in this format:
102
+ ONLY use the [TASK_PROPOSAL] wrapper when ALL of these conditions are met:
103
+ 1. The user has explicitly requested you create/build/write a task for them
104
+ 2. You have a complete, production-ready task definition (not an example or illustration)
105
+ 3. The user has confirmed their requirements or iterated to a final version
106
+
107
+ DO NOT use [TASK_PROPOSAL] for:
108
+ - Answering questions about capabilities or how tasks work
109
+ - Showing illustrative examples or code snippets
110
+ - Explaining concepts with sample code
111
+ - Incomplete or draft task definitions still being discussed
112
+
113
+ When you DO output a [TASK_PROPOSAL], use this format:
47
114
  [TASK_PROPOSAL]
48
115
  FILENAME: <filename.js>
49
116
  TASKNAME: <task-name>
@@ -55,21 +122,13 @@ CODE:
55
122
 
56
123
  const llmMessages = [{ role: "system", content: systemPrompt }, ...messages];
57
124
 
58
- console.log("[task-creation-endpoint] LLM messages array created:", {
59
- totalMessages: llmMessages.length,
60
- systemPromptLength: systemPrompt.length,
61
- });
62
-
63
125
  // Create SSE stream
64
- console.log("[task-creation-endpoint] Creating SSE stream...");
65
126
  const sse = streamSSE(res);
66
127
 
67
128
  try {
68
- console.log("[task-creation-endpoint] Creating LLM instance...");
69
129
  // Get LLM instance (uses default provider from config)
70
130
  const llm = createHighLevelLLM();
71
131
 
72
- console.log("[task-creation-endpoint] Calling LLM chat with streaming...");
73
132
  // Call LLM with streaming enabled
74
133
  const response = await llm.chat({
75
134
  messages: llmMessages,
@@ -77,38 +136,20 @@ CODE:
77
136
  stream: true,
78
137
  });
79
138
 
80
- console.log("[task-creation-endpoint] LLM response received:", {
81
- isStream: typeof response[Symbol.asyncIterator] !== "undefined",
82
- });
83
-
84
139
  // Stream is an async generator
85
- let chunkCount = 0;
86
140
  for await (const chunk of response) {
87
141
  if (chunk?.content) {
88
142
  sse.send("chunk", { content: chunk.content });
89
- chunkCount++;
90
143
  }
91
144
  }
92
145
 
93
- console.log("[task-creation-endpoint] Sent", chunkCount, "chunks via SSE");
94
-
95
146
  // Send done event
96
- console.log("[task-creation-endpoint] Sending 'done' event...");
97
147
  sse.send("done", {});
98
- console.log("[task-creation-endpoint] Ending SSE stream...");
99
148
  sse.end();
100
- console.log("[task-creation-endpoint] Request completed successfully");
101
149
  } catch (error) {
102
- console.error("[task-creation-endpoint] Error occurred:", {
103
- message: error.message,
104
- stack: error.stack,
105
- name: error.name,
106
- });
150
+ logger.error("LLM streaming failed", error);
107
151
  // Send error event
108
152
  sse.send("error", { message: error.message });
109
- console.log(
110
- "[task-creation-endpoint] Error sent via SSE, ending stream..."
111
- );
112
153
  sse.end();
113
154
  }
114
- }
155
+ }
@@ -2,6 +2,7 @@ import path from "node:path";
2
2
  import { promises as fs } from "node:fs";
3
3
  import { getConfig } from "../../core/config.js";
4
4
  import { sendJson } from "../utils/http-utils.js";
5
+ import { reviewAndCorrectTask } from "../lib/task-reviewer.js";
5
6
 
6
7
  /**
7
8
  * Handle task creation requests
@@ -52,7 +53,24 @@ export async function handleTaskSave(req, res) {
52
53
  if (!taskFilePath.startsWith(tasksDir)) {
53
54
  return sendJson(res, 400, { error: "Invalid filename" });
54
55
  }
55
- await fs.writeFile(taskFilePath, code, "utf8");
56
+
57
+ // Self-correct code before saving
58
+ let finalCode = code;
59
+ try {
60
+ const guidelinesPath = path.join(
61
+ rootDir,
62
+ "docs/pipeline-task-guidelines.md"
63
+ );
64
+ const guidelines = await fs.readFile(guidelinesPath, "utf8");
65
+ finalCode = await reviewAndCorrectTask(code, guidelines);
66
+ } catch (reviewError) {
67
+ console.warn(
68
+ "Task review failed, using original code:",
69
+ reviewError.message
70
+ );
71
+ }
72
+
73
+ await fs.writeFile(taskFilePath, finalCode, "utf8");
56
74
 
57
75
  // Update index.js to export new task
58
76
  const indexPath = taskRegistryPath;
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Parse @[display](id) mentions from chat messages.
3
+ * Used to extract referenced artifact files for schema enrichment.
4
+ */
5
+
6
+ const MENTION_REGEX = /@\[([^\]]+)\]\(([^)]+)\)/g;
7
+
8
+ /**
9
+ * Extract unique filenames from @mentions in messages.
10
+ * @param {Array<{ role: string, content: string }>} messages
11
+ * @returns {string[]} Array of unique filenames
12
+ */
13
+ export function parseMentions(messages) {
14
+ const filenames = new Set();
15
+
16
+ for (const msg of messages) {
17
+ if (!msg.content) continue;
18
+ for (const match of msg.content.matchAll(MENTION_REGEX)) {
19
+ filenames.add(match[2]);
20
+ }
21
+ }
22
+
23
+ return [...filenames];
24
+ }
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Schema loader utility for task creation prompt enrichment.
3
+ * Loads JSON Schema, sample data, and metadata for referenced artifact files.
4
+ */
5
+
6
+ import { promises as fs } from "node:fs";
7
+ import path from "node:path";
8
+ import { getPipelineConfig } from "../../core/config.js";
9
+
10
+ /**
11
+ * Load schema context for a referenced artifact file.
12
+ * @param {string} pipelineSlug - Pipeline identifier
13
+ * @param {string} fileName - Artifact filename (e.g., "analysis-output.json")
14
+ * @returns {Promise<{ fileName: string, schema: object, sample: object, meta?: object } | null>}
15
+ */
16
+ export async function loadSchemaContext(pipelineSlug, fileName) {
17
+ try {
18
+ const pipelineConfig = getPipelineConfig(pipelineSlug);
19
+ const pipelineDir = path.dirname(pipelineConfig.pipelineJsonPath);
20
+ const baseName = path.parse(fileName).name;
21
+ const schemasDir = path.join(pipelineDir, "schemas");
22
+
23
+ const schemaPath = path.join(schemasDir, `${baseName}.schema.json`);
24
+ const samplePath = path.join(schemasDir, `${baseName}.sample.json`);
25
+ const metaPath = path.join(schemasDir, `${baseName}.meta.json`);
26
+
27
+ // Schema is required - return null if missing
28
+ const schemaContent = await fs.readFile(schemaPath, "utf8");
29
+ const schema = JSON.parse(schemaContent);
30
+
31
+ // Sample is required - return null if missing
32
+ const sampleContent = await fs.readFile(samplePath, "utf8");
33
+ const sample = JSON.parse(sampleContent);
34
+
35
+ // Meta is optional
36
+ let meta;
37
+ try {
38
+ const metaContent = await fs.readFile(metaPath, "utf8");
39
+ meta = JSON.parse(metaContent);
40
+ } catch {
41
+ // Meta file missing or invalid - that's fine
42
+ }
43
+
44
+ return { fileName, schema, sample, meta };
45
+ } catch {
46
+ // Any error (pipeline not found, file missing, JSON parse error) -> return null
47
+ return null;
48
+ }
49
+ }
50
+
51
+ /**
52
+ * Build markdown prompt section from schema contexts.
53
+ * @param {Array<{ fileName: string, schema: object, sample: object, meta?: object }>} contexts
54
+ * @returns {string} Markdown formatted section for system prompt
55
+ */
56
+ export function buildSchemaPromptSection(contexts) {
57
+ if (!contexts || contexts.length === 0) {
58
+ return "";
59
+ }
60
+
61
+ const sections = contexts.map((ctx) => {
62
+ let section = `### @${ctx.fileName}\n\n`;
63
+ section += `**JSON Schema:**\n\n\`\`\`json\n${JSON.stringify(ctx.schema, null, 2)}\n\`\`\`\n\n`;
64
+ section += `**Sample Data:**\n\n\`\`\`json\n${JSON.stringify(ctx.sample, null, 2)}\n\`\`\``;
65
+ return section;
66
+ });
67
+
68
+ return `## Referenced Files\n\n${sections.join("\n\n")}`;
69
+ }
@@ -0,0 +1,51 @@
1
+ import { createHighLevelLLM } from "../../llm/index.js";
2
+ import { stripMarkdownFences } from "../../providers/base.js";
3
+
4
+ /**
5
+ * Review and correct task code using LLM
6
+ * @param {string} code - The task code to review
7
+ * @param {string} guidelines - Pipeline task guidelines
8
+ * @returns {Promise<string>} - Returns the original code if the LLM responds with
9
+ * NO_CHANGES_NEEDED; otherwise returns the LLM's corrected code output (after
10
+ * markdown fence stripping), which may be empty or invalid if the LLM response
11
+ * or formatting is unexpected.
12
+ */
13
+ export async function reviewAndCorrectTask(code, guidelines) {
14
+ const llm = createHighLevelLLM();
15
+
16
+ const prompt = `Review this pipeline task code for:
17
+ 1. JavaScript syntax errors
18
+ 2. Logic flaws or bugs
19
+ 3. Violations of the pipeline task guidelines below
20
+ 4. Missing error handling for io/llm operations
21
+
22
+ If the code is correct, respond with exactly: NO_CHANGES_NEEDED
23
+
24
+ If corrections are needed, respond with only the corrected code (no explanation).
25
+
26
+ ## Guidelines
27
+
28
+ ${guidelines}
29
+
30
+ ## Code to Review
31
+
32
+ \`\`\`javascript
33
+ ${code}
34
+ \`\`\``;
35
+
36
+ const messages = [{ role: "user", content: prompt }];
37
+
38
+ const response = await llm.chat({ messages, responseFormat: "text" });
39
+ const content = response.content || "";
40
+ const trimmedContent = content.trim();
41
+
42
+ // If the LLM returned no usable content, keep the original code
43
+ if (!trimmedContent) {
44
+ return code;
45
+ }
46
+
47
+ if (trimmedContent.includes("NO_CHANGES_NEEDED")) {
48
+ return code;
49
+ }
50
+ return stripMarkdownFences(content);
51
+ }