dirac-lang 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.yml +1 -1
- package/dist/{chunk-3LRJLSZC.js → chunk-GTSDTB7P.js} +1 -1
- package/dist/{chunk-UDA4H3GU.js → chunk-YSEKILQM.js} +153 -65
- package/dist/cli.js +3 -3
- package/dist/index.js +2 -2
- package/dist/{interpreter-BYQIE2MI.js → interpreter-QEWHGYCE.js} +1 -1
- package/dist/test-runner.js +1 -1
- package/examples/llm-feedback-debug.di +30 -0
- package/examples/llm-feedback-demo.di +19 -0
- package/examples/llm-feedback-math.di +22 -0
- package/examples/llm-feedback-simple.di +16 -0
- package/examples/llm-feedback-sub.di +22 -0
- package/examples/llm-no-feedback.di +10 -0
- package/package.json +1 -1
- package/src/tags/defvar.ts +6 -0
- package/src/tags/llm.ts +168 -70
package/config.yml
CHANGED
|
@@ -79,6 +79,7 @@ async function executeDefvar(session, element) {
|
|
|
79
79
|
const valueAttr = element.attributes.value;
|
|
80
80
|
const visibleAttr = element.attributes.visible || "false";
|
|
81
81
|
const literal = "literal" in element.attributes;
|
|
82
|
+
const trim = "trim" in element.attributes;
|
|
82
83
|
if (!name) {
|
|
83
84
|
throw new Error("<defvar> requires name attribute");
|
|
84
85
|
}
|
|
@@ -109,6 +110,9 @@ async function executeDefvar(session, element) {
|
|
|
109
110
|
} else {
|
|
110
111
|
value = "";
|
|
111
112
|
}
|
|
113
|
+
if (trim && typeof value === "string") {
|
|
114
|
+
value = value.trim();
|
|
115
|
+
}
|
|
112
116
|
setVariable(session, name, value, visible);
|
|
113
117
|
}
|
|
114
118
|
|
|
@@ -370,12 +374,12 @@ async function executeIf(session, element) {
|
|
|
370
374
|
const condition = await evaluatePredicate(session, conditionElement);
|
|
371
375
|
if (condition) {
|
|
372
376
|
if (thenElement) {
|
|
373
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
377
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-QEWHGYCE.js");
|
|
374
378
|
await integrateChildren2(session, thenElement);
|
|
375
379
|
}
|
|
376
380
|
} else {
|
|
377
381
|
if (elseElement) {
|
|
378
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
382
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-QEWHGYCE.js");
|
|
379
383
|
await integrateChildren2(session, elseElement);
|
|
380
384
|
}
|
|
381
385
|
}
|
|
@@ -388,7 +392,7 @@ async function evaluatePredicate(session, predicateElement) {
|
|
|
388
392
|
return await evaluateCondition(session, predicateElement);
|
|
389
393
|
}
|
|
390
394
|
const outputLengthBefore = session.output.length;
|
|
391
|
-
const { integrate: integrate2 } = await import("./interpreter-
|
|
395
|
+
const { integrate: integrate2 } = await import("./interpreter-QEWHGYCE.js");
|
|
392
396
|
await integrate2(session, predicateElement);
|
|
393
397
|
const newOutputChunks = session.output.slice(outputLengthBefore);
|
|
394
398
|
const result = newOutputChunks.join("").trim();
|
|
@@ -411,11 +415,11 @@ async function evaluateCondition(session, condElement) {
|
|
|
411
415
|
}
|
|
412
416
|
const outputLengthBefore = session.output.length;
|
|
413
417
|
const args = [];
|
|
414
|
-
const { integrate: integrate2 } = await import("./interpreter-
|
|
418
|
+
const { integrate: integrate2 } = await import("./interpreter-QEWHGYCE.js");
|
|
415
419
|
for (const child of condElement.children) {
|
|
416
420
|
if (child.tag.toLowerCase() === "arg") {
|
|
417
421
|
const argOutputStart = session.output.length;
|
|
418
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
422
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-QEWHGYCE.js");
|
|
419
423
|
await integrateChildren2(session, child);
|
|
420
424
|
const newChunks = session.output.slice(argOutputStart);
|
|
421
425
|
const argValue = newChunks.join("");
|
|
@@ -675,8 +679,13 @@ then you call it like
|
|
|
675
679
|
example = example.replace(/"/g, '"').replace(/:/g, ":");
|
|
676
680
|
systemPrompt += ">" + example + "</" + sub.name + ">";
|
|
677
681
|
}
|
|
678
|
-
systemPrompt += "\
|
|
679
|
-
systemPrompt += "\
|
|
682
|
+
systemPrompt += "\n\nIMPORTANT INSTRUCTIONS:";
|
|
683
|
+
systemPrompt += "\n1. Output ONLY valid XML tags from the list above";
|
|
684
|
+
systemPrompt += "\n2. Do NOT include any explanations, descriptions, or extra text";
|
|
685
|
+
systemPrompt += "\n3. Do NOT use bullet points or formatting - just pure XML";
|
|
686
|
+
systemPrompt += "\n4. Do NOT invent tags - only use tags from the list above";
|
|
687
|
+
systemPrompt += "\n5. Start your response directly with the XML tag (e.g., <add ...>)";
|
|
688
|
+
systemPrompt += "\n\nDouble-check: Does your response contain ONLY XML tags? If not, remove all non-XML text.";
|
|
680
689
|
prompt = systemPrompt + "\nUser: " + userPrompt + "\nOutput:";
|
|
681
690
|
if (session.debug || process.env.DIRAC_LOG_PROMPT === "1") {
|
|
682
691
|
console.error("[LLM] Full prompt sent to LLM:\n" + prompt + "\n");
|
|
@@ -727,6 +736,9 @@ then you call it like
|
|
|
727
736
|
const validateTags = element.attributes["validate"] === "true";
|
|
728
737
|
const autocorrect = element.attributes["autocorrect"] === "true";
|
|
729
738
|
const maxRetries = parseInt(element.attributes["max-retries"] || "0", 10);
|
|
739
|
+
const feedbackMode = element.attributes["feedback"] === "true";
|
|
740
|
+
const maxIterations = parseInt(element.attributes["max-iterations"] || "3", 10);
|
|
741
|
+
const replaceTick = element.attributes["replace-tick"] === "true";
|
|
730
742
|
if (session.debug) {
|
|
731
743
|
console.error(`[LLM] Executing response as Dirac code:
|
|
732
744
|
${result}
|
|
@@ -734,37 +746,126 @@ ${result}
|
|
|
734
746
|
if (validateTags) {
|
|
735
747
|
console.error(`[LLM] Tag validation enabled (autocorrect: ${autocorrect}, max-retries: ${maxRetries})`);
|
|
736
748
|
}
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
let diracCode = result.trim();
|
|
740
|
-
if (replaceTick && diracCode.startsWith("```")) {
|
|
741
|
-
const match = diracCode.match(/^```(\w+)?\n?/m);
|
|
742
|
-
if (match && match[1] === "bash") {
|
|
743
|
-
const endIdx = diracCode.indexOf("```", 3);
|
|
744
|
-
let bashContent = diracCode.slice(match[0].length, endIdx).trim();
|
|
745
|
-
diracCode = `<system>${bashContent}</system>`;
|
|
746
|
-
} else {
|
|
747
|
-
diracCode = diracCode.replace(/^```(?:xml|html|dirac)?\n?/m, "").replace(/\n?```$/m, "").trim();
|
|
749
|
+
if (feedbackMode) {
|
|
750
|
+
console.error(`[LLM] Feedback mode enabled (max iterations: ${maxIterations})`);
|
|
748
751
|
}
|
|
749
752
|
}
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
if (
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
753
|
+
let iteration = 0;
|
|
754
|
+
while (iteration < maxIterations && (iteration === 0 || feedbackMode)) {
|
|
755
|
+
iteration++;
|
|
756
|
+
if (session.debug && feedbackMode) {
|
|
757
|
+
console.error(`[LLM] Feedback iteration ${iteration}/${maxIterations}`);
|
|
758
|
+
}
|
|
759
|
+
let diracCode = result.trim();
|
|
760
|
+
if (replaceTick && diracCode.startsWith("```")) {
|
|
761
|
+
const match = diracCode.match(/^```(\w+)?\n?/m);
|
|
762
|
+
if (match && match[1] === "bash") {
|
|
763
|
+
const endIdx = diracCode.indexOf("```", 3);
|
|
764
|
+
let bashContent = diracCode.slice(match[0].length, endIdx).trim();
|
|
765
|
+
diracCode = `<system>${bashContent}</system>`;
|
|
766
|
+
} else {
|
|
767
|
+
diracCode = diracCode.replace(/^```(?:xml|html|dirac)?\n?/m, "").replace(/\n?```$/m, "").trim();
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
const outputBefore = feedbackMode ? session.output.slice() : [];
|
|
771
|
+
try {
|
|
772
|
+
const parser = new DiracParser();
|
|
773
|
+
let dynamicAST = parser.parse(diracCode);
|
|
774
|
+
if (validateTags) {
|
|
775
|
+
const { validateDiracCode, applyCorrectedTags } = await import("./tag-validator-I3GLCBVD.js");
|
|
776
|
+
let validation = await validateDiracCode(session, dynamicAST, { autocorrect });
|
|
777
|
+
let retryCount = 0;
|
|
778
|
+
while (!validation.valid && retryCount < maxRetries) {
|
|
779
|
+
retryCount++;
|
|
780
|
+
if (session.debug) {
|
|
781
|
+
console.error(`[LLM] Validation failed (attempt ${retryCount}/${maxRetries}):`, validation.errorMessages);
|
|
782
|
+
}
|
|
783
|
+
const errorFeedback = validation.errorMessages.join("\n");
|
|
784
|
+
const retryPrompt = `Your previous response had the following errors:
|
|
764
785
|
${errorFeedback}
|
|
765
786
|
|
|
766
787
|
Please fix these errors and generate valid Dirac XML again. Remember to only use the allowed tags.`;
|
|
767
|
-
|
|
788
|
+
dialogHistory.push({ role: "user", content: retryPrompt });
|
|
789
|
+
if (isOpenAI) {
|
|
790
|
+
const response = await session.llmClient.chat.completions.create({
|
|
791
|
+
model,
|
|
792
|
+
max_tokens: maxTokens,
|
|
793
|
+
temperature,
|
|
794
|
+
messages: dialogHistory
|
|
795
|
+
});
|
|
796
|
+
result = response.choices[0]?.message?.content || "";
|
|
797
|
+
} else if (isOllama) {
|
|
798
|
+
const ollamaPrompt = dialogHistory.map((m) => `${m.role.charAt(0).toUpperCase() + m.role.slice(1)}: ${m.content}`).join("\n");
|
|
799
|
+
result = await session.llmClient.complete(ollamaPrompt, {
|
|
800
|
+
model,
|
|
801
|
+
temperature,
|
|
802
|
+
max_tokens: maxTokens
|
|
803
|
+
});
|
|
804
|
+
} else {
|
|
805
|
+
const response = await session.llmClient.messages.create({
|
|
806
|
+
model,
|
|
807
|
+
max_tokens: maxTokens,
|
|
808
|
+
temperature,
|
|
809
|
+
messages: dialogHistory
|
|
810
|
+
});
|
|
811
|
+
const content = response.content[0];
|
|
812
|
+
result = content.type === "text" ? content.text : "";
|
|
813
|
+
}
|
|
814
|
+
dialogHistory.push({ role: "assistant", content: result });
|
|
815
|
+
if (contextVar) {
|
|
816
|
+
setVariable(session, contextVar, dialogHistory, true);
|
|
817
|
+
}
|
|
818
|
+
if (session.debug) {
|
|
819
|
+
console.error(`[LLM] Retry ${retryCount} response:
|
|
820
|
+
${result}
|
|
821
|
+
`);
|
|
822
|
+
}
|
|
823
|
+
diracCode = result.trim();
|
|
824
|
+
if (replaceTick && diracCode.startsWith("```")) {
|
|
825
|
+
const match = diracCode.match(/^```(\w+)?\n?/m);
|
|
826
|
+
if (match && match[1] === "bash") {
|
|
827
|
+
const endIdx = diracCode.indexOf("```", 3);
|
|
828
|
+
let bashContent = diracCode.slice(match[0].length, endIdx).trim();
|
|
829
|
+
diracCode = `<system>${bashContent}</system>`;
|
|
830
|
+
} else {
|
|
831
|
+
diracCode = diracCode.replace(/^```(?:xml|html|dirac)?\n?/m, "").replace(/\n?```$/m, "").trim();
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
dynamicAST = parser.parse(diracCode);
|
|
835
|
+
validation = await validateDiracCode(session, dynamicAST, { autocorrect });
|
|
836
|
+
}
|
|
837
|
+
if (!validation.valid) {
|
|
838
|
+
throw new Error(`Tag validation failed after ${maxRetries} retries:
|
|
839
|
+
${validation.errorMessages.join("\n")}`);
|
|
840
|
+
}
|
|
841
|
+
if (autocorrect) {
|
|
842
|
+
dynamicAST = applyCorrectedTags(dynamicAST, validation.results);
|
|
843
|
+
if (session.debug) {
|
|
844
|
+
console.error("[LLM] Applied auto-corrections to tags");
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
await integrate(session, dynamicAST);
|
|
849
|
+
if (feedbackMode) {
|
|
850
|
+
const outputAfter = session.output.slice();
|
|
851
|
+
const executionOutput = outputAfter.slice(outputBefore.length).join("");
|
|
852
|
+
if (session.debug) {
|
|
853
|
+
console.error(`[LLM] Execution output (${executionOutput.length} chars):
|
|
854
|
+
${executionOutput}
|
|
855
|
+
`);
|
|
856
|
+
}
|
|
857
|
+
const feedbackPrompt = `The code executed successfully. Here is the output:
|
|
858
|
+
\`\`\`
|
|
859
|
+
${executionOutput}
|
|
860
|
+
\`\`\`
|
|
861
|
+
|
|
862
|
+
Please review the output carefully. If the output is correct and complete, respond with ONLY the word "DONE" and nothing else. If the output is incorrect or incomplete, generate corrected Dirac XML code.`;
|
|
863
|
+
if (session.debug) {
|
|
864
|
+
console.error(`[LLM] Feedback prompt:
|
|
865
|
+
${feedbackPrompt}
|
|
866
|
+
`);
|
|
867
|
+
}
|
|
868
|
+
dialogHistory.push({ role: "user", content: feedbackPrompt });
|
|
768
869
|
if (isOpenAI) {
|
|
769
870
|
const response = await session.llmClient.chat.completions.create({
|
|
770
871
|
model,
|
|
@@ -795,41 +896,28 @@ Please fix these errors and generate valid Dirac XML again. Remember to only use
|
|
|
795
896
|
setVariable(session, contextVar, dialogHistory, true);
|
|
796
897
|
}
|
|
797
898
|
if (session.debug) {
|
|
798
|
-
console.error(`[LLM]
|
|
899
|
+
console.error(`[LLM] Feedback response:
|
|
799
900
|
${result}
|
|
800
901
|
`);
|
|
801
902
|
}
|
|
802
|
-
|
|
803
|
-
if (
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
let bashContent = diracCode.slice(match[0].length, endIdx).trim();
|
|
808
|
-
diracCode = `<system>${bashContent}</system>`;
|
|
809
|
-
} else {
|
|
810
|
-
diracCode = diracCode.replace(/^```(?:xml|html|dirac)?\n?/m, "").replace(/\n?```$/m, "").trim();
|
|
903
|
+
const responseStart = result.trim().substring(0, 100).toUpperCase();
|
|
904
|
+
if (responseStart.startsWith("DONE") || result.trim().toLowerCase().includes("looks correct") || result.trim().toLowerCase().includes("looks good")) {
|
|
905
|
+
if (session.debug) {
|
|
906
|
+
console.error(`[LLM] Feedback loop terminating - LLM indicated completion
|
|
907
|
+
`);
|
|
811
908
|
}
|
|
909
|
+
break;
|
|
812
910
|
}
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
}
|
|
816
|
-
if (!validation.valid) {
|
|
817
|
-
throw new Error(`Tag validation failed after ${maxRetries} retries:
|
|
818
|
-
${validation.errorMessages.join("\n")}`);
|
|
911
|
+
} else {
|
|
912
|
+
break;
|
|
819
913
|
}
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
console.error("[LLM] Applied auto-corrections to tags");
|
|
824
|
-
}
|
|
914
|
+
} catch (parseError) {
|
|
915
|
+
if (session.debug) {
|
|
916
|
+
console.error(`[LLM] Failed to parse as Dirac, treating as text: ${parseError}`);
|
|
825
917
|
}
|
|
918
|
+
emit(session, result);
|
|
919
|
+
break;
|
|
826
920
|
}
|
|
827
|
-
await integrate(session, dynamicAST);
|
|
828
|
-
} catch (parseError) {
|
|
829
|
-
if (session.debug) {
|
|
830
|
-
console.error(`[LLM] Failed to parse as Dirac, treating as text: ${parseError}`);
|
|
831
|
-
}
|
|
832
|
-
emit(session, result);
|
|
833
921
|
}
|
|
834
922
|
} else {
|
|
835
923
|
emit(session, result);
|
|
@@ -1242,7 +1330,7 @@ async function executeTagCheck(session, element) {
|
|
|
1242
1330
|
const executeTag = correctedTag || tagName;
|
|
1243
1331
|
console.error(`[tag-check] Executing <${executeTag}/> as all checks passed and execute=true.`);
|
|
1244
1332
|
const elementToExecute = correctedTag ? { ...child, tag: correctedTag } : child;
|
|
1245
|
-
const { integrate: integrate2 } = await import("./interpreter-
|
|
1333
|
+
const { integrate: integrate2 } = await import("./interpreter-QEWHGYCE.js");
|
|
1246
1334
|
await integrate2(session, elementToExecute);
|
|
1247
1335
|
}
|
|
1248
1336
|
}
|
|
@@ -1251,7 +1339,7 @@ async function executeTagCheck(session, element) {
|
|
|
1251
1339
|
// src/tags/throw.ts
|
|
1252
1340
|
async function executeThrow(session, element) {
|
|
1253
1341
|
const exceptionName = element.attributes?.name || "exception";
|
|
1254
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
1342
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-QEWHGYCE.js");
|
|
1255
1343
|
const exceptionDom = {
|
|
1256
1344
|
tag: "exception-content",
|
|
1257
1345
|
attributes: { name: exceptionName },
|
|
@@ -1264,7 +1352,7 @@ async function executeThrow(session, element) {
|
|
|
1264
1352
|
// src/tags/try.ts
|
|
1265
1353
|
async function executeTry(session, element) {
|
|
1266
1354
|
setExceptionBoundary(session);
|
|
1267
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
1355
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-QEWHGYCE.js");
|
|
1268
1356
|
await integrateChildren2(session, element);
|
|
1269
1357
|
unsetExceptionBoundary(session);
|
|
1270
1358
|
}
|
|
@@ -1274,7 +1362,7 @@ async function executeCatch(session, element) {
|
|
|
1274
1362
|
const exceptionName = element.attributes?.name || "exception";
|
|
1275
1363
|
const caughtCount = lookupException(session, exceptionName);
|
|
1276
1364
|
if (caughtCount > 0) {
|
|
1277
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
1365
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-QEWHGYCE.js");
|
|
1278
1366
|
await integrateChildren2(session, element);
|
|
1279
1367
|
}
|
|
1280
1368
|
flushCurrentException(session);
|
|
@@ -1283,7 +1371,7 @@ async function executeCatch(session, element) {
|
|
|
1283
1371
|
// src/tags/exception.ts
|
|
1284
1372
|
async function executeException(session, element) {
|
|
1285
1373
|
const exceptions = getCurrentExceptions(session);
|
|
1286
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
1374
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-QEWHGYCE.js");
|
|
1287
1375
|
for (const exceptionDom of exceptions) {
|
|
1288
1376
|
await integrateChildren2(session, exceptionDom);
|
|
1289
1377
|
}
|
package/dist/cli.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
execute
|
|
4
|
-
} from "./chunk-
|
|
5
|
-
import "./chunk-
|
|
4
|
+
} from "./chunk-GTSDTB7P.js";
|
|
5
|
+
import "./chunk-YSEKILQM.js";
|
|
6
6
|
import "./chunk-E7IWGUE6.js";
|
|
7
7
|
|
|
8
8
|
// src/cli.ts
|
|
@@ -11,7 +11,7 @@ import "dotenv/config";
|
|
|
11
11
|
// package.json
|
|
12
12
|
var package_default = {
|
|
13
13
|
name: "dirac-lang",
|
|
14
|
-
version: "0.1.
|
|
14
|
+
version: "0.1.12",
|
|
15
15
|
description: "LLM-Augmented Declarative Execution",
|
|
16
16
|
type: "module",
|
|
17
17
|
main: "dist/index.js",
|
package/dist/index.js
CHANGED
|
@@ -2,11 +2,11 @@ import {
|
|
|
2
2
|
createLLMAdapter,
|
|
3
3
|
execute,
|
|
4
4
|
executeUserCommand
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-GTSDTB7P.js";
|
|
6
6
|
import {
|
|
7
7
|
DiracParser,
|
|
8
8
|
integrate
|
|
9
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-YSEKILQM.js";
|
|
10
10
|
import {
|
|
11
11
|
createSession,
|
|
12
12
|
getAvailableSubroutines,
|
package/dist/test-runner.js
CHANGED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env dirac
|
|
2
|
+
<!-- Test: Feedback with math calculation - DEBUG VERSION -->
|
|
3
|
+
|
|
4
|
+
<dirac>
|
|
5
|
+
<subroutine name="add" description="addition of two numbers" param-a="number:required:first argument::2" param-b="number:required:second argument::3">
|
|
6
|
+
<output>DEBUG: a=<variable name="a" />, b=<variable name="b" />
|
|
7
|
+
</output>
|
|
8
|
+
<expr eval="plus">
|
|
9
|
+
<arg><variable name="a" /></arg>
|
|
10
|
+
<arg><variable name="b" /></arg>
|
|
11
|
+
</expr>
|
|
12
|
+
</subroutine>
|
|
13
|
+
|
|
14
|
+
<llm
|
|
15
|
+
execute="true"
|
|
16
|
+
feedback="true"
|
|
17
|
+
autocorrect="true"
|
|
18
|
+
max-iterations="3"
|
|
19
|
+
replace-tick="true"
|
|
20
|
+
output-var="generated_code">
|
|
21
|
+
Calculate 5 + 3 using the add subroutine and display the result.
|
|
22
|
+
</llm>
|
|
23
|
+
|
|
24
|
+
<output>
|
|
25
|
+
|
|
26
|
+
Generated code was:
|
|
27
|
+
<variable name="generated_code" />
|
|
28
|
+
</output>
|
|
29
|
+
|
|
30
|
+
</dirac>
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#!/usr/bin/env dirac
|
|
2
|
+
<!-- Test: LLM feedback mode
|
|
3
|
+
Description: Test the feedback loop where LLM generates code, sees output, and can iterate
|
|
4
|
+
Expected: Should output "Hello from feedback loop!" and eventually say "DONE"
|
|
5
|
+
-->
|
|
6
|
+
|
|
7
|
+
<dirac>
|
|
8
|
+
<llm
|
|
9
|
+
execute="true"
|
|
10
|
+
feedback="true"
|
|
11
|
+
max-iterations="2"
|
|
12
|
+
validate="true"
|
|
13
|
+
autocorrect="true"
|
|
14
|
+
max-retries="2"
|
|
15
|
+
replace-tick="true"
|
|
16
|
+
no-extra="true">
|
|
17
|
+
Use the output tag to print "Hello from feedback loop!"
|
|
18
|
+
</llm>
|
|
19
|
+
</dirac>
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#!/usr/bin/env dirac
|
|
2
|
+
<!-- Test: Feedback with math calculation -->
|
|
3
|
+
|
|
4
|
+
<dirac>
|
|
5
|
+
<subroutine name="add" description="addition of two numbers " param-a="number:required:first argument::1st arg" param-b="number:required:second argument::2nd arg">
|
|
6
|
+
<expr eval="plus">
|
|
7
|
+
<arg><variable name="a" /></arg>
|
|
8
|
+
<arg><variable name="b" /></arg>
|
|
9
|
+
</expr>
|
|
10
|
+
</subroutine>
|
|
11
|
+
|
|
12
|
+
<llm
|
|
13
|
+
execute="true"
|
|
14
|
+
feedback="true"
|
|
15
|
+
validate="true"
|
|
16
|
+
autocorrect="true"
|
|
17
|
+
max-iterations="3"
|
|
18
|
+
replace-tick="true">
|
|
19
|
+
Calculate 5 + 3 using the add subroutine and display the result.
|
|
20
|
+
</llm>
|
|
21
|
+
|
|
22
|
+
</dirac>
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env dirac
|
|
2
|
+
<!-- Test: Simple feedback mode without validation -->
|
|
3
|
+
|
|
4
|
+
<dirac>
|
|
5
|
+
<subroutine name="greet" param-name="string">
|
|
6
|
+
<output>Hello, <variable name="name" />!</output>
|
|
7
|
+
</subroutine>
|
|
8
|
+
|
|
9
|
+
<llm
|
|
10
|
+
execute="true"
|
|
11
|
+
feedback="true"
|
|
12
|
+
max-iterations="2"
|
|
13
|
+
replace-tick="true">
|
|
14
|
+
Call greet with name="Feedback Test"
|
|
15
|
+
</llm>
|
|
16
|
+
</dirac>
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#!/usr/bin/env dirac
|
|
2
|
+
<!-- Test: LLM feedback mode with subroutine
|
|
3
|
+
Description: Test feedback loop with a simple subroutine
|
|
4
|
+
Expected: Should call print-message and see the feedback loop work
|
|
5
|
+
-->
|
|
6
|
+
|
|
7
|
+
<dirac>
|
|
8
|
+
<subroutine name="print-message" param-text="string">
|
|
9
|
+
<output><variable name="text" /></output>
|
|
10
|
+
</subroutine>
|
|
11
|
+
|
|
12
|
+
<llm
|
|
13
|
+
execute="true"
|
|
14
|
+
feedback="true"
|
|
15
|
+
max-iterations="2"
|
|
16
|
+
validate="true"
|
|
17
|
+
autocorrect="true"
|
|
18
|
+
max-retries="2"
|
|
19
|
+
replace-tick="true">
|
|
20
|
+
Call print-message with text="Hello from feedback loop!"
|
|
21
|
+
</llm>
|
|
22
|
+
</dirac>
|
package/package.json
CHANGED
package/src/tags/defvar.ts
CHANGED
|
@@ -14,6 +14,7 @@ export async function executeDefvar(session: DiracSession, element: DiracElement
|
|
|
14
14
|
const valueAttr = element.attributes.value;
|
|
15
15
|
const visibleAttr = element.attributes.visible || 'false';
|
|
16
16
|
const literal = 'literal' in element.attributes;
|
|
17
|
+
const trim = 'trim' in element.attributes; // Support trim attribute to remove leading/trailing whitespace
|
|
17
18
|
|
|
18
19
|
if (!name) {
|
|
19
20
|
throw new Error('<defvar> requires name attribute');
|
|
@@ -52,5 +53,10 @@ export async function executeDefvar(session: DiracSession, element: DiracElement
|
|
|
52
53
|
value = '';
|
|
53
54
|
}
|
|
54
55
|
|
|
56
|
+
// Apply trim if requested
|
|
57
|
+
if (trim && typeof value === 'string') {
|
|
58
|
+
value = value.trim();
|
|
59
|
+
}
|
|
60
|
+
|
|
55
61
|
setVariable(session, name, value, visible);
|
|
56
62
|
}
|
package/src/tags/llm.ts
CHANGED
|
@@ -111,8 +111,13 @@ for (const sub of subroutines) {
|
|
|
111
111
|
example = example.replace(/"/g, '"').replace(/:/g, ':');
|
|
112
112
|
systemPrompt += '>'+example+'</' + sub.name + '>';
|
|
113
113
|
}
|
|
114
|
-
systemPrompt += '\
|
|
115
|
-
systemPrompt += '\
|
|
114
|
+
systemPrompt += '\n\nIMPORTANT INSTRUCTIONS:';
|
|
115
|
+
systemPrompt += '\n1. Output ONLY valid XML tags from the list above';
|
|
116
|
+
systemPrompt += '\n2. Do NOT include any explanations, descriptions, or extra text';
|
|
117
|
+
systemPrompt += '\n3. Do NOT use bullet points or formatting - just pure XML';
|
|
118
|
+
systemPrompt += '\n4. Do NOT invent tags - only use tags from the list above';
|
|
119
|
+
systemPrompt += '\n5. Start your response directly with the XML tag (e.g., <add ...>)';
|
|
120
|
+
systemPrompt += '\n\nDouble-check: Does your response contain ONLY XML tags? If not, remove all non-XML text.';
|
|
116
121
|
|
|
117
122
|
prompt = systemPrompt + '\nUser: ' + userPrompt + '\nOutput:';
|
|
118
123
|
if (session.debug || process.env.DIRAC_LOG_PROMPT === '1') {
|
|
@@ -177,56 +182,164 @@ for (const sub of subroutines) {
|
|
|
177
182
|
const validateTags = element.attributes['validate'] === 'true';
|
|
178
183
|
const autocorrect = element.attributes['autocorrect'] === 'true';
|
|
179
184
|
const maxRetries = parseInt(element.attributes['max-retries'] || '0', 10);
|
|
185
|
+
const feedbackMode = element.attributes['feedback'] === 'true';
|
|
186
|
+
const maxIterations = parseInt(element.attributes['max-iterations'] || '3', 10);
|
|
187
|
+
const replaceTick = element.attributes['replace-tick'] === 'true';
|
|
180
188
|
|
|
181
189
|
if (session.debug) {
|
|
182
190
|
console.error(`[LLM] Executing response as Dirac code:\n${result}\n`);
|
|
183
191
|
if (validateTags) {
|
|
184
192
|
console.error(`[LLM] Tag validation enabled (autocorrect: ${autocorrect}, max-retries: ${maxRetries})`);
|
|
185
193
|
}
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
// Only replace triple backtick code blocks if replace-tick="true" is set
|
|
189
|
-
const replaceTick = element.attributes['replace-tick'] === 'true';
|
|
190
|
-
let diracCode = result.trim();
|
|
191
|
-
if (replaceTick && diracCode.startsWith('```')) {
|
|
192
|
-
// Check for bash, xml, html, dirac, or no language
|
|
193
|
-
const match = diracCode.match(/^```(\w+)?\n?/m);
|
|
194
|
-
if (match && match[1] === 'bash') {
|
|
195
|
-
// Find closing triple backticks
|
|
196
|
-
const endIdx = diracCode.indexOf('```', 3);
|
|
197
|
-
let bashContent = diracCode.slice(match[0].length, endIdx).trim();
|
|
198
|
-
diracCode = `<system>${bashContent}</system>`;
|
|
199
|
-
} else {
|
|
200
|
-
// Remove opening and closing backticks for xml/html/dirac/none
|
|
201
|
-
diracCode = diracCode.replace(/^```(?:xml|html|dirac)?\n?/m, '').replace(/\n?```$/m, '').trim();
|
|
194
|
+
if (feedbackMode) {
|
|
195
|
+
console.error(`[LLM] Feedback mode enabled (max iterations: ${maxIterations})`);
|
|
202
196
|
}
|
|
203
197
|
}
|
|
198
|
+
|
|
199
|
+
// Feedback loop: execute, capture output, send back to LLM, repeat
|
|
200
|
+
let iteration = 0;
|
|
204
201
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
202
|
+
while (iteration < maxIterations && (iteration === 0 || feedbackMode)) {
|
|
203
|
+
iteration++;
|
|
204
|
+
if (session.debug && feedbackMode) {
|
|
205
|
+
console.error(`[LLM] Feedback iteration ${iteration}/${maxIterations}`);
|
|
206
|
+
}
|
|
209
207
|
|
|
210
|
-
//
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
208
|
+
// Only replace triple backtick code blocks if replace-tick="true" is set
|
|
209
|
+
let diracCode = result.trim();
|
|
210
|
+
if (replaceTick && diracCode.startsWith('```')) {
|
|
211
|
+
// Check for bash, xml, html, dirac, or no language
|
|
212
|
+
const match = diracCode.match(/^```(\w+)?\n?/m);
|
|
213
|
+
if (match && match[1] === 'bash') {
|
|
214
|
+
// Find closing triple backticks
|
|
215
|
+
const endIdx = diracCode.indexOf('```', 3);
|
|
216
|
+
let bashContent = diracCode.slice(match[0].length, endIdx).trim();
|
|
217
|
+
diracCode = `<system>${bashContent}</system>`;
|
|
218
|
+
} else {
|
|
219
|
+
// Remove opening and closing backticks for xml/html/dirac/none
|
|
220
|
+
diracCode = diracCode.replace(/^```(?:xml|html|dirac)?\n?/m, '').replace(/\n?```$/m, '').trim();
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Capture output before execution (for feedback)
|
|
225
|
+
const outputBefore = feedbackMode ? session.output.slice() : [];
|
|
226
|
+
|
|
227
|
+
try {
|
|
228
|
+
// Parse the LLM's output as Dirac code
|
|
229
|
+
const parser = new DiracParser();
|
|
230
|
+
let dynamicAST = parser.parse(diracCode);
|
|
215
231
|
|
|
216
|
-
|
|
217
|
-
|
|
232
|
+
// Validate tags if requested
|
|
233
|
+
if (validateTags) {
|
|
234
|
+
const { validateDiracCode, applyCorrectedTags } = await import('../utils/tag-validator.js');
|
|
235
|
+
let validation = await validateDiracCode(session, dynamicAST, { autocorrect });
|
|
236
|
+
let retryCount = 0;
|
|
237
|
+
|
|
238
|
+
while (!validation.valid && retryCount < maxRetries) {
|
|
239
|
+
retryCount++;
|
|
240
|
+
if (session.debug) {
|
|
241
|
+
console.error(`[LLM] Validation failed (attempt ${retryCount}/${maxRetries}):`, validation.errorMessages);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Build error feedback for LLM
|
|
245
|
+
const errorFeedback = validation.errorMessages.join('\n');
|
|
246
|
+
const retryPrompt = `Your previous response had the following errors:\n${errorFeedback}\n\nPlease fix these errors and generate valid Dirac XML again. Remember to only use the allowed tags.`;
|
|
247
|
+
|
|
248
|
+
// Add error feedback to dialog history
|
|
249
|
+
dialogHistory.push({ role: 'user', content: retryPrompt });
|
|
250
|
+
|
|
251
|
+
// Retry LLM call
|
|
252
|
+
if (isOpenAI) {
|
|
253
|
+
const response = await session.llmClient.chat.completions.create({
|
|
254
|
+
model,
|
|
255
|
+
max_tokens: maxTokens,
|
|
256
|
+
temperature,
|
|
257
|
+
messages: dialogHistory,
|
|
258
|
+
});
|
|
259
|
+
result = response.choices[0]?.message?.content || '';
|
|
260
|
+
} else if (isOllama) {
|
|
261
|
+
const ollamaPrompt = dialogHistory.map(m => `${m.role.charAt(0).toUpperCase() + m.role.slice(1)}: ${m.content}`).join('\n');
|
|
262
|
+
result = await session.llmClient.complete(ollamaPrompt, {
|
|
263
|
+
model,
|
|
264
|
+
temperature,
|
|
265
|
+
max_tokens: maxTokens,
|
|
266
|
+
});
|
|
267
|
+
} else {
|
|
268
|
+
const response = await session.llmClient.messages.create({
|
|
269
|
+
model,
|
|
270
|
+
max_tokens: maxTokens,
|
|
271
|
+
temperature,
|
|
272
|
+
messages: dialogHistory,
|
|
273
|
+
});
|
|
274
|
+
const content = response.content[0];
|
|
275
|
+
result = content.type === 'text' ? content.text : '';
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Add new response to dialog history
|
|
279
|
+
dialogHistory.push({ role: 'assistant', content: result });
|
|
280
|
+
|
|
281
|
+
// Update context variable if present
|
|
282
|
+
if (contextVar) {
|
|
283
|
+
setVariable(session, contextVar, dialogHistory, true);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (session.debug) {
|
|
287
|
+
console.error(`[LLM] Retry ${retryCount} response:\n${result}\n`);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Clean up and parse the new response
|
|
291
|
+
diracCode = result.trim();
|
|
292
|
+
if (replaceTick && diracCode.startsWith('```')) {
|
|
293
|
+
const match = diracCode.match(/^```(\w+)?\n?/m);
|
|
294
|
+
if (match && match[1] === 'bash') {
|
|
295
|
+
const endIdx = diracCode.indexOf('```', 3);
|
|
296
|
+
let bashContent = diracCode.slice(match[0].length, endIdx).trim();
|
|
297
|
+
diracCode = `<system>${bashContent}</system>`;
|
|
298
|
+
} else {
|
|
299
|
+
diracCode = diracCode.replace(/^```(?:xml|html|dirac)?\n?/m, '').replace(/\n?```$/m, '').trim();
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
dynamicAST = parser.parse(diracCode);
|
|
304
|
+
validation = await validateDiracCode(session, dynamicAST, { autocorrect });
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if (!validation.valid) {
|
|
308
|
+
throw new Error(`Tag validation failed after ${maxRetries} retries:\n${validation.errorMessages.join('\n')}`);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Apply auto-corrections if enabled
|
|
312
|
+
if (autocorrect) {
|
|
313
|
+
dynamicAST = applyCorrectedTags(dynamicAST, validation.results);
|
|
314
|
+
if (session.debug) {
|
|
315
|
+
console.error('[LLM] Applied auto-corrections to tags');
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Execute the validated (and possibly corrected) code
|
|
321
|
+
await integrate(session, dynamicAST);
|
|
322
|
+
|
|
323
|
+
// If feedback mode, capture execution output and send back to LLM
|
|
324
|
+
if (feedbackMode) {
|
|
325
|
+
const outputAfter = session.output.slice();
|
|
326
|
+
const executionOutput = outputAfter.slice(outputBefore.length).join('');
|
|
327
|
+
|
|
218
328
|
if (session.debug) {
|
|
219
|
-
console.error(`[LLM]
|
|
329
|
+
console.error(`[LLM] Execution output (${executionOutput.length} chars):\n${executionOutput}\n`);
|
|
220
330
|
}
|
|
221
331
|
|
|
222
|
-
// Build
|
|
223
|
-
const
|
|
224
|
-
const retryPrompt = `Your previous response had the following errors:\n${errorFeedback}\n\nPlease fix these errors and generate valid Dirac XML again. Remember to only use the allowed tags.`;
|
|
332
|
+
// Build feedback prompt
|
|
333
|
+
const feedbackPrompt = `The code executed successfully. Here is the output:\n\`\`\`\n${executionOutput}\n\`\`\`\n\nPlease review the output carefully. If the output is correct and complete, respond with ONLY the word "DONE" and nothing else. If the output is incorrect or incomplete, generate corrected Dirac XML code.`;
|
|
225
334
|
|
|
226
|
-
|
|
227
|
-
|
|
335
|
+
if (session.debug) {
|
|
336
|
+
console.error(`[LLM] Feedback prompt:\n${feedbackPrompt}\n`);
|
|
337
|
+
}
|
|
228
338
|
|
|
229
|
-
//
|
|
339
|
+
// Add feedback to dialog history
|
|
340
|
+
dialogHistory.push({ role: 'user', content: feedbackPrompt });
|
|
341
|
+
|
|
342
|
+
// Get LLM's assessment
|
|
230
343
|
if (isOpenAI) {
|
|
231
344
|
const response = await session.llmClient.chat.completions.create({
|
|
232
345
|
model,
|
|
@@ -253,7 +366,7 @@ for (const sub of subroutines) {
|
|
|
253
366
|
result = content.type === 'text' ? content.text : '';
|
|
254
367
|
}
|
|
255
368
|
|
|
256
|
-
// Add
|
|
369
|
+
// Add response to dialog history
|
|
257
370
|
dialogHistory.push({ role: 'assistant', content: result });
|
|
258
371
|
|
|
259
372
|
// Update context variable if present
|
|
@@ -262,48 +375,33 @@ for (const sub of subroutines) {
|
|
|
262
375
|
}
|
|
263
376
|
|
|
264
377
|
if (session.debug) {
|
|
265
|
-
console.error(`[LLM]
|
|
378
|
+
console.error(`[LLM] Feedback response:\n${result}\n`);
|
|
266
379
|
}
|
|
267
380
|
|
|
268
|
-
//
|
|
269
|
-
|
|
270
|
-
if (
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
const endIdx = diracCode.indexOf('```', 3);
|
|
274
|
-
let bashContent = diracCode.slice(match[0].length, endIdx).trim();
|
|
275
|
-
diracCode = `<system>${bashContent}</system>`;
|
|
276
|
-
} else {
|
|
277
|
-
diracCode = diracCode.replace(/^```(?:xml|html|dirac)?\n?/m, '').replace(/\n?```$/m, '').trim();
|
|
381
|
+
// Check if LLM says we're done (check at start of response)
|
|
382
|
+
const responseStart = result.trim().substring(0, 100).toUpperCase();
|
|
383
|
+
if (responseStart.startsWith('DONE') || result.trim().toLowerCase().includes('looks correct') || result.trim().toLowerCase().includes('looks good')) {
|
|
384
|
+
if (session.debug) {
|
|
385
|
+
console.error(`[LLM] Feedback loop terminating - LLM indicated completion\n`);
|
|
278
386
|
}
|
|
387
|
+
break;
|
|
279
388
|
}
|
|
280
389
|
|
|
281
|
-
|
|
282
|
-
|
|
390
|
+
// Otherwise, continue to next iteration with new LLM response
|
|
391
|
+
} else {
|
|
392
|
+
// No feedback mode, exit after first execution
|
|
393
|
+
break;
|
|
283
394
|
}
|
|
284
395
|
|
|
285
|
-
|
|
286
|
-
|
|
396
|
+
} catch (parseError) {
|
|
397
|
+
// If parsing fails, treat as plain text
|
|
398
|
+
if (session.debug) {
|
|
399
|
+
console.error(`[LLM] Failed to parse as Dirac, treating as text: ${parseError}`);
|
|
287
400
|
}
|
|
288
|
-
|
|
289
|
-
//
|
|
290
|
-
if (autocorrect) {
|
|
291
|
-
dynamicAST = applyCorrectedTags(dynamicAST, validation.results);
|
|
292
|
-
if (session.debug) {
|
|
293
|
-
console.error('[LLM] Applied auto-corrections to tags');
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
// Execute the validated (and possibly corrected) code
|
|
299
|
-
await integrate(session, dynamicAST);
|
|
300
|
-
} catch (parseError) {
|
|
301
|
-
// If parsing fails, treat as plain text
|
|
302
|
-
if (session.debug) {
|
|
303
|
-
console.error(`[LLM] Failed to parse as Dirac, treating as text: ${parseError}`);
|
|
401
|
+
emit(session, result);
|
|
402
|
+
break; // Exit feedback loop on parse error
|
|
304
403
|
}
|
|
305
|
-
|
|
306
|
-
}
|
|
404
|
+
} // end while loop
|
|
307
405
|
} else {
|
|
308
406
|
// Otherwise emit to output as text
|
|
309
407
|
emit(session, result);
|