dirac-lang 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LLM-VALIDATION.md +128 -0
- package/TESTING.md +162 -0
- package/config.yml +1 -1
- package/dist/{chunk-X3LQ626H.js → chunk-AMNIKIQX.js} +195 -31
- package/dist/{chunk-VHP3G4BF.js → chunk-KRLCG4G4.js} +1 -1
- package/dist/cli.js +6 -5
- package/dist/index.js +2 -2
- package/dist/{interpreter-46CAOCAZ.js → interpreter-RURSTR65.js} +1 -1
- package/dist/tag-validator-I3GLCBVD.js +152 -0
- package/dist/test-runner.d.ts +42 -0
- package/dist/test-runner.js +171 -0
- package/examples/llm-feedback-debug.di +30 -0
- package/examples/llm-feedback-demo.di +19 -0
- package/examples/llm-feedback-math.di +22 -0
- package/examples/llm-feedback-simple.di +16 -0
- package/examples/llm-feedback-sub.di +22 -0
- package/examples/llm-no-feedback.di +10 -0
- package/examples/llm-validate-test.di +18 -0
- package/package.json +4 -3
- package/src/tags/llm.ts +224 -28
- package/src/test-runner.ts +226 -0
- package/src/utils/tag-validator.ts +227 -0
- package/tests/basic-output.test.di +5 -0
- package/tests/exception-basic.test.di +10 -0
- package/tests/if-conditional.test.di +17 -0
- package/tests/loop-basic.test.di +9 -0
- package/tests/subroutine-basic.test.di +9 -0
- package/tests/test-if-basic.test.di +8 -0
- package/tests/variable-basic.test.di +6 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# LLM Tag Validation
|
|
2
|
+
|
|
3
|
+
The `<llm>` tag now supports automatic validation and correction of generated Dirac code when using `execute="true"`.
|
|
4
|
+
|
|
5
|
+
## Attributes
|
|
6
|
+
|
|
7
|
+
### Execution Mode
|
|
8
|
+
- `execute="true"` - Parse and execute the LLM response as Dirac code (existing feature)
|
|
9
|
+
|
|
10
|
+
### Tag Validation (New)
|
|
11
|
+
- `validate="true"` - Enable tag validation for LLM-generated code
|
|
12
|
+
- `autocorrect="true"` - Automatically correct similar tag names using semantic matching
|
|
13
|
+
- `max-retries="N"` - Maximum number of retry attempts if validation fails (default: 0)
|
|
14
|
+
|
|
15
|
+
## How It Works
|
|
16
|
+
|
|
17
|
+
When `validate="true"` is enabled:
|
|
18
|
+
|
|
19
|
+
1. **Parse**: LLM response is parsed as Dirac XML
|
|
20
|
+
2. **Validate**: Each tag is checked against available subroutines
|
|
21
|
+
- Verifies tag names exist
|
|
22
|
+
- Checks required parameters are present
|
|
23
|
+
- Warns about unknown attributes
|
|
24
|
+
3. **Semantic Matching**: If a tag doesn't exist, finds the closest match using embeddings
|
|
25
|
+
4. **Auto-correct**: If `autocorrect="true"` and similarity >= 0.75, replaces tag with best match
|
|
26
|
+
5. **Retry**: If validation fails and `max-retries > 0`, sends error feedback to LLM and retries
|
|
27
|
+
6. **Execute**: Once validation passes, executes the (possibly corrected) code
|
|
28
|
+
|
|
29
|
+
## Examples
|
|
30
|
+
|
|
31
|
+
### Basic Validation
|
|
32
|
+
|
|
33
|
+
```xml
|
|
34
|
+
<dirac>
|
|
35
|
+
<subroutine name="greet" param-name="string:required">
|
|
36
|
+
<output>Hello, <variable name="name" />!</output>
|
|
37
|
+
</subroutine>
|
|
38
|
+
|
|
39
|
+
<llm execute="true" validate="true">
|
|
40
|
+
Greet Alice
|
|
41
|
+
</llm>
|
|
42
|
+
</dirac>
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
If the LLM generates `<greeting name="Alice" />` instead of `<greet name="Alice" />`, validation will fail with an error.
|
|
46
|
+
|
|
47
|
+
### With Auto-correction
|
|
48
|
+
|
|
49
|
+
```xml
|
|
50
|
+
<dirac>
|
|
51
|
+
<subroutine name="greet" param-name="string:required">
|
|
52
|
+
<output>Hello, <variable name="name" />!</output>
|
|
53
|
+
</subroutine>
|
|
54
|
+
|
|
55
|
+
<llm execute="true" validate="true" autocorrect="true">
|
|
56
|
+
Greet Alice
|
|
57
|
+
</llm>
|
|
58
|
+
</dirac>
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
If the LLM generates `<greeting name="Alice" />`, and `greeting` is semantically similar to `greet` (similarity >= 0.75), it will be auto-corrected to `<greet name="Alice" />`.
|
|
62
|
+
|
|
63
|
+
### With Retry
|
|
64
|
+
|
|
65
|
+
```xml
|
|
66
|
+
<dirac>
|
|
67
|
+
<subroutine name="calculate" param-expression="string:required">
|
|
68
|
+
<eval><variable name="expression" /></eval>
|
|
69
|
+
</subroutine>
|
|
70
|
+
|
|
71
|
+
<llm execute="true" validate="true" max-retries="3">
|
|
72
|
+
Calculate 2 + 2
|
|
73
|
+
</llm>
|
|
74
|
+
</dirac>
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
If validation fails:
|
|
78
|
+
1. LLM receives feedback: "Your previous response had the following errors: <compute>: Missing required parameter: expression"
|
|
79
|
+
2. LLM generates a new response
|
|
80
|
+
3. Process repeats up to 3 times until validation passes
|
|
81
|
+
|
|
82
|
+
### Combined Approach
|
|
83
|
+
|
|
84
|
+
```xml
|
|
85
|
+
<llm execute="true" validate="true" autocorrect="true" max-retries="2">
|
|
86
|
+
Generate a greeting for Bob
|
|
87
|
+
</llm>
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
This combines auto-correction with retry:
|
|
91
|
+
- First tries to auto-correct similar tag names
|
|
92
|
+
- If that doesn't fix all errors, retries with LLM feedback
|
|
93
|
+
- Maximum 2 retry attempts
|
|
94
|
+
|
|
95
|
+
## Error Messages
|
|
96
|
+
|
|
97
|
+
Validation can detect:
|
|
98
|
+
|
|
99
|
+
- **Missing tags**: `Tag <xyz> does not exist and no similar tag was found.`
|
|
100
|
+
- **Similar tags**: `Tag <greeting> does not exist. Did you mean <greet>? (similarity: 0.85)`
|
|
101
|
+
- **Missing parameters**: `<greet>: Missing required parameter: name`
|
|
102
|
+
- **Unknown attributes**: `<greet>: Unknown attribute: person`
|
|
103
|
+
|
|
104
|
+
## Requirements
|
|
105
|
+
|
|
106
|
+
- Requires embedding server for semantic matching (Ollama with embeddinggemma model)
|
|
107
|
+
- Configure in `config.yml`:
|
|
108
|
+
```yaml
|
|
109
|
+
embeddingServer:
|
|
110
|
+
host: localhost
|
|
111
|
+
port: 11435
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Performance Notes
|
|
115
|
+
|
|
116
|
+
- Validation adds latency due to embedding calls
|
|
117
|
+
- Each tag requires an embedding API call
|
|
118
|
+
- Consider using `validate="true"` only when necessary
|
|
119
|
+
- Auto-correction is faster than retries
|
|
120
|
+
|
|
121
|
+
## Best Practices
|
|
122
|
+
|
|
123
|
+
1. **Start without validation** for simple prompts
|
|
124
|
+
2. **Add validation** when LLM frequently generates incorrect tags
|
|
125
|
+
3. **Use autocorrect** for typos and similar names
|
|
126
|
+
4. **Use retry** for more complex validation errors
|
|
127
|
+
5. **Limit retries** to 2-3 to avoid excessive API calls
|
|
128
|
+
6. **Monitor debug output** with `DIRAC_DEBUG=1` to see validation details
|
package/TESTING.md
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# Dirac Testing System
|
|
2
|
+
|
|
3
|
+
A simple, lightweight testing framework for Dirac language files.
|
|
4
|
+
|
|
5
|
+
## Running Tests
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm test
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
This will:
|
|
12
|
+
1. Build the project
|
|
13
|
+
2. Run all `*.test.di` files in the `tests/` directory
|
|
14
|
+
3. Report results
|
|
15
|
+
|
|
16
|
+
## Writing Tests
|
|
17
|
+
|
|
18
|
+
Tests are standard Dirac `.di` files with special comments that define expectations.
|
|
19
|
+
|
|
20
|
+
### Test File Format
|
|
21
|
+
|
|
22
|
+
```xml
|
|
23
|
+
<!-- TEST: test_name -->
|
|
24
|
+
<!-- EXPECT: expected output -->
|
|
25
|
+
<dirac>
|
|
26
|
+
<!-- your test code here -->
|
|
27
|
+
</dirac>
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Test Metadata Comments
|
|
31
|
+
|
|
32
|
+
- `<!-- TEST: name -->` - Name of the test (optional, defaults to filename)
|
|
33
|
+
- `<!-- EXPECT: output -->` - Expected output (optional)
|
|
34
|
+
- `<!-- EXPECT_ERROR: error message -->` - Expected error message (optional)
|
|
35
|
+
|
|
36
|
+
### Example Tests
|
|
37
|
+
|
|
38
|
+
#### Basic Output Test
|
|
39
|
+
|
|
40
|
+
```xml
|
|
41
|
+
<!-- TEST: hello_world -->
|
|
42
|
+
<!-- EXPECT: Hello, World! -->
|
|
43
|
+
<dirac>
|
|
44
|
+
<output>Hello, World!</output>
|
|
45
|
+
</dirac>
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
#### Variable Test
|
|
49
|
+
|
|
50
|
+
```xml
|
|
51
|
+
<!-- TEST: variables -->
|
|
52
|
+
<!-- EXPECT: Value is: test123 -->
|
|
53
|
+
<dirac>
|
|
54
|
+
<defvar name="myvar" value="test123" />
|
|
55
|
+
<output>Value is: <variable name="myvar" /></output>
|
|
56
|
+
</dirac>
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
#### Error Test
|
|
60
|
+
|
|
61
|
+
```xml
|
|
62
|
+
<!-- TEST: missing_variable -->
|
|
63
|
+
<!-- EXPECT_ERROR: Variable 'missing' not found -->
|
|
64
|
+
<dirac>
|
|
65
|
+
<variable name="missing" />
|
|
66
|
+
</dirac>
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
#### No Expectation (Just Run)
|
|
70
|
+
|
|
71
|
+
```xml
|
|
72
|
+
<!-- TEST: runs_without_error -->
|
|
73
|
+
<dirac>
|
|
74
|
+
<defvar name="x" value="10" />
|
|
75
|
+
<!-- Test passes if it runs without throwing an error -->
|
|
76
|
+
</dirac>
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Test Organization
|
|
80
|
+
|
|
81
|
+
Tests should be placed in the `tests/` directory with the `.test.di` extension:
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
tests/
|
|
85
|
+
├── basic-output.test.di
|
|
86
|
+
├── variable-basic.test.di
|
|
87
|
+
├── subroutine-basic.test.di
|
|
88
|
+
├── if-conditional.test.di
|
|
89
|
+
└── ...
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
You can organize tests into subdirectories:
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
tests/
|
|
96
|
+
├── core/
|
|
97
|
+
│ ├── output.test.di
|
|
98
|
+
│ └── variables.test.di
|
|
99
|
+
├── control-flow/
|
|
100
|
+
│ ├── if.test.di
|
|
101
|
+
│ └── loop.test.di
|
|
102
|
+
└── llm/
|
|
103
|
+
└── basic.test.di
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Output Matching
|
|
107
|
+
|
|
108
|
+
The test runner normalizes whitespace when comparing output:
|
|
109
|
+
- Multiple spaces/newlines are collapsed to single spaces
|
|
110
|
+
- Leading/trailing whitespace is trimmed
|
|
111
|
+
- This allows tests to ignore XML formatting whitespace
|
|
112
|
+
|
|
113
|
+
For example, these are equivalent:
|
|
114
|
+
- Expected: `Hello World`
|
|
115
|
+
- Actual: ` Hello World ` → matches ✓
|
|
116
|
+
- Actual: `Hello\n World` → matches ✓
|
|
117
|
+
|
|
118
|
+
## Exit Codes
|
|
119
|
+
|
|
120
|
+
- `0` - All tests passed
|
|
121
|
+
- `1` - One or more tests failed
|
|
122
|
+
|
|
123
|
+
## Continuous Integration
|
|
124
|
+
|
|
125
|
+
Add to your CI pipeline:
|
|
126
|
+
|
|
127
|
+
```yaml
|
|
128
|
+
# .github/workflows/test.yml
|
|
129
|
+
- name: Run tests
|
|
130
|
+
run: npm test
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Best Practices
|
|
134
|
+
|
|
135
|
+
1. **One concept per test** - Each test should verify one specific behavior
|
|
136
|
+
2. **Descriptive names** - Use clear test names that describe what's being tested
|
|
137
|
+
3. **Test edge cases** - Include tests for error conditions and boundary cases
|
|
138
|
+
4. **Keep tests fast** - Avoid LLM calls in unit tests (use mocks or separate integration tests)
|
|
139
|
+
5. **Test regressions** - When fixing bugs, add a test to prevent regression
|
|
140
|
+
|
|
141
|
+
## Example Test Suite
|
|
142
|
+
|
|
143
|
+
```
|
|
144
|
+
tests/
|
|
145
|
+
├── basic-output.test.di # Basic output functionality
|
|
146
|
+
├── variable-basic.test.di # Variable definition and output
|
|
147
|
+
├── subroutine-basic.test.di # Subroutine calls
|
|
148
|
+
├── if-conditional.test.di # Conditional execution
|
|
149
|
+
├── loop-basic.test.di # Loop functionality
|
|
150
|
+
├── exception-basic.test.di # Exception handling
|
|
151
|
+
└── test-if-basic.test.di # test-if conditional
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## Future Enhancements
|
|
155
|
+
|
|
156
|
+
Potential improvements:
|
|
157
|
+
- Watch mode for TDD workflow
|
|
158
|
+
- Code coverage reporting
|
|
159
|
+
- Performance benchmarks
|
|
160
|
+
- Parallel test execution
|
|
161
|
+
- Test fixtures/setup/teardown
|
|
162
|
+
- Snapshot testing for complex outputs
|
package/config.yml
CHANGED
|
@@ -370,12 +370,12 @@ async function executeIf(session, element) {
|
|
|
370
370
|
const condition = await evaluatePredicate(session, conditionElement);
|
|
371
371
|
if (condition) {
|
|
372
372
|
if (thenElement) {
|
|
373
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
373
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-RURSTR65.js");
|
|
374
374
|
await integrateChildren2(session, thenElement);
|
|
375
375
|
}
|
|
376
376
|
} else {
|
|
377
377
|
if (elseElement) {
|
|
378
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
378
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-RURSTR65.js");
|
|
379
379
|
await integrateChildren2(session, elseElement);
|
|
380
380
|
}
|
|
381
381
|
}
|
|
@@ -388,7 +388,7 @@ async function evaluatePredicate(session, predicateElement) {
|
|
|
388
388
|
return await evaluateCondition(session, predicateElement);
|
|
389
389
|
}
|
|
390
390
|
const outputLengthBefore = session.output.length;
|
|
391
|
-
const { integrate: integrate2 } = await import("./interpreter-
|
|
391
|
+
const { integrate: integrate2 } = await import("./interpreter-RURSTR65.js");
|
|
392
392
|
await integrate2(session, predicateElement);
|
|
393
393
|
const newOutputChunks = session.output.slice(outputLengthBefore);
|
|
394
394
|
const result = newOutputChunks.join("").trim();
|
|
@@ -411,11 +411,11 @@ async function evaluateCondition(session, condElement) {
|
|
|
411
411
|
}
|
|
412
412
|
const outputLengthBefore = session.output.length;
|
|
413
413
|
const args = [];
|
|
414
|
-
const { integrate: integrate2 } = await import("./interpreter-
|
|
414
|
+
const { integrate: integrate2 } = await import("./interpreter-RURSTR65.js");
|
|
415
415
|
for (const child of condElement.children) {
|
|
416
416
|
if (child.tag.toLowerCase() === "arg") {
|
|
417
417
|
const argOutputStart = session.output.length;
|
|
418
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
418
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-RURSTR65.js");
|
|
419
419
|
await integrateChildren2(session, child);
|
|
420
420
|
const newChunks = session.output.slice(argOutputStart);
|
|
421
421
|
const argValue = newChunks.join("");
|
|
@@ -675,8 +675,13 @@ then you call it like
|
|
|
675
675
|
example = example.replace(/"/g, '"').replace(/:/g, ":");
|
|
676
676
|
systemPrompt += ">" + example + "</" + sub.name + ">";
|
|
677
677
|
}
|
|
678
|
-
systemPrompt += "\
|
|
679
|
-
systemPrompt += "\
|
|
678
|
+
systemPrompt += "\n\nIMPORTANT INSTRUCTIONS:";
|
|
679
|
+
systemPrompt += "\n1. Output ONLY valid XML tags from the list above";
|
|
680
|
+
systemPrompt += "\n2. Do NOT include any explanations, descriptions, or extra text";
|
|
681
|
+
systemPrompt += "\n3. Do NOT use bullet points or formatting - just pure XML";
|
|
682
|
+
systemPrompt += "\n4. Do NOT invent tags - only use tags from the list above";
|
|
683
|
+
systemPrompt += "\n5. Start your response directly with the XML tag (e.g., <add ...>)";
|
|
684
|
+
systemPrompt += "\n\nDouble-check: Does your response contain ONLY XML tags? If not, remove all non-XML text.";
|
|
680
685
|
prompt = systemPrompt + "\nUser: " + userPrompt + "\nOutput:";
|
|
681
686
|
if (session.debug || process.env.DIRAC_LOG_PROMPT === "1") {
|
|
682
687
|
console.error("[LLM] Full prompt sent to LLM:\n" + prompt + "\n");
|
|
@@ -724,32 +729,191 @@ then you call it like
|
|
|
724
729
|
if (outputVar) {
|
|
725
730
|
setVariable(session, outputVar, result, false);
|
|
726
731
|
} else if (executeMode) {
|
|
732
|
+
const validateTags = element.attributes["validate"] === "true";
|
|
733
|
+
const autocorrect = element.attributes["autocorrect"] === "true";
|
|
734
|
+
const maxRetries = parseInt(element.attributes["max-retries"] || "0", 10);
|
|
735
|
+
const feedbackMode = element.attributes["feedback"] === "true";
|
|
736
|
+
const maxIterations = parseInt(element.attributes["max-iterations"] || "3", 10);
|
|
737
|
+
const replaceTick = element.attributes["replace-tick"] === "true";
|
|
727
738
|
if (session.debug) {
|
|
728
739
|
console.error(`[LLM] Executing response as Dirac code:
|
|
729
740
|
${result}
|
|
730
741
|
`);
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
if (match && match[1] === "bash") {
|
|
737
|
-
const endIdx = diracCode.indexOf("```", 3);
|
|
738
|
-
let bashContent = diracCode.slice(match[0].length, endIdx).trim();
|
|
739
|
-
diracCode = `<system>${bashContent}</system>`;
|
|
740
|
-
} else {
|
|
741
|
-
diracCode = diracCode.replace(/^```(?:xml|html|dirac)?\n?/m, "").replace(/\n?```$/m, "").trim();
|
|
742
|
+
if (validateTags) {
|
|
743
|
+
console.error(`[LLM] Tag validation enabled (autocorrect: ${autocorrect}, max-retries: ${maxRetries})`);
|
|
744
|
+
}
|
|
745
|
+
if (feedbackMode) {
|
|
746
|
+
console.error(`[LLM] Feedback mode enabled (max iterations: ${maxIterations})`);
|
|
742
747
|
}
|
|
743
748
|
}
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
749
|
+
let iteration = 0;
|
|
750
|
+
while (iteration < maxIterations && (iteration === 0 || feedbackMode)) {
|
|
751
|
+
iteration++;
|
|
752
|
+
if (session.debug && feedbackMode) {
|
|
753
|
+
console.error(`[LLM] Feedback iteration ${iteration}/${maxIterations}`);
|
|
754
|
+
}
|
|
755
|
+
let diracCode = result.trim();
|
|
756
|
+
if (replaceTick && diracCode.startsWith("```")) {
|
|
757
|
+
const match = diracCode.match(/^```(\w+)?\n?/m);
|
|
758
|
+
if (match && match[1] === "bash") {
|
|
759
|
+
const endIdx = diracCode.indexOf("```", 3);
|
|
760
|
+
let bashContent = diracCode.slice(match[0].length, endIdx).trim();
|
|
761
|
+
diracCode = `<system>${bashContent}</system>`;
|
|
762
|
+
} else {
|
|
763
|
+
diracCode = diracCode.replace(/^```(?:xml|html|dirac)?\n?/m, "").replace(/\n?```$/m, "").trim();
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
const outputBefore = feedbackMode ? session.output.slice() : [];
|
|
767
|
+
try {
|
|
768
|
+
const parser = new DiracParser();
|
|
769
|
+
let dynamicAST = parser.parse(diracCode);
|
|
770
|
+
if (validateTags) {
|
|
771
|
+
const { validateDiracCode, applyCorrectedTags } = await import("./tag-validator-I3GLCBVD.js");
|
|
772
|
+
let validation = await validateDiracCode(session, dynamicAST, { autocorrect });
|
|
773
|
+
let retryCount = 0;
|
|
774
|
+
while (!validation.valid && retryCount < maxRetries) {
|
|
775
|
+
retryCount++;
|
|
776
|
+
if (session.debug) {
|
|
777
|
+
console.error(`[LLM] Validation failed (attempt ${retryCount}/${maxRetries}):`, validation.errorMessages);
|
|
778
|
+
}
|
|
779
|
+
const errorFeedback = validation.errorMessages.join("\n");
|
|
780
|
+
const retryPrompt = `Your previous response had the following errors:
|
|
781
|
+
${errorFeedback}
|
|
782
|
+
|
|
783
|
+
Please fix these errors and generate valid Dirac XML again. Remember to only use the allowed tags.`;
|
|
784
|
+
dialogHistory.push({ role: "user", content: retryPrompt });
|
|
785
|
+
if (isOpenAI) {
|
|
786
|
+
const response = await session.llmClient.chat.completions.create({
|
|
787
|
+
model,
|
|
788
|
+
max_tokens: maxTokens,
|
|
789
|
+
temperature,
|
|
790
|
+
messages: dialogHistory
|
|
791
|
+
});
|
|
792
|
+
result = response.choices[0]?.message?.content || "";
|
|
793
|
+
} else if (isOllama) {
|
|
794
|
+
const ollamaPrompt = dialogHistory.map((m) => `${m.role.charAt(0).toUpperCase() + m.role.slice(1)}: ${m.content}`).join("\n");
|
|
795
|
+
result = await session.llmClient.complete(ollamaPrompt, {
|
|
796
|
+
model,
|
|
797
|
+
temperature,
|
|
798
|
+
max_tokens: maxTokens
|
|
799
|
+
});
|
|
800
|
+
} else {
|
|
801
|
+
const response = await session.llmClient.messages.create({
|
|
802
|
+
model,
|
|
803
|
+
max_tokens: maxTokens,
|
|
804
|
+
temperature,
|
|
805
|
+
messages: dialogHistory
|
|
806
|
+
});
|
|
807
|
+
const content = response.content[0];
|
|
808
|
+
result = content.type === "text" ? content.text : "";
|
|
809
|
+
}
|
|
810
|
+
dialogHistory.push({ role: "assistant", content: result });
|
|
811
|
+
if (contextVar) {
|
|
812
|
+
setVariable(session, contextVar, dialogHistory, true);
|
|
813
|
+
}
|
|
814
|
+
if (session.debug) {
|
|
815
|
+
console.error(`[LLM] Retry ${retryCount} response:
|
|
816
|
+
${result}
|
|
817
|
+
`);
|
|
818
|
+
}
|
|
819
|
+
diracCode = result.trim();
|
|
820
|
+
if (replaceTick && diracCode.startsWith("```")) {
|
|
821
|
+
const match = diracCode.match(/^```(\w+)?\n?/m);
|
|
822
|
+
if (match && match[1] === "bash") {
|
|
823
|
+
const endIdx = diracCode.indexOf("```", 3);
|
|
824
|
+
let bashContent = diracCode.slice(match[0].length, endIdx).trim();
|
|
825
|
+
diracCode = `<system>${bashContent}</system>`;
|
|
826
|
+
} else {
|
|
827
|
+
diracCode = diracCode.replace(/^```(?:xml|html|dirac)?\n?/m, "").replace(/\n?```$/m, "").trim();
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
dynamicAST = parser.parse(diracCode);
|
|
831
|
+
validation = await validateDiracCode(session, dynamicAST, { autocorrect });
|
|
832
|
+
}
|
|
833
|
+
if (!validation.valid) {
|
|
834
|
+
throw new Error(`Tag validation failed after ${maxRetries} retries:
|
|
835
|
+
${validation.errorMessages.join("\n")}`);
|
|
836
|
+
}
|
|
837
|
+
if (autocorrect) {
|
|
838
|
+
dynamicAST = applyCorrectedTags(dynamicAST, validation.results);
|
|
839
|
+
if (session.debug) {
|
|
840
|
+
console.error("[LLM] Applied auto-corrections to tags");
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
await integrate(session, dynamicAST);
|
|
845
|
+
if (feedbackMode) {
|
|
846
|
+
const outputAfter = session.output.slice();
|
|
847
|
+
const executionOutput = outputAfter.slice(outputBefore.length).join("");
|
|
848
|
+
if (session.debug) {
|
|
849
|
+
console.error(`[LLM] Execution output (${executionOutput.length} chars):
|
|
850
|
+
${executionOutput}
|
|
851
|
+
`);
|
|
852
|
+
}
|
|
853
|
+
const feedbackPrompt = `The code executed successfully. Here is the output:
|
|
854
|
+
\`\`\`
|
|
855
|
+
${executionOutput}
|
|
856
|
+
\`\`\`
|
|
857
|
+
|
|
858
|
+
Please review the output carefully. If the output is correct and complete, respond with ONLY the word "DONE" and nothing else. If the output is incorrect or incomplete, generate corrected Dirac XML code.`;
|
|
859
|
+
if (session.debug) {
|
|
860
|
+
console.error(`[LLM] Feedback prompt:
|
|
861
|
+
${feedbackPrompt}
|
|
862
|
+
`);
|
|
863
|
+
}
|
|
864
|
+
dialogHistory.push({ role: "user", content: feedbackPrompt });
|
|
865
|
+
if (isOpenAI) {
|
|
866
|
+
const response = await session.llmClient.chat.completions.create({
|
|
867
|
+
model,
|
|
868
|
+
max_tokens: maxTokens,
|
|
869
|
+
temperature,
|
|
870
|
+
messages: dialogHistory
|
|
871
|
+
});
|
|
872
|
+
result = response.choices[0]?.message?.content || "";
|
|
873
|
+
} else if (isOllama) {
|
|
874
|
+
const ollamaPrompt = dialogHistory.map((m) => `${m.role.charAt(0).toUpperCase() + m.role.slice(1)}: ${m.content}`).join("\n");
|
|
875
|
+
result = await session.llmClient.complete(ollamaPrompt, {
|
|
876
|
+
model,
|
|
877
|
+
temperature,
|
|
878
|
+
max_tokens: maxTokens
|
|
879
|
+
});
|
|
880
|
+
} else {
|
|
881
|
+
const response = await session.llmClient.messages.create({
|
|
882
|
+
model,
|
|
883
|
+
max_tokens: maxTokens,
|
|
884
|
+
temperature,
|
|
885
|
+
messages: dialogHistory
|
|
886
|
+
});
|
|
887
|
+
const content = response.content[0];
|
|
888
|
+
result = content.type === "text" ? content.text : "";
|
|
889
|
+
}
|
|
890
|
+
dialogHistory.push({ role: "assistant", content: result });
|
|
891
|
+
if (contextVar) {
|
|
892
|
+
setVariable(session, contextVar, dialogHistory, true);
|
|
893
|
+
}
|
|
894
|
+
if (session.debug) {
|
|
895
|
+
console.error(`[LLM] Feedback response:
|
|
896
|
+
${result}
|
|
897
|
+
`);
|
|
898
|
+
}
|
|
899
|
+
const responseStart = result.trim().substring(0, 100).toUpperCase();
|
|
900
|
+
if (responseStart.startsWith("DONE") || result.trim().toLowerCase().includes("looks correct") || result.trim().toLowerCase().includes("looks good")) {
|
|
901
|
+
if (session.debug) {
|
|
902
|
+
console.error(`[LLM] Feedback loop terminating - LLM indicated completion
|
|
903
|
+
`);
|
|
904
|
+
}
|
|
905
|
+
break;
|
|
906
|
+
}
|
|
907
|
+
} else {
|
|
908
|
+
break;
|
|
909
|
+
}
|
|
910
|
+
} catch (parseError) {
|
|
911
|
+
if (session.debug) {
|
|
912
|
+
console.error(`[LLM] Failed to parse as Dirac, treating as text: ${parseError}`);
|
|
913
|
+
}
|
|
914
|
+
emit(session, result);
|
|
915
|
+
break;
|
|
751
916
|
}
|
|
752
|
-
emit(session, result);
|
|
753
917
|
}
|
|
754
918
|
} else {
|
|
755
919
|
emit(session, result);
|
|
@@ -1162,7 +1326,7 @@ async function executeTagCheck(session, element) {
|
|
|
1162
1326
|
const executeTag = correctedTag || tagName;
|
|
1163
1327
|
console.error(`[tag-check] Executing <${executeTag}/> as all checks passed and execute=true.`);
|
|
1164
1328
|
const elementToExecute = correctedTag ? { ...child, tag: correctedTag } : child;
|
|
1165
|
-
const { integrate: integrate2 } = await import("./interpreter-
|
|
1329
|
+
const { integrate: integrate2 } = await import("./interpreter-RURSTR65.js");
|
|
1166
1330
|
await integrate2(session, elementToExecute);
|
|
1167
1331
|
}
|
|
1168
1332
|
}
|
|
@@ -1171,7 +1335,7 @@ async function executeTagCheck(session, element) {
|
|
|
1171
1335
|
// src/tags/throw.ts
|
|
1172
1336
|
async function executeThrow(session, element) {
|
|
1173
1337
|
const exceptionName = element.attributes?.name || "exception";
|
|
1174
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
1338
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-RURSTR65.js");
|
|
1175
1339
|
const exceptionDom = {
|
|
1176
1340
|
tag: "exception-content",
|
|
1177
1341
|
attributes: { name: exceptionName },
|
|
@@ -1184,7 +1348,7 @@ async function executeThrow(session, element) {
|
|
|
1184
1348
|
// src/tags/try.ts
|
|
1185
1349
|
async function executeTry(session, element) {
|
|
1186
1350
|
setExceptionBoundary(session);
|
|
1187
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
1351
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-RURSTR65.js");
|
|
1188
1352
|
await integrateChildren2(session, element);
|
|
1189
1353
|
unsetExceptionBoundary(session);
|
|
1190
1354
|
}
|
|
@@ -1194,7 +1358,7 @@ async function executeCatch(session, element) {
|
|
|
1194
1358
|
const exceptionName = element.attributes?.name || "exception";
|
|
1195
1359
|
const caughtCount = lookupException(session, exceptionName);
|
|
1196
1360
|
if (caughtCount > 0) {
|
|
1197
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
1361
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-RURSTR65.js");
|
|
1198
1362
|
await integrateChildren2(session, element);
|
|
1199
1363
|
}
|
|
1200
1364
|
flushCurrentException(session);
|
|
@@ -1203,7 +1367,7 @@ async function executeCatch(session, element) {
|
|
|
1203
1367
|
// src/tags/exception.ts
|
|
1204
1368
|
async function executeException(session, element) {
|
|
1205
1369
|
const exceptions = getCurrentExceptions(session);
|
|
1206
|
-
const { integrateChildren: integrateChildren2 } = await import("./interpreter-
|
|
1370
|
+
const { integrateChildren: integrateChildren2 } = await import("./interpreter-RURSTR65.js");
|
|
1207
1371
|
for (const exceptionDom of exceptions) {
|
|
1208
1372
|
await integrateChildren2(session, exceptionDom);
|
|
1209
1373
|
}
|
package/dist/cli.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
execute
|
|
4
|
-
} from "./chunk-
|
|
5
|
-
import "./chunk-
|
|
4
|
+
} from "./chunk-KRLCG4G4.js";
|
|
5
|
+
import "./chunk-AMNIKIQX.js";
|
|
6
6
|
import "./chunk-E7IWGUE6.js";
|
|
7
7
|
|
|
8
8
|
// src/cli.ts
|
|
@@ -11,7 +11,7 @@ import "dotenv/config";
|
|
|
11
11
|
// package.json
|
|
12
12
|
var package_default = {
|
|
13
13
|
name: "dirac-lang",
|
|
14
|
-
version: "0.1.
|
|
14
|
+
version: "0.1.11",
|
|
15
15
|
description: "LLM-Augmented Declarative Execution",
|
|
16
16
|
type: "module",
|
|
17
17
|
main: "dist/index.js",
|
|
@@ -21,8 +21,9 @@ var package_default = {
|
|
|
21
21
|
},
|
|
22
22
|
scripts: {
|
|
23
23
|
dev: "tsx src/cli.ts",
|
|
24
|
-
build: "tsup src/index.ts src/cli.ts --format esm --dts --clean",
|
|
25
|
-
test: "
|
|
24
|
+
build: "tsup src/index.ts src/cli.ts src/test-runner.ts --format esm --dts --clean",
|
|
25
|
+
test: "npm run build && node dist/test-runner.js tests",
|
|
26
|
+
"test:watch": "npm run build && node dist/test-runner.js tests --watch",
|
|
26
27
|
typecheck: "tsc --noEmit"
|
|
27
28
|
},
|
|
28
29
|
keywords: [
|
package/dist/index.js
CHANGED
|
@@ -2,11 +2,11 @@ import {
|
|
|
2
2
|
createLLMAdapter,
|
|
3
3
|
execute,
|
|
4
4
|
executeUserCommand
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-KRLCG4G4.js";
|
|
6
6
|
import {
|
|
7
7
|
DiracParser,
|
|
8
8
|
integrate
|
|
9
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-AMNIKIQX.js";
|
|
10
10
|
import {
|
|
11
11
|
createSession,
|
|
12
12
|
getAvailableSubroutines,
|