@sanity/ailf 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/LICENSE +21 -0
  2. package/dist/_vendor/ailf-core/examples/index.d.ts +50 -1
  3. package/dist/_vendor/ailf-core/examples/index.js +66 -1
  4. package/dist/agent-harness/assertions-runtime.d.ts +49 -0
  5. package/dist/agent-harness/assertions-runtime.js +138 -0
  6. package/dist/agent-harness/provider.d.ts +58 -0
  7. package/dist/agent-harness/provider.js +104 -0
  8. package/dist/cli.js +0 -0
  9. package/dist/commands/init.js +3 -0
  10. package/dist/orchestration/steps/generate-configs-step.d.ts +7 -0
  11. package/dist/orchestration/steps/generate-configs-step.js +35 -2
  12. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +39 -25
  13. package/dist/pipeline/compiler/compiler-to-yaml.js +78 -7
  14. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +9 -0
  15. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +28 -85
  16. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +22 -15
  17. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +8 -1
  18. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +42 -12
  19. package/package.json +25 -24
  20. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
  21. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
  22. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
  23. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
  24. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  25. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  26. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  27. package/dist/_vendor/ailf-tasks/index.js +0 -16
  28. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  29. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  30. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  31. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  32. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  33. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  34. package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
  35. package/dist/adapters/task-sources/yaml-task-source.js +0 -139
  36. package/dist/agent-observer/test-imports.d.ts +0 -7
  37. package/dist/agent-observer/test-imports.js +0 -185
  38. package/dist/commands/update-quality-scores.d.ts +0 -5
  39. package/dist/commands/update-quality-scores.js +0 -20
  40. package/dist/lib/agent-behavior-report.d.ts +0 -8
  41. package/dist/lib/agent-behavior-report.js +0 -185
  42. package/dist/lib/baseline.d.ts +0 -19
  43. package/dist/lib/baseline.js +0 -153
  44. package/dist/lib/calculate-scores.d.ts +0 -23
  45. package/dist/lib/calculate-scores.js +0 -42
  46. package/dist/lib/compare.d.ts +0 -18
  47. package/dist/lib/compare.js +0 -170
  48. package/dist/lib/coverage-audit.d.ts +0 -4
  49. package/dist/lib/coverage-audit.js +0 -42
  50. package/dist/lib/discovery-report.d.ts +0 -13
  51. package/dist/lib/discovery-report.js +0 -57
  52. package/dist/lib/fetch-docs.d.ts +0 -30
  53. package/dist/lib/fetch-docs.js +0 -171
  54. package/dist/lib/generate-configs.d.ts +0 -25
  55. package/dist/lib/generate-configs.js +0 -42
  56. package/dist/lib/grader-api.d.ts +0 -21
  57. package/dist/lib/grader-api.js +0 -34
  58. package/dist/lib/grader-compare.d.ts +0 -19
  59. package/dist/lib/grader-compare.js +0 -91
  60. package/dist/lib/grader-consistency.d.ts +0 -27
  61. package/dist/lib/grader-consistency.js +0 -79
  62. package/dist/lib/grader-sensitivity.d.ts +0 -19
  63. package/dist/lib/grader-sensitivity.js +0 -75
  64. package/dist/lib/grader-validate.d.ts +0 -19
  65. package/dist/lib/grader-validate.js +0 -78
  66. package/dist/lib/measure-retrieval.d.ts +0 -14
  67. package/dist/lib/measure-retrieval.js +0 -71
  68. package/dist/lib/pr-comment.d.ts +0 -16
  69. package/dist/lib/pr-comment.js +0 -28
  70. package/dist/lib/readiness-report.d.ts +0 -13
  71. package/dist/lib/readiness-report.js +0 -108
  72. package/dist/lib/webhook-server.d.ts +0 -11
  73. package/dist/lib/webhook-server.js +0 -24
  74. package/dist/lib/weekly-digest.d.ts +0 -24
  75. package/dist/lib/weekly-digest.js +0 -148
  76. package/dist/orchestration/env-bridge.d.ts +0 -21
  77. package/dist/orchestration/env-bridge.js +0 -66
  78. package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
  79. package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
  80. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
  81. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
  82. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
  83. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  84. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  85. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  86. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  87. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  88. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
  89. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
  90. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
  91. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
  92. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
  93. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
  94. package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
  95. package/dist/pipeline/compiler/task-bridge.js +0 -92
  96. package/dist/pipeline/expand-tasks.d.ts +0 -232
  97. package/dist/pipeline/expand-tasks.js +0 -467
  98. package/dist/pipeline/generate-configs.d.ts +0 -92
  99. package/dist/pipeline/generate-configs.js +0 -445
  100. package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
  101. package/dist/pipeline/steps/calculate-scores-step.js +0 -89
  102. package/dist/pipeline/steps/compare-step.d.ts +0 -18
  103. package/dist/pipeline/steps/compare-step.js +0 -90
  104. package/dist/pipeline/steps/eval-step.d.ts +0 -53
  105. package/dist/pipeline/steps/eval-step.js +0 -347
  106. package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
  107. package/dist/pipeline/steps/fetch-docs-step.js +0 -84
  108. package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
  109. package/dist/pipeline/steps/generate-configs-step.js +0 -98
  110. package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
  111. package/dist/pipeline/steps/grader-consistency-step.js +0 -74
  112. package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
  113. package/dist/pipeline/steps/publish-report-step.js +0 -243
  114. package/dist/pipeline/steps/report-step.d.ts +0 -13
  115. package/dist/pipeline/steps/report-step.js +0 -56
  116. package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
  117. package/dist/pipeline/steps/update-scores-step.js +0 -42
  118. package/dist/scripts/agent-behavior-report.d.ts +0 -19
  119. package/dist/scripts/agent-behavior-report.js +0 -315
  120. package/dist/scripts/baseline.d.ts +0 -43
  121. package/dist/scripts/baseline.js +0 -267
  122. package/dist/scripts/calculate-scores.d.ts +0 -166
  123. package/dist/scripts/calculate-scores.js +0 -1296
  124. package/dist/scripts/compare.d.ts +0 -22
  125. package/dist/scripts/compare.js +0 -334
  126. package/dist/scripts/coverage-audit.d.ts +0 -44
  127. package/dist/scripts/coverage-audit.js +0 -209
  128. package/dist/scripts/debug-eval.d.ts +0 -19
  129. package/dist/scripts/debug-eval.js +0 -73
  130. package/dist/scripts/discovery-report.d.ts +0 -58
  131. package/dist/scripts/discovery-report.js +0 -250
  132. package/dist/scripts/fetch-docs.d.ts +0 -35
  133. package/dist/scripts/fetch-docs.js +0 -472
  134. package/dist/scripts/generate-configs.d.ts +0 -66
  135. package/dist/scripts/generate-configs.js +0 -459
  136. package/dist/scripts/grader-api.d.ts +0 -27
  137. package/dist/scripts/grader-api.js +0 -206
  138. package/dist/scripts/grader-compare.d.ts +0 -22
  139. package/dist/scripts/grader-compare.js +0 -368
  140. package/dist/scripts/grader-consistency.d.ts +0 -20
  141. package/dist/scripts/grader-consistency.js +0 -313
  142. package/dist/scripts/grader-sensitivity.d.ts +0 -22
  143. package/dist/scripts/grader-sensitivity.js +0 -354
  144. package/dist/scripts/grader-validate.d.ts +0 -19
  145. package/dist/scripts/grader-validate.js +0 -267
  146. package/dist/scripts/measure-retrieval.d.ts +0 -10
  147. package/dist/scripts/measure-retrieval.js +0 -145
  148. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
  149. package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
  150. package/dist/scripts/pipeline.d.ts +0 -76
  151. package/dist/scripts/pipeline.js +0 -1031
  152. package/dist/scripts/pr-comment.d.ts +0 -10
  153. package/dist/scripts/pr-comment.js +0 -510
  154. package/dist/scripts/readiness-report.d.ts +0 -88
  155. package/dist/scripts/readiness-report.js +0 -342
  156. package/dist/scripts/update-quality-scores.d.ts +0 -15
  157. package/dist/scripts/update-quality-scores.js +0 -184
  158. package/dist/scripts/validate-task-sources.d.ts +0 -21
  159. package/dist/scripts/validate-task-sources.js +0 -210
  160. package/dist/scripts/validate.d.ts +0 -13
  161. package/dist/scripts/validate.js +0 -79
  162. package/dist/scripts/webhook-server.d.ts +0 -26
  163. package/dist/scripts/webhook-server.js +0 -147
  164. package/dist/scripts/weekly-digest.d.ts +0 -24
  165. package/dist/scripts/weekly-digest.js +0 -144
  166. package/dist/sinks/format-slack.d.ts +0 -64
  167. package/dist/sinks/format-slack.js +0 -306
  168. package/dist/sinks/slack-sink.d.ts +0 -27
  169. package/dist/sinks/slack-sink.js +0 -78
  170. package/dist/sinks/webhook-sink.d.ts +0 -19
  171. package/dist/sinks/webhook-sink.js +0 -50
  172. package/tasks/.expanded.agentic.yaml +0 -280
  173. package/tasks/.expanded.yaml +0 -565
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "2.0.2",
3
+ "version": "2.2.0",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -31,6 +31,29 @@
31
31
  "canonical",
32
32
  "tasks"
33
33
  ],
34
+ "dependencies": {
35
+ "@google-cloud/bigquery": "^8.1.1",
36
+ "@inquirer/prompts": "^8.3.0",
37
+ "@modelcontextprotocol/sdk": "^1.29.0",
38
+ "@portabletext/markdown": "^1.0.0",
39
+ "@sanity/client": "^7.3.0",
40
+ "commander": "^14.0.3",
41
+ "dotenv": "^16.4.7",
42
+ "dotenv-cli": "^11.0.0",
43
+ "jiti": "^2.6.1",
44
+ "js-yaml": "^4.1.0",
45
+ "promptfoo": "^0.120.24",
46
+ "zod": "^4.3.6"
47
+ },
48
+ "devDependencies": {
49
+ "@anthropic-ai/claude-agent-sdk": "^0.2.105",
50
+ "@types/js-yaml": "^4.0.9",
51
+ "@types/node": "^22.13.1",
52
+ "tsx": "^4.19.2",
53
+ "typescript": "^5.7.3",
54
+ "@sanity/ailf-core": "0.1.0",
55
+ "@sanity/ailf-shared": "0.1.0"
56
+ },
34
57
  "scripts": {
35
58
  "build": "tsc && tsx scripts/bundle-workspace-deps.ts",
36
59
  "generate-configs": "tsx src/cli.ts generate-configs",
@@ -58,27 +81,5 @@
58
81
  "discovery-report": "tsx src/cli.ts discovery-report",
59
82
  "webhook-server": "tsx src/cli.ts webhook-server",
60
83
  "weekly-digest": "tsx src/cli.ts weekly-digest"
61
- },
62
- "dependencies": {
63
- "@google-cloud/bigquery": "^8.1.1",
64
- "@inquirer/prompts": "^8.3.0",
65
- "@modelcontextprotocol/sdk": "^1.29.0",
66
- "@portabletext/markdown": "^1.0.0",
67
- "@sanity/client": "^7.3.0",
68
- "commander": "^14.0.3",
69
- "dotenv": "^16.4.7",
70
- "dotenv-cli": "^11.0.0",
71
- "jiti": "^2.6.1",
72
- "js-yaml": "^4.1.0",
73
- "promptfoo": "^0.120.24",
74
- "zod": "^4.3.6"
75
- },
76
- "devDependencies": {
77
- "@sanity/ailf-core": "workspace:*",
78
- "@sanity/ailf-shared": "workspace:*",
79
- "@types/js-yaml": "^4.0.9",
80
- "@types/node": "^22.13.1",
81
- "tsx": "^4.19.2",
82
- "typescript": "^5.7.3"
83
84
  }
84
- }
85
+ }
@@ -1,10 +0,0 @@
1
- /**
2
- * comparison-formatters.test.ts
3
- *
4
- * Verifies that formatComparisonMarkdown() and formatComparisonTable()
5
- * dynamically derive column headers from the dimension keys present
6
- * in the report data, rather than hardcoding literacy-specific names.
7
- *
8
- * Run: npx tsx --test src/__tests__/comparison-formatters.test.ts
9
- */
10
- export {};
@@ -1,185 +0,0 @@
1
- /**
2
- * comparison-formatters.test.ts
3
- *
4
- * Verifies that formatComparisonMarkdown() and formatComparisonTable()
5
- * dynamically derive column headers from the dimension keys present
6
- * in the report data, rather than hardcoding literacy-specific names.
7
- *
8
- * Run: npx tsx --test src/__tests__/comparison-formatters.test.ts
9
- */
10
- import assert from "node:assert/strict";
11
- import { describe, it } from "node:test";
12
- import { formatComparisonMarkdown, formatComparisonTable, } from "../services/comparison-formatters.js";
13
- // ---------------------------------------------------------------------------
14
- // Helpers
15
- // ---------------------------------------------------------------------------
16
- /** Minimal ScoreSummary stub — only fields the formatters actually read */
17
- function stubSummary(avgScore) {
18
- return {
19
- belowCritical: [],
20
- lowestArea: "area-a",
21
- lowestScore: 40,
22
- overall: {
23
- avgCeilingScore: 80,
24
- avgScore,
25
- avgDocLift: 10,
26
- avgDocQualityGap: 20,
27
- avgFloorScore: 30,
28
- negativeDocLiftCount: 0,
29
- },
30
- scores: [],
31
- timestamp: "2026-04-05T00:00:00.000Z",
32
- };
33
- }
34
- function makeReport(overrides) {
35
- return {
36
- areas: [
37
- {
38
- area: "area-a",
39
- baseline: 60,
40
- experiment: 65,
41
- delta: 5,
42
- change: "improved",
43
- dimensions: overrides.areaDimensions,
44
- ceilingDelta: 0,
45
- docLiftDelta: 2,
46
- floorDelta: 0,
47
- },
48
- ],
49
- baseline: stubSummary(60),
50
- experiment: stubSummary(65),
51
- deltas: {
52
- overall: 5,
53
- perArea: { "area-a": 5 },
54
- perDimension: overrides.perDimension,
55
- docLift: 2,
56
- },
57
- generatedAt: "2026-04-05T00:00:00.000Z",
58
- improved: ["area-a"],
59
- regressed: [],
60
- unchanged: [],
61
- notEvaluated: [],
62
- mismatched: { onlyInBaseline: [], onlyInExperiment: [] },
63
- noiseThreshold: 2,
64
- noiseThresholdEmpirical: false,
65
- };
66
- }
67
- // ---------------------------------------------------------------------------
68
- // Tests — literacy dimensions (backward compatibility)
69
- // ---------------------------------------------------------------------------
70
- describe("formatComparisonMarkdown", () => {
71
- it("renders literacy dimension columns dynamically", () => {
72
- const report = makeReport({
73
- areaDimensions: {
74
- "task-completion": { baseline: 60, experiment: 65, delta: 5 },
75
- "code-correctness": { baseline: 50, experiment: 55, delta: 5 },
76
- "doc-coverage": { baseline: 40, experiment: 42, delta: 2 },
77
- },
78
- perDimension: {
79
- "task-completion": 5,
80
- "code-correctness": 5,
81
- "doc-coverage": 2,
82
- },
83
- });
84
- const md = formatComparisonMarkdown(report);
85
- // Column headers should be title-cased from kebab-case
86
- assert.ok(md.includes("Task Completion"), "should have Task Completion column header");
87
- assert.ok(md.includes("Code Correctness"), "should have Code Correctness column header");
88
- assert.ok(md.includes("Doc Coverage"), "should have Doc Coverage column header");
89
- // Per-dimension averages section should also show dynamic labels
90
- assert.ok(md.includes("| Task Completion |"), "dimension averages should include Task Completion");
91
- assert.ok(md.includes("| Code Correctness |"), "dimension averages should include Code Correctness");
92
- assert.ok(md.includes("| Doc Coverage |"), "dimension averages should include Doc Coverage");
93
- });
94
- it("renders MCP dimension columns dynamically", () => {
95
- const report = makeReport({
96
- areaDimensions: {
97
- "input-validation": { baseline: 50, experiment: 60, delta: 10 },
98
- "output-correctness": { baseline: 70, experiment: 75, delta: 5 },
99
- "error-handling": { baseline: 40, experiment: 45, delta: 5 },
100
- security: { baseline: 80, experiment: 82, delta: 2 },
101
- },
102
- perDimension: {
103
- "input-validation": 10,
104
- "output-correctness": 5,
105
- "error-handling": 5,
106
- security: 2,
107
- },
108
- });
109
- const md = formatComparisonMarkdown(report);
110
- // 4 MCP columns instead of 3 literacy columns
111
- assert.ok(md.includes("Input Validation"), "should have Input Validation column");
112
- assert.ok(md.includes("Output Correctness"), "should have Output Correctness column");
113
- assert.ok(md.includes("Error Handling"), "should have Error Handling column");
114
- assert.ok(md.includes("Security"), "should have Security column");
115
- // Per-dimension averages
116
- assert.ok(md.includes("| Input Validation |"), "dimension averages should include Input Validation");
117
- assert.ok(md.includes("| Security |"), "dimension averages should include Security");
118
- });
119
- });
120
- describe("formatComparisonTable", () => {
121
- it("renders literacy dimension columns dynamically", () => {
122
- const report = makeReport({
123
- areaDimensions: {
124
- "task-completion": { baseline: 60, experiment: 65, delta: 5 },
125
- "code-correctness": { baseline: 50, experiment: 55, delta: 5 },
126
- "doc-coverage": { baseline: 40, experiment: 42, delta: 2 },
127
- },
128
- perDimension: {
129
- "task-completion": 5,
130
- "code-correctness": 5,
131
- "doc-coverage": 2,
132
- },
133
- });
134
- const table = formatComparisonTable(report);
135
- // Dimension averages section
136
- assert.ok(table.includes("Task Completion:"), "should show Task Completion in dimension averages");
137
- assert.ok(table.includes("Code Correctness:"), "should show Code Correctness in dimension averages");
138
- assert.ok(table.includes("Doc Coverage:"), "should show Doc Coverage in dimension averages");
139
- // Per-area table header
140
- assert.ok(table.includes("Task Completion"), "per-area table should have Task Completion header");
141
- assert.ok(table.includes("Code Correctness"), "per-area table should have Code Correctness header");
142
- assert.ok(table.includes("Doc Coverage"), "per-area table should have Doc Coverage header");
143
- });
144
- it("renders MCP dimension columns dynamically", () => {
145
- const report = makeReport({
146
- areaDimensions: {
147
- "input-validation": { baseline: 50, experiment: 60, delta: 10 },
148
- "output-correctness": { baseline: 70, experiment: 75, delta: 5 },
149
- "error-handling": { baseline: 40, experiment: 45, delta: 5 },
150
- security: { baseline: 80, experiment: 82, delta: 2 },
151
- },
152
- perDimension: {
153
- "input-validation": 10,
154
- "output-correctness": 5,
155
- "error-handling": 5,
156
- security: 2,
157
- },
158
- });
159
- const table = formatComparisonTable(report);
160
- // 4 MCP columns in the per-area table
161
- assert.ok(table.includes("Input Validation"), "should have Input Validation");
162
- assert.ok(table.includes("Output Correctness"), "should have Output Correctness");
163
- assert.ok(table.includes("Error Handling"), "should have Error Handling");
164
- assert.ok(table.includes("Security"), "should have Security");
165
- // Should NOT have literacy dimension headers
166
- assert.ok(!table.includes("Task Completion"), "should not contain Task Completion");
167
- assert.ok(!table.includes("Doc Coverage"), "should not contain Doc Coverage");
168
- });
169
- it("includes delta values for each dimension in the per-area rows", () => {
170
- const report = makeReport({
171
- areaDimensions: {
172
- "input-validation": { baseline: 50, experiment: 60, delta: 10 },
173
- "output-correctness": { baseline: 70, experiment: 75, delta: 5 },
174
- },
175
- perDimension: {
176
- "input-validation": 10,
177
- "output-correctness": 5,
178
- },
179
- });
180
- const table = formatComparisonTable(report);
181
- // The per-area row should include the delta values (+10 and +5)
182
- assert.ok(table.includes("+10"), "should show +10 delta for area-a");
183
- assert.ok(table.includes("+5"), "should show +5 delta for area-a");
184
- });
185
- });
@@ -1,6 +0,0 @@
1
- /**
2
- * noop-collector.test.ts — verifies the NoOpArtifactCollector is truly zero-cost.
3
- *
4
- * Run: npx tsx --test src/artifact-capture/__tests__/noop-collector.test.ts
5
- */
6
- export {};
@@ -1,42 +0,0 @@
1
- /**
2
- * noop-collector.test.ts — verifies the NoOpArtifactCollector is truly zero-cost.
3
- *
4
- * Run: npx tsx --test src/artifact-capture/__tests__/noop-collector.test.ts
5
- */
6
- import assert from "node:assert/strict";
7
- import { describe, it } from "node:test";
8
- import { NoOpArtifactCollector } from "../noop-collector.js";
9
- describe("NoOpArtifactCollector", () => {
10
- it("enabled returns false", () => {
11
- const collector = new NoOpArtifactCollector();
12
- assert.equal(collector.enabled, false);
13
- });
14
- it("extrasEnabled returns false", () => {
15
- const collector = new NoOpArtifactCollector();
16
- assert.equal(collector.extrasEnabled, false);
17
- });
18
- it("capture() is callable and returns void", () => {
19
- const collector = new NoOpArtifactCollector();
20
- const result = collector.capture("step", "type", { data: true });
21
- assert.equal(result, undefined);
22
- });
23
- it("captureFile() is callable and returns void", () => {
24
- const collector = new NoOpArtifactCollector();
25
- const result = collector.captureFile("step", "type", "/some/path");
26
- assert.equal(result, undefined);
27
- });
28
- it("flush() returns zero-count result", async () => {
29
- const collector = new NoOpArtifactCollector();
30
- const result = await collector.flush();
31
- assert.equal(result.artifactCount, 0);
32
- assert.equal(result.destination, "");
33
- assert.equal(result.totalBytes, 0);
34
- assert.equal(result.compressed, false);
35
- });
36
- it("flush() returns the same frozen object every time", async () => {
37
- const collector = new NoOpArtifactCollector();
38
- const a = await collector.flush();
39
- const b = await collector.flush();
40
- assert.equal(a, b);
41
- });
42
- });
@@ -1,8 +0,0 @@
1
- /**
2
- * cli.ts — Minimal CLI for standalone task validation.
3
- *
4
- * Usage:
5
- * npx @sanity/ailf-tasks validate .ailf/tasks/
6
- * npx @sanity/ailf-tasks validate # defaults to .ailf/tasks/
7
- */
8
- export declare function run(): void;
@@ -1,61 +0,0 @@
1
- /**
2
- * cli.ts — Minimal CLI for standalone task validation.
3
- *
4
- * Usage:
5
- * npx @sanity/ailf-tasks validate .ailf/tasks/
6
- * npx @sanity/ailf-tasks validate # defaults to .ailf/tasks/
7
- */
8
- import { loadTaskDir } from "./parser.js";
9
- import { formatValidationResult, validateRepoTasks } from "./validation.js";
10
- export function run() {
11
- const args = process.argv.slice(2);
12
- const command = args[0];
13
- if (command === "validate") {
14
- const dir = args[1] ?? ".ailf/tasks";
15
- validateCommand(dir);
16
- }
17
- else if (command === "--help" ||
18
- command === "-h" ||
19
- command === undefined) {
20
- printUsage();
21
- }
22
- else {
23
- console.error(`Unknown command: ${command}`);
24
- printUsage();
25
- process.exit(1);
26
- }
27
- }
28
- function validateCommand(dir) {
29
- try {
30
- const tasks = loadTaskDir(dir);
31
- // Run semantic validation
32
- const result = validateRepoTasks(tasks);
33
- const formatted = formatValidationResult(result);
34
- console.log(`✅ ${tasks.length} task(s) validated from ${dir}`);
35
- for (const task of tasks) {
36
- console.log(` ${task.id} — ${task.description}`);
37
- }
38
- if (result.warnings.length > 0 || result.errors.length > 0) {
39
- console.log("");
40
- console.log(formatted);
41
- }
42
- if (!result.valid) {
43
- process.exit(1);
44
- }
45
- }
46
- catch (err) {
47
- console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
48
- process.exit(1);
49
- }
50
- }
51
- function printUsage() {
52
- console.log("Usage: ailf-tasks <command> [options]");
53
- console.log("");
54
- console.log("Commands:");
55
- console.log(" validate [dir] Validate task YAML files (default: .ailf/tasks/)");
56
- console.log("");
57
- console.log("Examples:");
58
- console.log(" ailf-tasks validate");
59
- console.log(" ailf-tasks validate .ailf/tasks/");
60
- console.log(" ailf-tasks validate /path/to/tasks/");
61
- }
@@ -1,13 +0,0 @@
1
- /**
2
- * @sanity/ailf-tasks — Task definition schemas and YAML parser.
3
- *
4
- * Lightweight package for parsing and validating .ailf/tasks/*.yaml files
5
- * without depending on the full AILF CLI or its heavyweight dependencies
6
- * (Promptfoo, LLM SDKs, Sanity client).
7
- *
8
- * Usage:
9
- * import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
10
- */
11
- export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, type CuratedAssertionType, type RepoTask, type RubricTemplateName, } from "./schemas.js";
12
- export { loadTaskDir, parseTaskFile } from "./parser.js";
13
- export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, type ValidationMessage, type ValidationResult, } from "./validation.js";
@@ -1,16 +0,0 @@
1
- /**
2
- * @sanity/ailf-tasks — Task definition schemas and YAML parser.
3
- *
4
- * Lightweight package for parsing and validating .ailf/tasks/*.yaml files
5
- * without depending on the full AILF CLI or its heavyweight dependencies
6
- * (Promptfoo, LLM SDKs, Sanity client).
7
- *
8
- * Usage:
9
- * import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
10
- */
11
- // Schemas and types
12
- export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, } from "./schemas.js";
13
- // Parsing
14
- export { loadTaskDir, parseTaskFile } from "./parser.js";
15
- // Validation
16
- export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "./validation.js";
@@ -1,27 +0,0 @@
1
- /**
2
- * parser.ts — Standalone task file and directory parsing.
3
- *
4
- * High-level functions for loading and validating .ailf/tasks/ YAML
5
- * files without any dependency on the eval pipeline.
6
- *
7
- * Usage:
8
- * import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
9
- */
10
- import { type RepoTask } from "./schemas.js";
11
- /**
12
- * Parse a single task YAML string and return validated tasks.
13
- *
14
- * @param content - Raw YAML string content
15
- * @param filename - Source filename (for error messages)
16
- * @returns Validated array of RepoTask objects
17
- * @throws Error if YAML parsing or Zod validation fails
18
- */
19
- export declare function parseTaskFile(content: string, filename?: string): RepoTask[];
20
- /**
21
- * Load and parse all task YAML files from a directory.
22
- *
23
- * @param dirPath - Path to directory containing .yaml/.yml files
24
- * @returns All validated tasks, sorted by filename
25
- * @throws Error if directory not found, no YAML files, or validation fails
26
- */
27
- export declare function loadTaskDir(dirPath: string): RepoTask[];
@@ -1,73 +0,0 @@
1
- /**
2
- * parser.ts — Standalone task file and directory parsing.
3
- *
4
- * High-level functions for loading and validating .ailf/tasks/ YAML
5
- * files without any dependency on the eval pipeline.
6
- *
7
- * Usage:
8
- * import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
9
- */
10
- import { existsSync, readdirSync, readFileSync } from "fs";
11
- import { resolve } from "path";
12
- import { load } from "js-yaml";
13
- import { RepoTaskFileSchema } from "./schemas.js";
14
- // ---------------------------------------------------------------------------
15
- // Public API
16
- // ---------------------------------------------------------------------------
17
- /**
18
- * Parse a single task YAML string and return validated tasks.
19
- *
20
- * @param content - Raw YAML string content
21
- * @param filename - Source filename (for error messages)
22
- * @returns Validated array of RepoTask objects
23
- * @throws Error if YAML parsing or Zod validation fails
24
- */
25
- export function parseTaskFile(content, filename = "<string>") {
26
- const parsed = load(content);
27
- if (!Array.isArray(parsed)) {
28
- throw new Error(`${filename} did not parse to an array of tasks. ` +
29
- "Task files must contain a YAML array of task definitions.");
30
- }
31
- const result = RepoTaskFileSchema.safeParse(parsed);
32
- if (!result.success) {
33
- const messages = result.error.issues
34
- .map((i) => ` [${i.path.join(".")}]: ${i.message}`)
35
- .join("\n");
36
- throw new Error(`Invalid task file "${filename}":\n${messages}`);
37
- }
38
- return result.data;
39
- }
40
- /**
41
- * Load and parse all task YAML files from a directory.
42
- *
43
- * @param dirPath - Path to directory containing .yaml/.yml files
44
- * @returns All validated tasks, sorted by filename
45
- * @throws Error if directory not found, no YAML files, or validation fails
46
- */
47
- export function loadTaskDir(dirPath) {
48
- if (!existsSync(dirPath)) {
49
- throw new Error(`Tasks directory not found: ${dirPath}\n` +
50
- " Expected a directory containing .ailf/tasks/*.yaml files.");
51
- }
52
- const yamlFiles = readdirSync(dirPath)
53
- .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
54
- .sort();
55
- if (yamlFiles.length === 0) {
56
- throw new Error(`No YAML files found in ${dirPath}\n` +
57
- " Expected .ailf/tasks/*.yaml files with task definitions.");
58
- }
59
- const allTasks = [];
60
- for (const file of yamlFiles) {
61
- const filePath = resolve(dirPath, file);
62
- const content = readFileSync(filePath, "utf-8");
63
- try {
64
- const tasks = parseTaskFile(content, file);
65
- allTasks.push(...tasks);
66
- }
67
- catch (err) {
68
- const msg = err instanceof Error ? err.message : String(err);
69
- throw new Error(`Failed to load ${file}:\n${msg}`, { cause: err });
70
- }
71
- }
72
- return allTasks;
73
- }