@sanity/ailf 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -12
- package/dist/_vendor/ailf-core/examples/index.js +19 -12
- package/dist/_vendor/ailf-core/ports/context.d.ts +4 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +12 -2
- package/dist/adapters/task-sources/repo-schemas.js +28 -2
- package/dist/cli.js +0 -0
- package/dist/commands/init.js +17 -5
- package/dist/commands/pipeline-action.js +44 -6
- package/dist/commands/publish.js +2 -1
- package/dist/commands/validate-tasks.js +4 -1
- package/dist/composition-root.js +9 -5
- package/dist/orchestration/build-app-context.js +2 -0
- package/package.json +1 -1
- package/dist/commands/update-quality-scores.d.ts +0 -5
- package/dist/commands/update-quality-scores.js +0 -20
- package/dist/lib/agent-behavior-report.d.ts +0 -8
- package/dist/lib/agent-behavior-report.js +0 -185
- package/dist/lib/baseline.d.ts +0 -19
- package/dist/lib/baseline.js +0 -153
- package/dist/lib/calculate-scores.d.ts +0 -23
- package/dist/lib/calculate-scores.js +0 -42
- package/dist/lib/compare.d.ts +0 -18
- package/dist/lib/compare.js +0 -170
- package/dist/lib/coverage-audit.d.ts +0 -4
- package/dist/lib/coverage-audit.js +0 -42
- package/dist/lib/discovery-report.d.ts +0 -13
- package/dist/lib/discovery-report.js +0 -57
- package/dist/lib/fetch-docs.d.ts +0 -30
- package/dist/lib/fetch-docs.js +0 -171
- package/dist/lib/generate-configs.d.ts +0 -25
- package/dist/lib/generate-configs.js +0 -42
- package/dist/lib/grader-api.d.ts +0 -21
- package/dist/lib/grader-api.js +0 -34
- package/dist/lib/grader-compare.d.ts +0 -19
- package/dist/lib/grader-compare.js +0 -91
- package/dist/lib/grader-consistency.d.ts +0 -27
- package/dist/lib/grader-consistency.js +0 -79
- package/dist/lib/grader-sensitivity.d.ts +0 -19
- package/dist/lib/grader-sensitivity.js +0 -75
- package/dist/lib/grader-validate.d.ts +0 -19
- package/dist/lib/grader-validate.js +0 -78
- package/dist/lib/measure-retrieval.d.ts +0 -14
- package/dist/lib/measure-retrieval.js +0 -71
- package/dist/lib/pr-comment.d.ts +0 -16
- package/dist/lib/pr-comment.js +0 -28
- package/dist/lib/readiness-report.d.ts +0 -13
- package/dist/lib/readiness-report.js +0 -108
- package/dist/lib/webhook-server.d.ts +0 -11
- package/dist/lib/webhook-server.js +0 -24
- package/dist/lib/weekly-digest.d.ts +0 -24
- package/dist/lib/weekly-digest.js +0 -148
- package/dist/orchestration/env-bridge.d.ts +0 -21
- package/dist/orchestration/env-bridge.js +0 -66
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
- package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
- package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
- package/dist/pipeline/steps/calculate-scores-step.js +0 -89
- package/dist/pipeline/steps/compare-step.d.ts +0 -18
- package/dist/pipeline/steps/compare-step.js +0 -90
- package/dist/pipeline/steps/eval-step.d.ts +0 -53
- package/dist/pipeline/steps/eval-step.js +0 -347
- package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
- package/dist/pipeline/steps/fetch-docs-step.js +0 -84
- package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
- package/dist/pipeline/steps/generate-configs-step.js +0 -98
- package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
- package/dist/pipeline/steps/grader-consistency-step.js +0 -74
- package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
- package/dist/pipeline/steps/publish-report-step.js +0 -243
- package/dist/pipeline/steps/report-step.d.ts +0 -13
- package/dist/pipeline/steps/report-step.js +0 -56
- package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
- package/dist/pipeline/steps/update-scores-step.js +0 -42
- package/dist/scripts/agent-behavior-report.d.ts +0 -19
- package/dist/scripts/agent-behavior-report.js +0 -315
- package/dist/scripts/baseline.d.ts +0 -43
- package/dist/scripts/baseline.js +0 -267
- package/dist/scripts/calculate-scores.d.ts +0 -166
- package/dist/scripts/calculate-scores.js +0 -1296
- package/dist/scripts/compare.d.ts +0 -22
- package/dist/scripts/compare.js +0 -334
- package/dist/scripts/coverage-audit.d.ts +0 -44
- package/dist/scripts/coverage-audit.js +0 -209
- package/dist/scripts/debug-eval.d.ts +0 -19
- package/dist/scripts/debug-eval.js +0 -73
- package/dist/scripts/discovery-report.d.ts +0 -58
- package/dist/scripts/discovery-report.js +0 -250
- package/dist/scripts/fetch-docs.d.ts +0 -35
- package/dist/scripts/fetch-docs.js +0 -472
- package/dist/scripts/generate-configs.d.ts +0 -66
- package/dist/scripts/generate-configs.js +0 -459
- package/dist/scripts/grader-api.d.ts +0 -27
- package/dist/scripts/grader-api.js +0 -206
- package/dist/scripts/grader-compare.d.ts +0 -22
- package/dist/scripts/grader-compare.js +0 -368
- package/dist/scripts/grader-consistency.d.ts +0 -20
- package/dist/scripts/grader-consistency.js +0 -313
- package/dist/scripts/grader-sensitivity.d.ts +0 -22
- package/dist/scripts/grader-sensitivity.js +0 -354
- package/dist/scripts/grader-validate.d.ts +0 -19
- package/dist/scripts/grader-validate.js +0 -267
- package/dist/scripts/measure-retrieval.d.ts +0 -10
- package/dist/scripts/measure-retrieval.js +0 -145
- package/dist/scripts/pipeline.d.ts +0 -76
- package/dist/scripts/pipeline.js +0 -1031
- package/dist/scripts/pr-comment.d.ts +0 -10
- package/dist/scripts/pr-comment.js +0 -510
- package/dist/scripts/readiness-report.d.ts +0 -88
- package/dist/scripts/readiness-report.js +0 -342
- package/dist/scripts/update-quality-scores.d.ts +0 -15
- package/dist/scripts/update-quality-scores.js +0 -184
- package/dist/scripts/validate.d.ts +0 -13
- package/dist/scripts/validate.js +0 -79
- package/dist/scripts/webhook-server.d.ts +0 -26
- package/dist/scripts/webhook-server.js +0 -147
- package/dist/scripts/weekly-digest.d.ts +0 -24
- package/dist/scripts/weekly-digest.js +0 -144
- package/dist/sinks/format-slack.d.ts +0 -64
- package/dist/sinks/format-slack.js +0 -306
- package/dist/sinks/slack-sink.d.ts +0 -27
- package/dist/sinks/slack-sink.js +0 -78
- package/dist/sinks/webhook-sink.d.ts +0 -19
- package/dist/sinks/webhook-sink.js +0 -50
- package/tasks/.expanded.agentic.yaml +0 -51
- package/tasks/.expanded.yaml +0 -66
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* sinks/slack-sink.ts
|
|
3
|
-
*
|
|
4
|
-
* Slack notification sink — posts formatted score change messages to
|
|
5
|
-
* configured Slack channels via incoming webhooks.
|
|
6
|
-
*
|
|
7
|
-
* By default, only posts when regressions are detected (avoids notification
|
|
8
|
-
* fatigue). Set `alwaysPost: true` to receive all reports.
|
|
9
|
-
*
|
|
10
|
-
* @see docs/design-docs/report-store/sink-architecture.md
|
|
11
|
-
* @see docs/design-docs/report-store/notifications.md
|
|
12
|
-
*/
|
|
13
|
-
import type { Report, SinkHealthStatus, SinkResult } from "../pipeline/types.js";
|
|
14
|
-
import type { ReportSink } from "./types.js";
|
|
15
|
-
export interface SlackSinkOptions {
|
|
16
|
-
/** Post all reports, not just regressions (default: false — only regressions) */
|
|
17
|
-
alwaysPost?: boolean;
|
|
18
|
-
}
|
|
19
|
-
export declare class SlackSink implements ReportSink {
|
|
20
|
-
private readonly webhookUrl;
|
|
21
|
-
private readonly channel?;
|
|
22
|
-
private readonly options;
|
|
23
|
-
readonly name = "slack";
|
|
24
|
-
constructor(webhookUrl: string, channel?: string | undefined, options?: SlackSinkOptions);
|
|
25
|
-
healthCheck(): Promise<SinkHealthStatus>;
|
|
26
|
-
publish(report: Report): Promise<SinkResult>;
|
|
27
|
-
}
|
package/dist/sinks/slack-sink.js
DELETED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* sinks/slack-sink.ts
|
|
3
|
-
*
|
|
4
|
-
* Slack notification sink — posts formatted score change messages to
|
|
5
|
-
* configured Slack channels via incoming webhooks.
|
|
6
|
-
*
|
|
7
|
-
* By default, only posts when regressions are detected (avoids notification
|
|
8
|
-
* fatigue). Set `alwaysPost: true` to receive all reports.
|
|
9
|
-
*
|
|
10
|
-
* @see docs/design-docs/report-store/sink-architecture.md
|
|
11
|
-
* @see docs/design-docs/report-store/notifications.md
|
|
12
|
-
*/
|
|
13
|
-
import { formatRegressionAlert, formatScoreSummary, } from "./format-slack.js";
|
|
14
|
-
export class SlackSink {
|
|
15
|
-
webhookUrl;
|
|
16
|
-
channel;
|
|
17
|
-
options;
|
|
18
|
-
name = "slack";
|
|
19
|
-
constructor(webhookUrl, channel, options = {}) {
|
|
20
|
-
this.webhookUrl = webhookUrl;
|
|
21
|
-
this.channel = channel;
|
|
22
|
-
this.options = options;
|
|
23
|
-
}
|
|
24
|
-
healthCheck() {
|
|
25
|
-
try {
|
|
26
|
-
const url = new URL(this.webhookUrl);
|
|
27
|
-
if (url.protocol !== "https:") {
|
|
28
|
-
return Promise.resolve({
|
|
29
|
-
healthy: false,
|
|
30
|
-
reason: `Webhook URL must use HTTPS, got ${url.protocol}`,
|
|
31
|
-
});
|
|
32
|
-
}
|
|
33
|
-
if (!this.webhookUrl.startsWith("https://hooks.slack.com/")) {
|
|
34
|
-
// Non-standard URL — warn but don't fail (could be a proxy)
|
|
35
|
-
return Promise.resolve({ healthy: true });
|
|
36
|
-
}
|
|
37
|
-
return Promise.resolve({ healthy: true });
|
|
38
|
-
}
|
|
39
|
-
catch {
|
|
40
|
-
return Promise.resolve({
|
|
41
|
-
healthy: false,
|
|
42
|
-
reason: `Invalid webhook URL: ${this.webhookUrl}`,
|
|
43
|
-
});
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
async publish(report) {
|
|
47
|
-
const hasRegressions = report.comparison !== undefined && report.comparison.regressed.length > 0;
|
|
48
|
-
let message;
|
|
49
|
-
if (hasRegressions) {
|
|
50
|
-
message = formatRegressionAlert(report);
|
|
51
|
-
}
|
|
52
|
-
else if (this.options.alwaysPost) {
|
|
53
|
-
message = formatScoreSummary(report);
|
|
54
|
-
}
|
|
55
|
-
else {
|
|
56
|
-
return { reason: "No regressions detected", status: "skipped" };
|
|
57
|
-
}
|
|
58
|
-
const body = { ...message };
|
|
59
|
-
if (this.channel) {
|
|
60
|
-
body.channel = this.channel;
|
|
61
|
-
}
|
|
62
|
-
const response = await fetch(this.webhookUrl, {
|
|
63
|
-
body: JSON.stringify(body),
|
|
64
|
-
headers: { "Content-Type": "application/json" },
|
|
65
|
-
method: "POST",
|
|
66
|
-
});
|
|
67
|
-
if (!response.ok) {
|
|
68
|
-
return {
|
|
69
|
-
error: `Slack webhook returned HTTP ${response.status}`,
|
|
70
|
-
status: "failed",
|
|
71
|
-
};
|
|
72
|
-
}
|
|
73
|
-
return {
|
|
74
|
-
detail: hasRegressions ? "regression alert" : "score summary",
|
|
75
|
-
status: "success",
|
|
76
|
-
};
|
|
77
|
-
}
|
|
78
|
-
}
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* sinks/webhook-sink.ts
|
|
3
|
-
*
|
|
4
|
-
* Generic HTTP webhook sink — POSTs the full report JSON to any endpoint.
|
|
5
|
-
* This is the universal adapter for integrations that don't have a
|
|
6
|
-
* dedicated sink (Airbyte, Zapier, custom services, etc.).
|
|
7
|
-
*
|
|
8
|
-
* @see docs/design-docs/report-store/sink-architecture.md
|
|
9
|
-
*/
|
|
10
|
-
import type { Report, SinkHealthStatus, SinkResult } from "../pipeline/types.js";
|
|
11
|
-
import type { ReportSink } from "./types.js";
|
|
12
|
-
export declare class WebhookSink implements ReportSink {
|
|
13
|
-
private readonly url;
|
|
14
|
-
private readonly headers;
|
|
15
|
-
readonly name: string;
|
|
16
|
-
constructor(url: string, headers?: Record<string, string>);
|
|
17
|
-
healthCheck(): Promise<SinkHealthStatus>;
|
|
18
|
-
publish(report: Report): Promise<SinkResult>;
|
|
19
|
-
}
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* sinks/webhook-sink.ts
|
|
3
|
-
*
|
|
4
|
-
* Generic HTTP webhook sink — POSTs the full report JSON to any endpoint.
|
|
5
|
-
* This is the universal adapter for integrations that don't have a
|
|
6
|
-
* dedicated sink (Airbyte, Zapier, custom services, etc.).
|
|
7
|
-
*
|
|
8
|
-
* @see docs/design-docs/report-store/sink-architecture.md
|
|
9
|
-
*/
|
|
10
|
-
export class WebhookSink {
|
|
11
|
-
url;
|
|
12
|
-
headers;
|
|
13
|
-
name;
|
|
14
|
-
constructor(url, headers = {}) {
|
|
15
|
-
this.url = url;
|
|
16
|
-
this.headers = headers;
|
|
17
|
-
try {
|
|
18
|
-
this.name = `webhook:${new URL(url).hostname}`;
|
|
19
|
-
}
|
|
20
|
-
catch {
|
|
21
|
-
this.name = "webhook:invalid-url";
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
healthCheck() {
|
|
25
|
-
try {
|
|
26
|
-
new URL(this.url);
|
|
27
|
-
return Promise.resolve({ healthy: true });
|
|
28
|
-
}
|
|
29
|
-
catch {
|
|
30
|
-
return Promise.resolve({
|
|
31
|
-
healthy: false,
|
|
32
|
-
reason: `Invalid URL: ${this.url}`,
|
|
33
|
-
});
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
async publish(report) {
|
|
37
|
-
const response = await fetch(this.url, {
|
|
38
|
-
body: JSON.stringify(report),
|
|
39
|
-
headers: { "Content-Type": "application/json", ...this.headers },
|
|
40
|
-
method: "POST",
|
|
41
|
-
});
|
|
42
|
-
if (!response.ok) {
|
|
43
|
-
return {
|
|
44
|
-
error: `HTTP ${response.status}: ${response.statusText}`,
|
|
45
|
-
status: "failed",
|
|
46
|
-
};
|
|
47
|
-
}
|
|
48
|
-
return { detail: `HTTP ${response.status}`, status: "success" };
|
|
49
|
-
}
|
|
50
|
-
}
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
# .expanded.agentic.yaml
|
|
2
|
-
#
|
|
3
|
-
# AUTO-GENERATED — do not edit directly.
|
|
4
|
-
# Gold entries only (no baseline) for agentic evaluation mode.
|
|
5
|
-
# Source: tasks/*.yaml (single-definition format)
|
|
6
|
-
# Run: pnpm generate-configs
|
|
7
|
-
|
|
8
|
-
- assert:
|
|
9
|
-
- type: llm-rubric
|
|
10
|
-
value: |-
|
|
11
|
-
Score task completion from 0 to 100:
|
|
12
|
-
- 0: Couldn't attempt — missing critical information
|
|
13
|
-
- 20: Attempted but fundamentally wrong approach
|
|
14
|
-
- 50: Partial implementation — major functional gaps
|
|
15
|
-
- 80: Mostly complete — minor issues or missing edge cases
|
|
16
|
-
- 100: Fully functional code — works as expected
|
|
17
|
-
|
|
18
|
-
Must demonstrate:
|
|
19
|
-
- Configures a GROQ-powered webhook
|
|
20
|
-
- Webhook triggers on content changes
|
|
21
|
-
- Includes agent integration concepts
|
|
22
|
-
|
|
23
|
-
Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
|
|
24
|
-
metadata:
|
|
25
|
-
dimension: task-completion
|
|
26
|
-
maxScore: 100
|
|
27
|
-
- type: contains-any
|
|
28
|
-
value:
|
|
29
|
-
- webhook
|
|
30
|
-
- GROQ
|
|
31
|
-
weight: 1
|
|
32
|
-
- type: llm-rubric
|
|
33
|
-
value: |-
|
|
34
|
-
Score documentation coverage from 0 to 100:
|
|
35
|
-
- 0: Had to hallucinate/guess most implementation details
|
|
36
|
-
- 30: Significant gaps — filled with assumptions
|
|
37
|
-
- 50: Some gaps — inferred from partial information
|
|
38
|
-
- 80: Minor gaps — almost everything was documented
|
|
39
|
-
- 100: Complete coverage — all necessary info was in docs
|
|
40
|
-
|
|
41
|
-
Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
|
|
42
|
-
metadata:
|
|
43
|
-
dimension: doc-coverage
|
|
44
|
-
maxScore: 100
|
|
45
|
-
description: Test - Perspective ref expansion (gold)
|
|
46
|
-
vars:
|
|
47
|
-
docs: file://contexts/canonical/perspective-ref-test.md
|
|
48
|
-
task: |
|
|
49
|
-
Build a webhook handler that integrates with an AI agent pipeline.
|
|
50
|
-
Configure a GROQ-powered webhook that triggers when blog posts are
|
|
51
|
-
published and sends a payload to an agent endpoint.
|
package/tasks/.expanded.yaml
DELETED
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
# .expanded.yaml
|
|
2
|
-
#
|
|
3
|
-
# AUTO-GENERATED — do not edit directly.
|
|
4
|
-
# Source: tasks/*.yaml (single-definition format)
|
|
5
|
-
# Run: pnpm generate-configs
|
|
6
|
-
|
|
7
|
-
- assert:
|
|
8
|
-
- type: llm-rubric
|
|
9
|
-
value: |-
|
|
10
|
-
Score task completion from 0 to 100:
|
|
11
|
-
- 0: Couldn't attempt — missing critical information
|
|
12
|
-
- 20: Attempted but fundamentally wrong approach
|
|
13
|
-
- 50: Partial implementation — major functional gaps
|
|
14
|
-
- 80: Mostly complete — minor issues or missing edge cases
|
|
15
|
-
- 100: Fully functional code — works as expected
|
|
16
|
-
|
|
17
|
-
Must demonstrate:
|
|
18
|
-
- Configures a GROQ-powered webhook
|
|
19
|
-
- Webhook triggers on content changes
|
|
20
|
-
- Includes agent integration concepts
|
|
21
|
-
|
|
22
|
-
Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
|
|
23
|
-
metadata:
|
|
24
|
-
dimension: task-completion
|
|
25
|
-
maxScore: 100
|
|
26
|
-
- type: contains-any
|
|
27
|
-
value:
|
|
28
|
-
- webhook
|
|
29
|
-
- GROQ
|
|
30
|
-
weight: 1
|
|
31
|
-
- type: llm-rubric
|
|
32
|
-
value: |-
|
|
33
|
-
Score documentation coverage from 0 to 100:
|
|
34
|
-
- 0: Had to hallucinate/guess most implementation details
|
|
35
|
-
- 30: Significant gaps — filled with assumptions
|
|
36
|
-
- 50: Some gaps — inferred from partial information
|
|
37
|
-
- 80: Minor gaps — almost everything was documented
|
|
38
|
-
- 100: Complete coverage — all necessary info was in docs
|
|
39
|
-
|
|
40
|
-
Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
|
|
41
|
-
metadata:
|
|
42
|
-
dimension: doc-coverage
|
|
43
|
-
maxScore: 100
|
|
44
|
-
description: Test - Perspective ref expansion (gold)
|
|
45
|
-
prompts:
|
|
46
|
-
- with-docs
|
|
47
|
-
vars:
|
|
48
|
-
docs: file://contexts/canonical/perspective-ref-test.md
|
|
49
|
-
task: |
|
|
50
|
-
Build a webhook handler that integrates with an AI agent pipeline.
|
|
51
|
-
Configure a GROQ-powered webhook that triggers when blog posts are
|
|
52
|
-
published and sends a payload to an agent endpoint.
|
|
53
|
-
- description: Test - Perspective ref expansion (baseline)
|
|
54
|
-
prompts:
|
|
55
|
-
- without-docs
|
|
56
|
-
vars:
|
|
57
|
-
docs: ''
|
|
58
|
-
task: |
|
|
59
|
-
Build a webhook handler that integrates with an AI agent pipeline.
|
|
60
|
-
Configure a GROQ-powered webhook that triggers when blog posts are
|
|
61
|
-
published and sends a payload to an agent endpoint.
|
|
62
|
-
assert:
|
|
63
|
-
- type: llm-rubric
|
|
64
|
-
value: |-
|
|
65
|
-
Score task completion from 0 to 100 (same criteria as above).
|
|
66
|
-
Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
|