@sanity/ailf 3.4.1 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ailf.js +16 -1
- package/config/airbyte/ai_literacy_framework.connector.yaml +114 -0
- package/config/bigquery/README.md +44 -8
- package/config/bigquery/views/official_area_scores.sql +20 -0
- package/config/bigquery/views/official_runs.sql +31 -0
- package/config/bigquery/views/reports.sql +19 -0
- package/config/bigquery/views/team_runs_template.sql +17 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/examples/index.js +1 -1
- package/dist/_vendor/ailf-core/ports/context.d.ts +25 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +23 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +59 -1
- package/dist/_vendor/ailf-shared/index.d.ts +2 -0
- package/dist/_vendor/ailf-shared/index.js +2 -0
- package/dist/_vendor/ailf-shared/owner-teams.d.ts +26 -0
- package/dist/_vendor/ailf-shared/owner-teams.js +52 -0
- package/dist/_vendor/ailf-shared/run-classification.d.ts +100 -0
- package/dist/_vendor/ailf-shared/run-classification.js +28 -0
- package/dist/_vendor/ailf-shared/run-context.d.ts +23 -0
- package/dist/adapters/api-client/build-request.d.ts +42 -0
- package/dist/adapters/api-client/build-request.js +188 -10
- package/dist/adapters/api-client/index.d.ts +1 -1
- package/dist/adapters/api-client/index.js +1 -1
- package/dist/commands/explain-handler.js +5 -0
- package/dist/commands/pipeline-action.d.ts +6 -0
- package/dist/commands/pipeline-action.js +13 -1
- package/dist/commands/pipeline.d.ts +5 -0
- package/dist/commands/pipeline.js +16 -2
- package/dist/commands/remote-pipeline.js +13 -1
- package/dist/orchestration/steps/finalize-run-step.js +1 -0
- package/dist/orchestration/steps/publish-report-step.js +1 -0
- package/dist/pipeline/map-request-to-config.js +18 -0
- package/dist/pipeline/run-context.d.ts +63 -0
- package/dist/pipeline/run-context.js +166 -0
- package/package.json +1 -1
package/bin/ailf.js
CHANGED
|
@@ -33,9 +33,24 @@ const callerCwd = process.cwd()
|
|
|
33
33
|
// ---------------------------------------------------------------------------
|
|
34
34
|
if (existsSync(tsSrc)) {
|
|
35
35
|
try {
|
|
36
|
+
// Enable the `ailf-source` export condition so @sanity/ailf-shared and
|
|
37
|
+
// @sanity/ailf-core resolve to their `src/index.ts` entrypoints rather
|
|
38
|
+
// than whatever happens to be in their `dist/` directories. Without
|
|
39
|
+
// this, running `ailf …` against a freshly pulled monorepo (or any
|
|
40
|
+
// workspace with a stale dist) fails at import time whenever the
|
|
41
|
+
// source introduces a new export that the dist hasn't caught up with.
|
|
42
|
+
const existingNodeOptions = process.env.NODE_OPTIONS ?? ""
|
|
43
|
+
const conditionFlag = "--conditions=ailf-source"
|
|
44
|
+
const nodeOptions = existingNodeOptions.includes(conditionFlag)
|
|
45
|
+
? existingNodeOptions
|
|
46
|
+
: `${existingNodeOptions} ${conditionFlag}`.trim()
|
|
36
47
|
execFileSync("npx", ["tsx", tsSrc, ...args], {
|
|
37
48
|
cwd: ROOT,
|
|
38
|
-
env: {
|
|
49
|
+
env: {
|
|
50
|
+
...process.env,
|
|
51
|
+
AILF_CALLER_CWD: callerCwd,
|
|
52
|
+
NODE_OPTIONS: nodeOptions,
|
|
53
|
+
},
|
|
39
54
|
stdio: "inherit",
|
|
40
55
|
})
|
|
41
56
|
process.exit(0)
|
|
@@ -87,6 +87,23 @@ definitions:
|
|
|
87
87
|
summary.overall.avgInfrastructureEfficiency,
|
|
88
88
|
"promptfoo_url": provenance.promptfooUrl,
|
|
89
89
|
"promptfoo_urls": provenance.promptfooUrls[] { mode, url },
|
|
90
|
+
"classification": provenance.classification,
|
|
91
|
+
"owner_team": provenance.owner.team,
|
|
92
|
+
"owner_individual": provenance.owner.individual,
|
|
93
|
+
"executor_type": provenance.executor.type,
|
|
94
|
+
"executor_name": provenance.executor.name,
|
|
95
|
+
"executor_surface": provenance.executor.surface,
|
|
96
|
+
"executor_github_actor": provenance.executor.githubActor,
|
|
97
|
+
"purpose": provenance.purpose,
|
|
98
|
+
"labels": provenance.labels,
|
|
99
|
+
"lineage_rerun_of": provenance.lineage.rerunOf,
|
|
100
|
+
"lineage_compared_against": provenance.lineage.comparedAgainst,
|
|
101
|
+
"lineage_parent_job_id": provenance.lineage.parentJobId,
|
|
102
|
+
"tool_ailf_version": provenance.tool.ailfVersion,
|
|
103
|
+
"tool_node_version": provenance.tool.nodeVersion,
|
|
104
|
+
"host_platform": provenance.host.platform,
|
|
105
|
+
"host_arch": provenance.host.arch,
|
|
106
|
+
"host_ci": provenance.host.ci,
|
|
90
107
|
_createdAt
|
|
91
108
|
}
|
|
92
109
|
record_selector:
|
|
@@ -464,6 +481,103 @@ schemas:
|
|
|
464
481
|
url:
|
|
465
482
|
type: string
|
|
466
483
|
description: Promptfoo share URL for this mode
|
|
484
|
+
# ----------------------------------------------------------------
|
|
485
|
+
# D0037 — run classification, ownership, executor, reproducibility
|
|
486
|
+
# ----------------------------------------------------------------
|
|
487
|
+
classification:
|
|
488
|
+
type:
|
|
489
|
+
- string
|
|
490
|
+
- "null"
|
|
491
|
+
description:
|
|
492
|
+
"Run classification (D0037): official | ad-hoc | experimental | test |
|
|
493
|
+
external. Orthogonal to trigger_type."
|
|
494
|
+
owner_team:
|
|
495
|
+
type:
|
|
496
|
+
- string
|
|
497
|
+
- "null"
|
|
498
|
+
description: Team slug this run is attributable to (free-form).
|
|
499
|
+
owner_individual:
|
|
500
|
+
type:
|
|
501
|
+
- string
|
|
502
|
+
- "null"
|
|
503
|
+
description: Individual (e.g., GH actor) this run is attributable to.
|
|
504
|
+
executor_type:
|
|
505
|
+
type:
|
|
506
|
+
- string
|
|
507
|
+
- "null"
|
|
508
|
+
description: '"user" | "system" — who/what actually invoked the run.'
|
|
509
|
+
executor_name:
|
|
510
|
+
type:
|
|
511
|
+
- string
|
|
512
|
+
- "null"
|
|
513
|
+
description:
|
|
514
|
+
For system executors the system name (e.g., "github-actions"); for
|
|
515
|
+
user executors the resolved user name.
|
|
516
|
+
executor_surface:
|
|
517
|
+
type:
|
|
518
|
+
- string
|
|
519
|
+
- "null"
|
|
520
|
+
description:
|
|
521
|
+
For user executors — origin surface ("cli" | "studio" | "api").
|
|
522
|
+
executor_github_actor:
|
|
523
|
+
type:
|
|
524
|
+
- string
|
|
525
|
+
- "null"
|
|
526
|
+
description: GitHub actor when the user invoked via a GH surface.
|
|
527
|
+
purpose:
|
|
528
|
+
type:
|
|
529
|
+
- string
|
|
530
|
+
- "null"
|
|
531
|
+
description: Human-authored "why I ran this" (AILF_PURPOSE / --purpose).
|
|
532
|
+
labels:
|
|
533
|
+
type:
|
|
534
|
+
- array
|
|
535
|
+
- "null"
|
|
536
|
+
items:
|
|
537
|
+
type: string
|
|
538
|
+
description:
|
|
539
|
+
Free-form searchable tags (release IDs, regression hunts,
|
|
540
|
+
experiments).
|
|
541
|
+
lineage_rerun_of:
|
|
542
|
+
type:
|
|
543
|
+
- string
|
|
544
|
+
- "null"
|
|
545
|
+
description: Prior RunId this run re-executes.
|
|
546
|
+
lineage_compared_against:
|
|
547
|
+
type:
|
|
548
|
+
- string
|
|
549
|
+
- "null"
|
|
550
|
+
description: Sibling RunId this run is intentionally compared against.
|
|
551
|
+
lineage_parent_job_id:
|
|
552
|
+
type:
|
|
553
|
+
- string
|
|
554
|
+
- "null"
|
|
555
|
+
description: API-gateway job ID that dispatched this run.
|
|
556
|
+
tool_ailf_version:
|
|
557
|
+
type:
|
|
558
|
+
- string
|
|
559
|
+
- "null"
|
|
560
|
+
description: "@sanity/ailf package version that produced this run."
|
|
561
|
+
tool_node_version:
|
|
562
|
+
type:
|
|
563
|
+
- string
|
|
564
|
+
- "null"
|
|
565
|
+
description: Node runtime version.
|
|
566
|
+
host_platform:
|
|
567
|
+
type:
|
|
568
|
+
- string
|
|
569
|
+
- "null"
|
|
570
|
+
description: os.platform() — darwin | linux | win32.
|
|
571
|
+
host_arch:
|
|
572
|
+
type:
|
|
573
|
+
- string
|
|
574
|
+
- "null"
|
|
575
|
+
description: os.arch() — x64 | arm64.
|
|
576
|
+
host_ci:
|
|
577
|
+
type:
|
|
578
|
+
- string
|
|
579
|
+
- "null"
|
|
580
|
+
description: CI provider when running under one (e.g., github-actions).
|
|
467
581
|
_createdAt:
|
|
468
582
|
type:
|
|
469
583
|
- string
|
|
@@ -22,10 +22,13 @@ BigQuery views (this directory)
|
|
|
22
22
|
|
|
23
23
|
## Files
|
|
24
24
|
|
|
25
|
-
| File
|
|
26
|
-
|
|
|
27
|
-
| `views/area_scores.sql`
|
|
28
|
-
| `views/reports.sql`
|
|
25
|
+
| File | Purpose |
|
|
26
|
+
| -------------------------------- | ------------------------------------------------------------------------------------------------------------------- |
|
|
27
|
+
| `views/area_scores.sql` | Flattens nested `model_scores` array into one row per area per model per report |
|
|
28
|
+
| `views/reports.sql` | Clean passthrough view with correct types and column ordering |
|
|
29
|
+
| `views/official_runs.sql` | Canonical trend series (D0037): `classification='official' AND trigger_type='scheduled' AND owner_team='core-docs'` |
|
|
30
|
+
| `views/official_area_scores.sql` | `area_scores` joined to `official_runs` — inherits the official-run predicate for area-level dashboards |
|
|
31
|
+
| `views/team_runs_template.sql` | Recipe/template for instantiating per-team filtered views |
|
|
29
32
|
|
|
30
33
|
## Setup
|
|
31
34
|
|
|
@@ -36,22 +39,55 @@ from `docs/design-docs/report-store/bigquery.md`.
|
|
|
36
39
|
### 1. Create the raw dataset (Airbyte writes here)
|
|
37
40
|
|
|
38
41
|
```bash
|
|
39
|
-
bq
|
|
42
|
+
bq --project_id=data-platform-302218 --location=EU mk --dataset ailf_raw
|
|
40
43
|
```
|
|
41
44
|
|
|
42
45
|
### 2. Create the analytics dataset (views live here)
|
|
43
46
|
|
|
44
47
|
```bash
|
|
45
|
-
bq
|
|
48
|
+
bq --project_id=data-platform-302218 --location=EU mk --dataset ailf
|
|
46
49
|
```
|
|
47
50
|
|
|
48
51
|
### 3. Create the views
|
|
49
52
|
|
|
53
|
+
**Important ordering (learned 2026-04-23):** Airbyte must be redeployed with the
|
|
54
|
+
current manifest **before** you run these view SQLs. Each view binds to specific
|
|
55
|
+
columns on `ailf_raw.reports`; if the raw table is missing columns the Airbyte
|
|
56
|
+
projection expects, the `CREATE VIEW` statement fails with
|
|
57
|
+
`Unrecognized name: <column>`.
|
|
58
|
+
|
|
59
|
+
If your Airbyte destination has **schema evolution enabled** ("Propagate column
|
|
60
|
+
changes" in the UI), new columns appear automatically on the next incremental
|
|
61
|
+
sync. If not, flip it on, trigger a resync, and confirm the expected columns
|
|
62
|
+
exist before creating views:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false \
|
|
66
|
+
"SELECT column_name FROM ailf_raw.INFORMATION_SCHEMA.COLUMNS WHERE table_name = 'reports' ORDER BY column_name"
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
If propagation is disabled and you can't flip it quickly, manually
|
|
70
|
+
`ALTER TABLE ailf_raw.reports ADD COLUMN IF NOT EXISTS …` for each missing
|
|
71
|
+
column as a stop-gap. Values will be `NULL` until Airbyte writes to them on the
|
|
72
|
+
next sync.
|
|
73
|
+
|
|
74
|
+
Once the raw table has the expected columns:
|
|
75
|
+
|
|
50
76
|
```bash
|
|
51
|
-
|
|
52
|
-
bq query --use_legacy_sql=false < views/
|
|
77
|
+
cd packages/eval/config/bigquery
|
|
78
|
+
bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/reports.sql
|
|
79
|
+
bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/area_scores.sql
|
|
80
|
+
bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/official_runs.sql
|
|
81
|
+
bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/official_area_scores.sql
|
|
82
|
+
# per-team views are optional — copy views/team_runs_template.sql,
|
|
83
|
+
# fill in the slug, and run.
|
|
53
84
|
```
|
|
54
85
|
|
|
86
|
+
> `--project_id` / `--location=EU` are required because `bq` needs an explicit
|
|
87
|
+
> billing project and the `ailf*` datasets live in the EU multi-region. If you
|
|
88
|
+
> run `bq query` from this repo regularly, consider setting the default with
|
|
89
|
+
> `gcloud config set project data-platform-302218`.
|
|
90
|
+
|
|
55
91
|
## Naming conventions
|
|
56
92
|
|
|
57
93
|
- **`ailf_raw.*`** — raw Airbyte-loaded tables (nested JSON, Airbyte metadata
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
-- ailf.official_area_scores — per-area scores restricted to official runs
|
|
2
|
+
--
|
|
3
|
+
-- Joins ailf.area_scores (UNNESTed per-model per-area scores) to
|
|
4
|
+
-- ailf.official_runs on report_id so area-level trend dashboards
|
|
5
|
+
-- inherit the D0037 official-run predicate without re-declaring it.
|
|
6
|
+
--
|
|
7
|
+
-- Source: ailf.area_scores, ailf.official_runs
|
|
8
|
+
-- Target: ailf.official_area_scores (this view)
|
|
9
|
+
--
|
|
10
|
+
-- Usage:
|
|
11
|
+
-- bq query --use_legacy_sql=false < views/official_area_scores.sql
|
|
12
|
+
--
|
|
13
|
+
-- @see views/official_runs.sql
|
|
14
|
+
-- @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
15
|
+
|
|
16
|
+
CREATE OR REPLACE VIEW `data-platform-302218.ailf.official_area_scores` AS
|
|
17
|
+
SELECT a.*
|
|
18
|
+
FROM `data-platform-302218.ailf.area_scores` AS a
|
|
19
|
+
INNER JOIN `data-platform-302218.ailf.official_runs` AS r
|
|
20
|
+
USING (report_id);
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
-- ailf.official_runs — canonical trend series
|
|
2
|
+
--
|
|
3
|
+
-- Filters ailf.reports down to the runs that form the canonical
|
|
4
|
+
-- core-docs scheduled evaluation series (daily-baseline, weekly-full).
|
|
5
|
+
-- Dashboards that must stay stable across ad-hoc / test / external-team
|
|
6
|
+
-- activity should point at this view, not at `ailf.reports` directly.
|
|
7
|
+
--
|
|
8
|
+
-- Predicate (D0037):
|
|
9
|
+
-- classification = 'official' — intent: tracked trend series
|
|
10
|
+
-- trigger_type = 'scheduled' — mechanism: cron, not PR/manual/webhook
|
|
11
|
+
-- owner_team = 'core-docs' — attributable to the docs team
|
|
12
|
+
--
|
|
13
|
+
-- Historical rows predating D0037 have classification = NULL and are
|
|
14
|
+
-- excluded. Backfill is a separate one-shot; see
|
|
15
|
+
-- scripts/backfill-run-classification.ts.
|
|
16
|
+
--
|
|
17
|
+
-- Source: ailf.reports (view over ailf_raw.reports)
|
|
18
|
+
-- Target: ailf.official_runs (this view)
|
|
19
|
+
--
|
|
20
|
+
-- Usage:
|
|
21
|
+
-- bq query --use_legacy_sql=false < views/official_runs.sql
|
|
22
|
+
--
|
|
23
|
+
-- @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
24
|
+
-- @see docs/design-docs/run-classification-and-ownership.md
|
|
25
|
+
|
|
26
|
+
CREATE OR REPLACE VIEW `data-platform-302218.ailf.official_runs` AS
|
|
27
|
+
SELECT *
|
|
28
|
+
FROM `data-platform-302218.ailf.reports`
|
|
29
|
+
WHERE classification = 'official'
|
|
30
|
+
AND trigger_type = 'scheduled'
|
|
31
|
+
AND owner_team = 'core-docs';
|
|
@@ -45,6 +45,25 @@ SELECT
|
|
|
45
45
|
models,
|
|
46
46
|
promptfoo_url,
|
|
47
47
|
promptfoo_urls,
|
|
48
|
+
-- D0037 — run classification, ownership, executor, reproducibility.
|
|
49
|
+
-- Nullable for historical rows predating the taxonomy.
|
|
50
|
+
classification,
|
|
51
|
+
owner_team,
|
|
52
|
+
owner_individual,
|
|
53
|
+
executor_type,
|
|
54
|
+
executor_name,
|
|
55
|
+
executor_surface,
|
|
56
|
+
executor_github_actor,
|
|
57
|
+
purpose,
|
|
58
|
+
labels,
|
|
59
|
+
lineage_rerun_of,
|
|
60
|
+
lineage_compared_against,
|
|
61
|
+
lineage_parent_job_id,
|
|
62
|
+
tool_ailf_version,
|
|
63
|
+
tool_node_version,
|
|
64
|
+
host_platform,
|
|
65
|
+
host_arch,
|
|
66
|
+
host_ci,
|
|
48
67
|
TIMESTAMP(_createdAt) AS synced_at
|
|
49
68
|
FROM
|
|
50
69
|
`data-platform-302218.ailf_raw.reports`;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
-- ailf.team_runs_<team> — per-team filtered view factory
|
|
2
|
+
--
|
|
3
|
+
-- D0037 team ownership is a free-form string column (owner_team). Rather
|
|
4
|
+
-- than pre-creating one view per team, use this template as a recipe:
|
|
5
|
+
-- copy this file to `team_runs_<team>.sql`, fill in the slug, and run.
|
|
6
|
+
--
|
|
7
|
+
-- Example for the studio team:
|
|
8
|
+
--
|
|
9
|
+
-- CREATE OR REPLACE VIEW `data-platform-302218.ailf.team_runs_studio` AS
|
|
10
|
+
-- SELECT *
|
|
11
|
+
-- FROM `data-platform-302218.ailf.reports`
|
|
12
|
+
-- WHERE owner_team = 'studio';
|
|
13
|
+
--
|
|
14
|
+
-- Teams with ad-hoc filtering needs can also query ailf.reports directly
|
|
15
|
+
-- with `WHERE owner_team = 'X'` rather than maintaining a view.
|
|
16
|
+
--
|
|
17
|
+
-- @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
@@ -433,6 +433,6 @@ export interface ExampleRecord {
|
|
|
433
433
|
}
|
|
434
434
|
export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
|
|
435
435
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
436
|
-
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
436
|
+
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # D0037 run provenance envelope \u2014 REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | ad-hoc | experimental |\n # test | external. External teams should use `ad-hoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n AILF_CLASSIFICATION: ad-hoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
437
437
|
/** TypeScript project configuration template (ailf.config.ts) */
|
|
438
438
|
export declare const ailfConfigTs = "/**\n * .ailf/ailf.config.ts \u2014 AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nexport default {\n /**\n * Documentation source \u2014 which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration \u2014 when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" \u2014 check that task files parse correctly (fast, no LLM calls)\n * \"eval\" \u2014 run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n}\n";
|
|
@@ -630,7 +630,7 @@ export const EXAMPLES = {
|
|
|
630
630
|
// Raw file exports (non-data files, exported as raw strings)
|
|
631
631
|
// ---------------------------------------------------------------------------
|
|
632
632
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
633
|
-
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
633
|
+
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # ────────────────────────────────────────────────────────────\n # D0037 run provenance envelope — REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | ad-hoc | experimental |\n # test | external. External teams should use `ad-hoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # ────────────────────────────────────────────────────────────\n AILF_CLASSIFICATION: ad-hoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
634
634
|
// ---------------------------------------------------------------------------
|
|
635
635
|
// TypeScript template exports (for ailf init --output-format ts)
|
|
636
636
|
// ---------------------------------------------------------------------------
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
* Fields marked optional are transitional — they will become required
|
|
12
12
|
* as downstream consumers are converted to use them.
|
|
13
13
|
*/
|
|
14
|
+
import type { RunClassification, RunExecutorSurface } from "../../ailf-shared/index.d.ts";
|
|
14
15
|
import type { RunId } from "../types/branded-ids.js";
|
|
15
16
|
import type { DebugOptions, EvalMode, PluginRegistry } from "../types/index.js";
|
|
16
17
|
import type { ArtifactWriter } from "./artifact-writer.js";
|
|
@@ -138,6 +139,30 @@ export interface ResolvedConfig {
|
|
|
138
139
|
repo: string;
|
|
139
140
|
sha?: string;
|
|
140
141
|
};
|
|
142
|
+
/**
|
|
143
|
+
* Caller-provided D0037 provenance envelope (classification, owner,
|
|
144
|
+
* executor, purpose, labels). Set on --remote submissions so the
|
|
145
|
+
* caller's `AILF_*` env vars / CLI flags survive the API boundary.
|
|
146
|
+
* When set, buildRunContext prefers these over server-env detection —
|
|
147
|
+
* same pattern as callerGit.
|
|
148
|
+
*
|
|
149
|
+
* @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
150
|
+
*/
|
|
151
|
+
callerEnvelope?: {
|
|
152
|
+
classification?: RunClassification;
|
|
153
|
+
owner?: {
|
|
154
|
+
team: string;
|
|
155
|
+
individual?: string;
|
|
156
|
+
};
|
|
157
|
+
executor?: {
|
|
158
|
+
type: "user";
|
|
159
|
+
surface: RunExecutorSurface;
|
|
160
|
+
name?: string;
|
|
161
|
+
githubActor?: string;
|
|
162
|
+
};
|
|
163
|
+
purpose?: string;
|
|
164
|
+
labels?: string[];
|
|
165
|
+
};
|
|
141
166
|
/** Callback URL configuration for API-triggered evaluations */
|
|
142
167
|
callback?: {
|
|
143
168
|
url: string;
|
|
@@ -87,6 +87,29 @@ export declare const PipelineRequestSchema: z.ZodObject<{
|
|
|
87
87
|
full: "full";
|
|
88
88
|
}>>;
|
|
89
89
|
presets: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
90
|
+
classification: z.ZodOptional<z.ZodEnum<{
|
|
91
|
+
external: "external";
|
|
92
|
+
official: "official";
|
|
93
|
+
"ad-hoc": "ad-hoc";
|
|
94
|
+
experimental: "experimental";
|
|
95
|
+
test: "test";
|
|
96
|
+
}>>;
|
|
97
|
+
owner: z.ZodOptional<z.ZodObject<{
|
|
98
|
+
team: z.ZodString;
|
|
99
|
+
individual: z.ZodOptional<z.ZodString>;
|
|
100
|
+
}, z.core.$strip>>;
|
|
101
|
+
executor: z.ZodOptional<z.ZodObject<{
|
|
102
|
+
type: z.ZodLiteral<"user">;
|
|
103
|
+
surface: z.ZodEnum<{
|
|
104
|
+
cli: "cli";
|
|
105
|
+
studio: "studio";
|
|
106
|
+
api: "api";
|
|
107
|
+
}>;
|
|
108
|
+
name: z.ZodOptional<z.ZodString>;
|
|
109
|
+
githubActor: z.ZodOptional<z.ZodString>;
|
|
110
|
+
}, z.core.$strict>>;
|
|
111
|
+
purpose: z.ZodOptional<z.ZodString>;
|
|
112
|
+
labels: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
90
113
|
}, z.core.$strip>;
|
|
91
114
|
/** Inferred TypeScript type for a pipeline request payload. */
|
|
92
115
|
export type PipelineRequest = z.infer<typeof PipelineRequestSchema>;
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
* @see packages/eval/src/pipeline/map-request-to-config.ts — maps to ResolvedConfig
|
|
14
14
|
*/
|
|
15
15
|
import { z } from "zod";
|
|
16
|
-
import { LITERACY_VARIANTS, RAW_EVAL_MODES } from "../../ailf-shared/index.js";
|
|
16
|
+
import { LITERACY_VARIANTS, RAW_EVAL_MODES, RUN_CLASSIFICATIONS, RUN_EXECUTOR_SURFACES, } from "../../ailf-shared/index.js";
|
|
17
17
|
// ---------------------------------------------------------------------------
|
|
18
18
|
// Debug options — boolean shorthand or structured object
|
|
19
19
|
// ---------------------------------------------------------------------------
|
|
@@ -49,6 +49,47 @@ const CallerGitSchema = z.object({
|
|
|
49
49
|
sha: z.string().optional(),
|
|
50
50
|
});
|
|
51
51
|
// ---------------------------------------------------------------------------
|
|
52
|
+
// Caller envelope (D0037) — for --remote evaluations from external repos
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
/**
|
|
55
|
+
* Caller-provided D0037 provenance envelope.
|
|
56
|
+
*
|
|
57
|
+
* When the CLI submits a PipelineRequest via `--remote`, the pipeline
|
|
58
|
+
* runs server-side (Cloud Run / dispatched GH Actions) where the
|
|
59
|
+
* caller's local env vars don't exist. Carrying the envelope on the
|
|
60
|
+
* request lets the caller's `AILF_CLASSIFICATION` / `AILF_OWNER_TEAM`
|
|
61
|
+
* / explicit CLI flags survive the API boundary so provenance
|
|
62
|
+
* attributes to the caller, not the server runtime.
|
|
63
|
+
*
|
|
64
|
+
* Only caller-identity fields cross the wire. Server-environment
|
|
65
|
+
* facts (`executor.email`, `tool.ailfVersion`, `tool.nodeVersion`,
|
|
66
|
+
* `host.*`) stay server-inferred on the receiving side.
|
|
67
|
+
*
|
|
68
|
+
* @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
69
|
+
*/
|
|
70
|
+
const CallerOwnerSchema = z.object({
|
|
71
|
+
team: z.string().min(1),
|
|
72
|
+
individual: z.string().optional(),
|
|
73
|
+
});
|
|
74
|
+
/**
|
|
75
|
+
* Executor payload — user variant only. System executors (github-actions,
|
|
76
|
+
* cloud-run) are always server-detected and must not be spoofable by a
|
|
77
|
+
* caller, so the wire format excludes them.
|
|
78
|
+
*
|
|
79
|
+
* `.strict()` makes unknown keys fail the parse rather than be silently
|
|
80
|
+
* stripped. Critical for PII: a caller that accidentally sends `email`
|
|
81
|
+
* gets an immediate 400 instead of a silent drop that looks like success.
|
|
82
|
+
* Server-inferred fields (`email`, `tool`, `host`) are NOT accepted here.
|
|
83
|
+
*/
|
|
84
|
+
const CallerExecutorSchema = z
|
|
85
|
+
.object({
|
|
86
|
+
type: z.literal("user"),
|
|
87
|
+
surface: z.enum(RUN_EXECUTOR_SURFACES),
|
|
88
|
+
name: z.string().optional(),
|
|
89
|
+
githubActor: z.string().optional(),
|
|
90
|
+
})
|
|
91
|
+
.strict();
|
|
92
|
+
// ---------------------------------------------------------------------------
|
|
52
93
|
// Pipeline Request — the universal invocation contract
|
|
53
94
|
// ---------------------------------------------------------------------------
|
|
54
95
|
export const PipelineRequestSchema = z.object({
|
|
@@ -103,4 +144,21 @@ export const PipelineRequestSchema = z.object({
|
|
|
103
144
|
variant: z.enum(LITERACY_VARIANTS).optional(),
|
|
104
145
|
/** External preset file paths or npm package names to load */
|
|
105
146
|
presets: z.array(z.string()).optional(),
|
|
147
|
+
// -------------------------------------------------------------------------
|
|
148
|
+
// D0037 caller envelope — classification, attribution, and intent
|
|
149
|
+
// -------------------------------------------------------------------------
|
|
150
|
+
/**
|
|
151
|
+
* How this run should be treated for reporting and trend tracking.
|
|
152
|
+
* Orthogonal to `trigger.type` (captured server-side). When omitted,
|
|
153
|
+
* the server defaults to `"ad-hoc"`.
|
|
154
|
+
*/
|
|
155
|
+
classification: z.enum(RUN_CLASSIFICATIONS).optional(),
|
|
156
|
+
/** Team and (optionally) individual this run is attributable to. */
|
|
157
|
+
owner: CallerOwnerSchema.optional(),
|
|
158
|
+
/** Caller executor identity (user variant only). */
|
|
159
|
+
executor: CallerExecutorSchema.optional(),
|
|
160
|
+
/** Human-authored "why I ran this". */
|
|
161
|
+
purpose: z.string().optional(),
|
|
162
|
+
/** Free-form searchable tags (release IDs, regression hunts, experiments). */
|
|
163
|
+
labels: z.array(z.string().min(1)).optional(),
|
|
106
164
|
});
|
|
@@ -14,5 +14,7 @@ export * from "./feature-flags.js";
|
|
|
14
14
|
export * from "./score-grades.js";
|
|
15
15
|
export * from "./noise-threshold.js";
|
|
16
16
|
export * from "./eval-modes.js";
|
|
17
|
+
export * from "./owner-teams.js";
|
|
18
|
+
export * from "./run-classification.js";
|
|
17
19
|
export * from "./run-trigger.js";
|
|
18
20
|
export * from "./run-context.js";
|
|
@@ -14,5 +14,7 @@ export * from "./feature-flags.js";
|
|
|
14
14
|
export * from "./score-grades.js";
|
|
15
15
|
export * from "./noise-threshold.js";
|
|
16
16
|
export * from "./eval-modes.js";
|
|
17
|
+
export * from "./owner-teams.js";
|
|
18
|
+
export * from "./run-classification.js";
|
|
17
19
|
export * from "./run-trigger.js";
|
|
18
20
|
export * from "./run-context.js";
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Known owner-team slugs and soft-normalization helper.
|
|
3
|
+
*
|
|
4
|
+
* `RunOwner.team` is free-form by design (external teams name themselves
|
|
5
|
+
* and internal names drift). This module provides two things to keep UX
|
|
6
|
+
* polished without blocking new entrants:
|
|
7
|
+
*
|
|
8
|
+
* - `KNOWN_OWNER_TEAMS` — a seed list of canonical slugs that populates
|
|
9
|
+
* Studio filter comboboxes as suggestions. Not a closed enum.
|
|
10
|
+
* - `normalizeOwnerTeam()` — maps a handful of common aliases to
|
|
11
|
+
* canonical slugs. Warn-only: returns the original string when no
|
|
12
|
+
* mapping applies. Adding an alias here is a one-liner.
|
|
13
|
+
*
|
|
14
|
+
* @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
15
|
+
*/
|
|
16
|
+
export declare const KNOWN_OWNER_TEAMS: readonly string[];
|
|
17
|
+
/**
|
|
18
|
+
* Normalize a free-form team slug to its canonical form.
|
|
19
|
+
*
|
|
20
|
+
* - Trims and lowercases.
|
|
21
|
+
* - Maps known aliases to canonical slugs.
|
|
22
|
+
* - Passes unknown values through unchanged (warn-only at the UI layer).
|
|
23
|
+
* - Returns `"unknown"` for empty input.
|
|
24
|
+
*/
|
|
25
|
+
export declare function normalizeOwnerTeam(value: string | undefined | null): string;
|
|
26
|
+
export declare function isKnownOwnerTeam(value: string): boolean;
|