@sanity/ailf 3.4.1 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/bin/ailf.js +16 -1
  2. package/config/airbyte/ai_literacy_framework.connector.yaml +114 -0
  3. package/config/bigquery/README.md +44 -8
  4. package/config/bigquery/views/official_area_scores.sql +20 -0
  5. package/config/bigquery/views/official_runs.sql +31 -0
  6. package/config/bigquery/views/reports.sql +19 -0
  7. package/config/bigquery/views/team_runs_template.sql +17 -0
  8. package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
  9. package/dist/_vendor/ailf-core/examples/index.js +1 -1
  10. package/dist/_vendor/ailf-core/ports/context.d.ts +25 -0
  11. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +23 -0
  12. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +59 -1
  13. package/dist/_vendor/ailf-shared/index.d.ts +2 -0
  14. package/dist/_vendor/ailf-shared/index.js +2 -0
  15. package/dist/_vendor/ailf-shared/owner-teams.d.ts +26 -0
  16. package/dist/_vendor/ailf-shared/owner-teams.js +52 -0
  17. package/dist/_vendor/ailf-shared/run-classification.d.ts +100 -0
  18. package/dist/_vendor/ailf-shared/run-classification.js +28 -0
  19. package/dist/_vendor/ailf-shared/run-context.d.ts +23 -0
  20. package/dist/adapters/api-client/build-request.d.ts +42 -0
  21. package/dist/adapters/api-client/build-request.js +188 -10
  22. package/dist/adapters/api-client/index.d.ts +1 -1
  23. package/dist/adapters/api-client/index.js +1 -1
  24. package/dist/commands/explain-handler.js +5 -0
  25. package/dist/commands/pipeline-action.d.ts +6 -0
  26. package/dist/commands/pipeline-action.js +13 -1
  27. package/dist/commands/pipeline.d.ts +5 -0
  28. package/dist/commands/pipeline.js +16 -2
  29. package/dist/commands/remote-pipeline.js +13 -1
  30. package/dist/orchestration/steps/finalize-run-step.js +1 -0
  31. package/dist/orchestration/steps/publish-report-step.js +1 -0
  32. package/dist/pipeline/map-request-to-config.js +18 -0
  33. package/dist/pipeline/run-context.d.ts +63 -0
  34. package/dist/pipeline/run-context.js +166 -0
  35. package/package.json +1 -1
package/bin/ailf.js CHANGED
@@ -33,9 +33,24 @@ const callerCwd = process.cwd()
33
33
  // ---------------------------------------------------------------------------
34
34
  if (existsSync(tsSrc)) {
35
35
  try {
36
+ // Enable the `ailf-source` export condition so @sanity/ailf-shared and
37
+ // @sanity/ailf-core resolve to their `src/index.ts` entrypoints rather
38
+ // than whatever happens to be in their `dist/` directories. Without
39
+ // this, running `ailf …` against a freshly pulled monorepo (or any
40
+ // workspace with a stale dist) fails at import time whenever the
41
+ // source introduces a new export that the dist hasn't caught up with.
42
+ const existingNodeOptions = process.env.NODE_OPTIONS ?? ""
43
+ const conditionFlag = "--conditions=ailf-source"
44
+ const nodeOptions = existingNodeOptions.includes(conditionFlag)
45
+ ? existingNodeOptions
46
+ : `${existingNodeOptions} ${conditionFlag}`.trim()
36
47
  execFileSync("npx", ["tsx", tsSrc, ...args], {
37
48
  cwd: ROOT,
38
- env: { ...process.env, AILF_CALLER_CWD: callerCwd },
49
+ env: {
50
+ ...process.env,
51
+ AILF_CALLER_CWD: callerCwd,
52
+ NODE_OPTIONS: nodeOptions,
53
+ },
39
54
  stdio: "inherit",
40
55
  })
41
56
  process.exit(0)
@@ -87,6 +87,23 @@ definitions:
87
87
  summary.overall.avgInfrastructureEfficiency,
88
88
  "promptfoo_url": provenance.promptfooUrl,
89
89
  "promptfoo_urls": provenance.promptfooUrls[] { mode, url },
90
+ "classification": provenance.classification,
91
+ "owner_team": provenance.owner.team,
92
+ "owner_individual": provenance.owner.individual,
93
+ "executor_type": provenance.executor.type,
94
+ "executor_name": provenance.executor.name,
95
+ "executor_surface": provenance.executor.surface,
96
+ "executor_github_actor": provenance.executor.githubActor,
97
+ "purpose": provenance.purpose,
98
+ "labels": provenance.labels,
99
+ "lineage_rerun_of": provenance.lineage.rerunOf,
100
+ "lineage_compared_against": provenance.lineage.comparedAgainst,
101
+ "lineage_parent_job_id": provenance.lineage.parentJobId,
102
+ "tool_ailf_version": provenance.tool.ailfVersion,
103
+ "tool_node_version": provenance.tool.nodeVersion,
104
+ "host_platform": provenance.host.platform,
105
+ "host_arch": provenance.host.arch,
106
+ "host_ci": provenance.host.ci,
90
107
  _createdAt
91
108
  }
92
109
  record_selector:
@@ -464,6 +481,103 @@ schemas:
464
481
  url:
465
482
  type: string
466
483
  description: Promptfoo share URL for this mode
484
+ # ----------------------------------------------------------------
485
+ # D0037 — run classification, ownership, executor, reproducibility
486
+ # ----------------------------------------------------------------
487
+ classification:
488
+ type:
489
+ - string
490
+ - "null"
491
+ description:
492
+ "Run classification (D0037): official | ad-hoc | experimental | test |
493
+ external. Orthogonal to trigger_type."
494
+ owner_team:
495
+ type:
496
+ - string
497
+ - "null"
498
+ description: Team slug this run is attributable to (free-form).
499
+ owner_individual:
500
+ type:
501
+ - string
502
+ - "null"
503
+ description: Individual (e.g., GH actor) this run is attributable to.
504
+ executor_type:
505
+ type:
506
+ - string
507
+ - "null"
508
+ description: '"user" | "system" — who/what actually invoked the run.'
509
+ executor_name:
510
+ type:
511
+ - string
512
+ - "null"
513
+ description:
514
+ For system executors the system name (e.g., "github-actions"); for
515
+ user executors the resolved user name.
516
+ executor_surface:
517
+ type:
518
+ - string
519
+ - "null"
520
+ description:
521
+ For user executors — origin surface ("cli" | "studio" | "api").
522
+ executor_github_actor:
523
+ type:
524
+ - string
525
+ - "null"
526
+ description: GitHub actor when the user invoked via a GH surface.
527
+ purpose:
528
+ type:
529
+ - string
530
+ - "null"
531
+ description: Human-authored "why I ran this" (AILF_PURPOSE / --purpose).
532
+ labels:
533
+ type:
534
+ - array
535
+ - "null"
536
+ items:
537
+ type: string
538
+ description:
539
+ Free-form searchable tags (release IDs, regression hunts,
540
+ experiments).
541
+ lineage_rerun_of:
542
+ type:
543
+ - string
544
+ - "null"
545
+ description: Prior RunId this run re-executes.
546
+ lineage_compared_against:
547
+ type:
548
+ - string
549
+ - "null"
550
+ description: Sibling RunId this run is intentionally compared against.
551
+ lineage_parent_job_id:
552
+ type:
553
+ - string
554
+ - "null"
555
+ description: API-gateway job ID that dispatched this run.
556
+ tool_ailf_version:
557
+ type:
558
+ - string
559
+ - "null"
560
+ description: "@sanity/ailf package version that produced this run."
561
+ tool_node_version:
562
+ type:
563
+ - string
564
+ - "null"
565
+ description: Node runtime version.
566
+ host_platform:
567
+ type:
568
+ - string
569
+ - "null"
570
+ description: os.platform() — darwin | linux | win32.
571
+ host_arch:
572
+ type:
573
+ - string
574
+ - "null"
575
+ description: os.arch() — x64 | arm64.
576
+ host_ci:
577
+ type:
578
+ - string
579
+ - "null"
580
+ description: CI provider when running under one (e.g., github-actions).
467
581
  _createdAt:
468
582
  type:
469
583
  - string
@@ -22,10 +22,13 @@ BigQuery views (this directory)
22
22
 
23
23
  ## Files
24
24
 
25
- | File | Purpose |
26
- | ----------------------- | ------------------------------------------------------------------------------- |
27
- | `views/area_scores.sql` | Flattens nested `model_scores` array into one row per area per model per report |
28
- | `views/reports.sql` | Clean passthrough view with correct types and column ordering |
25
+ | File | Purpose |
26
+ | -------------------------------- | ------------------------------------------------------------------------------------------------------------------- |
27
+ | `views/area_scores.sql` | Flattens nested `model_scores` array into one row per area per model per report |
28
+ | `views/reports.sql` | Clean passthrough view with correct types and column ordering |
29
+ | `views/official_runs.sql` | Canonical trend series (D0037): `classification='official' AND trigger_type='scheduled' AND owner_team='core-docs'` |
30
+ | `views/official_area_scores.sql` | `area_scores` joined to `official_runs` — inherits the official-run predicate for area-level dashboards |
31
+ | `views/team_runs_template.sql` | Recipe/template for instantiating per-team filtered views |
29
32
 
30
33
  ## Setup
31
34
 
@@ -36,22 +39,55 @@ from `docs/design-docs/report-store/bigquery.md`.
36
39
  ### 1. Create the raw dataset (Airbyte writes here)
37
40
 
38
41
  ```bash
39
- bq mk --dataset data-platform-302218:ailf_raw
42
+ bq --project_id=data-platform-302218 --location=EU mk --dataset ailf_raw
40
43
  ```
41
44
 
42
45
  ### 2. Create the analytics dataset (views live here)
43
46
 
44
47
  ```bash
45
- bq mk --dataset data-platform-302218:ailf
48
+ bq --project_id=data-platform-302218 --location=EU mk --dataset ailf
46
49
  ```
47
50
 
48
51
  ### 3. Create the views
49
52
 
53
+ **Important ordering (learned 2026-04-23):** Airbyte must be redeployed with the
54
+ current manifest **before** you run these view SQLs. Each view binds to specific
55
+ columns on `ailf_raw.reports`; if the raw table is missing columns the Airbyte
56
+ projection expects, the `CREATE VIEW` statement fails with
57
+ `Unrecognized name: <column>`.
58
+
59
+ If your Airbyte destination has **schema evolution enabled** ("Propagate column
60
+ changes" in the UI), new columns appear automatically on the next incremental
61
+ sync. If not, flip it on, trigger a resync, and confirm the expected columns
62
+ exist before creating views:
63
+
64
+ ```bash
65
+ bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false \
66
+ "SELECT column_name FROM ailf_raw.INFORMATION_SCHEMA.COLUMNS WHERE table_name = 'reports' ORDER BY column_name"
67
+ ```
68
+
69
+ If propagation is disabled and you can't flip it quickly, manually
70
+ `ALTER TABLE ailf_raw.reports ADD COLUMN IF NOT EXISTS …` for each missing
71
+ column as a stop-gap. Values will be `NULL` until Airbyte writes to them on the
72
+ next sync.
73
+
74
+ Once the raw table has the expected columns:
75
+
50
76
  ```bash
51
- bq query --use_legacy_sql=false < views/reports.sql
52
- bq query --use_legacy_sql=false < views/area_scores.sql
77
+ cd packages/eval/config/bigquery
78
+ bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/reports.sql
79
+ bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/area_scores.sql
80
+ bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/official_runs.sql
81
+ bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/official_area_scores.sql
82
+ # per-team views are optional — copy views/team_runs_template.sql,
83
+ # fill in the slug, and run.
53
84
  ```
54
85
 
86
+ > `--project_id` / `--location=EU` are required because `bq` needs an explicit
87
+ > billing project and the `ailf*` datasets live in the EU multi-region. If you
88
+ > run `bq query` from this repo regularly, consider setting the default with
89
+ > `gcloud config set project data-platform-302218`.
90
+
55
91
  ## Naming conventions
56
92
 
57
93
  - **`ailf_raw.*`** — raw Airbyte-loaded tables (nested JSON, Airbyte metadata
@@ -0,0 +1,20 @@
1
+ -- ailf.official_area_scores — per-area scores restricted to official runs
2
+ --
3
+ -- Joins ailf.area_scores (UNNESTed per-model per-area scores) to
4
+ -- ailf.official_runs on report_id so area-level trend dashboards
5
+ -- inherit the D0037 official-run predicate without re-declaring it.
6
+ --
7
+ -- Source: ailf.area_scores, ailf.official_runs
8
+ -- Target: ailf.official_area_scores (this view)
9
+ --
10
+ -- Usage:
11
+ -- bq query --use_legacy_sql=false < views/official_area_scores.sql
12
+ --
13
+ -- @see views/official_runs.sql
14
+ -- @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
15
+
16
+ CREATE OR REPLACE VIEW `data-platform-302218.ailf.official_area_scores` AS
17
+ SELECT a.*
18
+ FROM `data-platform-302218.ailf.area_scores` AS a
19
+ INNER JOIN `data-platform-302218.ailf.official_runs` AS r
20
+ USING (report_id);
@@ -0,0 +1,31 @@
1
+ -- ailf.official_runs — canonical trend series
2
+ --
3
+ -- Filters ailf.reports down to the runs that form the canonical
4
+ -- core-docs scheduled evaluation series (daily-baseline, weekly-full).
5
+ -- Dashboards that must stay stable across ad-hoc / test / external-team
6
+ -- activity should point at this view, not at `ailf.reports` directly.
7
+ --
8
+ -- Predicate (D0037):
9
+ -- classification = 'official' — intent: tracked trend series
10
+ -- trigger_type = 'scheduled' — mechanism: cron, not PR/manual/webhook
11
+ -- owner_team = 'core-docs' — attributable to the docs team
12
+ --
13
+ -- Historical rows predating D0037 have classification = NULL and are
14
+ -- excluded. Backfill is a separate one-shot; see
15
+ -- scripts/backfill-run-classification.ts.
16
+ --
17
+ -- Source: ailf.reports (view over ailf_raw.reports)
18
+ -- Target: ailf.official_runs (this view)
19
+ --
20
+ -- Usage:
21
+ -- bq query --use_legacy_sql=false < views/official_runs.sql
22
+ --
23
+ -- @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
24
+ -- @see docs/design-docs/run-classification-and-ownership.md
25
+
26
+ CREATE OR REPLACE VIEW `data-platform-302218.ailf.official_runs` AS
27
+ SELECT *
28
+ FROM `data-platform-302218.ailf.reports`
29
+ WHERE classification = 'official'
30
+ AND trigger_type = 'scheduled'
31
+ AND owner_team = 'core-docs';
@@ -45,6 +45,25 @@ SELECT
45
45
  models,
46
46
  promptfoo_url,
47
47
  promptfoo_urls,
48
+ -- D0037 — run classification, ownership, executor, reproducibility.
49
+ -- Nullable for historical rows predating the taxonomy.
50
+ classification,
51
+ owner_team,
52
+ owner_individual,
53
+ executor_type,
54
+ executor_name,
55
+ executor_surface,
56
+ executor_github_actor,
57
+ purpose,
58
+ labels,
59
+ lineage_rerun_of,
60
+ lineage_compared_against,
61
+ lineage_parent_job_id,
62
+ tool_ailf_version,
63
+ tool_node_version,
64
+ host_platform,
65
+ host_arch,
66
+ host_ci,
48
67
  TIMESTAMP(_createdAt) AS synced_at
49
68
  FROM
50
69
  `data-platform-302218.ailf_raw.reports`;
@@ -0,0 +1,17 @@
1
+ -- ailf.team_runs_<team> — per-team filtered view factory
2
+ --
3
+ -- D0037 team ownership is a free-form string column (owner_team). Rather
4
+ -- than pre-creating one view per team, use this template as a recipe:
5
+ -- copy this file to `team_runs_<team>.sql`, fill in the slug, and run.
6
+ --
7
+ -- Example for the studio team:
8
+ --
9
+ -- CREATE OR REPLACE VIEW `data-platform-302218.ailf.team_runs_studio` AS
10
+ -- SELECT *
11
+ -- FROM `data-platform-302218.ailf.reports`
12
+ -- WHERE owner_team = 'studio';
13
+ --
14
+ -- Teams with ad-hoc filtering needs can also query ailf.reports directly
15
+ -- with `WHERE owner_team = 'X'` rather than maintaining a view.
16
+ --
17
+ -- @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
@@ -433,6 +433,6 @@ export interface ExampleRecord {
433
433
  }
434
434
  export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
435
435
  /** GitHub Actions workflow template for AI Literacy evaluation */
436
- export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
436
+ export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # D0037 run provenance envelope \u2014 REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | ad-hoc | experimental |\n # test | external. External teams should use `ad-hoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n AILF_CLASSIFICATION: ad-hoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
437
437
  /** TypeScript project configuration template (ailf.config.ts) */
438
438
  export declare const ailfConfigTs = "/**\n * .ailf/ailf.config.ts \u2014 AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nexport default {\n /**\n * Documentation source \u2014 which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration \u2014 when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" \u2014 check that task files parse correctly (fast, no LLM calls)\n * \"eval\" \u2014 run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n}\n";
@@ -630,7 +630,7 @@ export const EXAMPLES = {
630
630
  // Raw file exports (non-data files, exported as raw strings)
631
631
  // ---------------------------------------------------------------------------
632
632
  /** GitHub Actions workflow template for AI Literacy evaluation */
633
- export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
633
+ export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # ────────────────────────────────────────────────────────────\n # D0037 run provenance envelope — REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | ad-hoc | experimental |\n # test | external. External teams should use `ad-hoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # ────────────────────────────────────────────────────────────\n AILF_CLASSIFICATION: ad-hoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
634
634
  // ---------------------------------------------------------------------------
635
635
  // TypeScript template exports (for ailf init --output-format ts)
636
636
  // ---------------------------------------------------------------------------
@@ -11,6 +11,7 @@
11
11
  * Fields marked optional are transitional — they will become required
12
12
  * as downstream consumers are converted to use them.
13
13
  */
14
+ import type { RunClassification, RunExecutorSurface } from "../../ailf-shared/index.d.ts";
14
15
  import type { RunId } from "../types/branded-ids.js";
15
16
  import type { DebugOptions, EvalMode, PluginRegistry } from "../types/index.js";
16
17
  import type { ArtifactWriter } from "./artifact-writer.js";
@@ -138,6 +139,30 @@ export interface ResolvedConfig {
138
139
  repo: string;
139
140
  sha?: string;
140
141
  };
142
+ /**
143
+ * Caller-provided D0037 provenance envelope (classification, owner,
144
+ * executor, purpose, labels). Set on --remote submissions so the
145
+ * caller's `AILF_*` env vars / CLI flags survive the API boundary.
146
+ * When set, buildRunContext prefers these over server-env detection —
147
+ * same pattern as callerGit.
148
+ *
149
+ * @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
150
+ */
151
+ callerEnvelope?: {
152
+ classification?: RunClassification;
153
+ owner?: {
154
+ team: string;
155
+ individual?: string;
156
+ };
157
+ executor?: {
158
+ type: "user";
159
+ surface: RunExecutorSurface;
160
+ name?: string;
161
+ githubActor?: string;
162
+ };
163
+ purpose?: string;
164
+ labels?: string[];
165
+ };
141
166
  /** Callback URL configuration for API-triggered evaluations */
142
167
  callback?: {
143
168
  url: string;
@@ -87,6 +87,29 @@ export declare const PipelineRequestSchema: z.ZodObject<{
87
87
  full: "full";
88
88
  }>>;
89
89
  presets: z.ZodOptional<z.ZodArray<z.ZodString>>;
90
+ classification: z.ZodOptional<z.ZodEnum<{
91
+ external: "external";
92
+ official: "official";
93
+ "ad-hoc": "ad-hoc";
94
+ experimental: "experimental";
95
+ test: "test";
96
+ }>>;
97
+ owner: z.ZodOptional<z.ZodObject<{
98
+ team: z.ZodString;
99
+ individual: z.ZodOptional<z.ZodString>;
100
+ }, z.core.$strip>>;
101
+ executor: z.ZodOptional<z.ZodObject<{
102
+ type: z.ZodLiteral<"user">;
103
+ surface: z.ZodEnum<{
104
+ cli: "cli";
105
+ studio: "studio";
106
+ api: "api";
107
+ }>;
108
+ name: z.ZodOptional<z.ZodString>;
109
+ githubActor: z.ZodOptional<z.ZodString>;
110
+ }, z.core.$strict>>;
111
+ purpose: z.ZodOptional<z.ZodString>;
112
+ labels: z.ZodOptional<z.ZodArray<z.ZodString>>;
90
113
  }, z.core.$strip>;
91
114
  /** Inferred TypeScript type for a pipeline request payload. */
92
115
  export type PipelineRequest = z.infer<typeof PipelineRequestSchema>;
@@ -13,7 +13,7 @@
13
13
  * @see packages/eval/src/pipeline/map-request-to-config.ts — maps to ResolvedConfig
14
14
  */
15
15
  import { z } from "zod";
16
- import { LITERACY_VARIANTS, RAW_EVAL_MODES } from "../../ailf-shared/index.js";
16
+ import { LITERACY_VARIANTS, RAW_EVAL_MODES, RUN_CLASSIFICATIONS, RUN_EXECUTOR_SURFACES, } from "../../ailf-shared/index.js";
17
17
  // ---------------------------------------------------------------------------
18
18
  // Debug options — boolean shorthand or structured object
19
19
  // ---------------------------------------------------------------------------
@@ -49,6 +49,47 @@ const CallerGitSchema = z.object({
49
49
  sha: z.string().optional(),
50
50
  });
51
51
  // ---------------------------------------------------------------------------
52
+ // Caller envelope (D0037) — for --remote evaluations from external repos
53
+ // ---------------------------------------------------------------------------
54
+ /**
55
+ * Caller-provided D0037 provenance envelope.
56
+ *
57
+ * When the CLI submits a PipelineRequest via `--remote`, the pipeline
58
+ * runs server-side (Cloud Run / dispatched GH Actions) where the
59
+ * caller's local env vars don't exist. Carrying the envelope on the
60
+ * request lets the caller's `AILF_CLASSIFICATION` / `AILF_OWNER_TEAM`
61
+ * / explicit CLI flags survive the API boundary so provenance
62
+ * attributes to the caller, not the server runtime.
63
+ *
64
+ * Only caller-identity fields cross the wire. Server-environment
65
+ * facts (`executor.email`, `tool.ailfVersion`, `tool.nodeVersion`,
66
+ * `host.*`) stay server-inferred on the receiving side.
67
+ *
68
+ * @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
69
+ */
70
+ const CallerOwnerSchema = z.object({
71
+ team: z.string().min(1),
72
+ individual: z.string().optional(),
73
+ });
74
+ /**
75
+ * Executor payload — user variant only. System executors (github-actions,
76
+ * cloud-run) are always server-detected and must not be spoofable by a
77
+ * caller, so the wire format excludes them.
78
+ *
79
+ * `.strict()` makes unknown keys fail the parse rather than be silently
80
+ * stripped. Critical for PII: a caller that accidentally sends `email`
81
+ * gets an immediate 400 instead of a silent drop that looks like success.
82
+ * Server-inferred fields (`email`, `tool`, `host`) are NOT accepted here.
83
+ */
84
+ const CallerExecutorSchema = z
85
+ .object({
86
+ type: z.literal("user"),
87
+ surface: z.enum(RUN_EXECUTOR_SURFACES),
88
+ name: z.string().optional(),
89
+ githubActor: z.string().optional(),
90
+ })
91
+ .strict();
92
+ // ---------------------------------------------------------------------------
52
93
  // Pipeline Request — the universal invocation contract
53
94
  // ---------------------------------------------------------------------------
54
95
  export const PipelineRequestSchema = z.object({
@@ -103,4 +144,21 @@ export const PipelineRequestSchema = z.object({
103
144
  variant: z.enum(LITERACY_VARIANTS).optional(),
104
145
  /** External preset file paths or npm package names to load */
105
146
  presets: z.array(z.string()).optional(),
147
+ // -------------------------------------------------------------------------
148
+ // D0037 caller envelope — classification, attribution, and intent
149
+ // -------------------------------------------------------------------------
150
+ /**
151
+ * How this run should be treated for reporting and trend tracking.
152
+ * Orthogonal to `trigger.type` (captured server-side). When omitted,
153
+ * the server defaults to `"ad-hoc"`.
154
+ */
155
+ classification: z.enum(RUN_CLASSIFICATIONS).optional(),
156
+ /** Team and (optionally) individual this run is attributable to. */
157
+ owner: CallerOwnerSchema.optional(),
158
+ /** Caller executor identity (user variant only). */
159
+ executor: CallerExecutorSchema.optional(),
160
+ /** Human-authored "why I ran this". */
161
+ purpose: z.string().optional(),
162
+ /** Free-form searchable tags (release IDs, regression hunts, experiments). */
163
+ labels: z.array(z.string().min(1)).optional(),
106
164
  });
@@ -14,5 +14,7 @@ export * from "./feature-flags.js";
14
14
  export * from "./score-grades.js";
15
15
  export * from "./noise-threshold.js";
16
16
  export * from "./eval-modes.js";
17
+ export * from "./owner-teams.js";
18
+ export * from "./run-classification.js";
17
19
  export * from "./run-trigger.js";
18
20
  export * from "./run-context.js";
@@ -14,5 +14,7 @@ export * from "./feature-flags.js";
14
14
  export * from "./score-grades.js";
15
15
  export * from "./noise-threshold.js";
16
16
  export * from "./eval-modes.js";
17
+ export * from "./owner-teams.js";
18
+ export * from "./run-classification.js";
17
19
  export * from "./run-trigger.js";
18
20
  export * from "./run-context.js";
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Known owner-team slugs and soft-normalization helper.
3
+ *
4
+ * `RunOwner.team` is free-form by design (external teams name themselves
5
+ * and internal names drift). This module provides two things to keep UX
6
+ * polished without blocking new entrants:
7
+ *
8
+ * - `KNOWN_OWNER_TEAMS` — a seed list of canonical slugs that populates
9
+ * Studio filter comboboxes as suggestions. Not a closed enum.
10
+ * - `normalizeOwnerTeam()` — maps a handful of common aliases to
11
+ * canonical slugs. Warn-only: returns the original string when no
12
+ * mapping applies. Adding an alias here is a one-liner.
13
+ *
14
+ * @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
15
+ */
16
+ export declare const KNOWN_OWNER_TEAMS: readonly string[];
17
+ /**
18
+ * Normalize a free-form team slug to its canonical form.
19
+ *
20
+ * - Trims and lowercases.
21
+ * - Maps known aliases to canonical slugs.
22
+ * - Passes unknown values through unchanged (warn-only at the UI layer).
23
+ * - Returns `"unknown"` for empty input.
24
+ */
25
+ export declare function normalizeOwnerTeam(value: string | undefined | null): string;
26
+ export declare function isKnownOwnerTeam(value: string): boolean;