@runcontext/cli 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/index.ts
4
- import { Command as Command15 } from "commander";
4
+ import { Command as Command16 } from "commander";
5
5
 
6
6
  // src/commands/lint.ts
7
7
  import { Command } from "commander";
@@ -1201,15 +1201,14 @@ var siteCommand = new Command10("site").description("Build a static documentatio
1201
1201
  // src/commands/serve.ts
1202
1202
  import { Command as Command11 } from "commander";
1203
1203
  import chalk12 from "chalk";
1204
- var serveCommand = new Command11("serve").description("Start the MCP server (stdio transport)").option("--context-dir <path>", "Path to context directory").action(async (opts) => {
1204
+ var serveCommand = new Command11("serve").description("Start the MCP server (stdio or HTTP transport)").option("--context-dir <path>", "Path to context directory").option("--http", "Serve over HTTP instead of stdio").option("--port <number>", "HTTP port (default: 3000)", "3000").option("--host <address>", "HTTP host (default: 0.0.0.0)", "0.0.0.0").action(async (opts) => {
1205
1205
  try {
1206
- let startServer;
1206
+ let mcpModule;
1207
1207
  try {
1208
- const mcpModule = await import("@runcontext/mcp");
1209
- startServer = mcpModule.startServer;
1208
+ mcpModule = await import("@runcontext/mcp");
1210
1209
  } catch {
1211
1210
  }
1212
- if (!startServer) {
1211
+ if (!mcpModule) {
1213
1212
  console.log(
1214
1213
  chalk12.yellow(
1215
1214
  "MCP server is not available. Install @runcontext/mcp to enable this command."
@@ -1217,11 +1216,24 @@ var serveCommand = new Command11("serve").description("Start the MCP server (std
1217
1216
  );
1218
1217
  process.exit(1);
1219
1218
  }
1220
- console.log(chalk12.blue("Starting MCP server (stdio transport)..."));
1221
- await startServer({
1222
- contextDir: opts.contextDir,
1223
- rootDir: process.cwd()
1224
- });
1219
+ if (opts.http) {
1220
+ const startServerHttp = mcpModule.startServerHttp;
1221
+ const port = parseInt(opts.port, 10);
1222
+ console.log(chalk12.blue(`Starting MCP server (HTTP on port ${port})...`));
1223
+ await startServerHttp({
1224
+ contextDir: opts.contextDir,
1225
+ rootDir: process.cwd(),
1226
+ port,
1227
+ host: opts.host
1228
+ });
1229
+ } else {
1230
+ const startServer = mcpModule.startServer;
1231
+ console.log(chalk12.blue("Starting MCP server (stdio transport)..."));
1232
+ await startServer({
1233
+ contextDir: opts.contextDir,
1234
+ rootDir: process.cwd()
1235
+ });
1236
+ }
1225
1237
  } catch (err) {
1226
1238
  console.error(formatError(err.message));
1227
1239
  process.exit(1);
@@ -1562,8 +1574,1114 @@ var rulesCommand = new Command14("rules").description("List all lint rules with
1562
1574
  }
1563
1575
  });
1564
1576
 
1577
+ // src/commands/setup.ts
1578
+ import { Command as Command15 } from "commander";
1579
+ import * as p9 from "@clack/prompts";
1580
+ import chalk16 from "chalk";
1581
+
1582
+ // src/setup/steps/connect.ts
1583
+ import * as p from "@clack/prompts";
1584
+ import path13 from "path";
1585
+ import { existsSync as existsSync3, readFileSync as readFileSync4, writeFileSync as writeFileSync5 } from "fs";
1586
+ import * as yaml2 from "yaml";
1587
+ import { loadConfig as loadConfig10, createAdapter as createAdapter5 } from "@runcontext/core";
1588
+ function autoDetectDb(cwd) {
1589
+ try {
1590
+ const config = loadConfig10(cwd);
1591
+ if (config.data_sources && Object.keys(config.data_sources).length > 0) {
1592
+ const name = Object.keys(config.data_sources)[0];
1593
+ const ds = config.data_sources[name];
1594
+ const loc = ds.path ?? ds.connection ?? name;
1595
+ return { dsConfig: ds, label: `${ds.adapter} \u2014 ${loc} (from contextkit.config.yaml)` };
1596
+ }
1597
+ } catch {
1598
+ }
1599
+ if (process.env.DATABASE_URL) {
1600
+ try {
1601
+ const ds = parseDbUrl(process.env.DATABASE_URL);
1602
+ return { dsConfig: ds, label: `${ds.adapter} \u2014 $DATABASE_URL` };
1603
+ } catch {
1604
+ }
1605
+ }
1606
+ if (process.env.DUCKDB_PATH && existsSync3(process.env.DUCKDB_PATH)) {
1607
+ return {
1608
+ dsConfig: { adapter: "duckdb", path: process.env.DUCKDB_PATH },
1609
+ label: `duckdb \u2014 $DUCKDB_PATH`
1610
+ };
1611
+ }
1612
+ const mcpPath = path13.join(cwd, ".claude", "mcp.json");
1613
+ if (existsSync3(mcpPath)) {
1614
+ try {
1615
+ const mcpConfig = JSON.parse(readFileSync4(mcpPath, "utf-8"));
1616
+ const duckdbServer = mcpConfig.mcpServers?.duckdb;
1617
+ if (duckdbServer?.args) {
1618
+ const args = duckdbServer.args;
1619
+ const idx = args.indexOf("--db-path");
1620
+ if (idx >= 0 && args[idx + 1]) {
1621
+ const dbPath = args[idx + 1];
1622
+ if (existsSync3(dbPath)) {
1623
+ return {
1624
+ dsConfig: { adapter: "duckdb", path: dbPath },
1625
+ label: `duckdb \u2014 ${path13.basename(dbPath)} (from .claude/mcp.json)`
1626
+ };
1627
+ }
1628
+ }
1629
+ }
1630
+ } catch {
1631
+ }
1632
+ }
1633
+ return void 0;
1634
+ }
1635
+ async function promptForConnection() {
1636
+ const connector = await p.select({
1637
+ message: "Select your database",
1638
+ options: [
1639
+ { value: "duckdb", label: "DuckDB", hint: "Local .duckdb file" },
1640
+ { value: "postgres", label: "PostgreSQL", hint: "Connection string" }
1641
+ ]
1642
+ });
1643
+ if (p.isCancel(connector)) return void 0;
1644
+ if (connector === "duckdb") {
1645
+ const method = await p.select({
1646
+ message: "How do you connect?",
1647
+ options: [
1648
+ { value: "env", label: "Environment variable", hint: "e.g. DUCKDB_PATH" },
1649
+ { value: "path", label: "File path", hint: "e.g. ./warehouse.duckdb" }
1650
+ ]
1651
+ });
1652
+ if (p.isCancel(method)) return void 0;
1653
+ if (method === "env") {
1654
+ const envName = await p.text({
1655
+ message: "Environment variable name",
1656
+ initialValue: "DUCKDB_PATH",
1657
+ validate(value) {
1658
+ if (!value) return "Required";
1659
+ const resolved = process.env[value];
1660
+ if (!resolved) return `$${value} is not set`;
1661
+ if (!existsSync3(resolved)) return `$${value} points to "${resolved}" which does not exist`;
1662
+ }
1663
+ });
1664
+ if (p.isCancel(envName)) return void 0;
1665
+ return { adapter: "duckdb", path: process.env[envName] };
1666
+ } else {
1667
+ const filePath = await p.text({
1668
+ message: "Path to .duckdb file",
1669
+ placeholder: "./warehouse.duckdb",
1670
+ validate(value) {
1671
+ if (!value) return "Required";
1672
+ if (!existsSync3(value)) return `File not found: ${value}`;
1673
+ }
1674
+ });
1675
+ if (p.isCancel(filePath)) return void 0;
1676
+ return { adapter: "duckdb", path: path13.resolve(filePath) };
1677
+ }
1678
+ } else {
1679
+ const method = await p.select({
1680
+ message: "How do you connect?",
1681
+ options: [
1682
+ { value: "env", label: "Environment variable", hint: "e.g. DATABASE_URL" },
1683
+ { value: "url", label: "Connection string", hint: "postgres://..." }
1684
+ ]
1685
+ });
1686
+ if (p.isCancel(method)) return void 0;
1687
+ if (method === "env") {
1688
+ const envName = await p.text({
1689
+ message: "Environment variable name",
1690
+ initialValue: "DATABASE_URL",
1691
+ validate(value) {
1692
+ if (!value) return "Required";
1693
+ const resolved = process.env[value];
1694
+ if (!resolved) return `$${value} is not set`;
1695
+ }
1696
+ });
1697
+ if (p.isCancel(envName)) return void 0;
1698
+ return { adapter: "postgres", connection: process.env[envName] };
1699
+ } else {
1700
+ const url = await p.text({
1701
+ message: "Connection string",
1702
+ placeholder: "postgres://user:pass@host:5432/dbname",
1703
+ validate(value) {
1704
+ if (!value) return "Required";
1705
+ if (!value.startsWith("postgres://") && !value.startsWith("postgresql://")) {
1706
+ return "Must start with postgres:// or postgresql://";
1707
+ }
1708
+ }
1709
+ });
1710
+ if (p.isCancel(url)) return void 0;
1711
+ return { adapter: "postgres", connection: url };
1712
+ }
1713
+ }
1714
+ }
1715
+ async function runConnectStep() {
1716
+ const cwd = process.cwd();
1717
+ let dsConfig;
1718
+ const detected = autoDetectDb(cwd);
1719
+ if (detected) {
1720
+ p.log.info(`Detected: ${detected.label}`);
1721
+ const useDetected = await p.confirm({ message: "Use this database?" });
1722
+ if (p.isCancel(useDetected)) {
1723
+ p.cancel("Setup cancelled.");
1724
+ return void 0;
1725
+ }
1726
+ if (useDetected) {
1727
+ dsConfig = detected.dsConfig;
1728
+ } else {
1729
+ const manual = await promptForConnection();
1730
+ if (!manual) {
1731
+ p.cancel("Setup cancelled.");
1732
+ return void 0;
1733
+ }
1734
+ dsConfig = manual;
1735
+ }
1736
+ } else {
1737
+ const manual = await promptForConnection();
1738
+ if (!manual) {
1739
+ p.cancel("Setup cancelled.");
1740
+ return void 0;
1741
+ }
1742
+ dsConfig = manual;
1743
+ }
1744
+ const spin = p.spinner();
1745
+ spin.start("Connecting to database...");
1746
+ let adapter;
1747
+ try {
1748
+ adapter = await createAdapter5(dsConfig);
1749
+ await adapter.connect();
1750
+ } catch (err) {
1751
+ spin.stop("Connection failed");
1752
+ p.log.error(err.message);
1753
+ p.cancel("Could not connect to database.");
1754
+ return void 0;
1755
+ }
1756
+ const tables = await adapter.listTables();
1757
+ const columns = {};
1758
+ for (const table of tables) {
1759
+ columns[table.name] = await adapter.listColumns(table.name);
1760
+ }
1761
+ const totalCols = Object.values(columns).reduce((sum, c) => sum + c.length, 0);
1762
+ spin.stop(`Found ${tables.length} tables, ${totalCols} columns`);
1763
+ const tableLines = tables.map((t) => ` ${t.name.padEnd(30)} ${t.row_count.toLocaleString()} rows`).join("\n");
1764
+ p.note(tableLines, "Discovered Tables");
1765
+ const defaultModel = path13.basename(cwd).replace(/[^a-z0-9-]/gi, "-").toLowerCase();
1766
+ const modelInput = await p.text({
1767
+ message: "Model name",
1768
+ initialValue: defaultModel,
1769
+ validate(value) {
1770
+ if (!value) return "Required";
1771
+ if (!/^[a-z0-9-]+$/.test(value)) return "Use lowercase letters, numbers, and hyphens only";
1772
+ }
1773
+ });
1774
+ if (p.isCancel(modelInput)) {
1775
+ p.cancel("Setup cancelled.");
1776
+ await adapter.disconnect();
1777
+ return void 0;
1778
+ }
1779
+ const tierInput = await p.select({
1780
+ message: "Target metadata tier",
1781
+ options: [
1782
+ { value: "bronze", label: "Bronze", hint: "Schema + ownership + grain" },
1783
+ { value: "silver", label: "Silver", hint: "+ trust, lineage, glossary, refresh, sample values" },
1784
+ { value: "gold", label: "Gold", hint: "+ semantic roles, rules, golden queries (needs curation)" }
1785
+ ]
1786
+ });
1787
+ if (p.isCancel(tierInput)) {
1788
+ p.cancel("Setup cancelled.");
1789
+ await adapter.disconnect();
1790
+ return void 0;
1791
+ }
1792
+ const configPath = path13.join(cwd, "contextkit.config.yaml");
1793
+ let config;
1794
+ try {
1795
+ config = loadConfig10(cwd);
1796
+ } catch {
1797
+ config = { context_dir: "./context" };
1798
+ }
1799
+ if (!config.data_sources || Object.keys(config.data_sources).length === 0) {
1800
+ const newConfig = {
1801
+ context_dir: config.context_dir ?? "./context",
1802
+ data_sources: { default: dsConfig }
1803
+ };
1804
+ writeFileSync5(configPath, yaml2.stringify(newConfig, { lineWidth: 120 }), "utf-8");
1805
+ config = loadConfig10(cwd);
1806
+ }
1807
+ const contextDir = path13.resolve(cwd, config.context_dir ?? "./context");
1808
+ return {
1809
+ cwd,
1810
+ contextDir,
1811
+ dsConfig,
1812
+ adapter,
1813
+ tables,
1814
+ columns,
1815
+ modelName: modelInput,
1816
+ targetTier: tierInput
1817
+ };
1818
+ }
1819
+
1820
+ // src/setup/steps/scaffold.ts
1821
+ import * as p3 from "@clack/prompts";
1822
+ import path14 from "path";
1823
+ import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync6, existsSync as existsSync4 } from "fs";
1824
+ import { scaffoldFromSchema as scaffoldFromSchema2, compile as compile10, computeTier as computeTier3, loadConfig as loadConfig11 } from "@runcontext/core";
1825
+
1826
+ // src/setup/display.ts
1827
+ import * as p2 from "@clack/prompts";
1828
+ function displayTierScore(score) {
1829
+ p2.note(formatTierScore(score), "Tier Scorecard");
1830
+ }
1831
+
1832
+ // src/setup/steps/scaffold.ts
1833
+ async function runScaffoldStep(ctx) {
1834
+ const shouldRun = await p3.confirm({
1835
+ message: "Scaffold Bronze metadata from database schema?"
1836
+ });
1837
+ if (p3.isCancel(shouldRun) || !shouldRun) {
1838
+ return { skipped: true, summary: "Skipped" };
1839
+ }
1840
+ const spin = p3.spinner();
1841
+ spin.start("Scaffolding Bronze metadata...");
1842
+ const result = scaffoldFromSchema2({
1843
+ modelName: ctx.modelName,
1844
+ dataSourceName: "default",
1845
+ tables: ctx.tables,
1846
+ columns: ctx.columns
1847
+ });
1848
+ for (const dir of ["models", "governance", "owners"]) {
1849
+ const dirPath = path14.join(ctx.contextDir, dir);
1850
+ if (!existsSync4(dirPath)) mkdirSync3(dirPath, { recursive: true });
1851
+ }
1852
+ const created = [];
1853
+ const files = [
1854
+ { rel: path14.join("models", result.files.osi), content: result.osiYaml },
1855
+ { rel: path14.join("governance", result.files.governance), content: result.governanceYaml },
1856
+ { rel: path14.join("owners", result.files.owner), content: result.ownerYaml }
1857
+ ];
1858
+ for (const f of files) {
1859
+ const fullPath = path14.join(ctx.contextDir, f.rel);
1860
+ writeFileSync6(fullPath, f.content, "utf-8");
1861
+ created.push(f.rel);
1862
+ }
1863
+ const config = loadConfig11(ctx.cwd);
1864
+ const { graph } = await compile10({ contextDir: ctx.contextDir, config, rootDir: ctx.cwd });
1865
+ ctx.graph = graph;
1866
+ ctx.tierScore = computeTier3(ctx.modelName, graph);
1867
+ spin.stop(`Created ${created.length} files`);
1868
+ const fileList = created.map((f) => ` ${f}`).join("\n");
1869
+ p3.note(fileList, "Files Created");
1870
+ displayTierScore(ctx.tierScore);
1871
+ return { skipped: false, summary: `${created.length} files \u2192 ${ctx.tierScore.tier.toUpperCase()}` };
1872
+ }
1873
+
1874
+ // src/setup/steps/enrich-silver.ts
1875
+ import * as p4 from "@clack/prompts";
1876
+ import path15 from "path";
1877
+ import { readFileSync as readFileSync5, writeFileSync as writeFileSync7, mkdirSync as mkdirSync4, existsSync as existsSync5, readdirSync as readdirSync2 } from "fs";
1878
+ import * as yaml3 from "yaml";
1879
+ import {
1880
+ compile as compile11,
1881
+ computeTier as computeTier4,
1882
+ suggestEnrichments as suggestEnrichments2,
1883
+ loadConfig as loadConfig12
1884
+ } from "@runcontext/core";
1885
+ function findFileRecursive2(dir, suffix) {
1886
+ if (!existsSync5(dir)) return void 0;
1887
+ const entries = readdirSync2(dir, { withFileTypes: true });
1888
+ for (const entry of entries) {
1889
+ const fullPath = path15.join(dir, entry.name);
1890
+ if (entry.isDirectory()) {
1891
+ const found = findFileRecursive2(fullPath, suffix);
1892
+ if (found) return found;
1893
+ } else if (entry.name.endsWith(suffix)) {
1894
+ return fullPath;
1895
+ }
1896
+ }
1897
+ return void 0;
1898
+ }
1899
+ async function runEnrichSilverStep(ctx) {
1900
+ const config = loadConfig12(ctx.cwd);
1901
+ const { graph } = await compile11({ contextDir: ctx.contextDir, config, rootDir: ctx.cwd });
1902
+ ctx.graph = graph;
1903
+ const tierScore = computeTier4(ctx.modelName, graph);
1904
+ if (tierScore.silver.passed) {
1905
+ p4.log.success("Already at Silver or above \u2014 skipping.");
1906
+ ctx.tierScore = tierScore;
1907
+ return { skipped: true, summary: "Already Silver" };
1908
+ }
1909
+ const model = graph.models.get(ctx.modelName);
1910
+ if (!model) {
1911
+ p4.log.error(`Model "${ctx.modelName}" not found in graph.`);
1912
+ return { skipped: true, summary: "Model not found" };
1913
+ }
1914
+ const datasetNames = model.datasets.map((d) => d.name);
1915
+ const suggestions = suggestEnrichments2("silver", tierScore, datasetNames);
1916
+ const preview = [];
1917
+ if (suggestions.governance?.trust) preview.push(`+ trust: ${suggestions.governance.trust}`);
1918
+ if (suggestions.governance?.tags) preview.push(`+ tags: [${suggestions.governance.tags.join(", ")}]`);
1919
+ if (suggestions.governance?.refreshAll) preview.push(`+ refresh: ${suggestions.governance.refreshAll} (all datasets)`);
1920
+ if (suggestions.lineage) preview.push(`+ ${suggestions.lineage.upstream?.length ?? 0} lineage upstream source(s)`);
1921
+ if (suggestions.glossaryTerms) preview.push(`+ ${suggestions.glossaryTerms.length} glossary term(s)`);
1922
+ if (suggestions.needsSampleValues) preview.push("+ sample_values from live data");
1923
+ if (preview.length > 0) {
1924
+ p4.note(preview.join("\n"), "Silver Enrichments");
1925
+ }
1926
+ const shouldRun = await p4.confirm({
1927
+ message: "Apply Silver enrichments?"
1928
+ });
1929
+ if (p4.isCancel(shouldRun) || !shouldRun) {
1930
+ return { skipped: true, summary: "Skipped" };
1931
+ }
1932
+ const spin = p4.spinner();
1933
+ spin.start("Enriching to Silver...");
1934
+ const govFilePath = findFileRecursive2(ctx.contextDir, `${ctx.modelName}.governance.yaml`);
1935
+ if (govFilePath) {
1936
+ const govContent = readFileSync5(govFilePath, "utf-8");
1937
+ const govDoc = yaml3.parse(govContent) ?? {};
1938
+ if (suggestions.governance?.trust) govDoc.trust = suggestions.governance.trust;
1939
+ if (suggestions.governance?.tags) govDoc.tags = suggestions.governance.tags;
1940
+ if (suggestions.governance?.refreshAll) {
1941
+ for (const dsName of Object.keys(govDoc.datasets ?? {})) {
1942
+ govDoc.datasets[dsName].refresh = suggestions.governance.refreshAll;
1943
+ }
1944
+ }
1945
+ if (suggestions.needsSampleValues) {
1946
+ govDoc.fields = govDoc.fields ?? {};
1947
+ try {
1948
+ let count = 0;
1949
+ for (const ds of model.datasets) {
1950
+ if (count >= 2) break;
1951
+ const tableName = ds.source?.split(".").pop() ?? ds.name;
1952
+ for (const field of ds.fields ?? []) {
1953
+ if (count >= 2) break;
1954
+ const fieldKey = `${ds.name}.${field.name}`;
1955
+ if (govDoc.fields[fieldKey]?.sample_values?.length > 0) continue;
1956
+ try {
1957
+ const result = await ctx.adapter.query(
1958
+ `SELECT DISTINCT CAST("${field.name}" AS VARCHAR) AS val FROM "${tableName}" WHERE "${field.name}" IS NOT NULL LIMIT 5`
1959
+ );
1960
+ if (result.rows.length > 0) {
1961
+ govDoc.fields[fieldKey] = govDoc.fields[fieldKey] ?? {};
1962
+ govDoc.fields[fieldKey].sample_values = result.rows.map((r) => String(r.val));
1963
+ count++;
1964
+ }
1965
+ } catch {
1966
+ }
1967
+ }
1968
+ }
1969
+ } catch {
1970
+ }
1971
+ }
1972
+ writeFileSync7(govFilePath, yaml3.stringify(govDoc, { lineWidth: 120 }), "utf-8");
1973
+ }
1974
+ if (suggestions.lineage) {
1975
+ const lineageDir = path15.join(ctx.contextDir, "lineage");
1976
+ if (!existsSync5(lineageDir)) mkdirSync4(lineageDir, { recursive: true });
1977
+ const lineagePath = path15.join(lineageDir, `${ctx.modelName}.lineage.yaml`);
1978
+ if (!existsSync5(lineagePath)) {
1979
+ const lineageDoc = { model: ctx.modelName, upstream: suggestions.lineage.upstream };
1980
+ writeFileSync7(lineagePath, yaml3.stringify(lineageDoc, { lineWidth: 120 }), "utf-8");
1981
+ }
1982
+ }
1983
+ if (suggestions.glossaryTerms) {
1984
+ const glossaryDir = path15.join(ctx.contextDir, "glossary");
1985
+ if (!existsSync5(glossaryDir)) mkdirSync4(glossaryDir, { recursive: true });
1986
+ for (const term of suggestions.glossaryTerms) {
1987
+ const termPath = path15.join(glossaryDir, `${term.id}.term.yaml`);
1988
+ if (!existsSync5(termPath)) {
1989
+ writeFileSync7(termPath, yaml3.stringify(term, { lineWidth: 120 }), "utf-8");
1990
+ }
1991
+ }
1992
+ }
1993
+ const { graph: newGraph } = await compile11({ contextDir: ctx.contextDir, config, rootDir: ctx.cwd });
1994
+ ctx.graph = newGraph;
1995
+ ctx.tierScore = computeTier4(ctx.modelName, newGraph);
1996
+ spin.stop("Applied Silver enrichments");
1997
+ displayTierScore(ctx.tierScore);
1998
+ return { skipped: false, summary: ctx.tierScore.tier.toUpperCase() };
1999
+ }
2000
+
2001
+ // src/setup/steps/enrich-gold.ts
2002
+ import * as p5 from "@clack/prompts";
2003
+ import path16 from "path";
2004
+ import { readFileSync as readFileSync6, writeFileSync as writeFileSync8, mkdirSync as mkdirSync5, existsSync as existsSync6, readdirSync as readdirSync3 } from "fs";
2005
+ import * as yaml4 from "yaml";
2006
+ import {
2007
+ compile as compile12,
2008
+ computeTier as computeTier5,
2009
+ suggestEnrichments as suggestEnrichments3,
2010
+ inferSemanticRole as inferSemanticRole2,
2011
+ inferAggregation as inferAggregation2,
2012
+ loadConfig as loadConfig13
2013
+ } from "@runcontext/core";
2014
+ function findFileRecursive3(dir, suffix) {
2015
+ if (!existsSync6(dir)) return void 0;
2016
+ const entries = readdirSync3(dir, { withFileTypes: true });
2017
+ for (const entry of entries) {
2018
+ const fullPath = path16.join(dir, entry.name);
2019
+ if (entry.isDirectory()) {
2020
+ const found = findFileRecursive3(fullPath, suffix);
2021
+ if (found) return found;
2022
+ } else if (entry.name.endsWith(suffix)) {
2023
+ return fullPath;
2024
+ }
2025
+ }
2026
+ return void 0;
2027
+ }
2028
+ async function runEnrichGoldStep(ctx) {
2029
+ const config = loadConfig13(ctx.cwd);
2030
+ const { graph } = await compile12({ contextDir: ctx.contextDir, config, rootDir: ctx.cwd });
2031
+ ctx.graph = graph;
2032
+ const tierScore = computeTier5(ctx.modelName, graph);
2033
+ if (tierScore.gold.passed) {
2034
+ p5.log.success("Already at Gold \u2014 skipping.");
2035
+ ctx.tierScore = tierScore;
2036
+ return { skipped: true, summary: "Already Gold" };
2037
+ }
2038
+ const model = graph.models.get(ctx.modelName);
2039
+ if (!model) {
2040
+ p5.log.error(`Model "${ctx.modelName}" not found.`);
2041
+ return { skipped: true, summary: "Model not found" };
2042
+ }
2043
+ const datasetNames = model.datasets.map((d) => d.name);
2044
+ const suggestions = suggestEnrichments3("gold", tierScore, datasetNames);
2045
+ const preview = [];
2046
+ if (suggestions.needsSemanticRoles) preview.push("+ Infer semantic_role for all fields");
2047
+ if (suggestions.needsRulesFile) preview.push("+ Generate rules file (golden queries, guardrails, hierarchies)");
2048
+ if (suggestions.governance?.trust) preview.push(`+ trust: ${suggestions.governance.trust}`);
2049
+ preview.push("+ Add version, business_context stubs to governance");
2050
+ preview.push("+ Add ai_context placeholder to model");
2051
+ preview.push("+ Infer relationships from column name patterns");
2052
+ if (preview.length > 0) {
2053
+ p5.note(preview.join("\n"), "Gold Enrichments");
2054
+ }
2055
+ p5.log.warning("Gold enrichments create TODO placeholders that need manual curation.");
2056
+ const shouldRun = await p5.confirm({
2057
+ message: "Apply Gold enrichments?"
2058
+ });
2059
+ if (p5.isCancel(shouldRun) || !shouldRun) {
2060
+ return { skipped: true, summary: "Skipped" };
2061
+ }
2062
+ const spin = p5.spinner();
2063
+ spin.start("Enriching to Gold...");
2064
+ const govFilePath = findFileRecursive3(ctx.contextDir, `${ctx.modelName}.governance.yaml`);
2065
+ if (govFilePath) {
2066
+ const govContent = readFileSync6(govFilePath, "utf-8");
2067
+ const govDoc = yaml4.parse(govContent) ?? {};
2068
+ if (suggestions.governance?.trust) govDoc.trust = suggestions.governance.trust;
2069
+ if (suggestions.needsSemanticRoles) {
2070
+ govDoc.fields = govDoc.fields ?? {};
2071
+ for (const ds of model.datasets) {
2072
+ const tableName = ds.source?.split(".").pop() ?? ds.name;
2073
+ let dbColumns = [];
2074
+ try {
2075
+ dbColumns = await ctx.adapter.listColumns(tableName);
2076
+ } catch {
2077
+ }
2078
+ for (const field of ds.fields ?? []) {
2079
+ const fieldKey = `${ds.name}.${field.name}`;
2080
+ if (govDoc.fields[fieldKey]?.semantic_role) continue;
2081
+ const col = dbColumns.find((c) => c.name === field.name);
2082
+ const isPK = col?.is_primary_key ?? field.name.endsWith("_id");
2083
+ const dataType = col?.data_type ?? "VARCHAR";
2084
+ govDoc.fields[fieldKey] = govDoc.fields[fieldKey] ?? {};
2085
+ const role = inferSemanticRole2(field.name, dataType, isPK);
2086
+ govDoc.fields[fieldKey].semantic_role = role;
2087
+ if (role === "metric") {
2088
+ govDoc.fields[fieldKey].default_aggregation = inferAggregation2(field.name);
2089
+ govDoc.fields[fieldKey].additive = govDoc.fields[fieldKey].default_aggregation === "SUM";
2090
+ }
2091
+ }
2092
+ }
2093
+ }
2094
+ if (!govDoc.version) {
2095
+ govDoc.version = "0.1.0";
2096
+ }
2097
+ if (!govDoc.business_context || govDoc.business_context.length === 0) {
2098
+ govDoc.business_context = [
2099
+ { name: "TODO: Use Case Name", description: "TODO: Describe the analytical use case and business value." }
2100
+ ];
2101
+ }
2102
+ writeFileSync8(govFilePath, yaml4.stringify(govDoc, { lineWidth: 120 }), "utf-8");
2103
+ }
2104
+ const modelFilePath = findFileRecursive3(ctx.contextDir, `${ctx.modelName}.osi.yaml`);
2105
+ if (modelFilePath) {
2106
+ const modelContent = readFileSync6(modelFilePath, "utf-8");
2107
+ const modelDoc = yaml4.parse(modelContent) ?? {};
2108
+ const semModels = modelDoc.semantic_model ?? [];
2109
+ let changed = false;
2110
+ for (const sm of semModels) {
2111
+ if (sm.name !== ctx.modelName) continue;
2112
+ if (!sm.ai_context) {
2113
+ sm.ai_context = "TODO: Describe how an AI agent should use this model, common pitfalls, and important filters.";
2114
+ changed = true;
2115
+ }
2116
+ if (!sm.relationships || sm.relationships.length === 0) {
2117
+ const datasets = sm.datasets ?? [];
2118
+ const dsNames = new Set(datasets.map((d) => d.name));
2119
+ const inferred = [];
2120
+ for (const ds of datasets) {
2121
+ for (const field of ds.fields ?? []) {
2122
+ const fname = field.name;
2123
+ const idMatch = fname.match(/^(.+)_id$/);
2124
+ if (idMatch && idMatch[1]) {
2125
+ const targetBase = idMatch[1];
2126
+ for (const targetDs of datasets) {
2127
+ if (targetDs.name === ds.name) continue;
2128
+ const targetName = targetDs.name;
2129
+ if (targetName.includes(targetBase) || targetBase.includes(targetName)) {
2130
+ const targetHasField = (targetDs.fields ?? []).some((f) => f.name === fname);
2131
+ if (targetHasField) {
2132
+ const relName = `${ds.name}-to-${targetName}`;
2133
+ if (!inferred.some((r) => r.name === relName)) {
2134
+ inferred.push({
2135
+ name: relName,
2136
+ from: ds.name,
2137
+ to: targetName,
2138
+ from_columns: [fname],
2139
+ to_columns: [fname]
2140
+ });
2141
+ }
2142
+ }
2143
+ }
2144
+ }
2145
+ }
2146
+ }
2147
+ }
2148
+ if (inferred.length > 0) {
2149
+ sm.relationships = inferred;
2150
+ changed = true;
2151
+ }
2152
+ }
2153
+ }
2154
+ if (changed) {
2155
+ writeFileSync8(modelFilePath, yaml4.stringify(modelDoc, { lineWidth: 120 }), "utf-8");
2156
+ }
2157
+ }
2158
+ if (suggestions.needsRulesFile) {
2159
+ const rulesDir = path16.join(ctx.contextDir, "rules");
2160
+ if (!existsSync6(rulesDir)) mkdirSync5(rulesDir, { recursive: true });
2161
+ const rulesPath = path16.join(rulesDir, `${ctx.modelName}.rules.yaml`);
2162
+ if (!existsSync6(rulesPath)) {
2163
+ const rulesDoc = {
2164
+ model: ctx.modelName,
2165
+ golden_queries: [
2166
+ { question: "TODO: What is the total count?", sql: "SELECT COUNT(*) FROM table_name" },
2167
+ { question: "TODO: What are the top records?", sql: "SELECT * FROM table_name LIMIT 10" },
2168
+ { question: "TODO: What is the distribution?", sql: "SELECT column, COUNT(*) FROM table_name GROUP BY column" }
2169
+ ],
2170
+ business_rules: [
2171
+ { name: "TODO: rule-name", definition: "TODO: describe the business rule" }
2172
+ ],
2173
+ guardrail_filters: [
2174
+ { name: "TODO: filter-name", filter: "column IS NOT NULL", reason: "TODO: explain why" }
2175
+ ],
2176
+ hierarchies: [
2177
+ { name: "TODO: hierarchy-name", levels: ["level1", "level2"], dataset: datasetNames[0] ?? "dataset" }
2178
+ ]
2179
+ };
2180
+ writeFileSync8(rulesPath, yaml4.stringify(rulesDoc, { lineWidth: 120 }), "utf-8");
2181
+ }
2182
+ }
2183
+ const { graph: newGraph } = await compile12({ contextDir: ctx.contextDir, config, rootDir: ctx.cwd });
2184
+ ctx.graph = newGraph;
2185
+ ctx.tierScore = computeTier5(ctx.modelName, newGraph);
2186
+ spin.stop("Applied Gold enrichments");
2187
+ const todos = suggestions.needsRulesFile ? "\nThe rules file contains TODO placeholders \u2014 edit context/rules/ to complete Gold." : "";
2188
+ if (todos) p5.log.warning(todos);
2189
+ displayTierScore(ctx.tierScore);
2190
+ return { skipped: false, summary: `${ctx.tierScore.tier.toUpperCase()} (may need curation)` };
2191
+ }
2192
+
2193
+ // src/setup/steps/verify.ts
2194
+ import * as p6 from "@clack/prompts";
2195
+ import {
2196
+ compile as compile13,
2197
+ LintEngine as LintEngine5,
2198
+ ALL_RULES as ALL_RULES6,
2199
+ computeTier as computeTier6,
2200
+ loadConfig as loadConfig14
2201
+ } from "@runcontext/core";
2202
+ async function runVerifyStep(ctx) {
2203
+ const shouldRun = await p6.confirm({
2204
+ message: "Verify metadata against live data?"
2205
+ });
2206
+ if (p6.isCancel(shouldRun) || !shouldRun) {
2207
+ return { skipped: true, summary: "Skipped" };
2208
+ }
2209
+ const spin = p6.spinner();
2210
+ spin.start("Verifying against database...");
2211
+ const config = loadConfig14(ctx.cwd);
2212
+ const { graph } = await compile13({ contextDir: ctx.contextDir, config, rootDir: ctx.cwd });
2213
+ graph.dataValidation = await collectDataValidation(ctx.adapter, graph);
2214
+ const engine = new LintEngine5();
2215
+ for (const rule of ALL_RULES6) {
2216
+ if (rule.id.startsWith("data/")) engine.register(rule);
2217
+ }
2218
+ const dataDiags = engine.run(graph);
2219
+ ctx.graph = graph;
2220
+ ctx.tierScore = computeTier6(ctx.modelName, graph);
2221
+ const errors = dataDiags.filter((d) => d.severity === "error").length;
2222
+ const warnings = dataDiags.filter((d) => d.severity === "warning").length;
2223
+ if (dataDiags.length === 0) {
2224
+ spin.stop("All data validation checks passed");
2225
+ } else {
2226
+ spin.stop(`${errors} error(s), ${warnings} warning(s)`);
2227
+ const details = dataDiags.map((d) => ` ${d.severity === "error" ? "x" : "!"} ${d.message}`).join("\n");
2228
+ p6.note(details, "Data Validation Issues");
2229
+ }
2230
+ return {
2231
+ skipped: false,
2232
+ summary: dataDiags.length === 0 ? "Clean" : `${errors} errors, ${warnings} warnings`
2233
+ };
2234
+ }
2235
+
2236
+ // src/setup/steps/autofix.ts
2237
+ import * as p7 from "@clack/prompts";
2238
+ import fs4 from "fs";
2239
+ import {
2240
+ compile as compile14,
2241
+ LintEngine as LintEngine6,
2242
+ ALL_RULES as ALL_RULES7,
2243
+ applyFixes as applyFixes4,
2244
+ computeTier as computeTier7,
2245
+ loadConfig as loadConfig15
2246
+ } from "@runcontext/core";
2247
+ async function runAutofixStep(ctx) {
2248
+ const config = loadConfig15(ctx.cwd);
2249
+ const { graph } = await compile14({ contextDir: ctx.contextDir, config, rootDir: ctx.cwd });
2250
+ graph.dataValidation = await collectDataValidation(ctx.adapter, graph);
2251
+ const engine = new LintEngine6();
2252
+ for (const rule of ALL_RULES7) engine.register(rule);
2253
+ const diagnostics = engine.run(graph);
2254
+ const fixable = diagnostics.filter((d) => d.fixable);
2255
+ if (fixable.length === 0) {
2256
+ p7.log.success("No fixable issues found.");
2257
+ ctx.graph = graph;
2258
+ ctx.tierScore = computeTier7(ctx.modelName, graph);
2259
+ return { skipped: true, summary: "Nothing to fix" };
2260
+ }
2261
+ const shouldRun = await p7.confirm({
2262
+ message: `Auto-fix ${fixable.length} issue(s)?`
2263
+ });
2264
+ if (p7.isCancel(shouldRun) || !shouldRun) {
2265
+ return { skipped: true, summary: "Skipped" };
2266
+ }
2267
+ const spin = p7.spinner();
2268
+ spin.start("Fixing...");
2269
+ const readFile = (filePath) => fs4.readFileSync(filePath, "utf-8");
2270
+ const fixedFiles = applyFixes4(fixable, readFile);
2271
+ for (const [file, content] of fixedFiles) {
2272
+ fs4.writeFileSync(file, content, "utf-8");
2273
+ }
2274
+ const { graph: newGraph } = await compile14({ contextDir: ctx.contextDir, config, rootDir: ctx.cwd });
2275
+ ctx.graph = newGraph;
2276
+ ctx.tierScore = computeTier7(ctx.modelName, newGraph);
2277
+ spin.stop(`Fixed ${fixable.length} issue(s) in ${fixedFiles.size} file(s)`);
2278
+ displayTierScore(ctx.tierScore);
2279
+ return { skipped: false, summary: `${fixable.length} issues fixed` };
2280
+ }
2281
+
2282
+ // src/setup/steps/claude-md.ts
2283
+ import * as p8 from "@clack/prompts";
2284
+ import path17 from "path";
2285
+ import { existsSync as existsSync7, writeFileSync as writeFileSync9 } from "fs";
2286
+ function buildClaudeMd(ctx) {
2287
+ const modelName = ctx.modelName;
2288
+ const tier = ctx.tierScore?.tier?.toUpperCase() ?? "UNKNOWN";
2289
+ const model = ctx.graph?.models.get(modelName);
2290
+ const datasets = model?.datasets ?? [];
2291
+ const datasetList = datasets.map((ds) => `- \`${ds.name}\` \u2014 ${ds.fields?.length ?? 0} fields`).join("\n");
2292
+ const failingChecks = [];
2293
+ if (ctx.tierScore) {
2294
+ for (const check of ctx.tierScore.bronze.checks) {
2295
+ if (!check.passed) failingChecks.push(`- ${check.id}: ${check.detail ?? check.label}`);
2296
+ }
2297
+ for (const check of ctx.tierScore.silver.checks) {
2298
+ if (!check.passed) failingChecks.push(`- ${check.id}: ${check.detail ?? check.label}`);
2299
+ }
2300
+ for (const check of ctx.tierScore.gold.checks) {
2301
+ if (!check.passed) failingChecks.push(`- ${check.id}: ${check.detail ?? check.label}`);
2302
+ }
2303
+ }
2304
+ const failingSection = failingChecks.length > 0 ? `### Failing Checks
2305
+
2306
+ ${failingChecks.join("\n")}` : "All checks passing.";
2307
+ return `# ContextKit Agent Instructions
2308
+
2309
+ You have two MCP servers: **duckdb** (query data) and **contextkit** (query metadata).
2310
+
2311
+ Model: **${modelName}** | Current Tier: **${tier}**
2312
+
2313
+ ## The Cardinal Rule: Never Fabricate Metadata
2314
+
2315
+ **Every piece of metadata you write must be grounded in evidence from the actual data.**
2316
+
2317
+ - NEVER invent owner names, emails, team names, or contact info
2318
+ - NEVER write a field description that is just the column name repeated
2319
+ - NEVER assign a semantic_role without first querying the column's actual values
2320
+ - NEVER mark a field as additive without understanding what summing it means
2321
+ - NEVER write lineage entries without knowing the actual data sources
2322
+ - NEVER write a business_context narrative you can't justify from the data
2323
+ - NEVER create a glossary definition that is just "Definition for X"
2324
+
2325
+ If you don't know something, say so. Leave it as a TODO with a note about what you'd need to determine the answer. A honest TODO is infinitely better than fabricated metadata that looks plausible but is wrong.
2326
+
2327
+ ## On Session Start
2328
+
2329
+ 1. Run \`context_tier\` to check the current metadata tier (Bronze/Silver/Gold)
2330
+ 2. Report the current tier and list failing checks
2331
+ 3. Ask the user what they'd like to work on \u2014 don't start changing files unprompted
2332
+
2333
+ ## When Asked to Reach Gold
2334
+
2335
+ Work through ALL failing Gold checks iteratively until \`context tier\` reports Gold:
2336
+
2337
+ 1. Run \`context_tier\` and collect every failing check
2338
+ 2. For each failing check, query the database to gather evidence, then fix the metadata
2339
+ 3. Run \`context_tier\` again
2340
+ 4. If checks still fail, go back to step 2
2341
+ 5. **Do NOT stop until every Gold check passes** or you hit something that genuinely requires human input (like real owner contact info)
2342
+ 6. For checks you cannot fix (e.g., owner email), leave a clear TODO explaining what a human needs to provide
2343
+
2344
+ You must iterate \u2014 a single pass is never enough. Each \`context tier\` run may reveal new failures after earlier ones are fixed.
2345
+
2346
+ ## How to Curate Metadata (the right way)
2347
+
2348
+ ### Before writing ANY metadata, query the database first
2349
+
2350
+ For every field you're about to describe or classify:
2351
+
2352
+ \`\`\`sql
2353
+ -- What type of values does this column contain?
2354
+ SELECT DISTINCT column_name FROM table LIMIT 20;
2355
+
2356
+ -- For numeric columns: is this a metric or dimension?
2357
+ SELECT MIN(col), MAX(col), AVG(col), COUNT(DISTINCT col) FROM table;
2358
+
2359
+ -- For potential metrics: does SUM make sense?
2360
+ -- If SUM produces a meaningful business number \u2192 additive: true
2361
+ -- If SUM is meaningless (e.g., summing percentages, scores, ratings) \u2192 additive: false
2362
+ \`\`\`
2363
+
2364
+ ### Semantic Role Decision Tree
2365
+
2366
+ Query the column first, then apply this logic:
2367
+
2368
+ 1. **Is it a primary key or foreign key?** \u2192 \`identifier\`
2369
+ 2. **Is it a date or timestamp?** \u2192 \`date\`
2370
+ 3. **Is it numeric AND does aggregation make business sense?**
2371
+ - Does SUM make sense? (counts, amounts, quantities) \u2192 \`metric\`, \`additive: true\`
2372
+ - Does only AVG/MIN/MAX make sense? (rates, percentages, scores, ratings) \u2192 \`metric\`, \`additive: false\`
2373
+ 4. **Everything else** \u2192 \`dimension\`
2374
+
2375
+ Common mistakes to avoid:
2376
+ - \`stars\` (ratings) \u2192 metric with AVG, NOT additive (summing star ratings is meaningless)
2377
+ - \`_per_10k_people\` (rates) \u2192 metric with AVG, NOT additive
2378
+ - \`_score\` (composite scores) \u2192 metric with AVG, NOT additive
2379
+ - \`useful/funny/cool\` (vote counts) \u2192 metric with SUM, additive
2380
+ - \`_count\` fields \u2192 metric with SUM, additive (usually)
2381
+
2382
+ ### Field Descriptions
2383
+
2384
+ Write descriptions that help someone who has never seen this database understand what the column contains. Include:
2385
+ - What the value represents
2386
+ - Units or scale (if applicable)
2387
+ - Where the data comes from (if known)
2388
+ - Any known quirks or caveats
2389
+
2390
+ Bad: \`description: total_population\`
2391
+ Good: \`description: Total resident population of the census tract from American Community Survey 5-year estimates\`
2392
+
2393
+ Bad: \`description: stars\`
2394
+ Good: \`description: Average Yelp star rating (1.0-5.0 scale) based on all reviews for this business\`
2395
+
2396
+ ### Lineage
2397
+
2398
+ Upstream sources are the EXTERNAL systems that feed data into this warehouse. They are NOT the tables in the warehouse itself.
2399
+
2400
+ Ask yourself: "Where did this data originally come from before it was loaded here?"
2401
+
2402
+ Bad lineage:
2403
+ \`\`\`yaml
2404
+ upstream:
2405
+ - source: yelp_business # This is a table IN the warehouse, not an upstream source
2406
+ type: pipeline
2407
+ \`\`\`
2408
+
2409
+ Good lineage:
2410
+ \`\`\`yaml
2411
+ upstream:
2412
+ - source: yelp-academic-dataset
2413
+ type: file
2414
+ notes: Yelp Open Dataset (academic use), loaded via CSV import
2415
+ \`\`\`
2416
+
2417
+ ### Owner Files
2418
+
2419
+ Do NOT create fake owner identities. If the real owner is unknown:
2420
+ - Keep the existing owner file as-is
2421
+ - Note in the file that contact info needs to be filled in by a real person
2422
+ - NEVER invent email addresses like \`analytics@example.com\`
2423
+
2424
+ ### Business Context
2425
+
2426
+ Write business_context entries that describe real analytical use cases you can verify from the data. Query the data to understand what questions it can answer before writing narratives.
2427
+
2428
+ ### Golden Queries
2429
+
2430
+ Every golden query MUST be tested against the actual database before you write it. Run the SQL, verify it returns sensible results, then document it.
2431
+
2432
+ ### Data Quality
2433
+
2434
+ When you discover data quality issues (null values, broken joins, missing data), FLAG THEM \u2014 don't hide them. Add notes in governance or report them to the user.
2435
+
2436
+ ## This Project
2437
+
2438
+ ### Datasets
2439
+
2440
+ ${datasetList || "(none detected)"}
2441
+
2442
+ ${failingSection}
2443
+
2444
+ ## MCP Tools
2445
+
2446
+ | Tool | Parameters | What it does |
2447
+ |------|-----------|-------------|
2448
+ | \`context_search\` | \`query\` | Find models, datasets, fields, terms by keyword |
2449
+ | \`context_explain\` | \`model\` | Full model details \u2014 governance, rules, lineage, tier |
2450
+ | \`context_validate\` | \u2014 | Run linter, get errors and warnings |
2451
+ | \`context_tier\` | \`model\` | Tier scorecard with all check results |
2452
+ | \`context_golden_query\` | \`question\` | Find pre-validated SQL for a question |
2453
+ | \`context_guardrails\` | \`tables[]\` | Get required WHERE clauses for tables |
2454
+
2455
+ ## Tier Checks Quick Reference
2456
+
2457
+ **Bronze (7):** descriptions, owner, security, grain, table_type
2458
+ **Silver (+6):** trust, 2+ tags, glossary linked, lineage, refresh, 2+ sample_values
2459
+ **Gold (+24):** semantic_role on ALL fields, metric aggregation/additive, 1+ guardrail, 3+ golden queries, 1+ business rule, 1+ hierarchy, 1+ default_filter, trust=endorsed, contactable owner, 1+ relationship, description \u226550 chars, ai_context (no TODO), 1+ business_context, version, field descriptions not lazy, glossary definitions substantive, lineage references real sources, grain statements specific, ai_context filled in, 3+ relationships (models with 3+ datasets), 1+ computed metric, 3+ glossary terms (models with 5+ datasets)
2460
+
2461
+ ## How to Reach Gold: Curation Recipes
2462
+
2463
+ ### Metrics (gold/metrics-defined)
2464
+
2465
+ Inspect computed views in the database. Any calculated column is a candidate metric.
2466
+
2467
+ \`\`\`sql
2468
+ -- Find computed columns in views
2469
+ SELECT column_name, data_type
2470
+ FROM information_schema.columns
2471
+ WHERE table_name LIKE 'vw_%' AND data_type IN ('DOUBLE', 'FLOAT', 'INTEGER', 'BIGINT', 'DECIMAL');
2472
+ \`\`\`
2473
+
2474
+ For each computed column (e.g., \`opportunity_score\`, \`shops_per_10k\`, \`demand_signal_pct\`):
2475
+ 1. Query it to understand what it measures
2476
+ 2. Add it to the model's \`metrics[]\` array in the OSI YAML
2477
+ 3. Include the SQL expression, aggregation type (SUM/AVG), and a human description
2478
+ 4. Mark whether it's additive (can be summed across dimensions)
2479
+
2480
+ Example:
2481
+ \`\`\`yaml
2482
+ metrics:
2483
+ - name: opportunity_score
2484
+ expression:
2485
+ dialects:
2486
+ - dialect: DuckDB
2487
+ expression: "(population/10000)*2 + (income/50000)*2 + (10-shops_per_10k)*3 + transit*1.5 + demand*0.5"
2488
+ description: Composite score ranking census tracts for coffee shop viability
2489
+ aggregation: AVG
2490
+ additive: false
2491
+ \`\`\`
2492
+
2493
+ ### Glossary Terms (gold/glossary-coverage)
2494
+
2495
+ For each key business concept your model measures, create a glossary term file.
2496
+
2497
+ Think about the terms a new analyst would need defined:
2498
+ - What is "supply saturation"? (> 5.0 shops per 10k people)
2499
+ - What is a "demand signal"? (review mentioning wait/line/crowded/busy)
2500
+ - What is "opportunity score"? (composite ranking formula)
2501
+
2502
+ For each term, create \`context/glossary/<term-name>.term.yaml\`:
2503
+ \`\`\`yaml
2504
+ term: supply-saturation
2505
+ definition: >
2506
+ A measure of coffee shop density per census tract. Calculated as
2507
+ shops per 10,000 residents. Tracts with > 5.0 are considered saturated.
2508
+ owner: analytics-team
2509
+ tags: [coffee-analytics]
2510
+ \`\`\`
2511
+
2512
+ Models with 5+ datasets need at least 3 glossary terms linked by shared tags or owner.
2513
+
2514
+ ### Relationships (gold/relationships-coverage)
2515
+
2516
+ For each join in the SQL views, define a relationship in the OSI model.
2517
+
2518
+ \`\`\`sql
2519
+ -- Find joins by examining view definitions
2520
+ -- Look for patterns: ON table_a.col = table_b.col
2521
+ -- Or spatial joins: ABS(a.lat - b.lat) < threshold
2522
+ \`\`\`
2523
+
2524
+ For each join:
2525
+ \`\`\`yaml
2526
+ relationships:
2527
+ - name: business-to-tract
2528
+ left_dataset: yelp_business
2529
+ right_dataset: census_tract
2530
+ join_type: spatial
2531
+ cardinality: many-to-one
2532
+ description: Businesses assigned to nearest census tract within 0.02 degrees (~1 mile)
2533
+ \`\`\`
2534
+
2535
+ Models with 3+ datasets need at least 3 relationships.
2536
+
2537
+ ### Golden Queries
2538
+
2539
+ Write 3-5 SQL queries answering common business questions. **Test each query first!**
2540
+
2541
+ \`\`\`sql
2542
+ -- Run the query, verify it returns sensible results, then document:
2543
+ SELECT geoid, tract_name, opportunity_score
2544
+ FROM vw_candidate_zones ORDER BY opportunity_score DESC LIMIT 10;
2545
+ \`\`\`
2546
+
2547
+ ## YAML Formats
2548
+
2549
+ **Governance** (\`context/governance/*.governance.yaml\`):
2550
+ \`\`\`yaml
2551
+ model: my-model
2552
+ owner: team-name
2553
+ version: "1.0.0"
2554
+ trust: endorsed
2555
+ security: internal
2556
+ tags: [domain-tag-1, domain-tag-2]
2557
+ business_context:
2558
+ - name: Use Case Name
2559
+ description: What analytical question this data answers and for whom.
2560
+ datasets:
2561
+ my_table:
2562
+ grain: "One row per [entity] identified by [key]"
2563
+ table_type: fact # fact | dimension | event | view
2564
+ refresh: daily
2565
+ fields:
2566
+ dataset.field:
2567
+ semantic_role: metric # metric | dimension | identifier | date
2568
+ default_aggregation: SUM # SUM | AVG | COUNT | COUNT_DISTINCT | MIN | MAX
2569
+ additive: true # can this metric be summed across dimensions?
2570
+ default_filter: "is_open = 1"
2571
+ sample_values: ["val1", "val2"]
2572
+ \`\`\`
2573
+
2574
+ **Rules** (\`context/rules/*.rules.yaml\`):
2575
+ \`\`\`yaml
2576
+ model: my-model
2577
+ golden_queries:
2578
+ - question: What are the top items by count?
2579
+ sql: SELECT name, count FROM my_table ORDER BY count DESC LIMIT 10
2580
+ intent: Identify top performers by volume
2581
+ caveats: Filters to active records only
2582
+ business_rules:
2583
+ - name: valid-ratings
2584
+ definition: All ratings must be between 1 and 5
2585
+ guardrail_filters:
2586
+ - name: active-only
2587
+ filter: "status = 'active'"
2588
+ reason: Exclude inactive records from analytics
2589
+ tables: [my_table]
2590
+ hierarchies:
2591
+ - name: geography
2592
+ levels: [state, city, postal_code]
2593
+ dataset: my_table
2594
+ \`\`\`
2595
+
2596
+ ## CLI Commands
2597
+
2598
+ \`\`\`bash
2599
+ context tier # Check scorecard
2600
+ context verify --db <path> # Validate against live data
2601
+ context fix --db <path> # Auto-fix data warnings
2602
+ context setup # Interactive setup wizard
2603
+ context dev # Watch mode for live editing
2604
+ \`\`\`
2605
+ `;
2606
+ }
2607
+ async function runClaudeMdStep(ctx) {
2608
+ const instructionsPath = path17.join(ctx.contextDir, "AGENT_INSTRUCTIONS.md");
2609
+ if (existsSync7(instructionsPath)) {
2610
+ const shouldOverwrite = await p8.confirm({
2611
+ message: "context/AGENT_INSTRUCTIONS.md already exists. Overwrite with updated instructions?"
2612
+ });
2613
+ if (p8.isCancel(shouldOverwrite) || !shouldOverwrite) {
2614
+ return { skipped: true, summary: "context/AGENT_INSTRUCTIONS.md already exists, kept existing" };
2615
+ }
2616
+ }
2617
+ const content = buildClaudeMd(ctx);
2618
+ writeFileSync9(instructionsPath, content, "utf-8");
2619
+ p8.log.success("Generated context/AGENT_INSTRUCTIONS.md with agent curation instructions");
2620
+ return { skipped: false, summary: "Generated context/AGENT_INSTRUCTIONS.md" };
2621
+ }
2622
+
2623
+ // src/commands/setup.ts
2624
+ var setupCommand = new Command15("setup").description("Interactive wizard to scaffold and enrich metadata from a database").action(async () => {
2625
+ p9.intro(chalk16.bgCyan(chalk16.black(" ContextKit Setup ")));
2626
+ const ctx = await runConnectStep();
2627
+ if (!ctx) return;
2628
+ try {
2629
+ const steps = [
2630
+ { name: "Scaffold Bronze", fn: runScaffoldStep }
2631
+ ];
2632
+ if (ctx.targetTier === "silver" || ctx.targetTier === "gold") {
2633
+ steps.push({ name: "Enrich to Silver", fn: runEnrichSilverStep });
2634
+ }
2635
+ if (ctx.targetTier === "gold") {
2636
+ steps.push({ name: "Enrich to Gold", fn: runEnrichGoldStep });
2637
+ }
2638
+ steps.push(
2639
+ { name: "Verify data", fn: runVerifyStep },
2640
+ { name: "Auto-fix", fn: runAutofixStep },
2641
+ { name: "Generate agent instructions", fn: runClaudeMdStep }
2642
+ );
2643
+ const results = [];
2644
+ for (let i = 0; i < steps.length; i++) {
2645
+ const step = steps[i];
2646
+ p9.log.step(`${chalk16.dim(`[${i + 1}/${steps.length}]`)} ${step.name}`);
2647
+ const result = await step.fn(ctx);
2648
+ results.push({ name: step.name, summary: result.summary });
2649
+ }
2650
+ const summaryLines = results.map((r) => ` ${chalk16.green("+")} ${r.name}: ${r.summary}`).join("\n");
2651
+ p9.note(summaryLines, "Summary");
2652
+ if (ctx.tierScore) {
2653
+ displayTierScore(ctx.tierScore);
2654
+ }
2655
+ const currentTier = ctx.tierScore?.tier ?? "none";
2656
+ const reachedTarget = ctx.targetTier === "bronze" && ["bronze", "silver", "gold"].includes(currentTier) || ctx.targetTier === "silver" && ["silver", "gold"].includes(currentTier) || ctx.targetTier === "gold" && currentTier === "gold";
2657
+ if (reachedTarget) {
2658
+ p9.outro(`Done! You're at ${chalk16.bold(currentTier.toUpperCase())}. Run ${chalk16.cyan("context tier")} anytime to check.`);
2659
+ } else if (ctx.targetTier === "gold" && currentTier !== "gold") {
2660
+ const nextSteps = [
2661
+ `Your metadata is at ${chalk16.bold(currentTier.toUpperCase())} \u2014 Gold needs curation.`,
2662
+ "",
2663
+ `${chalk16.bold("To reach Gold, tell your AI assistant:")}`,
2664
+ "",
2665
+ ` "Read ${chalk16.cyan("context/AGENT_INSTRUCTIONS.md")} for curation guidelines.`,
2666
+ ` Run ${chalk16.cyan("context tier")} and fix every failing Gold check.`,
2667
+ ` Query the database before writing any metadata.`,
2668
+ ` Keep iterating until ${chalk16.cyan("context tier")} reports Gold."`
2669
+ ];
2670
+ p9.note(nextSteps.join("\n"), "Next Steps");
2671
+ p9.outro(`Run ${chalk16.cyan("context dev")} to watch for changes as you edit.`);
2672
+ } else {
2673
+ p9.outro(`Run ${chalk16.cyan("context tier")} to check your scorecard.`);
2674
+ }
2675
+ } finally {
2676
+ try {
2677
+ await ctx.adapter.disconnect();
2678
+ } catch {
2679
+ }
2680
+ }
2681
+ });
2682
+
1565
2683
  // src/index.ts
1566
- var program = new Command15();
2684
+ var program = new Command16();
1567
2685
  program.name("context").description("ContextKit \u2014 AI-ready metadata governance over OSI").version("0.3.1");
1568
2686
  program.addCommand(lintCommand);
1569
2687
  program.addCommand(buildCommand);
@@ -1579,5 +2697,6 @@ program.addCommand(introspectCommand);
1579
2697
  program.addCommand(verifyCommand);
1580
2698
  program.addCommand(enrichCommand);
1581
2699
  program.addCommand(rulesCommand);
2700
+ program.addCommand(setupCommand);
1582
2701
  program.parse();
1583
2702
  //# sourceMappingURL=index.js.map