@duckcodeailabs/dql-cli 1.6.3 → 1.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { existsSync, mkdirSync, readdirSync, readFileSync, rmSync, statSync, wat
4
4
  import { homedir } from 'node:os';
5
5
  import { dirname, extname, join, normalize, relative, resolve } from 'node:path';
6
6
  import { buildExecutionPlan, createWelcomeNotebook, deserializeNotebook, getConnectorFormSchemas, hasSemanticRefs, resolveSemanticRefs, } from '@duckcodeailabs/dql-notebook';
7
- import { loadSemanticLayerFromDir, resolveSemanticLayerAsync, Parser, buildLineageGraph, buildManifest, findAppDocuments, findDashboardsForApp, isBlockIdRef, loadAppDocument, loadDashboardDocument, analyzeImpact, buildTrustChain, detectDomainFlows, getDomainTrustOverview, queryLineage, queryCompleteLineagePaths, LineageGraph, canonicalize, canonicalizeNotebook, diffDQL, diffNotebook, } from '@duckcodeailabs/dql-core';
7
+ import { loadSemanticLayerFromDir, resolveSemanticLayerAsync, getDialect, Parser, buildLineageGraph, buildManifest, findAppDocuments, findDashboardsForApp, isBlockIdRef, loadAppDocument, loadDashboardDocument, analyzeImpact, buildTrustChain, detectDomainFlows, getDomainTrustOverview, queryLineage, queryCompleteLineagePaths, LineageGraph, canonicalize, canonicalizeNotebook, diffDQL, diffNotebook, } from '@duckcodeailabs/dql-core';
8
8
  import { load as loadYaml } from 'js-yaml';
9
9
  import { listBlockTemplates } from './block-templates.js';
10
10
  import { getRunner as getLLMRunner } from './llm/index.js';
@@ -239,12 +239,67 @@ export async function startLocalServer(opts) {
239
239
  FROM information_schema.columns
240
240
  WHERE table_schema NOT IN ('information_schema', 'pg_catalog')
241
241
  ORDER BY table_schema, table_name, ordinal_position`, [], runtimeVariables({}), connection);
242
- return buildAgentSchemaContext(question, result.rows);
242
+ const schemaContext = buildAgentSchemaContext(question, result.rows);
243
+ return enrichAgentSchemaContextWithValueMatches(question, schemaContext, executor, connection);
243
244
  }
244
245
  catch {
245
246
  return [];
246
247
  }
247
248
  };
249
+ const generateInvestigationSqlForApp = async (input) => {
250
+ const resolvedProvider = resolveDefaultLLMProvider(projectRoot);
251
+ const runner = resolvedProvider ? getLLMRunner(resolvedProvider) : null;
252
+ if (!resolvedProvider || !runner) {
253
+ throw new Error('No AI provider is configured. Configure OpenAI, Gemini, Ollama, or a custom OpenAI-compatible endpoint in Settings.');
254
+ }
255
+ let governedAnswer;
256
+ let providerError;
257
+ const contextEnvelope = {
258
+ mode: 'app_research',
259
+ intent: input.intent,
260
+ appId: input.appId,
261
+ dashboardId: input.dashboardId,
262
+ sourceTileId: input.sourceTileId,
263
+ sourceBlockId: input.sourceBlockId,
264
+ title: input.title,
265
+ instruction: 'Generate review-required read-only SQL when certified blocks do not exactly answer the requested research grain. Execute only through the bounded generated SQL preview path.',
266
+ context: input.context,
267
+ };
268
+ const controller = new AbortController();
269
+ await runner.run({
270
+ provider: resolvedProvider,
271
+ messages: [{ role: 'user', content: input.question }],
272
+ upstream: {
273
+ cellId: `app-research:${input.appId}:${input.dashboardId ?? 'app'}`,
274
+ sql: JSON.stringify(contextEnvelope, null, 2),
275
+ },
276
+ projectRoot,
277
+ executeCertifiedBlock: executeCertifiedBlockForAgent,
278
+ executeGeneratedSql: executeGeneratedSqlForAgent,
279
+ getSchemaContext: getSchemaContextForAgent,
280
+ }, (turn) => {
281
+ if (turn.kind === 'tool_result' && turn.id === 'governed_answer') {
282
+ governedAnswer = turn.output;
283
+ }
284
+ if (turn.kind === 'error') {
285
+ providerError = turn.message;
286
+ }
287
+ }, controller.signal);
288
+ if (!governedAnswer) {
289
+ throw new Error(providerError ?? 'The AI provider did not return a governed answer.');
290
+ }
291
+ return {
292
+ sql: governedAnswer.proposedSql ?? governedAnswer.sql,
293
+ answer: governedAnswer.answer ?? governedAnswer.text,
294
+ result: governedAnswer.result,
295
+ analysisPlan: governedAnswer.analysisPlan,
296
+ evidence: governedAnswer.evidence,
297
+ citations: governedAnswer.citations,
298
+ suggestedViz: governedAnswer.suggestedViz,
299
+ executionError: governedAnswer.executionError,
300
+ providerUsed: governedAnswer.providerUsed,
301
+ };
302
+ };
248
303
  // SSE clients for /api/watch hot-reload
249
304
  const sseClients = new Set();
250
305
  // Watch notebooks/, workbooks/, semantic-layer/, and data/ dirs for changes
@@ -303,6 +358,17 @@ export async function startLocalServer(opts) {
303
358
  ...candidate,
304
359
  validation: validateBlockStudioSource(candidate.dqlSource, semanticLayer),
305
360
  });
361
+ const validateImportCandidateForSave = (candidate) => {
362
+ const validated = validateImportCandidate(candidate);
363
+ const diagnostics = (validated.validation?.diagnostics ?? []);
364
+ const errors = diagnostics
365
+ .filter((diagnostic) => diagnostic.severity === 'error')
366
+ .map((diagnostic) => diagnostic.message || 'Candidate validation failed.');
367
+ if (validated.reviewStatus === 'rejected') {
368
+ errors.unshift('Candidate was rejected.');
369
+ }
370
+ return { candidate: validated, errors };
371
+ };
306
372
  const runBlockStudioPreviewSource = async (source, targetConnection = connection) => {
307
373
  let tableMapping;
308
374
  if (semanticLayer) {
@@ -777,6 +843,7 @@ export async function startLocalServer(opts) {
777
843
  path,
778
844
  projectRoot,
779
845
  executeSql: executeLocalSqlForStoredResult,
846
+ generateInvestigationSql: generateInvestigationSqlForApp,
780
847
  runNotebook: (appId, notebookPath) => runNotebookForApp(appId, notebookPath),
781
848
  });
782
849
  if (handled)
@@ -1514,25 +1581,32 @@ export async function startLocalServer(opts) {
1514
1581
  const nextCandidates = [...session.candidates];
1515
1582
  for (let i = 0; i < nextCandidates.length; i += 1) {
1516
1583
  const candidate = nextCandidates[i];
1517
- if (candidate.reviewStatus === 'saved' || candidate.reviewStatus === 'rejected' || candidate.validation?.valid === false)
1584
+ if (candidate.reviewStatus === 'saved' || candidate.reviewStatus === 'rejected')
1585
+ continue;
1586
+ const readiness = validateImportCandidateForSave(candidate);
1587
+ nextCandidates[i] = readiness.candidate;
1588
+ writeBlockStudioImportCandidate(projectRoot, importId, readiness.candidate);
1589
+ if (readiness.errors.length > 0) {
1590
+ errors.push({ candidateId: candidate.id, error: readiness.errors.join(' ') });
1518
1591
  continue;
1592
+ }
1519
1593
  try {
1520
1594
  const savedPath = saveBlockStudioArtifacts(projectRoot, {
1521
- source: candidate.dqlSource,
1522
- name: candidate.name,
1523
- domain: candidate.domain,
1524
- description: candidate.description,
1525
- owner: candidate.owner,
1526
- tags: candidate.tags,
1527
- lineage: candidate.lineage.sourceTables,
1595
+ source: readiness.candidate.dqlSource,
1596
+ name: readiness.candidate.name,
1597
+ domain: readiness.candidate.domain,
1598
+ description: readiness.candidate.description,
1599
+ owner: readiness.candidate.owner,
1600
+ tags: readiness.candidate.tags,
1601
+ lineage: readiness.candidate.lineage.sourceTables,
1528
1602
  importMeta: {
1529
1603
  importId,
1530
- candidateId: candidate.id,
1531
- sourceKind: candidate.sourceKind,
1532
- sourcePath: candidate.sourcePath,
1604
+ candidateId: readiness.candidate.id,
1605
+ sourceKind: readiness.candidate.sourceKind,
1606
+ sourcePath: readiness.candidate.sourcePath,
1533
1607
  },
1534
1608
  });
1535
- nextCandidates[i] = { ...candidate, reviewStatus: 'saved', savedPath };
1609
+ nextCandidates[i] = { ...readiness.candidate, reviewStatus: 'saved', savedPath };
1536
1610
  writeBlockStudioImportCandidate(projectRoot, importId, nextCandidates[i]);
1537
1611
  saved.push({ candidateId: candidate.id, path: savedPath });
1538
1612
  }
@@ -1625,22 +1699,39 @@ export async function startLocalServer(opts) {
1625
1699
  }
1626
1700
  if (req.method === 'POST' && candidateId && action === 'save') {
1627
1701
  const candidate = readBlockStudioImportCandidate(projectRoot, importId, candidateId);
1702
+ if (candidate.reviewStatus === 'saved' && candidate.savedPath) {
1703
+ const payload = openBlockStudioDocument(projectRoot, candidate.savedPath, semanticLayer);
1704
+ res.writeHead(200, { 'Content-Type': 'application/json; charset=utf-8' });
1705
+ res.end(serializeJSON({ candidate, block: payload }));
1706
+ return;
1707
+ }
1708
+ const readiness = validateImportCandidateForSave(candidate);
1709
+ if (readiness.errors.length > 0) {
1710
+ writeBlockStudioImportCandidate(projectRoot, importId, readiness.candidate);
1711
+ res.writeHead(422, { 'Content-Type': 'application/json; charset=utf-8' });
1712
+ res.end(serializeJSON({
1713
+ error: readiness.errors.join(' '),
1714
+ candidate: readiness.candidate,
1715
+ diagnostics: readiness.candidate.validation?.diagnostics ?? [],
1716
+ }));
1717
+ return;
1718
+ }
1628
1719
  const savedPath = saveBlockStudioArtifacts(projectRoot, {
1629
- source: candidate.dqlSource,
1630
- name: candidate.name,
1631
- domain: candidate.domain,
1632
- description: candidate.description,
1633
- owner: candidate.owner,
1634
- tags: candidate.tags,
1635
- lineage: candidate.lineage.sourceTables,
1720
+ source: readiness.candidate.dqlSource,
1721
+ name: readiness.candidate.name,
1722
+ domain: readiness.candidate.domain,
1723
+ description: readiness.candidate.description,
1724
+ owner: readiness.candidate.owner,
1725
+ tags: readiness.candidate.tags,
1726
+ lineage: readiness.candidate.lineage.sourceTables,
1636
1727
  importMeta: {
1637
1728
  importId,
1638
1729
  candidateId,
1639
- sourceKind: candidate.sourceKind,
1640
- sourcePath: candidate.sourcePath,
1730
+ sourceKind: readiness.candidate.sourceKind,
1731
+ sourcePath: readiness.candidate.sourcePath,
1641
1732
  },
1642
1733
  });
1643
- const next = { ...candidate, reviewStatus: 'saved', savedPath };
1734
+ const next = { ...readiness.candidate, reviewStatus: 'saved', savedPath };
1644
1735
  writeBlockStudioImportCandidate(projectRoot, importId, next);
1645
1736
  const payload = openBlockStudioDocument(projectRoot, savedPath, semanticLayer);
1646
1737
  res.writeHead(200, { 'Content-Type': 'application/json; charset=utf-8' });
@@ -1664,11 +1755,8 @@ export async function startLocalServer(opts) {
1664
1755
  if (req.method === 'GET' && path === '/api/block-studio/catalog') {
1665
1756
  try {
1666
1757
  const cfg = loadProjectConfig(projectRoot);
1667
- const connections = cfg.connections ?? {};
1668
- if (Object.keys(connections).length === 0 && cfg.defaultConnection) {
1669
- connections.default = cfg.defaultConnection;
1670
- }
1671
- const defaultKey = cfg.defaultConnection ? 'default' : Object.keys(connections)[0] ?? 'default';
1758
+ const connections = getProjectConnectionsForApi(cfg);
1759
+ const defaultKey = resolveDefaultConnectionKey(cfg, connections) ?? Object.keys(connections)[0] ?? 'default';
1672
1760
  const userPrefs = readUserPrefs(userPrefsPath);
1673
1761
  res.writeHead(200, { 'Content-Type': 'application/json; charset=utf-8' });
1674
1762
  res.end(serializeJSON({
@@ -1800,15 +1888,10 @@ export async function startLocalServer(opts) {
1800
1888
  }
1801
1889
  if (req.method === 'GET' && path === '/api/connections') {
1802
1890
  const cfg = loadProjectConfig(projectRoot);
1803
- const raw = cfg;
1804
- const connections = raw.connections ?? {};
1805
- // If no explicit connections map, surface the defaultConnection as "default"
1806
- if (Object.keys(connections).length === 0 && cfg.defaultConnection) {
1807
- connections['default'] = cfg.defaultConnection;
1808
- }
1809
- const defaultKey = raw.defaultConnection
1810
- ? 'default'
1811
- : Object.keys(connections)[0] ?? 'default';
1891
+ const connections = getProjectConnectionsForApi(cfg);
1892
+ const defaultKey = resolveDefaultConnectionKey(cfg, connections)
1893
+ ?? Object.keys(connections)[0]
1894
+ ?? 'default';
1812
1895
  const dbtProfiles = discoverDbtProfileConnections(projectRoot, cfg);
1813
1896
  res.writeHead(200, { 'Content-Type': 'application/json; charset=utf-8' });
1814
1897
  res.end(serializeJSON({ default: defaultKey, connections, dbtProfiles }));
@@ -1826,6 +1909,22 @@ export async function startLocalServer(opts) {
1826
1909
  if (body.connections && typeof body.connections === 'object') {
1827
1910
  raw.connections = body.connections;
1828
1911
  }
1912
+ const connections = getStoredConnections(raw);
1913
+ if (body.connections && typeof body.connections === 'object') {
1914
+ const requestedDefault = typeof body.defaultConnectionName === 'string'
1915
+ ? body.defaultConnectionName
1916
+ : typeof body.default === 'string'
1917
+ ? body.default
1918
+ : undefined;
1919
+ const defaultConnectionName = resolveDefaultConnectionKey(requestedDefault ? { ...raw, defaultConnectionName: requestedDefault } : raw, connections);
1920
+ delete raw.defaultConnection;
1921
+ if (defaultConnectionName) {
1922
+ raw.defaultConnectionName = defaultConnectionName;
1923
+ }
1924
+ else {
1925
+ delete raw.defaultConnectionName;
1926
+ }
1927
+ }
1829
1928
  writeFileSync(configPath, JSON.stringify(raw, null, 2) + '\n', 'utf-8');
1830
1929
  // Hot-swap: re-read the config and re-initialize the active connection
1831
1930
  projectConfig = loadProjectConfig(projectRoot);
@@ -3361,22 +3460,95 @@ export function loadProjectConfig(projectRoot) {
3361
3460
  }
3362
3461
  const raw = JSON.parse(readFileSync(configPath, 'utf-8'));
3363
3462
  const config = raw;
3364
- // Normalize modern `connections.default` format to `defaultConnection`
3365
- if (!config.defaultConnection && raw.connections) {
3366
- const connections = raw.connections;
3367
- const defaultConn = connections.default;
3368
- if (defaultConn?.driver) {
3369
- // Support both `filepath` (correct) and `path` (legacy/init compat)
3370
- const filepath = (defaultConn.filepath ?? defaultConn.path);
3371
- config.defaultConnection = {
3372
- ...defaultConn,
3373
- driver: defaultConn.driver,
3374
- ...(filepath ? { filepath } : {}),
3375
- };
3463
+ const connections = getStoredConnections(raw);
3464
+ const defaultConnectionName = resolveDefaultConnectionKey(raw, connections);
3465
+ if (defaultConnectionName) {
3466
+ const selected = normalizeStoredConnection(connections[defaultConnectionName]);
3467
+ if (selected) {
3468
+ config.defaultConnection = selected;
3469
+ config.defaultConnectionName = defaultConnectionName;
3470
+ }
3471
+ }
3472
+ else if (config.defaultConnection) {
3473
+ const normalized = normalizeStoredConnection(config.defaultConnection);
3474
+ if (normalized) {
3475
+ config.defaultConnection = normalized;
3376
3476
  }
3377
3477
  }
3378
3478
  return config;
3379
3479
  }
3480
+ function getProjectConnectionsForApi(config) {
3481
+ const connections = getStoredConnections(config);
3482
+ if (Object.keys(connections).length === 0 && isConnectionLike(config.defaultConnection)) {
3483
+ return { default: config.defaultConnection };
3484
+ }
3485
+ return connections;
3486
+ }
3487
+ function getStoredConnections(raw) {
3488
+ const value = raw.connections;
3489
+ if (!value || typeof value !== 'object' || Array.isArray(value)) {
3490
+ return {};
3491
+ }
3492
+ return { ...value };
3493
+ }
3494
+ function resolveDefaultConnectionKey(raw, connections) {
3495
+ const keys = Object.keys(connections).filter((key) => isConnectionLike(connections[key]));
3496
+ if (keys.length === 0)
3497
+ return undefined;
3498
+ const configured = readConfiguredDefaultConnectionName(raw);
3499
+ if (configured && keys.includes(configured)) {
3500
+ return configured;
3501
+ }
3502
+ if (keys.includes('default') && !isPlaceholderLocalConnection(connections.default)) {
3503
+ return 'default';
3504
+ }
3505
+ const realConnections = keys.filter((key) => !isPlaceholderLocalConnection(connections[key]));
3506
+ if (keys.includes('default') && isPlaceholderLocalConnection(connections.default) && realConnections.length === 1) {
3507
+ return realConnections[0];
3508
+ }
3509
+ if (keys.length === 1) {
3510
+ return keys[0];
3511
+ }
3512
+ return keys.includes('default') ? 'default' : keys[0];
3513
+ }
3514
+ function readConfiguredDefaultConnectionName(raw) {
3515
+ for (const key of ['defaultConnectionName', 'defaultConnectionKey', 'currentConnection']) {
3516
+ const value = raw[key];
3517
+ if (typeof value === 'string' && value.trim())
3518
+ return value.trim();
3519
+ }
3520
+ return typeof raw.default === 'string' && raw.default.trim() ? raw.default.trim() : undefined;
3521
+ }
3522
+ function normalizeStoredConnection(value) {
3523
+ if (!value || typeof value !== 'object' || Array.isArray(value))
3524
+ return null;
3525
+ const raw = value;
3526
+ const driver = raw.driver ?? raw.type;
3527
+ if (typeof driver !== 'string' || !driver.trim())
3528
+ return null;
3529
+ const { path: legacyPath, type: _type, ...rest } = raw;
3530
+ const filepath = typeof raw.filepath === 'string'
3531
+ ? raw.filepath
3532
+ : typeof legacyPath === 'string'
3533
+ ? legacyPath
3534
+ : undefined;
3535
+ return {
3536
+ ...rest,
3537
+ driver: driver.trim(),
3538
+ ...(filepath ? { filepath } : {}),
3539
+ };
3540
+ }
3541
+ function isConnectionLike(value) {
3542
+ return normalizeStoredConnection(value) !== null;
3543
+ }
3544
+ function isPlaceholderLocalConnection(value) {
3545
+ const connection = normalizeStoredConnection(value);
3546
+ if (!connection)
3547
+ return false;
3548
+ if (connection.driver !== 'duckdb' && connection.driver !== 'file')
3549
+ return false;
3550
+ return !connection.filepath || connection.filepath === ':memory:';
3551
+ }
3380
3552
  export function prepareLocalExecution(sql, connection, projectRoot, projectConfig) {
3381
3553
  const normalizedConnection = normalizeProjectConnection(connection, projectRoot);
3382
3554
  return {
@@ -3416,19 +3588,25 @@ export function buildAgentPreviewSql(sql) {
3416
3588
  if (!trimmed)
3417
3589
  throw new Error('Generated SQL preview is empty.');
3418
3590
  const withoutTrailingSemicolon = trimmed.replace(/;\s*$/, '').trim();
3419
- const scanSql = stripSqlStringsAndComments(withoutTrailingSemicolon).trim();
3591
+ const readOnlyError = readOnlySqlValidationError(withoutTrailingSemicolon, 'Generated SQL preview');
3592
+ if (readOnlyError)
3593
+ throw new Error(readOnlyError);
3594
+ return `SELECT * FROM (\n${withoutTrailingSemicolon}\n) AS dql_agent_preview LIMIT 200`;
3595
+ }
3596
+ function readOnlySqlValidationError(sql, subject) {
3597
+ const scanSql = stripSqlStringsAndComments(sql).trim();
3420
3598
  if (!/^(select|with)\b/i.test(scanSql)) {
3421
- throw new Error('Generated SQL preview only supports read-only SELECT or WITH queries.');
3599
+ return `${subject} only supports read-only SELECT or WITH queries.`;
3422
3600
  }
3423
3601
  if (scanSql.includes(';')) {
3424
- throw new Error('Generated SQL preview only supports one statement.');
3602
+ return `${subject} only supports one statement.`;
3425
3603
  }
3426
3604
  const forbiddenPattern = new RegExp(`\\b(${AGENT_PREVIEW_FORBIDDEN_SQL.join('|')})\\b`, 'i');
3427
3605
  const forbidden = scanSql.match(forbiddenPattern)?.[1];
3428
3606
  if (forbidden) {
3429
- throw new Error(`Generated SQL preview rejected unsupported statement keyword: ${forbidden.toUpperCase()}.`);
3607
+ return `${subject} rejected unsupported statement keyword: ${forbidden.toUpperCase()}.`;
3430
3608
  }
3431
- return `SELECT * FROM (\n${withoutTrailingSemicolon}\n) AS dql_agent_preview LIMIT 200`;
3609
+ return null;
3432
3610
  }
3433
3611
  function stripSqlStringsAndComments(sql) {
3434
3612
  let output = '';
@@ -4211,6 +4389,16 @@ export function validateBlockStudioSource(source, semanticLayer) {
4211
4389
  diagnostics.push(...resolvedCustomSql.diagnostics);
4212
4390
  executableSql = resolvedCustomSql.sql;
4213
4391
  }
4392
+ if (executableSql && semanticConfig.blockType !== 'semantic') {
4393
+ const readOnlyError = readOnlySqlValidationError(executableSql.trim().replace(/;\s*$/, '').trim(), 'Block SQL');
4394
+ if (readOnlyError) {
4395
+ diagnostics.push({
4396
+ severity: 'error',
4397
+ code: 'sql_read_only',
4398
+ message: readOnlyError,
4399
+ });
4400
+ }
4401
+ }
4214
4402
  const chartConfig = extractBlockStudioChartConfig(source);
4215
4403
  if (!chartConfig) {
4216
4404
  diagnostics.push({
@@ -4240,7 +4428,7 @@ export function validateBlockStudioSource(source, semanticLayer) {
4240
4428
  executableSql,
4241
4429
  };
4242
4430
  }
4243
- function saveBlockStudioArtifacts(projectRoot, options) {
4431
+ export function saveBlockStudioArtifacts(projectRoot, options) {
4244
4432
  const slug = options.name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '') || 'block';
4245
4433
  const safeDomain = (options.domain ?? '')
4246
4434
  .trim()
@@ -6113,7 +6301,7 @@ function isAiPinRefreshDue(lastRefreshedAt) {
6113
6301
  return true;
6114
6302
  return Date.now() - last >= 24 * 60 * 60 * 1000;
6115
6303
  }
6116
- function buildAgentSchemaContext(question, rows) {
6304
+ export function buildAgentSchemaContext(question, rows) {
6117
6305
  const byRelation = new Map();
6118
6306
  for (const row of rows) {
6119
6307
  if (!row || typeof row !== 'object')
@@ -6141,13 +6329,54 @@ function buildAgentSchemaContext(question, rows) {
6141
6329
  byRelation.set(relation, current);
6142
6330
  }
6143
6331
  const tokens = agentSchemaTokens(question);
6332
+ const shouldProbeValues = extractAgentValueSearchTerms(question).length > 0;
6144
6333
  return Array.from(byRelation.values())
6145
- .map((table) => ({ table, score: scoreAgentSchemaTable(table, tokens) }))
6334
+ .map((table) => ({
6335
+ table,
6336
+ score: scoreAgentSchemaTable(table, tokens) + (shouldProbeValues ? scoreAgentValueProbeTable(table) : 0),
6337
+ }))
6146
6338
  .filter((entry) => entry.score > 0)
6147
6339
  .sort((a, b) => b.score - a.score || a.table.relation.localeCompare(b.table.relation))
6148
6340
  .slice(0, 12)
6149
6341
  .map((entry) => entry.table);
6150
6342
  }
6343
+ async function enrichAgentSchemaContextWithValueMatches(question, schemaContext, executor, connection) {
6344
+ const searchTerms = extractAgentValueSearchTerms(question);
6345
+ if (schemaContext.length === 0 || searchTerms.length === 0)
6346
+ return schemaContext;
6347
+ const matches = new Map();
6348
+ for (const candidate of rankAgentValueProbeColumns(schemaContext).slice(0, 12)) {
6349
+ try {
6350
+ const result = await executor.executeQuery(buildAgentValueProbeSql(candidate.table, candidate.column.name, searchTerms, connection), [], runtimeVariables({}), connection);
6351
+ const values = uniqueStrings(result.rows.flatMap(valueProbeRowValues)).slice(0, 5);
6352
+ if (values.length === 0)
6353
+ continue;
6354
+ const tableMatches = matches.get(candidate.table.relation) ?? new Map();
6355
+ tableMatches.set(candidate.column.name, values);
6356
+ matches.set(candidate.table.relation, tableMatches);
6357
+ }
6358
+ catch {
6359
+ // Value probes are advisory. Unsupported casts, privileges, and large-table
6360
+ // failures should not block the metadata-backed answer path.
6361
+ }
6362
+ }
6363
+ if (matches.size === 0)
6364
+ return schemaContext;
6365
+ return schemaContext.map((table) => {
6366
+ const tableMatches = matches.get(table.relation);
6367
+ if (!tableMatches)
6368
+ return table;
6369
+ return {
6370
+ ...table,
6371
+ columns: table.columns.map((column) => {
6372
+ const sampleValues = tableMatches.get(column.name);
6373
+ return sampleValues?.length
6374
+ ? { ...column, sampleValues: uniqueStrings([...(column.sampleValues ?? []), ...sampleValues]).slice(0, 5) }
6375
+ : column;
6376
+ }),
6377
+ };
6378
+ });
6379
+ }
6151
6380
  function scoreAgentSchemaTable(table, tokens) {
6152
6381
  let score = 0;
6153
6382
  const relationTokens = agentSchemaTokens(`${table.schema ?? ''} ${table.name} ${table.relation}`);
@@ -6166,6 +6395,146 @@ function scoreAgentSchemaTable(table, tokens) {
6166
6395
  score += 1;
6167
6396
  return score;
6168
6397
  }
6398
+ function scoreAgentValueProbeTable(table) {
6399
+ let score = 0;
6400
+ if (hasAgentSchemaToken(table.name, ['account', 'customer', 'member', 'order', 'product', 'sku', 'subscriber', 'user']))
6401
+ score += 5;
6402
+ for (const column of table.columns) {
6403
+ if (!isAgentValueProbeColumn(column))
6404
+ continue;
6405
+ score += 2;
6406
+ if (hasAgentSchemaToken(column.name, ['account', 'customer', 'email', 'full', 'member', 'name', 'product', 'sku', 'user']))
6407
+ score += 2;
6408
+ }
6409
+ return Math.min(score, 18);
6410
+ }
6411
+ function rankAgentValueProbeColumns(schemaContext) {
6412
+ const ranked = [];
6413
+ for (const table of schemaContext) {
6414
+ for (const column of table.columns) {
6415
+ if (!isAgentValueProbeColumn(column))
6416
+ continue;
6417
+ ranked.push({
6418
+ table,
6419
+ column,
6420
+ score: scoreAgentValueProbeColumn(table, column),
6421
+ });
6422
+ }
6423
+ }
6424
+ return ranked.sort((a, b) => b.score - a.score || a.table.relation.localeCompare(b.table.relation) || a.column.name.localeCompare(b.column.name));
6425
+ }
6426
+ function scoreAgentValueProbeColumn(table, column) {
6427
+ let score = 0;
6428
+ if (hasAgentSchemaToken(table.name, ['account', 'customer', 'member', 'product', 'sku', 'subscriber', 'user']))
6429
+ score += 4;
6430
+ if (hasAgentSchemaToken(column.name, ['full', 'name', 'email', 'account', 'customer', 'member', 'product', 'sku', 'subscriber', 'user']))
6431
+ score += 8;
6432
+ if (hasAgentSchemaToken(column.name, ['id', 'key', 'code', 'number', 'status', 'segment', 'region', 'category', 'type']))
6433
+ score += 3;
6434
+ return score;
6435
+ }
6436
+ function isAgentValueProbeColumn(column) {
6437
+ const name = column.name.toLowerCase();
6438
+ if (/\b(password|secret|token|credential|hash|salt)\b/.test(name))
6439
+ return false;
6440
+ if (!hasAgentSchemaToken(name, [
6441
+ 'account',
6442
+ 'category',
6443
+ 'channel',
6444
+ 'city',
6445
+ 'code',
6446
+ 'country',
6447
+ 'customer',
6448
+ 'email',
6449
+ 'full',
6450
+ 'id',
6451
+ 'key',
6452
+ 'member',
6453
+ 'name',
6454
+ 'number',
6455
+ 'product',
6456
+ 'region',
6457
+ 'segment',
6458
+ 'sku',
6459
+ 'state',
6460
+ 'status',
6461
+ 'subscriber',
6462
+ 'type',
6463
+ 'user',
6464
+ ])) {
6465
+ return false;
6466
+ }
6467
+ const type = column.type?.toLowerCase() ?? '';
6468
+ if (!type)
6469
+ return true;
6470
+ return /\b(char|character|clob|email|string|text|uuid|varchar)\b/.test(type);
6471
+ }
6472
+ function buildAgentValueProbeSql(table, column, searchTerms, connection) {
6473
+ const relation = quoteAgentRelation(table.relation, connection);
6474
+ const identifier = quoteAgentIdentifier(column, connection);
6475
+ const castValue = `LOWER(CAST(${identifier} AS ${agentTextCastType(connection.driver)}))`;
6476
+ const predicates = searchTerms
6477
+ .slice(0, 5)
6478
+ .map((term) => `${castValue} LIKE ${sqlStringLiteral(`%${escapeSqlLike(term.toLowerCase())}%`)} ESCAPE '\\\\'`)
6479
+ .join(' OR ');
6480
+ return [
6481
+ `SELECT DISTINCT CAST(${identifier} AS ${agentTextCastType(connection.driver)}) AS value`,
6482
+ `FROM ${relation}`,
6483
+ `WHERE ${identifier} IS NOT NULL AND (${predicates})`,
6484
+ 'LIMIT 5',
6485
+ ].join('\n');
6486
+ }
6487
+ function agentTextCastType(driver) {
6488
+ switch (driver) {
6489
+ case 'bigquery':
6490
+ return 'STRING';
6491
+ case 'clickhouse':
6492
+ return 'String';
6493
+ case 'fabric':
6494
+ case 'mssql':
6495
+ return 'NVARCHAR(MAX)';
6496
+ case 'mysql':
6497
+ return 'CHAR';
6498
+ case 'sqlite':
6499
+ return 'TEXT';
6500
+ default:
6501
+ return 'VARCHAR';
6502
+ }
6503
+ }
6504
+ function quoteAgentRelation(relation, connection) {
6505
+ return relation.split('.').map((part) => quoteAgentIdentifier(part, connection)).join('.');
6506
+ }
6507
+ function quoteAgentIdentifier(identifier, connection) {
6508
+ return getDialect(connection.driver).quoteIdentifier(identifier);
6509
+ }
6510
+ function sqlStringLiteral(value) {
6511
+ return `'${value.replace(/'/g, "''")}'`;
6512
+ }
6513
+ function escapeSqlLike(value) {
6514
+ return value.replace(/[\\%_]/g, (match) => `\\${match}`);
6515
+ }
6516
+ function valueProbeRowValues(row) {
6517
+ if (!row || typeof row !== 'object')
6518
+ return [];
6519
+ const record = row;
6520
+ return Object.values(record)
6521
+ .filter((value) => (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean'))
6522
+ .map(String)
6523
+ .map((value) => value.trim())
6524
+ .filter(Boolean);
6525
+ }
6526
+ function uniqueStrings(values) {
6527
+ const seen = new Set();
6528
+ const output = [];
6529
+ for (const value of values) {
6530
+ const normalized = value.toLowerCase();
6531
+ if (seen.has(normalized))
6532
+ continue;
6533
+ seen.add(normalized);
6534
+ output.push(value);
6535
+ }
6536
+ return output;
6537
+ }
6169
6538
  function agentSchemaTokens(value) {
6170
6539
  const tokens = new Set();
6171
6540
  for (const raw of value.toLowerCase().match(/[a-z0-9_]+/g) ?? []) {
@@ -6178,10 +6547,60 @@ function agentSchemaTokens(value) {
6178
6547
  }
6179
6548
  return tokens;
6180
6549
  }
6550
+ function hasAgentSchemaToken(value, expected) {
6551
+ const tokens = agentSchemaTokens(value);
6552
+ return expected.some((token) => tokens.has(token));
6553
+ }
6554
+ export function extractAgentValueSearchTerms(question) {
6555
+ const terms = [];
6556
+ for (const match of question.matchAll(/["']([^"']{3,120})["']/g)) {
6557
+ terms.push(match[1]);
6558
+ }
6559
+ for (const match of question.matchAll(/\b[\w.%+-]+@[\w.-]+\.[A-Za-z]{2,}\b/g)) {
6560
+ terms.push(match[0]);
6561
+ }
6562
+ for (const match of question.matchAll(/\b[A-Z][a-z0-9]+(?:\s+[A-Z][a-z0-9]+){1,3}\b/g)) {
6563
+ terms.push(match[0]);
6564
+ }
6565
+ for (const match of question.matchAll(/\b(?:for|named|called|only|where|customer|user|account|product)\s+([A-Za-z0-9@._-]+(?:\s+[A-Za-z0-9@._-]+){0,3})/gi)) {
6566
+ terms.push(match[1]);
6567
+ }
6568
+ return uniqueStrings(terms
6569
+ .map(cleanAgentValueSearchTerm)
6570
+ .filter((term) => term.length >= 3 && !AGENT_VALUE_SEARCH_STOP_PHRASES.has(term.toLowerCase()))).slice(0, 6);
6571
+ }
6572
+ function cleanAgentValueSearchTerm(term) {
6573
+ return term
6574
+ .replace(/[?.,;:]+$/g, '')
6575
+ .replace(/\s+/g, ' ')
6576
+ .trim()
6577
+ .replace(/^(?:account|customer|member|named|called|product|sku|subscriber|user)\s+/i, '')
6578
+ .replace(/\s+\b(?:last|next|this)\b.*$/i, '')
6579
+ .replace(/\s+\b(?:last|this)\s+(?:day|week|month|quarter|year)\b.*$/i, '')
6580
+ .replace(/\s+\b(?:daily|weekly|monthly|quarterly|yearly)\b.*$/i, '')
6581
+ .trim();
6582
+ }
6181
6583
  const AGENT_SCHEMA_STOPWORDS = new Set([
6182
6584
  'all', 'and', 'are', 'can', 'data', 'for', 'from', 'have', 'how', 'many', 'me',
6183
6585
  'show', 'the', 'this', 'who', 'with', 'value',
6184
6586
  ]);
6587
+ const AGENT_VALUE_SEARCH_STOP_PHRASES = new Set([
6588
+ 'account',
6589
+ 'customer',
6590
+ 'last week',
6591
+ 'this week',
6592
+ 'last month',
6593
+ 'this month',
6594
+ 'last quarter',
6595
+ 'this quarter',
6596
+ 'last year',
6597
+ 'this year',
6598
+ 'member',
6599
+ 'product',
6600
+ 'sku',
6601
+ 'subscriber',
6602
+ 'user',
6603
+ ]);
6185
6604
  function normalizeAgentSchemaToken(token) {
6186
6605
  if (token === 'orders')
6187
6606
  return 'order';