@hypequery/datasets 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +498 -0
  2. package/dist/api.type-test.d.ts +2 -0
  3. package/dist/api.type-test.d.ts.map +1 -0
  4. package/dist/api.type-test.js +103 -0
  5. package/dist/catalog.d.ts +68 -0
  6. package/dist/catalog.d.ts.map +1 -0
  7. package/dist/catalog.js +105 -0
  8. package/dist/constants.d.ts +13 -0
  9. package/dist/constants.d.ts.map +1 -1
  10. package/dist/constants.js +26 -0
  11. package/dist/dataset-query.d.ts +16 -0
  12. package/dist/dataset-query.d.ts.map +1 -0
  13. package/dist/dataset-query.js +56 -0
  14. package/dist/dataset.d.ts +1 -1
  15. package/dist/dataset.d.ts.map +1 -1
  16. package/dist/dataset.js +22 -157
  17. package/dist/executor.d.ts +42 -14
  18. package/dist/executor.d.ts.map +1 -1
  19. package/dist/executor.js +188 -36
  20. package/dist/formulas.d.ts +1 -1
  21. package/dist/formulas.d.ts.map +1 -1
  22. package/dist/formulas.js +27 -12
  23. package/dist/in-memory-backend.d.ts +5 -0
  24. package/dist/in-memory-backend.d.ts.map +1 -0
  25. package/dist/in-memory-backend.js +221 -0
  26. package/dist/index.d.ts +11 -5
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +8 -5
  29. package/dist/internal.d.ts +23 -0
  30. package/dist/internal.d.ts.map +1 -0
  31. package/dist/internal.js +19 -0
  32. package/dist/measure.d.ts.map +1 -1
  33. package/dist/measure.js +1 -0
  34. package/dist/query-builder-protocol.d.ts +2 -2
  35. package/dist/query-builder-protocol.d.ts.map +1 -1
  36. package/dist/query-builder-protocol.js +1 -1
  37. package/dist/query-helpers.d.ts +12 -12
  38. package/dist/query-helpers.d.ts.map +1 -1
  39. package/dist/query-planner.d.ts +9 -7
  40. package/dist/query-planner.d.ts.map +1 -1
  41. package/dist/query-planner.js +26 -9
  42. package/dist/registry.d.ts +1 -1
  43. package/dist/registry.js +1 -1
  44. package/dist/relationships.d.ts +1 -1
  45. package/dist/relationships.js +1 -1
  46. package/dist/semantic-plan.d.ts +82 -0
  47. package/dist/semantic-plan.d.ts.map +1 -0
  48. package/dist/semantic-plan.js +1 -0
  49. package/dist/semantic-planner.d.ts +5 -0
  50. package/dist/semantic-planner.d.ts.map +1 -0
  51. package/dist/semantic-planner.js +155 -0
  52. package/dist/sql-utils.d.ts +1 -1
  53. package/dist/sql-utils.js +4 -4
  54. package/dist/tools.d.ts +53 -0
  55. package/dist/tools.d.ts.map +1 -0
  56. package/dist/tools.js +322 -0
  57. package/dist/types.d.ts +130 -52
  58. package/dist/types.d.ts.map +1 -1
  59. package/dist/utils/dataset-contract.d.ts +3 -0
  60. package/dist/utils/dataset-contract.d.ts.map +1 -0
  61. package/dist/utils/dataset-contract.js +30 -0
  62. package/dist/utils/dataset-metric-ref.d.ts +9 -0
  63. package/dist/utils/dataset-metric-ref.d.ts.map +1 -0
  64. package/dist/utils/dataset-metric-ref.js +39 -0
  65. package/dist/utils/dataset-normalization.d.ts +10 -0
  66. package/dist/utils/dataset-normalization.d.ts.map +1 -0
  67. package/dist/utils/dataset-normalization.js +35 -0
  68. package/dist/utils/dataset-query-validation.d.ts +4 -0
  69. package/dist/utils/dataset-query-validation.d.ts.map +1 -0
  70. package/dist/utils/dataset-query-validation.js +96 -0
  71. package/dist/utils/dataset-validation.d.ts +6 -0
  72. package/dist/utils/dataset-validation.d.ts.map +1 -0
  73. package/dist/utils/dataset-validation.js +42 -0
  74. package/dist/utils/derived-cte-validation.d.ts +3 -0
  75. package/dist/utils/derived-cte-validation.d.ts.map +1 -0
  76. package/dist/utils/derived-cte-validation.js +32 -0
  77. package/dist/utils/filtered-aggregation-sql.d.ts +5 -0
  78. package/dist/utils/filtered-aggregation-sql.d.ts.map +1 -0
  79. package/dist/utils/filtered-aggregation-sql.js +73 -0
  80. package/dist/utils/metric-handle.d.ts +11 -0
  81. package/dist/utils/metric-handle.d.ts.map +1 -0
  82. package/dist/utils/metric-handle.js +36 -0
  83. package/dist/utils/pagination.d.ts +17 -0
  84. package/dist/utils/pagination.d.ts.map +1 -0
  85. package/dist/utils/pagination.js +23 -0
  86. package/dist/utils/tenant-runtime.d.ts +14 -0
  87. package/dist/utils/tenant-runtime.d.ts.map +1 -0
  88. package/dist/utils/tenant-runtime.js +36 -0
  89. package/package.json +14 -2
@@ -0,0 +1,82 @@
1
+ import type { AggregationType, FieldType, MetricFilter, MetricOrderBy, TimeGrain } from './types.js';
2
+ export type SemanticBinaryOperator = 'add' | 'subtract' | 'multiply' | 'divide';
3
+ export type SemanticFunctionName = 'nullIfZero' | 'coalesce' | 'round' | 'floor' | 'ceil';
4
+ export type SemanticExpression = {
5
+ kind: 'ref';
6
+ name: string;
7
+ } | {
8
+ kind: 'literal';
9
+ value: string | number | boolean | null;
10
+ } | {
11
+ kind: 'binary';
12
+ operator: SemanticBinaryOperator;
13
+ left: SemanticExpression;
14
+ right: SemanticExpression;
15
+ } | {
16
+ kind: 'function';
17
+ name: SemanticFunctionName;
18
+ args: SemanticExpression[];
19
+ };
20
+ export interface SemanticDimensionPlan {
21
+ name: string;
22
+ field: string;
23
+ fieldType?: FieldType;
24
+ }
25
+ export interface SemanticAggregationPlan {
26
+ name: string;
27
+ aggregation: AggregationType;
28
+ field: string;
29
+ filters?: MetricFilter[];
30
+ }
31
+ export interface SemanticGrainPlan {
32
+ field: string;
33
+ unit: TimeGrain;
34
+ output: 'period';
35
+ timezone?: string;
36
+ weekStart?: 0 | 1 | 2 | 3 | 4 | 5 | 6;
37
+ }
38
+ export type PlanNode = {
39
+ kind: 'aggregate';
40
+ source: string;
41
+ dimensions: SemanticDimensionPlan[];
42
+ aggregations: SemanticAggregationPlan[];
43
+ filters: MetricFilter[];
44
+ grain?: SemanticGrainPlan;
45
+ orderBy?: MetricOrderBy[];
46
+ limit?: number;
47
+ offset?: number;
48
+ tenant?: {
49
+ field: string;
50
+ operator: 'eq';
51
+ value: string;
52
+ } | {
53
+ field: string;
54
+ operator: 'in';
55
+ value: string[];
56
+ };
57
+ } | {
58
+ kind: 'derive';
59
+ input: PlanNode;
60
+ metrics: Array<{
61
+ name: string;
62
+ expression: SemanticExpression;
63
+ }>;
64
+ orderBy?: MetricOrderBy[];
65
+ limit?: number;
66
+ offset?: number;
67
+ };
68
+ export interface SemanticBackendResult<T = Record<string, unknown>> {
69
+ data: T[];
70
+ meta?: {
71
+ timingMs?: number;
72
+ sql?: string;
73
+ tenant?: string;
74
+ };
75
+ }
76
+ export interface SemanticBackend {
77
+ execute<T = Record<string, unknown>>(plan: PlanNode): Promise<SemanticBackendResult<T>>;
78
+ explain?(plan: PlanNode): Promise<{
79
+ sql?: string;
80
+ }>;
81
+ }
82
+ //# sourceMappingURL=semantic-plan.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-plan.d.ts","sourceRoot":"","sources":["../src/semantic-plan.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,eAAe,EACf,SAAS,EACT,YAAY,EACZ,aAAa,EACb,SAAS,EACV,MAAM,YAAY,CAAC;AAEpB,MAAM,MAAM,sBAAsB,GAAG,KAAK,GAAG,UAAU,GAAG,UAAU,GAAG,QAAQ,CAAC;AAChF,MAAM,MAAM,oBAAoB,GAAG,YAAY,GAAG,UAAU,GAAG,OAAO,GAAG,OAAO,GAAG,MAAM,CAAC;AAE1F,MAAM,MAAM,kBAAkB,GAC1B;IAAE,IAAI,EAAE,KAAK,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAC7B;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,IAAI,CAAA;CAAE,GAC5D;IACA,IAAI,EAAE,QAAQ,CAAC;IACf,QAAQ,EAAE,sBAAsB,CAAC;IACjC,IAAI,EAAE,kBAAkB,CAAC;IACzB,KAAK,EAAE,kBAAkB,CAAC;CAC3B,GACC;IACA,IAAI,EAAE,UAAU,CAAC;IACjB,IAAI,EAAE,oBAAoB,CAAC;IAC3B,IAAI,EAAE,kBAAkB,EAAE,CAAC;CAC5B,CAAC;AAEJ,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;AAED,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,eAAe,CAAC;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,YAAY,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,SAAS,CAAC;IAChB,MAAM,EAAE,QAAQ,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;CACvC;AAED,MAAM,MAAM,QAAQ,GAChB;IACA,IAAI,EAAE,WAAW,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,qBAAqB,EAAE,CAAC;IACpC,YAAY,EAAE,uBAAuB,EAAE,CAAC;IACxC,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,KAAK,CAAC,EAAE,iBAAiB,CAAC;IAC1B,OAAO,CAAC,EAAE,aAAa,EAAE,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,IAAI,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,GAAG;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,IAAI,CAAC;QAAC,KAAK,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;CAChH,GACC;IACA,IAAI,EAAE,QAAQ,CAAC;IACf,KAAK,EAAE,QAAQ,CAAC;IAChB,OAAO,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,kBAAkB,CAAA;KAAE,CAAC,CAAC;IACjE,OAAO,CAAC,EAAE,aAAa,EAAE,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAC;AAEJ,MAAM,WAAW,qBAAqB,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;IAChE,IAAI,EAAE,CAAC,EAAE,CAAC;IACV,IAAI,CAAC,EAAE;QACL,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC,CAAC;IACxF,OAAO,CAAC,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC;QAAE,GAAG,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACrD"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,5 @@
1
+ import type { AnyDatasetInstance, DatasetQuery, ExecutionContext, MetricQuery, MetricRef, GrainedMetricRef } from './types.js';
2
+ import type { PlanNode } from './semantic-plan.js';
3
+ export declare function buildDatasetPlan(ds: AnyDatasetInstance, query?: DatasetQuery, context?: ExecutionContext): PlanNode;
4
+ export declare function buildMetricPlan(metric: MetricRef | GrainedMetricRef, query?: MetricQuery, context?: ExecutionContext): PlanNode;
5
+ //# sourceMappingURL=semantic-planner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-planner.d.ts","sourceRoot":"","sources":["../src/semantic-planner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAClB,YAAY,EACZ,gBAAgB,EAEhB,WAAW,EACX,SAAS,EACT,gBAAgB,EACjB,MAAM,YAAY,CAAC;AACpB,OAAO,KAAK,EACV,QAAQ,EAGT,MAAM,oBAAoB,CAAC;AAgH5B,wBAAgB,gBAAgB,CAC9B,EAAE,EAAE,kBAAkB,EACtB,KAAK,GAAE,YAAiB,EACxB,OAAO,CAAC,EAAE,gBAAgB,GACzB,QAAQ,CAaV;AAuFD,wBAAgB,eAAe,CAC7B,MAAM,EAAE,SAAS,GAAG,gBAAgB,EACpC,KAAK,GAAE,WAAgB,EACvB,OAAO,CAAC,EAAE,gBAAgB,GACzB,QAAQ,CAUV"}
@@ -0,0 +1,155 @@
1
+ import { getMetricGrain, getMetricRef, } from './utils/metric-handle.js';
2
+ import { validateDatasetQuery } from './dataset-query.js';
3
+ import { isSupportedTimeGrain } from './constants.js';
4
+ import { getRuntimeTenantPredicate } from './utils/tenant-runtime.js';
5
+ function resolveField(ds, field) {
6
+ const dimension = ds.dimensions[field];
7
+ if (dimension?.sql) {
8
+ throw new Error(`Semantic backend plans do not support SQL-backed dimension "${field}". ` +
9
+ 'Use a backend-specific query builder for raw SQL expressions.');
10
+ }
11
+ return dimension?.column ?? field;
12
+ }
13
+ function dimensionsForQuery(ds, dimensions = []) {
14
+ return dimensions.map((name) => ({
15
+ name,
16
+ field: resolveField(ds, name),
17
+ fieldType: ds.dimensions[name]?.fieldType,
18
+ }));
19
+ }
20
+ function normalizeFilters(ds, filters = []) {
21
+ return filters.map((filter) => {
22
+ const resolvedField = ds.filters[filter.field]?.field ?? filter.field;
23
+ return {
24
+ ...filter,
25
+ field: resolveField(ds, resolvedField),
26
+ };
27
+ });
28
+ }
29
+ function aggregationForMeasure(ds, name) {
30
+ const measure = ds.measures[name];
31
+ if (!measure) {
32
+ throw new Error(`Unknown measure "${name}" on dataset "${ds.name}".`);
33
+ }
34
+ if (measure.sql) {
35
+ throw new Error(`Semantic backend plans do not support SQL-backed measure "${name}". ` +
36
+ 'Use a backend-specific query builder for raw SQL expressions.');
37
+ }
38
+ return {
39
+ name,
40
+ aggregation: measure.aggregation,
41
+ field: resolveField(ds, measure.field),
42
+ filters: normalizeFilters(ds, measure.filters),
43
+ };
44
+ }
45
+ function tenantForContext(ds, context) {
46
+ const tenantPredicate = getRuntimeTenantPredicate(context);
47
+ if (!tenantPredicate || !ds.tenantKey) {
48
+ return undefined;
49
+ }
50
+ return { field: ds.tenantKey, ...tenantPredicate };
51
+ }
52
+ function grainForQuery(ds, unit) {
53
+ if (!unit) {
54
+ return undefined;
55
+ }
56
+ if (!ds.timeKey) {
57
+ throw new Error(`Cannot use grain "${unit}" because dataset "${ds.name}" has no timeKey.`);
58
+ }
59
+ if (!isSupportedTimeGrain(unit)) {
60
+ throw new Error(`Unsupported time grain "${unit}".`);
61
+ }
62
+ return {
63
+ field: ds.timeKey,
64
+ unit,
65
+ output: 'period',
66
+ };
67
+ }
68
+ function aggregatePlan(ds, query, aggregations, context) {
69
+ return {
70
+ kind: 'aggregate',
71
+ source: ds.source,
72
+ dimensions: dimensionsForQuery(ds, query.dimensions),
73
+ aggregations,
74
+ filters: normalizeFilters(ds, query.filters),
75
+ grain: grainForQuery(ds, query.by),
76
+ orderBy: query.orderBy,
77
+ limit: query.limit,
78
+ offset: query.offset,
79
+ tenant: tenantForContext(ds, context),
80
+ };
81
+ }
82
+ export function buildDatasetPlan(ds, query = {}, context) {
83
+ const validation = validateDatasetQuery(ds, query, context);
84
+ if (!validation.valid) {
85
+ throw new Error(`Invalid dataset query: ${validation.errors.join('; ')}`);
86
+ }
87
+ const measures = query.measures ?? Object.keys(ds.measures);
88
+ return aggregatePlan(ds, query, measures.map((name) => aggregationForMeasure(ds, name)), context);
89
+ }
90
+ function buildBaseMetricPlan(metric, query, context) {
91
+ const spec = metric.spec;
92
+ if (spec.__type !== 'aggregation_spec') {
93
+ throw new Error(`Metric "${metric.name}" is not a base metric.`);
94
+ }
95
+ if (spec.sql) {
96
+ throw new Error(`Semantic backend plans do not support SQL-backed metric "${metric.name}". ` +
97
+ 'Use a backend-specific query builder for raw SQL expressions.');
98
+ }
99
+ return aggregatePlan(metric.dataset, {
100
+ ...query,
101
+ by: query.by,
102
+ measures: [],
103
+ }, [{
104
+ name: metric.name,
105
+ aggregation: spec.aggregation,
106
+ field: resolveField(metric.dataset, spec.field),
107
+ filters: normalizeFilters(metric.dataset, spec.filters),
108
+ }], context);
109
+ }
110
+ function buildDerivedMetricPlan(metric, query, context) {
111
+ const spec = metric.spec;
112
+ if (spec.__type !== 'derived_metric_spec') {
113
+ throw new Error(`Metric "${metric.name}" is not a derived metric.`);
114
+ }
115
+ const inputAggregations = Object.entries(spec.uses).map(([alias, baseMetric]) => {
116
+ const baseSpec = baseMetric.spec;
117
+ if (baseSpec.__type !== 'aggregation_spec') {
118
+ throw new Error(`Derived metric "${metric.name}" references non-base metric "${alias}".`);
119
+ }
120
+ if (baseSpec.sql) {
121
+ throw new Error(`Semantic backend plans do not support SQL-backed metric "${alias}" used by derived metric "${metric.name}". ` +
122
+ 'Use a backend-specific query builder for raw SQL expressions.');
123
+ }
124
+ return {
125
+ name: alias,
126
+ aggregation: baseSpec.aggregation,
127
+ field: resolveField(metric.dataset, baseSpec.field),
128
+ filters: baseSpec.filters,
129
+ };
130
+ });
131
+ const inputs = Object.fromEntries(Object.keys(spec.uses).map((alias) => [alias, alias]));
132
+ const expression = spec.formula(inputs).expression;
133
+ return {
134
+ kind: 'derive',
135
+ input: aggregatePlan(metric.dataset, {
136
+ ...query,
137
+ orderBy: undefined,
138
+ limit: undefined,
139
+ offset: undefined,
140
+ }, inputAggregations, context),
141
+ metrics: [{ name: metric.name, expression }],
142
+ orderBy: query.orderBy,
143
+ limit: query.limit,
144
+ offset: query.offset,
145
+ };
146
+ }
147
+ export function buildMetricPlan(metric, query = {}, context) {
148
+ const ref = getMetricRef(metric);
149
+ const grain = getMetricGrain(metric, query);
150
+ const plannedQuery = { ...query, by: grain };
151
+ if (ref.spec.__type === 'derived_metric_spec') {
152
+ return buildDerivedMetricPlan(ref, plannedQuery, context);
153
+ }
154
+ return buildBaseMetricPlan(ref, plannedQuery, context);
155
+ }
@@ -19,7 +19,7 @@ export declare function isSafeSQLIdentifier(identifier: string): boolean;
19
19
  export declare function validateSQLIdentifier(identifier: string, context: string): void;
20
20
  /**
21
21
  * Quotes a SQL identifier for safe use in queries.
22
- * Uses double quotes which is standard SQL.
22
+ * Uses backticks, which is how ClickHouse quotes identifiers.
23
23
  *
24
24
  * @param identifier - The identifier to quote
25
25
  * @returns Quoted identifier
package/dist/sql-utils.js CHANGED
@@ -26,13 +26,13 @@ export function validateSQLIdentifier(identifier, context) {
26
26
  }
27
27
  /**
28
28
  * Quotes a SQL identifier for safe use in queries.
29
- * Uses double quotes which is standard SQL.
29
+ * Uses backticks, which is how ClickHouse quotes identifiers.
30
30
  *
31
31
  * @param identifier - The identifier to quote
32
32
  * @returns Quoted identifier
33
33
  */
34
34
  export function quoteSQLIdentifier(identifier) {
35
- // Escape any existing double quotes by doubling them
36
- const escaped = identifier.replace(/"/g, '""');
37
- return `"${escaped}"`;
35
+ // Escape any existing backticks by doubling them
36
+ const escaped = identifier.replace(/`/g, '``');
37
+ return `\`${escaped}\``;
38
38
  }
@@ -0,0 +1,53 @@
1
+ import type { AnyDatasetInstance, DatasetQuery, ExecutionContext, MetricHandle, MetricQuery } from './types.js';
2
+ import { getDatasetCatalog, type DatasetCatalogSource } from './catalog.js';
3
+ export type JsonSchema = {
4
+ type?: string;
5
+ description?: string;
6
+ properties?: Record<string, JsonSchema>;
7
+ items?: JsonSchema;
8
+ required?: string[];
9
+ enum?: string[];
10
+ minimum?: number;
11
+ maximum?: number;
12
+ additionalProperties?: boolean;
13
+ };
14
+ export interface SemanticToolDefinition<TInput = Record<string, unknown>, TResult = unknown> {
15
+ name: string;
16
+ description: string;
17
+ parameters: JsonSchema;
18
+ execute(input: TInput, context?: ExecutionContext): Promise<TResult>;
19
+ }
20
+ export type DatasetToolMode = 'catalog' | 'per-dataset' | 'per-metric';
21
+ export interface DatasetToolAnalytics {
22
+ execute(target: AnyDatasetInstance | MetricHandle, query?: DatasetQuery | MetricQuery, context?: ExecutionContext): Promise<unknown>;
23
+ }
24
+ export interface GenerateDatasetToolsOptions {
25
+ datasets: Record<string, DatasetCatalogSource>;
26
+ analytics: DatasetToolAnalytics;
27
+ mode?: DatasetToolMode;
28
+ includeSql?: boolean;
29
+ }
30
+ export interface OpenAIToolDefinition {
31
+ type: 'function';
32
+ function: {
33
+ name: string;
34
+ description: string;
35
+ parameters: JsonSchema;
36
+ };
37
+ }
38
+ export interface AISDKToolDefinition {
39
+ description: string;
40
+ parameters: JsonSchema;
41
+ execute(input: Record<string, unknown>): Promise<unknown>;
42
+ }
43
+ export interface McpToolDefinition {
44
+ name: string;
45
+ description: string;
46
+ inputSchema: JsonSchema;
47
+ }
48
+ export declare function generateDatasetTools(options: GenerateDatasetToolsOptions): SemanticToolDefinition[];
49
+ export declare function toOpenAITools(tools: SemanticToolDefinition[]): OpenAIToolDefinition[];
50
+ export declare function toAISDKTools(tools: SemanticToolDefinition[]): Record<string, AISDKToolDefinition>;
51
+ export declare function toMcpTools(tools: SemanticToolDefinition[]): McpToolDefinition[];
52
+ export { getDatasetCatalog };
53
+ //# sourceMappingURL=tools.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tools.d.ts","sourceRoot":"","sources":["../src/tools.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAClB,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACZ,WAAW,EAGZ,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,iBAAiB,EAIjB,KAAK,oBAAoB,EAC1B,MAAM,cAAc,CAAC;AAGtB,MAAM,MAAM,UAAU,GAAG;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IACxC,KAAK,CAAC,EAAE,UAAU,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAChC,CAAC;AAEF,MAAM,WAAW,sBAAsB,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO;IACzF,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,UAAU,CAAC;IACvB,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;CACtE;AAED,MAAM,MAAM,eAAe,GAAG,SAAS,GAAG,aAAa,GAAG,YAAY,CAAC;AAEvE,MAAM,WAAW,oBAAoB;IACnC,OAAO,CACL,MAAM,EAAE,kBAAkB,GAAG,YAAY,EACzC,KAAK,CAAC,EAAE,YAAY,GAAG,WAAW,EAClC,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,OAAO,CAAC,CAAC;CACrB;AAED,MAAM,WAAW,2BAA2B;IAC1C,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,oBAAoB,CAAC,CAAC;IAC/C,SAAS,EAAE,oBAAoB,CAAC;IAChC,IAAI,CAAC,EAAE,eAAe,CAAC;IACvB,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE;QACR,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,UAAU,CAAC;KACxB,CAAC;CACH;AAED,MAAM,WAAW,mBAAmB;IAClC,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,UAAU,CAAC;IACvB,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;CAC3D;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,UAAU,CAAC;CACzB;AA8VD,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,2BAA2B,GAAG,sBAAsB,EAAE,CAenG;AAED,wBAAgB,aAAa,CAAC,KAAK,EAAE,sBAAsB,EAAE,GAAG,oBAAoB,EAAE,CASrF;AAED,wBAAgB,YAAY,CAAC,KAAK,EAAE,sBAAsB,EAAE,GAAG,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAAC,CAWjG;AAED,wBAAgB,UAAU,CAAC,KAAK,EAAE,sBAAsB,EAAE,GAAG,iBAAiB,EAAE,CAM/E;AAED,OAAO,EAAE,iBAAiB,EAAE,CAAC"}
package/dist/tools.js ADDED
@@ -0,0 +1,322 @@
1
+ import { getDatasetCatalog, getDatasetCatalogs, } from './catalog.js';
2
+ import { SEMANTIC_FILTER_OPERATORS } from './constants.js';
3
+ function enumSchema(values, description) {
4
+ const unique = Array.from(new Set(values));
5
+ return {
6
+ type: 'string',
7
+ ...(unique.length > 0 ? { enum: unique } : {}),
8
+ ...(description ? { description } : {}),
9
+ };
10
+ }
11
+ function toolNamePart(name) {
12
+ const normalized = name.replace(/[^A-Za-z0-9_-]/g, '_');
13
+ return /^[A-Za-z_]/.test(normalized) ? normalized : `_${normalized}`;
14
+ }
15
+ function arrayEnumSchema(values, description) {
16
+ return {
17
+ type: 'array',
18
+ items: enumSchema(values),
19
+ ...(description ? { description } : {}),
20
+ };
21
+ }
22
+ function limitSchema(catalogs) {
23
+ const finiteLimits = catalogs
24
+ .map(catalog => catalog.maxLimit)
25
+ .filter((limit) => typeof limit === 'number');
26
+ return {
27
+ type: 'integer',
28
+ minimum: 0,
29
+ ...(finiteLimits.length > 0 ? { maximum: Math.max(...finiteLimits) } : {}),
30
+ };
31
+ }
32
+ function querySchema(catalogs, includeDataset) {
33
+ const datasetNames = catalogs.map(catalog => catalog.name);
34
+ const dimensionNames = Array.from(new Set(catalogs.flatMap(catalog => Object.keys(catalog.dimensions))));
35
+ const measureNames = Array.from(new Set(catalogs.flatMap(catalog => Object.keys(catalog.measures))));
36
+ const filterNames = Array.from(new Set(catalogs.flatMap(catalog => Object.keys(catalog.filters))));
37
+ const grainNames = Array.from(new Set(catalogs.flatMap(catalog => catalog.supportedGrains)));
38
+ const orderFields = Array.from(new Set(catalogs.flatMap(catalog => catalog.orderableFields)));
39
+ const properties = {
40
+ dimensions: arrayEnumSchema(dimensionNames, 'Dimensions to group by.'),
41
+ measures: arrayEnumSchema(measureNames, 'Measures to aggregate.'),
42
+ filters: {
43
+ type: 'array',
44
+ items: {
45
+ type: 'object',
46
+ properties: {
47
+ field: enumSchema(filterNames),
48
+ operator: enumSchema([...SEMANTIC_FILTER_OPERATORS]),
49
+ value: {},
50
+ },
51
+ required: ['field', 'operator', 'value'],
52
+ additionalProperties: false,
53
+ },
54
+ },
55
+ orderBy: {
56
+ type: 'array',
57
+ items: {
58
+ type: 'object',
59
+ properties: {
60
+ field: enumSchema(orderFields),
61
+ direction: enumSchema(['asc', 'desc']),
62
+ },
63
+ required: ['field', 'direction'],
64
+ additionalProperties: false,
65
+ },
66
+ },
67
+ by: enumSchema(grainNames, 'Optional time grain.'),
68
+ limit: limitSchema(catalogs),
69
+ offset: {
70
+ type: 'integer',
71
+ minimum: 0,
72
+ },
73
+ };
74
+ if (includeDataset) {
75
+ properties.dataset = enumSchema(datasetNames);
76
+ }
77
+ return {
78
+ type: 'object',
79
+ properties,
80
+ required: includeDataset ? ['dataset'] : [],
81
+ additionalProperties: false,
82
+ };
83
+ }
84
+ function assertStringArray(input, name) {
85
+ if (input == null)
86
+ return [];
87
+ if (!Array.isArray(input) || input.some(value => typeof value !== 'string')) {
88
+ throw new Error(`Invalid ${name}: expected an array of strings.`);
89
+ }
90
+ return input;
91
+ }
92
+ function assertFilters(input) {
93
+ if (input == null)
94
+ return [];
95
+ if (!Array.isArray(input)) {
96
+ throw new Error('Invalid filters: expected an array.');
97
+ }
98
+ return input.map((filter, index) => {
99
+ if (!filter || typeof filter !== 'object') {
100
+ throw new Error(`Invalid filters[${index}]: expected an object.`);
101
+ }
102
+ const candidate = filter;
103
+ if (typeof candidate.field !== 'string') {
104
+ throw new Error(`Invalid filters[${index}].field: expected a string.`);
105
+ }
106
+ if (!SEMANTIC_FILTER_OPERATORS.includes(candidate.operator)) {
107
+ throw new Error(`Invalid filters[${index}].operator: expected a supported operator.`);
108
+ }
109
+ return {
110
+ field: candidate.field,
111
+ operator: candidate.operator,
112
+ value: candidate.value,
113
+ };
114
+ });
115
+ }
116
+ function assertOrderBy(input) {
117
+ if (input == null)
118
+ return [];
119
+ if (!Array.isArray(input)) {
120
+ throw new Error('Invalid orderBy: expected an array.');
121
+ }
122
+ return input.map((order, index) => {
123
+ if (!order || typeof order !== 'object') {
124
+ throw new Error(`Invalid orderBy[${index}]: expected an object.`);
125
+ }
126
+ const candidate = order;
127
+ if (typeof candidate.field !== 'string') {
128
+ throw new Error(`Invalid orderBy[${index}].field: expected a string.`);
129
+ }
130
+ if (candidate.direction !== 'asc' && candidate.direction !== 'desc') {
131
+ throw new Error(`Invalid orderBy[${index}].direction: expected "asc" or "desc".`);
132
+ }
133
+ return {
134
+ field: candidate.field,
135
+ direction: candidate.direction,
136
+ };
137
+ });
138
+ }
139
+ function assertNonNegativeInteger(input, name) {
140
+ if (input == null)
141
+ return undefined;
142
+ if (!Number.isInteger(input) || input < 0) {
143
+ throw new Error(`Invalid ${name}: expected a non-negative integer.`);
144
+ }
145
+ return input;
146
+ }
147
+ function assertAllowedValues(values, allowed, label) {
148
+ const invalid = values.filter(value => !allowed.includes(value));
149
+ if (invalid.length > 0) {
150
+ throw new Error(`Invalid ${label}: ${invalid.join(', ')}. Available: ${allowed.join(', ') || '(none)'}.`);
151
+ }
152
+ }
153
+ function normalizeDatasetQuery(input, catalog, options = {}) {
154
+ const dimensions = assertStringArray(input.dimensions, 'dimensions');
155
+ const measures = assertStringArray(input.measures, 'measures');
156
+ const filters = assertFilters(input.filters);
157
+ const orderBy = assertOrderBy(input.orderBy);
158
+ const limit = assertNonNegativeInteger(input.limit, 'limit');
159
+ const offset = assertNonNegativeInteger(input.offset, 'offset');
160
+ let by;
161
+ assertAllowedValues(dimensions, Object.keys(catalog.dimensions), 'dimensions');
162
+ assertAllowedValues(measures, Object.keys(catalog.measures), 'measures');
163
+ assertAllowedValues(filters.map(filter => filter.field), Object.keys(catalog.filters), 'filter fields');
164
+ assertAllowedValues(orderBy.map(order => order.field), options.orderableFields ?? catalog.orderableFields, 'orderBy fields');
165
+ for (const filter of filters) {
166
+ const allowedOperators = catalog.filters[filter.field]?.operators ?? [];
167
+ if (!allowedOperators.includes(filter.operator)) {
168
+ throw new Error(`Invalid filter operator for "${filter.field}": ${filter.operator}. Allowed: ${allowedOperators.join(', ')}.`);
169
+ }
170
+ }
171
+ if (typeof input.by === 'string') {
172
+ assertAllowedValues([input.by], catalog.supportedGrains, 'time grain');
173
+ by = input.by;
174
+ }
175
+ else if (input.by != null) {
176
+ throw new Error('Invalid by: expected a time grain string.');
177
+ }
178
+ if (limit != null && catalog.maxLimit != null && limit > catalog.maxLimit) {
179
+ throw new Error(`Invalid limit: ${limit}. Max: ${catalog.maxLimit}.`);
180
+ }
181
+ return {
182
+ ...(dimensions.length > 0 ? { dimensions } : {}),
183
+ ...(measures.length > 0 ? { measures } : {}),
184
+ ...(filters.length > 0 ? { filters } : {}),
185
+ ...(orderBy.length > 0 ? { orderBy } : {}),
186
+ ...(by ? { by } : {}),
187
+ ...(limit != null ? { limit } : {}),
188
+ ...(offset != null ? { offset } : {}),
189
+ };
190
+ }
191
+ function redactSql(result, includeSql) {
192
+ if (includeSql || !result || typeof result !== 'object') {
193
+ return result;
194
+ }
195
+ const resultObject = result;
196
+ if (!resultObject.meta || typeof resultObject.meta !== 'object' || !('sql' in resultObject.meta)) {
197
+ return result;
198
+ }
199
+ const { sql: _sql, ...meta } = resultObject.meta;
200
+ return {
201
+ ...resultObject,
202
+ meta,
203
+ };
204
+ }
205
+ function buildCatalogTool(datasets, catalogs, analytics, includeSql) {
206
+ const catalogList = Object.values(catalogs);
207
+ return {
208
+ name: 'query_dataset',
209
+ description: 'Query governed analytics datasets by selecting dimensions, measures, filters, and time grains.',
210
+ parameters: querySchema(catalogList, true),
211
+ async execute(input, context) {
212
+ if (typeof input.dataset !== 'string' || !datasets[input.dataset]) {
213
+ throw new Error(`Invalid dataset: ${String(input.dataset)}. Available: ${Object.keys(datasets).join(', ')}.`);
214
+ }
215
+ const catalog = catalogs[input.dataset];
216
+ const query = normalizeDatasetQuery(input, catalog);
217
+ const result = await analytics.execute(datasets[input.dataset], query, context);
218
+ return redactSql(result, includeSql);
219
+ },
220
+ };
221
+ }
222
+ function buildDatasetTools(datasets, catalogs, analytics, includeSql) {
223
+ return Object.entries(datasets).map(([datasetName, dataset]) => {
224
+ const catalog = catalogs[datasetName];
225
+ return {
226
+ name: `query_${toolNamePart(datasetName)}`,
227
+ description: `Query the ${datasetName} analytics dataset.`,
228
+ parameters: querySchema([catalog], false),
229
+ async execute(input, context) {
230
+ const query = normalizeDatasetQuery(input, catalog);
231
+ const result = await analytics.execute(dataset, query, context);
232
+ return redactSql(result, includeSql);
233
+ },
234
+ };
235
+ });
236
+ }
237
+ function metricQuerySchema(catalog, metricName) {
238
+ const schema = querySchema([catalog], false);
239
+ const properties = schema.properties ?? {};
240
+ delete properties.measures;
241
+ properties.orderBy = {
242
+ type: 'array',
243
+ items: {
244
+ type: 'object',
245
+ properties: {
246
+ field: enumSchema([...Object.keys(catalog.dimensions), metricName, ...(catalog.timeKey ? ['period'] : [])]),
247
+ direction: enumSchema(['asc', 'desc']),
248
+ },
249
+ required: ['field', 'direction'],
250
+ additionalProperties: false,
251
+ },
252
+ };
253
+ return {
254
+ ...schema,
255
+ properties,
256
+ };
257
+ }
258
+ function buildMetricTools(datasets, catalogs, analytics, includeSql) {
259
+ const tools = [];
260
+ for (const [datasetName, dataset] of Object.entries(datasets)) {
261
+ const catalog = catalogs[datasetName];
262
+ for (const [metricName, metric] of Object.entries(dataset.metrics ?? {})) {
263
+ tools.push({
264
+ name: `query_${toolNamePart(metricName)}`,
265
+ description: `Query the ${metricName} metric from the ${datasetName} dataset.`,
266
+ parameters: metricQuerySchema(catalog, metricName),
267
+ async execute(input, context) {
268
+ const orderableFields = [
269
+ ...Object.keys(catalog.dimensions),
270
+ metricName,
271
+ ...(catalog.timeKey ? ['period'] : []),
272
+ ];
273
+ const query = normalizeDatasetQuery({ ...input, measures: [] }, catalog, { orderableFields });
274
+ const result = await analytics.execute(metric, query, context);
275
+ return redactSql(result, includeSql);
276
+ },
277
+ });
278
+ }
279
+ }
280
+ return tools;
281
+ }
282
+ export function generateDatasetTools(options) {
283
+ const mode = options.mode ?? 'catalog';
284
+ const catalogs = getDatasetCatalogs(options.datasets);
285
+ if (mode === 'catalog') {
286
+ return [
287
+ buildCatalogTool(options.datasets, catalogs, options.analytics, options.includeSql ?? false),
288
+ ];
289
+ }
290
+ if (mode === 'per-dataset') {
291
+ return buildDatasetTools(options.datasets, catalogs, options.analytics, options.includeSql ?? false);
292
+ }
293
+ return buildMetricTools(options.datasets, catalogs, options.analytics, options.includeSql ?? false);
294
+ }
295
+ export function toOpenAITools(tools) {
296
+ return tools.map(tool => ({
297
+ type: 'function',
298
+ function: {
299
+ name: tool.name,
300
+ description: tool.description,
301
+ parameters: tool.parameters,
302
+ },
303
+ }));
304
+ }
305
+ export function toAISDKTools(tools) {
306
+ return Object.fromEntries(tools.map(tool => [
307
+ tool.name,
308
+ {
309
+ description: tool.description,
310
+ parameters: tool.parameters,
311
+ execute: input => tool.execute(input),
312
+ },
313
+ ]));
314
+ }
315
+ export function toMcpTools(tools) {
316
+ return tools.map(tool => ({
317
+ name: tool.name,
318
+ description: tool.description,
319
+ inputSchema: tool.parameters,
320
+ }));
321
+ }
322
+ export { getDatasetCatalog };