trellis 1.0.8 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +533 -82
  3. package/bin/trellis.mjs +2 -0
  4. package/dist/cli/index.js +4718 -0
  5. package/dist/core/index.js +12 -0
  6. package/dist/decisions/index.js +19 -0
  7. package/dist/embeddings/index.js +43 -0
  8. package/dist/index-1j1anhmr.js +4038 -0
  9. package/dist/index-3s0eak0p.js +1556 -0
  10. package/dist/index-8pce39mh.js +272 -0
  11. package/dist/index-a76rekgs.js +67 -0
  12. package/dist/index-cy9k1g6v.js +684 -0
  13. package/dist/index-fd4e26s4.js +69 -0
  14. package/dist/{store/eav-store.js → index-gkvhzm9f.js} +4 -6
  15. package/dist/index-gnw8d7d6.js +51 -0
  16. package/dist/index-vkpkfwhq.js +817 -0
  17. package/dist/index.js +118 -2876
  18. package/dist/links/index.js +55 -0
  19. package/dist/transformers-m9je15kg.js +32491 -0
  20. package/dist/vcs/index.js +110 -0
  21. package/logo.png +0 -0
  22. package/logo.svg +9 -0
  23. package/package.json +79 -76
  24. package/src/cli/index.ts +2340 -0
  25. package/src/core/index.ts +35 -0
  26. package/src/core/kernel/middleware.ts +44 -0
  27. package/src/core/persist/backend.ts +64 -0
  28. package/src/core/store/eav-store.ts +467 -0
  29. package/src/decisions/auto-capture.ts +136 -0
  30. package/src/decisions/hooks.ts +163 -0
  31. package/src/decisions/index.ts +261 -0
  32. package/src/decisions/types.ts +103 -0
  33. package/src/embeddings/chunker.ts +327 -0
  34. package/src/embeddings/index.ts +41 -0
  35. package/src/embeddings/model.ts +95 -0
  36. package/src/embeddings/search.ts +305 -0
  37. package/src/embeddings/store.ts +313 -0
  38. package/src/embeddings/types.ts +85 -0
  39. package/src/engine.ts +1083 -0
  40. package/src/garden/cluster.ts +330 -0
  41. package/src/garden/garden.ts +306 -0
  42. package/src/garden/index.ts +29 -0
  43. package/src/git/git-exporter.ts +286 -0
  44. package/src/git/git-importer.ts +329 -0
  45. package/src/git/git-reader.ts +189 -0
  46. package/src/git/index.ts +22 -0
  47. package/src/identity/governance.ts +211 -0
  48. package/src/identity/identity.ts +224 -0
  49. package/src/identity/index.ts +30 -0
  50. package/src/identity/signing-middleware.ts +97 -0
  51. package/src/index.ts +20 -0
  52. package/src/links/index.ts +49 -0
  53. package/src/links/lifecycle.ts +400 -0
  54. package/src/links/parser.ts +484 -0
  55. package/src/links/ref-index.ts +186 -0
  56. package/src/links/resolver.ts +314 -0
  57. package/src/links/types.ts +108 -0
  58. package/src/mcp/index.ts +22 -0
  59. package/src/mcp/server.ts +1278 -0
  60. package/src/semantic/csharp-parser.ts +493 -0
  61. package/src/semantic/go-parser.ts +585 -0
  62. package/src/semantic/index.ts +34 -0
  63. package/src/semantic/java-parser.ts +456 -0
  64. package/src/semantic/python-parser.ts +659 -0
  65. package/src/semantic/ruby-parser.ts +446 -0
  66. package/src/semantic/rust-parser.ts +784 -0
  67. package/src/semantic/semantic-merge.ts +210 -0
  68. package/src/semantic/ts-parser.ts +681 -0
  69. package/src/semantic/types.ts +175 -0
  70. package/src/sync/index.ts +32 -0
  71. package/src/sync/memory-transport.ts +66 -0
  72. package/src/sync/reconciler.ts +237 -0
  73. package/src/sync/sync-engine.ts +258 -0
  74. package/src/sync/types.ts +104 -0
  75. package/src/vcs/blob-store.ts +124 -0
  76. package/src/vcs/branch.ts +150 -0
  77. package/src/vcs/checkpoint.ts +64 -0
  78. package/src/vcs/decompose.ts +469 -0
  79. package/src/vcs/diff.ts +409 -0
  80. package/src/vcs/engine-context.ts +26 -0
  81. package/src/vcs/index.ts +23 -0
  82. package/src/vcs/issue.ts +800 -0
  83. package/src/vcs/merge.ts +425 -0
  84. package/src/vcs/milestone.ts +124 -0
  85. package/src/vcs/ops.ts +59 -0
  86. package/src/vcs/types.ts +213 -0
  87. package/src/vcs/vcs-middleware.ts +81 -0
  88. package/src/watcher/fs-watcher.ts +217 -0
  89. package/src/watcher/index.ts +9 -0
  90. package/src/watcher/ingestion.ts +116 -0
  91. package/dist/ai/index.js +0 -688
  92. package/dist/cli/server.js +0 -3321
  93. package/dist/cli/tql.js +0 -5282
  94. package/dist/client/tql-client.js +0 -108
  95. package/dist/graph/index.js +0 -2248
  96. package/dist/kernel/logic-middleware.js +0 -179
  97. package/dist/kernel/middleware.js +0 -0
  98. package/dist/kernel/operations.js +0 -32
  99. package/dist/kernel/schema-middleware.js +0 -34
  100. package/dist/kernel/security-middleware.js +0 -53
  101. package/dist/kernel/trellis-kernel.js +0 -2239
  102. package/dist/kernel/workspace.js +0 -91
  103. package/dist/persist/backend.js +0 -0
  104. package/dist/persist/sqlite-backend.js +0 -123
  105. package/dist/query/index.js +0 -1643
  106. package/dist/server/index.js +0 -3309
  107. package/dist/workflows/index.js +0 -3160
@@ -1,3160 +0,0 @@
1
- // src/workflows/types.ts
2
- class WorkflowValidationError extends Error {
3
- stepId;
4
- constructor(message, stepId) {
5
- super(message);
6
- this.stepId = stepId;
7
- this.name = "WorkflowValidationError";
8
- }
9
- }
10
-
11
- class WorkflowRuntimeError extends Error {
12
- stepId;
13
- constructor(message, stepId) {
14
- super(message);
15
- this.stepId = stepId;
16
- this.name = "WorkflowRuntimeError";
17
- }
18
- }
19
- // src/workflows/schema.ts
20
- var WORKFLOW_SCHEMA = {
21
- $schema: "http://json-schema.org/draft-07/schema#",
22
- type: "object",
23
- properties: {
24
- version: {
25
- type: "number",
26
- const: 1,
27
- description: "Workflow specification version. Must be 1."
28
- },
29
- name: {
30
- type: "string",
31
- minLength: 1,
32
- description: "Human-readable workflow name"
33
- },
34
- env: {
35
- type: "object",
36
- additionalProperties: {
37
- type: "string"
38
- },
39
- description: "Environment variables merged with process.env and --var"
40
- },
41
- steps: {
42
- type: "array",
43
- minItems: 1,
44
- items: {
45
- $ref: "#/definitions/step"
46
- },
47
- description: "Workflow execution steps"
48
- }
49
- },
50
- required: ["version", "name", "steps"],
51
- additionalProperties: false,
52
- definitions: {
53
- step: {
54
- type: "object",
55
- properties: {
56
- id: {
57
- type: "string",
58
- pattern: "^[a-zA-Z][a-zA-Z0-9_]*$",
59
- description: "Unique step identifier"
60
- },
61
- type: {
62
- enum: ["source", "query", "output"],
63
- description: "Step type"
64
- },
65
- needs: {
66
- type: "array",
67
- items: {
68
- type: "string"
69
- },
70
- description: "Dependencies on other steps"
71
- },
72
- out: {
73
- type: "string",
74
- description: "Output dataset name"
75
- },
76
- source: {
77
- oneOf: [
78
- { $ref: "#/definitions/httpSource" },
79
- { $ref: "#/definitions/fileSource" }
80
- ]
81
- },
82
- from: { type: "string" },
83
- eqls: { type: "string" },
84
- output: { $ref: "#/definitions/output" }
85
- },
86
- required: ["id", "type"],
87
- additionalProperties: false,
88
- allOf: [
89
- {
90
- if: { properties: { type: { const: "source" } } },
91
- then: {
92
- required: ["source", "out"]
93
- }
94
- },
95
- {
96
- if: { properties: { type: { const: "query" } } },
97
- then: {
98
- required: ["needs", "eqls", "out"]
99
- }
100
- },
101
- {
102
- if: { properties: { type: { const: "output" } } },
103
- then: {
104
- required: ["needs", "output"]
105
- }
106
- }
107
- ]
108
- },
109
- fileSource: {
110
- type: "object",
111
- properties: {
112
- kind: {
113
- const: "file",
114
- description: "Source kind"
115
- },
116
- path: {
117
- type: "string",
118
- description: "Path to local file, relative to the workflow file"
119
- },
120
- format: {
121
- enum: ["json", "csv"],
122
- description: "File format (defaults to json)"
123
- }
124
- },
125
- required: ["kind", "path"],
126
- additionalProperties: false
127
- },
128
- httpSource: {
129
- type: "object",
130
- properties: {
131
- kind: {
132
- const: "http",
133
- description: "Source kind"
134
- },
135
- url: {
136
- type: "string",
137
- description: "HTTP URL with optional template variables"
138
- },
139
- headers: {
140
- type: "object",
141
- additionalProperties: {
142
- type: "string"
143
- },
144
- description: "HTTP headers"
145
- },
146
- mode: {
147
- enum: ["batch", "map"],
148
- description: "Execution mode: batch (single request) or map (per-row requests)"
149
- },
150
- mapFrom: {
151
- type: "string",
152
- description: "Dataset to iterate over in map mode"
153
- }
154
- },
155
- required: ["kind", "url", "mode"],
156
- allOf: [
157
- {
158
- if: { properties: { mode: { const: "map" } } },
159
- then: {
160
- required: ["mapFrom"]
161
- }
162
- }
163
- ],
164
- additionalProperties: false
165
- },
166
- output: {
167
- oneOf: [
168
- {
169
- type: "object",
170
- properties: {
171
- kind: { const: "file" },
172
- format: { enum: ["json", "csv"] },
173
- path: { type: "string" }
174
- },
175
- required: ["kind", "format", "path"],
176
- additionalProperties: false
177
- },
178
- {
179
- type: "object",
180
- properties: {
181
- kind: { const: "stdout" },
182
- format: { enum: ["json", "csv"] }
183
- },
184
- required: ["kind", "format"],
185
- additionalProperties: false
186
- }
187
- ]
188
- }
189
- }
190
- };
191
- // src/workflows/parser.ts
192
- import { parse } from "yaml";
193
- import Ajv from "ajv";
194
- var ajv = new Ajv({ allErrors: true });
195
- var validateWorkflow = ajv.compile(WORKFLOW_SCHEMA);
196
- function parseWorkflow(yamlContent) {
197
- try {
198
- const spec = parse(yamlContent, {
199
- prettyErrors: true,
200
- strict: true
201
- });
202
- if (!validateWorkflow(spec)) {
203
- const errors = validateWorkflow.errors?.map((err) => {
204
- const path = err.instancePath || err.schemaPath;
205
- const message = err.message;
206
- return `${path}: ${message}`;
207
- }).join("; ") || "Unknown validation error";
208
- throw new WorkflowValidationError(`Workflow validation failed: ${errors}`);
209
- }
210
- return spec;
211
- } catch (error) {
212
- if (error instanceof Error) {
213
- if (error.message.includes("line") && error.message.includes("column")) {
214
- throw new WorkflowValidationError(`YAML parsing failed: ${error.message}`);
215
- }
216
- throw new WorkflowValidationError(`Failed to parse workflow: ${error.message}`);
217
- }
218
- throw new WorkflowValidationError("Failed to parse workflow: Unknown error");
219
- }
220
- }
221
- function interpolateTemplate(template, context, options = {}) {
222
- return template.replace(/(\$)?\{\{\s*([^}]+)\s*\}\}/g, (match, dollar, expression) => {
223
- const trimmed = expression.trim();
224
- if (trimmed.includes("__proto__") || trimmed.includes("constructor") || trimmed.includes("prototype")) {
225
- throw new Error(`Invalid template expression (security): ${trimmed}`);
226
- }
227
- let value;
228
- if (trimmed.startsWith("env.") || trimmed.startsWith("secrets.")) {
229
- const key = trimmed.split(".").slice(1).join(".");
230
- const envValue = context.env[key];
231
- if (envValue === undefined) {
232
- throw new Error(`Environment variable not found: ${key}`);
233
- }
234
- value = envValue;
235
- } else if (trimmed.startsWith("var.")) {
236
- const key = trimmed.substring(4);
237
- const varValue = context.vars[key];
238
- if (varValue === undefined) {
239
- throw new Error(`Variable not found: ${key}`);
240
- }
241
- value = varValue;
242
- } else if (trimmed.startsWith("row.")) {
243
- if (!context.row) {
244
- throw new Error("Row variables are only available in map mode");
245
- }
246
- const path = trimmed.substring(4);
247
- const rowValue = getNestedValue(context.row, path);
248
- if (rowValue === undefined) {
249
- throw new Error(`Row attribute not found: ${path}`);
250
- }
251
- value = String(rowValue);
252
- } else {
253
- throw new Error(`Invalid template expression: ${trimmed}`);
254
- }
255
- return options.urlEncode ? encodeURIComponent(value) : value;
256
- });
257
- }
258
- function interpolateObject(obj, context) {
259
- if (typeof obj === "string") {
260
- return interpolateTemplate(obj, context);
261
- }
262
- if (Array.isArray(obj)) {
263
- return obj.map((item) => interpolateObject(item, context));
264
- }
265
- if (obj && typeof obj === "object") {
266
- const result = {};
267
- for (const [key, value] of Object.entries(obj)) {
268
- result[key] = interpolateObject(value, context);
269
- }
270
- return result;
271
- }
272
- return obj;
273
- }
274
- function getNestedValue(obj, path) {
275
- const parts = path.split(".");
276
- for (const part of parts) {
277
- if (part === "__proto__" || part === "constructor" || part === "prototype") {
278
- throw new Error(`Invalid property access (security): ${part}`);
279
- }
280
- }
281
- return parts.reduce((current, key) => {
282
- return current && typeof current === "object" ? current[key] : undefined;
283
- }, obj);
284
- }
285
- function validateWorkflowSemantics(spec) {
286
- const stepIds = new Set;
287
- const outputs = new Set;
288
- const outputNames = new Set;
289
- for (const step of spec.steps) {
290
- if (stepIds.has(step.id)) {
291
- throw new WorkflowValidationError(`Duplicate step ID: ${step.id}`);
292
- }
293
- stepIds.add(step.id);
294
- if (step.out) {
295
- if (outputNames.has(step.out)) {
296
- throw new WorkflowValidationError(`duplicate output name: ${step.out}`, step.id);
297
- }
298
- if (stepIds.has(step.out)) {
299
- throw new WorkflowValidationError(`Output name "${step.out}" conflicts with step ID. Use different names to avoid ambiguity.`, step.id);
300
- }
301
- outputNames.add(step.out);
302
- outputs.add(step.out);
303
- }
304
- }
305
- const graph = new Map;
306
- for (const step of spec.steps) {
307
- graph.set(step.id, step.needs || []);
308
- if (step.type === "source" && step.source.mode === "map") {
309
- if (!step.source.mapFrom || !outputs.has(step.source.mapFrom)) {
310
- throw new WorkflowValidationError(`Step ${step.id} map mode requires valid mapFrom dataset`, step.id);
311
- }
312
- }
313
- if (step.type === "query" && step.from) {
314
- if (!outputs.has(step.from)) {
315
- throw new WorkflowValidationError(`Step ${step.id} references unknown dataset: ${step.from}`, step.id);
316
- }
317
- }
318
- }
319
- const visited = new Set;
320
- const recursionStack = new Set;
321
- function hasCycle(nodeId) {
322
- if (recursionStack.has(nodeId)) {
323
- return true;
324
- }
325
- if (visited.has(nodeId)) {
326
- return false;
327
- }
328
- visited.add(nodeId);
329
- recursionStack.add(nodeId);
330
- const dependencies = graph.get(nodeId) || [];
331
- for (const dep of dependencies) {
332
- if (hasCycle(dep)) {
333
- return true;
334
- }
335
- }
336
- recursionStack.delete(nodeId);
337
- return false;
338
- }
339
- for (const stepId of stepIds) {
340
- if (hasCycle(stepId)) {
341
- throw new WorkflowValidationError("Circular dependency detected in workflow");
342
- }
343
- }
344
- }
345
- // src/workflows/planner.ts
346
- function createExecutionPlan(spec) {
347
- const steps = spec.steps;
348
- const stepMap = new Map;
349
- for (const step of steps) {
350
- stepMap.set(step.id, step);
351
- }
352
- validateNeedsReferences(steps);
353
- const order = topologicalSort(steps);
354
- return {
355
- steps,
356
- order
357
- };
358
- }
359
- function validateNeedsReferences(steps) {
360
- const stepIds = new Set(steps.map((s) => s.id));
361
- const outputs = new Map(steps.map((s) => [s.out, s.id]).filter(([out, id]) => out && id));
362
- for (const step of steps) {
363
- for (const need of step.needs ?? []) {
364
- if (!stepIds.has(need)) {
365
- let hint = "";
366
- if (outputs.has(need)) {
367
- hint = ` Did you mean step id "${outputs.get(need)}"?`;
368
- } else {
369
- const stepList = Array.from(stepIds).join(", ");
370
- const datasetList = Array.from(outputs.keys()).join(", ");
371
- hint = ` Steps: ${stepList}. Datasets: ${datasetList}.`;
372
- }
373
- throw new WorkflowValidationError(`Step ${step.id} depends on unknown step: ${need}.${hint}`, step.id);
374
- }
375
- }
376
- }
377
- }
378
- function topologicalSort(steps) {
379
- const graph = new Map;
380
- const inDegree = new Map;
381
- const result = [];
382
- for (const step of steps) {
383
- graph.set(step.id, []);
384
- inDegree.set(step.id, 0);
385
- }
386
- for (const step of steps) {
387
- const dependencies = step.needs || [];
388
- for (const dep of dependencies) {
389
- if (!graph.has(dep)) {
390
- throw new WorkflowValidationError(`Step ${step.id} depends on unknown step: ${dep}`, step.id);
391
- }
392
- graph.get(dep).push(step.id);
393
- inDegree.set(step.id, (inDegree.get(step.id) || 0) + 1);
394
- }
395
- }
396
- const queue = [];
397
- for (const [stepId, degree] of inDegree) {
398
- if (degree === 0) {
399
- queue.push(stepId);
400
- }
401
- }
402
- while (queue.length > 0) {
403
- queue.sort((a, b) => {
404
- const stepA = steps.find((s) => s.id === a);
405
- const stepB = steps.find((s) => s.id === b);
406
- if (stepA.type === "source" && stepB.type !== "source")
407
- return -1;
408
- if (stepB.type === "source" && stepA.type !== "source")
409
- return 1;
410
- return a.localeCompare(b);
411
- });
412
- const current = queue.shift();
413
- result.push(current);
414
- const dependents = graph.get(current) || [];
415
- for (const dependent of dependents) {
416
- const newDegree = (inDegree.get(dependent) || 0) - 1;
417
- inDegree.set(dependent, newDegree);
418
- if (newDegree === 0) {
419
- queue.push(dependent);
420
- }
421
- }
422
- }
423
- if (result.length !== steps.length) {
424
- const remaining = steps.filter((step) => !result.includes(step.id)).map((step) => step.id);
425
- const cycle = findMinimalCycle(remaining, graph);
426
- const cycleStr = cycle ? cycle.join(" → ") : remaining.join(", ");
427
- throw new WorkflowValidationError(`Workflow has circular dependencies: ${cycleStr}`);
428
- }
429
- return result;
430
- }
431
- function findMinimalCycle(remaining, graph) {
432
- const visited = new Set;
433
- const recursionStack = new Set;
434
- const path = [];
435
- function dfs(node) {
436
- if (recursionStack.has(node)) {
437
- const cycleStart = path.indexOf(node);
438
- return path.slice(cycleStart).concat([node]);
439
- }
440
- if (visited.has(node)) {
441
- return null;
442
- }
443
- visited.add(node);
444
- recursionStack.add(node);
445
- path.push(node);
446
- const dependencies = graph.get(node) || [];
447
- for (const dep of dependencies) {
448
- if (remaining.includes(dep)) {
449
- const cycle = dfs(dep);
450
- if (cycle) {
451
- return cycle;
452
- }
453
- }
454
- }
455
- path.pop();
456
- recursionStack.delete(node);
457
- return null;
458
- }
459
- for (const node of remaining) {
460
- if (!visited.has(node)) {
461
- const cycle = dfs(node);
462
- if (cycle) {
463
- return cycle;
464
- }
465
- }
466
- }
467
- return null;
468
- }
469
- function validateExecutionPlan(plan) {
470
- const { steps, order } = plan;
471
- const stepMap = new Map(steps.map((s) => [s.id, s]));
472
- const availableDatasets = new Set;
473
- for (const stepId of order) {
474
- const step = stepMap.get(stepId);
475
- if (!step) {
476
- throw new WorkflowValidationError(`Step not found in plan: ${stepId}`);
477
- }
478
- const dependencies = step.needs || [];
479
- for (const dep of dependencies) {
480
- const depStep = stepMap.get(dep);
481
- if (!depStep || !depStep.out) {
482
- throw new WorkflowValidationError(`Step ${stepId} depends on ${dep} which doesn't produce output`, stepId);
483
- }
484
- if (!availableDatasets.has(depStep.out)) {
485
- throw new WorkflowValidationError(`Step ${stepId} depends on dataset ${depStep.out} which is not yet available`, stepId);
486
- }
487
- }
488
- validateStepRequirements(step, availableDatasets);
489
- if (step.out) {
490
- availableDatasets.add(step.out);
491
- }
492
- }
493
- }
494
- function validateStepRequirements(step, availableDatasets) {
495
- switch (step.type) {
496
- case "source":
497
- if (step.source.mode === "map") {
498
- const mapFrom = step.source.mapFrom;
499
- if (!mapFrom || !availableDatasets.has(mapFrom)) {
500
- throw new WorkflowValidationError(`Step ${step.id} map mode requires dataset ${mapFrom} to be available`, step.id);
501
- }
502
- }
503
- break;
504
- case "query":
505
- if (step.from && !availableDatasets.has(step.from)) {
506
- throw new WorkflowValidationError(`Step ${step.id} references unavailable dataset: ${step.from}`, step.id);
507
- }
508
- break;
509
- case "output":
510
- break;
511
- }
512
- }
513
- function getStepDependencies(step, plan) {
514
- const { steps, order } = plan;
515
- const stepMap = new Map(steps.map((s) => [s.id, s]));
516
- const dependencies = [];
517
- const stepIndex = order.indexOf(step.id);
518
- if (stepIndex === -1) {
519
- throw new WorkflowValidationError(`Step ${step.id} not found in execution plan`);
520
- }
521
- for (let i = 0;i < stepIndex; i++) {
522
- const stepIdAtIndex = order[i];
523
- if (!stepIdAtIndex)
524
- continue;
525
- const depStep = stepMap.get(stepIdAtIndex);
526
- if (depStep && (step.needs || []).includes(depStep.id)) {
527
- dependencies.push(depStep);
528
- }
529
- }
530
- return dependencies;
531
- }
532
- function getParallelGroups(plan) {
533
- const { steps, order } = plan;
534
- const stepMap = new Map(steps.map((s) => [s.id, s]));
535
- const groups = [];
536
- const processed = new Set;
537
- for (const stepId of order) {
538
- if (processed.has(stepId))
539
- continue;
540
- const step = stepMap.get(stepId);
541
- const group = [step];
542
- processed.add(stepId);
543
- for (const otherStepId of order) {
544
- if (processed.has(otherStepId))
545
- continue;
546
- const otherStep = stepMap.get(otherStepId);
547
- if (canRunInParallel(step, otherStep, stepMap)) {
548
- group.push(otherStep);
549
- processed.add(otherStepId);
550
- }
551
- }
552
- groups.push(group);
553
- }
554
- return groups;
555
- }
556
- function canRunInParallel(stepA, stepB, stepMap) {
557
- const depsA = new Set(stepA.needs || []);
558
- const depsB = new Set(stepB.needs || []);
559
- if (depsA.has(stepB.id) || depsB.has(stepA.id)) {
560
- return false;
561
- }
562
- if (stepA.out && depsB.has(stepA.id))
563
- return false;
564
- if (stepB.out && depsA.has(stepB.id))
565
- return false;
566
- if (stepA.type === "source" && stepA.source.mode === "map") {
567
- if (stepA.source.mapFrom === stepB.out)
568
- return false;
569
- }
570
- if (stepB.type === "source" && stepB.source.mode === "map") {
571
- if (stepB.source.mapFrom === stepA.out)
572
- return false;
573
- }
574
- return true;
575
- }
576
- // src/workflows/runners.ts
577
- import { writeFile, mkdir, readFile } from "fs/promises";
578
- import { dirname, resolve } from "path";
579
-
580
- // src/store/eav-store.ts
581
- function* flatten(obj, base = "") {
582
- if (Array.isArray(obj)) {
583
- for (const v of obj) {
584
- yield* flatten(v, base);
585
- }
586
- } else if (obj && typeof obj === "object") {
587
- for (const [k, v] of Object.entries(obj)) {
588
- yield* flatten(v, base ? `${base}.${k}` : k);
589
- }
590
- } else {
591
- yield [base, obj];
592
- }
593
- }
594
- function jsonEntityFacts(entityId, root, type) {
595
- const facts = [{ e: entityId, a: "type", v: type }];
596
- for (const [a, v] of flatten(root)) {
597
- if (v === undefined || v === null)
598
- continue;
599
- if (Array.isArray(v)) {
600
- for (const el of v) {
601
- facts.push({ e: entityId, a, v: el });
602
- }
603
- } else if (typeof v === "object") {} else {
604
- facts.push({ e: entityId, a, v });
605
- }
606
- }
607
- return facts;
608
- }
609
-
610
- class EAVStore {
611
- facts = [];
612
- links = [];
613
- catalog = new Map;
614
- eavIndex = new Map;
615
- aevIndex = new Map;
616
- aveIndex = new Map;
617
- linkIndex = new Map;
618
- linkReverseIndex = new Map;
619
- linkAttrIndex = new Map;
620
- distinct = new Map;
621
- addFacts(facts) {
622
- for (let i = 0;i < facts.length; i++) {
623
- const fact = facts[i];
624
- if (fact) {
625
- this.facts.push(fact);
626
- this.updateIndexes(fact, this.facts.length - 1);
627
- this.updateCatalog(fact);
628
- }
629
- }
630
- }
631
- addLinks(links) {
632
- for (const link of links) {
633
- this.links.push(link);
634
- this.updateLinkIndexes(link);
635
- }
636
- }
637
- deleteFacts(factsToDelete) {
638
- for (const fact of factsToDelete) {
639
- const valueKey = this.valueKey(fact.v);
640
- const indices = this.aveIndex.get(fact.a)?.get(valueKey);
641
- if (!indices)
642
- continue;
643
- let foundIdx = -1;
644
- for (const idx of indices) {
645
- const storedFact = this.facts[idx];
646
- if (storedFact && storedFact.e === fact.e && storedFact.a === fact.a) {
647
- foundIdx = idx;
648
- break;
649
- }
650
- }
651
- if (foundIdx !== -1) {
652
- this.facts[foundIdx] = undefined;
653
- this.eavIndex.get(fact.e)?.get(fact.a)?.delete(foundIdx);
654
- this.aevIndex.get(fact.a)?.get(fact.e)?.delete(foundIdx);
655
- this.aveIndex.get(fact.a)?.get(valueKey)?.delete(foundIdx);
656
- const entry = this.catalog.get(fact.a);
657
- if (entry) {}
658
- }
659
- }
660
- }
661
- deleteLinks(linksToDelete) {
662
- for (const link of linksToDelete) {
663
- const initialLen = this.links.length;
664
- this.links = this.links.filter((l) => !(l.e1 === link.e1 && l.a === link.a && l.e2 === link.e2));
665
- if (this.links.length < initialLen) {
666
- this.linkIndex.get(link.e1)?.get(link.a)?.delete(link.e2);
667
- this.linkReverseIndex.get(link.e2)?.get(link.a)?.delete(link.e1);
668
- const attrPairs = this.linkAttrIndex.get(link.a);
669
- if (attrPairs) {
670
- for (const pair of attrPairs) {
671
- if (pair[0] === link.e1 && pair[1] === link.e2) {
672
- attrPairs.delete(pair);
673
- break;
674
- }
675
- }
676
- }
677
- }
678
- }
679
- }
680
- updateIndexes(fact, index) {
681
- if (!this.eavIndex.has(fact.e)) {
682
- this.eavIndex.set(fact.e, new Map);
683
- }
684
- if (!this.eavIndex.get(fact.e).has(fact.a)) {
685
- this.eavIndex.get(fact.e).set(fact.a, new Set);
686
- }
687
- this.eavIndex.get(fact.e).get(fact.a).add(index);
688
- if (!this.aevIndex.has(fact.a)) {
689
- this.aevIndex.set(fact.a, new Map);
690
- }
691
- if (!this.aevIndex.get(fact.a).has(fact.e)) {
692
- this.aevIndex.get(fact.a).set(fact.e, new Set);
693
- }
694
- this.aevIndex.get(fact.a).get(fact.e).add(index);
695
- if (!this.aveIndex.has(fact.a)) {
696
- this.aveIndex.set(fact.a, new Map);
697
- }
698
- const valueKey = this.valueKey(fact.v);
699
- if (!this.aveIndex.get(fact.a).has(valueKey)) {
700
- this.aveIndex.get(fact.a).set(valueKey, new Set);
701
- }
702
- this.aveIndex.get(fact.a).get(valueKey).add(index);
703
- }
704
- updateLinkIndexes(link) {
705
- if (!this.linkIndex.has(link.e1)) {
706
- this.linkIndex.set(link.e1, new Map);
707
- }
708
- const e1Attrs = this.linkIndex.get(link.e1);
709
- if (!e1Attrs.has(link.a)) {
710
- e1Attrs.set(link.a, new Set);
711
- }
712
- e1Attrs.get(link.a).add(link.e2);
713
- if (!this.linkReverseIndex.has(link.e2)) {
714
- this.linkReverseIndex.set(link.e2, new Map);
715
- }
716
- const e2Attrs = this.linkReverseIndex.get(link.e2);
717
- if (!e2Attrs.has(link.a)) {
718
- e2Attrs.set(link.a, new Set);
719
- }
720
- e2Attrs.get(link.a).add(link.e1);
721
- if (!this.linkAttrIndex.has(link.a)) {
722
- this.linkAttrIndex.set(link.a, new Set);
723
- }
724
- this.linkAttrIndex.get(link.a).add([link.e1, link.e2]);
725
- }
726
- valueKey(v) {
727
- if (v instanceof Date)
728
- return `date:${v.toISOString()}`;
729
- return `${typeof v}:${v}`;
730
- }
731
- updateCatalog(fact) {
732
- const entry = this.catalog.get(fact.a) || {
733
- attribute: fact.a,
734
- type: this.inferType(fact.v),
735
- cardinality: "one",
736
- distinctCount: 0,
737
- examples: []
738
- };
739
- const factType = this.inferType(fact.v);
740
- if (entry.type !== factType && entry.type !== "mixed") {
741
- entry.type = "mixed";
742
- }
743
- const entityAttrs = this.eavIndex.get(fact.e)?.get(fact.a);
744
- if (entityAttrs && entityAttrs.size > 1) {
745
- entry.cardinality = "many";
746
- }
747
- const k = this.valueKey(fact.v);
748
- const s = this.distinct.get(fact.a) || (this.distinct.set(fact.a, new Set), this.distinct.get(fact.a));
749
- s.add(k);
750
- entry.distinctCount = s.size;
751
- if (entry.examples.length < 5 && !entry.examples.includes(fact.v)) {
752
- entry.examples.push(fact.v);
753
- }
754
- if (typeof fact.v === "number") {
755
- entry.min = Math.min(entry.min ?? fact.v, fact.v);
756
- entry.max = Math.max(entry.max ?? fact.v, fact.v);
757
- }
758
- this.catalog.set(fact.a, entry);
759
- }
760
- inferType(v) {
761
- if (typeof v === "string")
762
- return "string";
763
- if (typeof v === "number")
764
- return "number";
765
- if (typeof v === "boolean")
766
- return "boolean";
767
- if (v instanceof Date)
768
- return "date";
769
- return "mixed";
770
- }
771
- getFactsByEntity(entity) {
772
- const indices = this.eavIndex.get(entity);
773
- if (!indices)
774
- return [];
775
- const result = [];
776
- for (const attrIndices of indices.values()) {
777
- for (const idx of attrIndices) {
778
- const fact = this.facts[idx];
779
- if (fact) {
780
- result.push(fact);
781
- }
782
- }
783
- }
784
- return result;
785
- }
786
- getFactsByAttribute(attribute) {
787
- const indices = this.aevIndex.get(attribute);
788
- if (!indices)
789
- return [];
790
- const result = [];
791
- for (const entityIndices of indices.values()) {
792
- for (const idx of entityIndices) {
793
- const fact = this.facts[idx];
794
- if (fact) {
795
- result.push(fact);
796
- }
797
- }
798
- }
799
- return result;
800
- }
801
- getFactsByValue(attribute, value) {
802
- const indices = this.aveIndex.get(attribute)?.get(this.valueKey(value));
803
- if (!indices)
804
- return [];
805
- return Array.from(indices).map((idx) => this.facts[idx]).filter((fact) => fact !== undefined);
806
- }
807
- getCatalog() {
808
- return Array.from(this.catalog.values());
809
- }
810
- getCatalogEntry(attribute) {
811
- return this.catalog.get(attribute);
812
- }
813
- getAllFacts() {
814
- return this.facts.filter((f) => f !== undefined);
815
- }
816
- getAllLinks() {
817
- return [...this.links];
818
- }
819
- getLinksByEntity(entity) {
820
- const results = [];
821
- const forwardLinks = this.linkIndex.get(entity);
822
- if (forwardLinks) {
823
- for (const [attr, targets] of forwardLinks) {
824
- for (const target of targets) {
825
- results.push({ e1: entity, a: attr, e2: target });
826
- }
827
- }
828
- }
829
- const reverseLinks = this.linkReverseIndex.get(entity);
830
- if (reverseLinks) {
831
- for (const [attr, sources] of reverseLinks) {
832
- for (const source of sources) {
833
- results.push({ e1: source, a: attr, e2: entity });
834
- }
835
- }
836
- }
837
- return results;
838
- }
839
- getLinksByAttribute(attribute) {
840
- const results = [];
841
- const links = this.linkAttrIndex.get(attribute);
842
- if (links) {
843
- for (const [e1, e2] of links) {
844
- results.push({ e1, a: attribute, e2 });
845
- }
846
- }
847
- return results;
848
- }
849
- getLinksByEntityAndAttribute(entity, attribute) {
850
- const results = [];
851
- const attrs = this.linkIndex.get(entity);
852
- if (attrs) {
853
- const targets = attrs.get(attribute);
854
- if (targets) {
855
- for (const target of targets) {
856
- results.push({ e1: entity, a: attribute, e2: target });
857
- }
858
- }
859
- }
860
- return results;
861
- }
862
- getStats() {
863
- return {
864
- totalFacts: this.facts.length,
865
- totalLinks: this.links.length,
866
- uniqueEntities: this.eavIndex.size,
867
- uniqueAttributes: this.aevIndex.size,
868
- catalogEntries: this.catalog.size
869
- };
870
- }
871
- snapshot() {
872
- return {
873
- facts: this.facts.filter((f) => f !== undefined),
874
- links: [...this.links],
875
- catalog: this.getCatalog()
876
- };
877
- }
878
- restore(snapshot) {
879
- this.facts = [];
880
- this.links = [];
881
- this.catalog.clear();
882
- this.eavIndex.clear();
883
- this.aevIndex.clear();
884
- this.aveIndex.clear();
885
- this.linkIndex.clear();
886
- this.linkReverseIndex.clear();
887
- this.linkAttrIndex.clear();
888
- this.distinct.clear();
889
- this.addFacts(snapshot.facts);
890
- this.addLinks(snapshot.links);
891
- if (snapshot.catalog) {
892
- for (const entry of snapshot.catalog) {
893
- this.catalog.set(entry.attribute, entry);
894
- }
895
- }
896
- }
897
- }
898
-
899
- // src/query/datalog-evaluator.ts
900
- class ExternalPredicates {
901
- static regex(str, pattern) {
902
- if (typeof pattern === "string") {
903
- try {
904
- const regexMatch = pattern.match(/^\/(.*)\/([gimuy]*)$/);
905
- if (regexMatch) {
906
- const [, regexPattern, flags] = regexMatch;
907
- const regex = new RegExp(regexPattern, flags || "i");
908
- return regex.test(str);
909
- }
910
- return new RegExp(pattern, "i").test(str);
911
- } catch (e) {
912
- console.warn(`Invalid regex pattern: ${pattern}`, e);
913
- return str.toLowerCase().includes(pattern.toLowerCase());
914
- }
915
- }
916
- return pattern.test(str);
917
- }
918
- static gt(a, b) {
919
- return a > b;
920
- }
921
- static lt(a, b) {
922
- return a < b;
923
- }
924
- static between(val, min, max) {
925
- return val >= min && val <= max;
926
- }
927
- static contains(str, substr) {
928
- return str.toLowerCase().includes(substr.toLowerCase());
929
- }
930
- static after(a, b) {
931
- return a > b;
932
- }
933
- static betweenDate(d, start, end) {
934
- return d >= start && d <= end;
935
- }
936
- static sum(values) {
937
- return values.reduce((a, b) => a + b, 0);
938
- }
939
- static count(values) {
940
- return values.length;
941
- }
942
- static avg(values) {
943
- return values.length > 0 ? this.sum(values) / values.length : 0;
944
- }
945
- }
946
-
947
- class DatalogEvaluator {
948
- store;
949
- rules = [];
950
- ws = new Map;
951
- constructor(store) {
952
- this.store = store;
953
- }
954
- addRule(rule) {
955
- this.rules.push(rule);
956
- }
957
- seedBaseFacts() {
958
- const attrRows = [];
959
- for (const f of this.store.getAllFacts()) {
960
- if (f) {
961
- attrRows.push([f.e, f.a, f.v]);
962
- }
963
- }
964
- this.ws.set("attr", attrRows);
965
- const linkRows = [];
966
- for (const link of this.store.getAllLinks()) {
967
- linkRows.push([link.e1, link.a, link.e2]);
968
- }
969
- this.ws.set("link", linkRows);
970
- }
971
- pushDerived(predicate, tuple) {
972
- const bucket = this.ws.get(predicate) || [];
973
- if (!this.ws.has(predicate)) {
974
- this.ws.set(predicate, bucket);
975
- }
976
- const key = JSON.stringify(tuple);
977
- if (!bucket._keys) {
978
- bucket._keys = new Set;
979
- }
980
- const keys = bucket._keys;
981
- if (keys.has(key)) {
982
- return false;
983
- }
984
- bucket.push(tuple);
985
- keys.add(key);
986
- return true;
987
- }
988
- evaluate(query, limit) {
989
- const startTime = performance.now();
990
- const trace = [];
991
- this.seedBaseFacts();
992
- let added = true;
993
- let iterations = 0;
994
- const maxIterations = 100;
995
- while (added && iterations < maxIterations) {
996
- added = false;
997
- for (const rule of this.rules) {
998
- const bindings2 = this.findBindingsOverWS(rule.body);
999
- for (const binding of bindings2) {
1000
- const head = this.substitute(rule.head, binding);
1001
- const tuple = head.terms.map((term) => this.resolveTerm(term, binding));
1002
- if (this.pushDerived(head.predicate, tuple)) {
1003
- added = true;
1004
- }
1005
- }
1006
- }
1007
- iterations++;
1008
- }
1009
- const bindings = this.findBindingsOverWS(query.goals, trace, limit);
1010
- return {
1011
- bindings,
1012
- executionTime: performance.now() - startTime,
1013
- plan: `Semi-naive evaluation: ${iterations} iterations, ${this.getTotalFacts()} facts`,
1014
- trace
1015
- };
1016
- }
1017
- getTotalFacts() {
1018
- let total = 0;
1019
- for (const tuples of this.ws.values()) {
1020
- total += tuples.length;
1021
- }
1022
- return total;
1023
- }
1024
- findBindingsOverWS(goals, trace, limit) {
1025
- if (goals.length === 0) {
1026
- return [{}];
1027
- }
1028
- let bindings = [{}];
1029
- for (const goal of goals) {
1030
- const goalStartTime = performance.now();
1031
- const newBindings = [];
1032
- outer:
1033
- for (const binding of bindings) {
1034
- const goalBindings = this.evaluateGoal(goal, binding);
1035
- for (const goalBinding of goalBindings) {
1036
- const merged = { ...binding, ...goalBinding };
1037
- let hasConflict = false;
1038
- for (const key in merged) {
1039
- if (binding[key] !== undefined && goalBinding[key] !== undefined && binding[key] !== goalBinding[key]) {
1040
- hasConflict = true;
1041
- break;
1042
- }
1043
- }
1044
- if (!hasConflict) {
1045
- newBindings.push(merged);
1046
- if (limit !== undefined && newBindings.length >= limit)
1047
- break outer;
1048
- }
1049
- }
1050
- }
1051
- bindings = newBindings;
1052
- if (trace) {
1053
- trace.push({
1054
- goal: `${goal.predicate}(${goal.terms.join(", ")})`,
1055
- bindingsCount: bindings.length,
1056
- durationMs: performance.now() - goalStartTime
1057
- });
1058
- }
1059
- }
1060
- const uniqueBindings = new Map;
1061
- for (const binding of bindings) {
1062
- const key = JSON.stringify(binding);
1063
- uniqueBindings.set(key, binding);
1064
- }
1065
- return Array.from(uniqueBindings.values());
1066
- }
1067
- evaluateGoal(goal, binding) {
1068
- const { predicate, terms } = goal;
1069
- if (predicate === "not") {
1070
- const inner = goal.terms[0];
1071
- const res = this.evaluateGoal(inner, binding);
1072
- return res.length === 0 ? [binding] : [];
1073
- }
1074
- if (predicate === "attr") {
1075
- return this.evaluateAttrPredicate(terms, binding);
1076
- }
1077
- if (predicate === "link") {
1078
- return this.evaluateLinkPredicate(terms, binding);
1079
- }
1080
- if (predicate === "gt" || predicate === "lt" || predicate === "between" || predicate === ">" || predicate === "<" || predicate === ">=" || predicate === "<=" || predicate === "=" || predicate === "!=") {
1081
- return this.evaluateComparisonPredicate(goal, binding);
1082
- }
1083
- if (predicate === "regex" || predicate === "contains") {
1084
- return this.evaluateStringPredicate(goal, binding);
1085
- }
1086
- if (predicate === "after" || predicate === "betweenDate") {
1087
- return this.evaluateDatePredicate(goal, binding);
1088
- }
1089
- if (predicate.startsWith("ext_")) {
1090
- return this.evaluateExternalPredicate(goal, binding);
1091
- }
1092
- return this.evalPredicateFromWS(predicate, terms, binding);
1093
- }
1094
- evalPredicateFromWS(predicate, terms, binding) {
1095
- const rows = this.ws.get(predicate) || [];
1096
- const results = [];
1097
- rowloop:
1098
- for (const row of rows) {
1099
- const newBinding = { ...binding };
1100
- for (let i = 0;i < terms.length; i++) {
1101
- const term = terms[i];
1102
- const val = row[i];
1103
- if (typeof term === "string" && term.startsWith("?")) {
1104
- const bound = newBinding[term];
1105
- if (bound !== undefined && bound !== val) {
1106
- continue rowloop;
1107
- }
1108
- newBinding[term] = val;
1109
- } else {
1110
- if (term !== val) {
1111
- continue rowloop;
1112
- }
1113
- }
1114
- }
1115
- results.push(newBinding);
1116
- }
1117
- return results;
1118
- }
1119
- evaluateAttrPredicate(terms, binding) {
1120
- if (terms.length !== 3)
1121
- return [];
1122
- const [entity, attribute, value] = terms.map((term) => this.resolveTerm(term, binding));
1123
- const results = [];
1124
- if (typeof entity === "string" && !entity.startsWith("?") && typeof attribute === "string" && !attribute.startsWith("?") && (typeof value !== "string" || !value.startsWith("?"))) {
1125
- const facts = this.store.getFactsByValue(attribute, value);
1126
- for (const fact of facts) {
1127
- if (fact.e === entity) {
1128
- results.push({});
1129
- }
1130
- }
1131
- return results;
1132
- }
1133
- if (typeof entity === "string" && !entity.startsWith("?") && typeof attribute === "string" && !attribute.startsWith("?")) {
1134
- const facts = this.store.getFactsByEntity(entity);
1135
- for (const fact of facts) {
1136
- if (fact.a === attribute) {
1137
- const newBinding = { ...binding };
1138
- if (typeof value === "string" && value.startsWith("?")) {
1139
- newBinding[value] = fact.v;
1140
- results.push(newBinding);
1141
- } else if (fact.v === value) {
1142
- results.push(newBinding);
1143
- }
1144
- }
1145
- }
1146
- return results;
1147
- }
1148
- if (typeof attribute === "string" && !attribute.startsWith("?")) {
1149
- const facts = this.store.getFactsByAttribute(attribute);
1150
- for (const fact of facts) {
1151
- const newBinding = { ...binding };
1152
- if (typeof entity === "string" && !entity.startsWith("?") && fact.e !== entity) {
1153
- continue;
1154
- }
1155
- if ((typeof value !== "string" || !value.startsWith("?")) && fact.v !== value) {
1156
- continue;
1157
- }
1158
- if (typeof entity === "string" && entity.startsWith("?")) {
1159
- newBinding[entity] = fact.e;
1160
- }
1161
- if (typeof value === "string" && value.startsWith("?")) {
1162
- newBinding[value] = fact.v;
1163
- }
1164
- results.push(newBinding);
1165
- }
1166
- return results;
1167
- }
1168
- return [];
1169
- }
1170
- evaluateLinkPredicate(terms, binding) {
1171
- if (terms.length !== 3)
1172
- return [];
1173
- const [e1, a, e2] = terms;
1174
- const results = [];
1175
- const links = this.store.getAllLinks();
1176
- for (const link of links) {
1177
- const newBinding = { ...binding };
1178
- let matches = true;
1179
- if (typeof e1 === "string" && !e1.startsWith("?")) {
1180
- if (link.e1 !== e1)
1181
- continue;
1182
- } else if (typeof e1 === "string" && e1.startsWith("?")) {
1183
- newBinding[e1] = link.e1;
1184
- }
1185
- if (typeof a === "string" && !a.startsWith("?")) {
1186
- if (link.a !== a)
1187
- continue;
1188
- } else if (typeof a === "string" && a.startsWith("?")) {
1189
- newBinding[a] = link.a;
1190
- }
1191
- if (typeof e2 === "string" && !e2.startsWith("?")) {
1192
- if (link.e2 !== e2)
1193
- continue;
1194
- } else if (typeof e2 === "string" && e2.startsWith("?")) {
1195
- newBinding[e2] = link.e2;
1196
- }
1197
- if (matches) {
1198
- results.push(newBinding);
1199
- }
1200
- }
1201
- return results;
1202
- }
1203
- evaluateComparisonPredicate(goal, binding) {
1204
- const { predicate, terms } = goal;
1205
- if (terms.length < 2)
1206
- return [];
1207
- const left = this.resolveTerm(terms[0], binding);
1208
- const right = this.resolveTerm(terms[1], binding);
1209
- let leftNum = left;
1210
- let rightNum = right;
1211
- if (typeof left === "string" && !isNaN(Number(left))) {
1212
- leftNum = Number(left);
1213
- }
1214
- if (typeof right === "string" && !isNaN(Number(right))) {
1215
- rightNum = Number(right);
1216
- }
1217
- if (typeof leftNum !== "number" || typeof rightNum !== "number")
1218
- return [];
1219
- let result = false;
1220
- switch (predicate) {
1221
- case "gt":
1222
- case ">":
1223
- result = ExternalPredicates.gt(leftNum, rightNum);
1224
- break;
1225
- case "lt":
1226
- case "<":
1227
- result = ExternalPredicates.lt(leftNum, rightNum);
1228
- break;
1229
- case ">=":
1230
- result = leftNum >= rightNum;
1231
- break;
1232
- case "<=":
1233
- result = leftNum <= rightNum;
1234
- break;
1235
- case "=":
1236
- result = leftNum === rightNum;
1237
- break;
1238
- case "!=":
1239
- result = leftNum !== rightNum;
1240
- break;
1241
- case "between":
1242
- if (terms.length >= 3) {
1243
- const max = this.resolveTerm(terms[2], binding);
1244
- let maxNum = max;
1245
- if (typeof max === "string" && !isNaN(Number(max))) {
1246
- maxNum = Number(max);
1247
- }
1248
- if (typeof maxNum === "number") {
1249
- result = ExternalPredicates.between(leftNum, rightNum, maxNum);
1250
- }
1251
- }
1252
- break;
1253
- }
1254
- return result ? [{}] : [];
1255
- }
1256
- evaluateStringPredicate(goal, binding) {
1257
- const { predicate, terms } = goal;
1258
- if (terms.length < 2)
1259
- return [];
1260
- const str = this.resolveTerm(terms[0], binding);
1261
- const pattern = this.resolveTerm(terms[1], binding);
1262
- if (typeof str !== "string" || typeof pattern !== "string")
1263
- return [];
1264
- let result = false;
1265
- switch (predicate) {
1266
- case "regex":
1267
- result = ExternalPredicates.regex(str, pattern);
1268
- break;
1269
- case "contains":
1270
- result = ExternalPredicates.contains(str, pattern);
1271
- break;
1272
- }
1273
- return result ? [{}] : [];
1274
- }
1275
- evaluateDatePredicate(goal, binding) {
1276
- const { predicate, terms } = goal;
1277
- if (terms.length < 2)
1278
- return [];
1279
- const left = this.resolveTerm(terms[0], binding);
1280
- const right = this.resolveTerm(terms[1], binding);
1281
- if (!(left instanceof Date) || !(right instanceof Date))
1282
- return [];
1283
- let result = false;
1284
- switch (predicate) {
1285
- case "after":
1286
- result = ExternalPredicates.after(left, right);
1287
- break;
1288
- case "betweenDate":
1289
- if (terms.length >= 3) {
1290
- const end = this.resolveTerm(terms[2], binding);
1291
- if (end instanceof Date) {
1292
- result = ExternalPredicates.betweenDate(left, right, end);
1293
- }
1294
- }
1295
- break;
1296
- }
1297
- return result ? [{}] : [];
1298
- }
1299
- evaluateExternalPredicate(goal, binding) {
1300
- const { predicate, terms } = goal;
1301
- const resolvedTerms = terms.map((term) => this.resolveTerm(term, binding));
1302
- let result = false;
1303
- switch (predicate) {
1304
- case "ext_regex":
1305
- if (resolvedTerms.length >= 2 && typeof resolvedTerms[0] === "string") {
1306
- result = ExternalPredicates.regex(resolvedTerms[0], resolvedTerms[1]);
1307
- }
1308
- break;
1309
- case "ext_gt":
1310
- if (resolvedTerms.length >= 2 && typeof resolvedTerms[0] === "number" && typeof resolvedTerms[1] === "number") {
1311
- result = ExternalPredicates.gt(resolvedTerms[0], resolvedTerms[1]);
1312
- }
1313
- break;
1314
- case "ext_between":
1315
- if (resolvedTerms.length >= 3 && typeof resolvedTerms[0] === "number" && typeof resolvedTerms[1] === "number" && typeof resolvedTerms[2] === "number") {
1316
- result = ExternalPredicates.between(resolvedTerms[0], resolvedTerms[1], resolvedTerms[2]);
1317
- }
1318
- break;
1319
- case "ext_contains":
1320
- if (resolvedTerms.length >= 2 && typeof resolvedTerms[0] === "string") {
1321
- result = ExternalPredicates.contains(resolvedTerms[0], resolvedTerms[1]);
1322
- }
1323
- break;
1324
- }
1325
- return result ? [{}] : [];
1326
- }
1327
- resolveTerm(term, binding) {
1328
- if (typeof term === "string" && term.startsWith("?")) {
1329
- return binding[term] || term;
1330
- }
1331
- return term;
1332
- }
1333
- substitute(atom, binding) {
1334
- return {
1335
- predicate: atom.predicate,
1336
- terms: atom.terms.map((term) => this.resolveTerm(term, binding))
1337
- };
1338
- }
1339
- }
1340
-
1341
- // src/query/attribute-resolver.ts
1342
- class AttributeResolver {
1343
- schema = {};
1344
- buildSchema(catalog) {
1345
- this.schema = {};
1346
- for (const entry of catalog) {
1347
- const entityType = "default";
1348
- const attributeName = entry.attribute;
1349
- if (!this.schema[entityType]) {
1350
- this.schema[entityType] = {};
1351
- }
1352
- this.schema[entityType][attributeName] = {
1353
- type: entry.type,
1354
- distinctCount: entry.distinctCount,
1355
- examples: entry.examples
1356
- };
1357
- }
1358
- }
1359
- resolveAttribute(entityType, queryAttribute) {
1360
- const entitySchema = this.schema[entityType];
1361
- if (!entitySchema) {
1362
- return null;
1363
- }
1364
- const queryLower = queryAttribute.toLowerCase();
1365
- if (entitySchema[queryAttribute]) {
1366
- return queryAttribute;
1367
- }
1368
- for (const [actualAttribute] of Object.entries(entitySchema)) {
1369
- if (actualAttribute.toLowerCase() === queryLower) {
1370
- return actualAttribute;
1371
- }
1372
- }
1373
- return null;
1374
- }
1375
- validateQuery(entityType, attributes) {
1376
- const errors = [];
1377
- const resolved = new Map;
1378
- for (const attr of attributes) {
1379
- const resolvedAttr = this.resolveAttribute(entityType, attr);
1380
- if (resolvedAttr) {
1381
- resolved.set(attr, resolvedAttr);
1382
- } else {
1383
- errors.push(`Unknown attribute '${attr}' for entity type '${entityType}'. Available attributes: ${Object.keys(this.schema[entityType] || {}).join(", ")}`);
1384
- }
1385
- }
1386
- return {
1387
- valid: errors.length === 0,
1388
- errors,
1389
- resolved
1390
- };
1391
- }
1392
- getAvailableAttributes(entityType) {
1393
- return Object.keys(this.schema[entityType] || {});
1394
- }
1395
- getSchema() {
1396
- return this.schema;
1397
- }
1398
- }
1399
-
1400
- // src/query/query-optimizer.ts
1401
- class QueryOptimizer {
1402
- catalog;
1403
- constructor(catalog = []) {
1404
- this.catalog = catalog;
1405
- }
1406
- optimize(query) {
1407
- if (query.goals.length <= 1)
1408
- return query;
1409
- const optimizedGoals = [];
1410
- const remainingGoals = [...query.goals];
1411
- const boundVars = new Set;
1412
- const typeGoalIdx = remainingGoals.findIndex((g) => g.predicate === "attr" && g.terms[1] === "type");
1413
- if (typeGoalIdx !== -1) {
1414
- const typeGoal = remainingGoals.splice(typeGoalIdx, 1)[0];
1415
- optimizedGoals.push(typeGoal);
1416
- this.collectVars(typeGoal, boundVars);
1417
- }
1418
- while (remainingGoals.length > 0) {
1419
- const bestIdx = this.findBestNextGoal(remainingGoals, boundVars);
1420
- if (bestIdx === -1) {
1421
- const goal = remainingGoals.splice(0, 1)[0];
1422
- optimizedGoals.push(goal);
1423
- this.collectVars(goal, boundVars);
1424
- } else {
1425
- const goal = remainingGoals.splice(bestIdx, 1)[0];
1426
- optimizedGoals.push(goal);
1427
- this.collectVars(goal, boundVars);
1428
- }
1429
- let pushdownPossible = true;
1430
- while (pushdownPossible) {
1431
- const filterIdx = remainingGoals.findIndex((g) => this.isFilter(g) && this.isSatisfied(g, boundVars));
1432
- if (filterIdx !== -1) {
1433
- const filter = remainingGoals.splice(filterIdx, 1)[0];
1434
- optimizedGoals.push(filter);
1435
- } else {
1436
- pushdownPossible = false;
1437
- }
1438
- }
1439
- }
1440
- return {
1441
- ...query,
1442
- goals: optimizedGoals
1443
- };
1444
- }
1445
- findBestNextGoal(goals, boundVars) {
1446
- let bestIdx = -1;
1447
- let bestScore = -1;
1448
- const filterVars = new Set;
1449
- for (const goal of goals) {
1450
- if (this.isFilter(goal)) {
1451
- for (const term of goal.terms) {
1452
- if (typeof term === "string" && term.startsWith("?")) {
1453
- filterVars.add(term);
1454
- }
1455
- }
1456
- }
1457
- }
1458
- for (let i = 0;i < goals.length; i++) {
1459
- const goal = goals[i];
1460
- if (this.isFilter(goal))
1461
- continue;
1462
- let score = this.calculateRestrictiveness(goal, boundVars);
1463
- for (const term of goal.terms) {
1464
- if (typeof term === "string" && term.startsWith("?") && !boundVars.has(term) && filterVars.has(term)) {
1465
- score += 25;
1466
- }
1467
- }
1468
- if (score > bestScore) {
1469
- bestScore = score;
1470
- bestIdx = i;
1471
- }
1472
- }
1473
- return bestIdx;
1474
- }
1475
- calculateRestrictiveness(goal, boundVars) {
1476
- let score = 0;
1477
- const terms = goal.terms;
1478
- for (const term of terms) {
1479
- if (typeof term !== "string" || !term.startsWith("?")) {
1480
- score += 100;
1481
- } else if (boundVars.has(term)) {
1482
- score += 50;
1483
- }
1484
- }
1485
- if (goal.predicate === "attr" && typeof terms[1] === "string") {
1486
- const entry = this.catalog.find((e) => e.attribute === terms[1]);
1487
- if (entry) {
1488
- if (entry.cardinality === "one") {
1489
- score += 20;
1490
- }
1491
- score -= Math.min(10, entry.distinctCount / 100);
1492
- }
1493
- }
1494
- return score;
1495
- }
1496
- isFilter(goal) {
1497
- const filters = new Set([
1498
- "gt",
1499
- "lt",
1500
- "between",
1501
- "regex",
1502
- "contains",
1503
- ">",
1504
- "<",
1505
- ">=",
1506
- "<=",
1507
- "=",
1508
- "!=",
1509
- "after",
1510
- "betweenDate"
1511
- ]);
1512
- return filters.has(goal.predicate) || goal.predicate.startsWith("ext_");
1513
- }
1514
- isSatisfied(goal, boundVars) {
1515
- return goal.terms.every((term) => {
1516
- if (typeof term === "string" && term.startsWith("?")) {
1517
- return boundVars.has(term);
1518
- }
1519
- return true;
1520
- });
1521
- }
1522
- collectVars(goal, boundVars) {
1523
- for (const term of goal.terms) {
1524
- if (typeof term === "string" && term.startsWith("?")) {
1525
- boundVars.add(term);
1526
- }
1527
- }
1528
- }
1529
- }
1530
-
1531
- // src/query/eqls-parser.ts
1532
- class EQLSParser {
1533
- tokens = [];
1534
- current = 0;
1535
- errors = [];
1536
- static KEYWORDS = new Set([
1537
- "FIND",
1538
- "AS",
1539
- "WHERE",
1540
- "AND",
1541
- "OR",
1542
- "RETURN",
1543
- "ORDER",
1544
- "BY",
1545
- "LIMIT",
1546
- "ASC",
1547
- "DESC",
1548
- "BETWEEN",
1549
- "CONTAINS",
1550
- "MATCHES",
1551
- "IN"
1552
- ]);
1553
- static SINGLE_CHAR_OPERATORS = new Set(["=", ">", "<"]);
1554
- static MULTI_CHAR_OPERATORS = new Set([
1555
- "CONTAINS",
1556
- "MATCHES",
1557
- "BETWEEN",
1558
- "IN"
1559
- ]);
1560
- parse(query) {
1561
- this.tokens = this.tokenize(query);
1562
- this.current = 0;
1563
- this.errors = [];
1564
- try {
1565
- const parsed = this.parseQuery();
1566
- if (this.errors.length > 0) {
1567
- return { errors: this.errors };
1568
- }
1569
- return { query: parsed, errors: [] };
1570
- } catch (error) {
1571
- this.errors.push({
1572
- line: 1,
1573
- column: 1,
1574
- message: `Parse error: ${error instanceof Error ? error.message : "Unknown error"}`
1575
- });
1576
- return { errors: this.errors };
1577
- }
1578
- }
1579
- tokenize(input) {
1580
- const tokens = [];
1581
- const lines = input.split(`
1582
- `);
1583
- for (let lineNum = 0;lineNum < lines.length; lineNum++) {
1584
- const line = lines[lineNum];
1585
- const trimmed = line.trim();
1586
- if (!trimmed || trimmed.startsWith("--"))
1587
- continue;
1588
- let pos = 0;
1589
- while (pos < line.length) {
1590
- const char = line[pos];
1591
- if (char === " ") {
1592
- pos++;
1593
- continue;
1594
- }
1595
- if (char === '"') {
1596
- const start = pos;
1597
- pos++;
1598
- while (pos < line.length && line[pos] !== '"') {
1599
- if (line[pos] === "\\" && pos + 1 < line.length) {
1600
- pos += 2;
1601
- } else {
1602
- pos++;
1603
- }
1604
- }
1605
- if (pos < line.length) {
1606
- pos++;
1607
- const value = line.slice(start + 1, pos - 1);
1608
- tokens.push({
1609
- type: "STRING",
1610
- value,
1611
- line: lineNum + 1,
1612
- column: start + 1
1613
- });
1614
- } else {
1615
- this.errors.push({
1616
- line: lineNum + 1,
1617
- column: start + 1,
1618
- message: "Unterminated string literal"
1619
- });
1620
- break;
1621
- }
1622
- } else if (char === "/" && pos + 1 < line.length) {
1623
- const start = pos;
1624
- pos++;
1625
- while (pos < line.length && line[pos] !== "/") {
1626
- if (line[pos] === "\\" && pos + 1 < line.length) {
1627
- pos += 2;
1628
- } else {
1629
- pos++;
1630
- }
1631
- }
1632
- if (pos < line.length) {
1633
- pos++;
1634
- const pattern = line.slice(start, pos);
1635
- tokens.push({
1636
- type: "REGEX",
1637
- value: pattern,
1638
- line: lineNum + 1,
1639
- column: start + 1
1640
- });
1641
- } else {
1642
- this.errors.push({
1643
- line: lineNum + 1,
1644
- column: start + 1,
1645
- message: "Unterminated regex literal"
1646
- });
1647
- break;
1648
- }
1649
- } else if (char.match(/[A-Za-z_@]/)) {
1650
- const start = pos;
1651
- while (pos < line.length && line[pos].match(/[A-Za-z0-9_:@-]/)) {
1652
- pos++;
1653
- }
1654
- const value = line.slice(start, pos);
1655
- const upperValue = value.toUpperCase();
1656
- let type = "IDENTIFIER";
1657
- let tokenValue = value;
1658
- if (EQLSParser.KEYWORDS.has(upperValue)) {
1659
- type = upperValue;
1660
- tokenValue = upperValue;
1661
- } else if (EQLSParser.MULTI_CHAR_OPERATORS.has(upperValue)) {
1662
- type = "OPERATOR";
1663
- tokenValue = upperValue;
1664
- }
1665
- tokens.push({
1666
- type,
1667
- value: tokenValue,
1668
- line: lineNum + 1,
1669
- column: start + 1
1670
- });
1671
- } else if (char.match(/[0-9]/)) {
1672
- const start = pos;
1673
- let hasDecimal = false;
1674
- while (pos < line.length) {
1675
- const nextChar = line[pos];
1676
- if (nextChar.match(/[0-9]/)) {
1677
- pos++;
1678
- } else if (nextChar === "." && !hasDecimal && pos + 1 < line.length && line[pos + 1].match(/[0-9]/)) {
1679
- hasDecimal = true;
1680
- pos++;
1681
- } else {
1682
- break;
1683
- }
1684
- }
1685
- const value = line.slice(start, pos);
1686
- const numValue = value.includes(".") ? parseFloat(value) : parseInt(value, 10);
1687
- tokens.push({
1688
- type: "NUMBER",
1689
- value: numValue,
1690
- line: lineNum + 1,
1691
- column: start + 1
1692
- });
1693
- } else if (char === ".") {
1694
- tokens.push({
1695
- type: "DOT",
1696
- value: ".",
1697
- line: lineNum + 1,
1698
- column: pos + 1
1699
- });
1700
- pos++;
1701
- } else if (char === "?") {
1702
- const start = pos;
1703
- pos++;
1704
- while (pos < line.length && line[pos].match(/[A-Za-z0-9_]/)) {
1705
- pos++;
1706
- }
1707
- const value = line.slice(start, pos);
1708
- tokens.push({
1709
- type: "VARIABLE",
1710
- value,
1711
- line: lineNum + 1,
1712
- column: start + 1
1713
- });
1714
- } else if (EQLSParser.SINGLE_CHAR_OPERATORS.has(char) || char === "!" && pos + 1 < line.length && line[pos + 1] === "=" || char === ">" && pos + 1 < line.length && line[pos + 1] === "=" || char === "<" && pos + 1 < line.length && line[pos + 1] === "=" || char === "=" && pos + 1 < line.length && line[pos + 1] === "=") {
1715
- const start = pos;
1716
- if (char === "!" || char === ">" || char === "<" || char === "=") {
1717
- pos += 2;
1718
- } else {
1719
- pos++;
1720
- }
1721
- const value = line.slice(start, pos);
1722
- tokens.push({
1723
- type: "OPERATOR",
1724
- value,
1725
- line: lineNum + 1,
1726
- column: start + 1
1727
- });
1728
- } else if (char === ",") {
1729
- tokens.push({
1730
- type: "COMMA",
1731
- value: ",",
1732
- line: lineNum + 1,
1733
- column: pos + 1
1734
- });
1735
- pos++;
1736
- } else if (char === "(") {
1737
- tokens.push({
1738
- type: "LPAREN",
1739
- value: "(",
1740
- line: lineNum + 1,
1741
- column: pos + 1
1742
- });
1743
- pos++;
1744
- } else if (char === ")") {
1745
- tokens.push({
1746
- type: "RPAREN",
1747
- value: ")",
1748
- line: lineNum + 1,
1749
- column: pos + 1
1750
- });
1751
- pos++;
1752
- } else {
1753
- this.errors.push({
1754
- line: lineNum + 1,
1755
- column: pos + 1,
1756
- message: `Unexpected character '${char}'`,
1757
- expected: ["identifier", "string", "number", "operator"]
1758
- });
1759
- pos++;
1760
- }
1761
- }
1762
- }
1763
- return tokens;
1764
- }
1765
- parseQuery() {
1766
- this.expect("FIND");
1767
- const find = this.expect("IDENTIFIER").value;
1768
- this.expect("AS");
1769
- const as = this.expect("VARIABLE").value;
1770
- let where;
1771
- if (this.match("WHERE")) {
1772
- where = this.parseExpression();
1773
- }
1774
- let returnFields;
1775
- if (this.match("RETURN")) {
1776
- returnFields = this.parseReturnFields();
1777
- }
1778
- let orderBy;
1779
- if (this.match("ORDER")) {
1780
- this.expect("BY");
1781
- const field = this.parseAttributeReference();
1782
- const direction = this.match("DESC") ? "DESC" : this.match("ASC") ? "ASC" : "ASC";
1783
- orderBy = { field, direction };
1784
- }
1785
- let limit;
1786
- if (this.match("LIMIT")) {
1787
- limit = this.expect("NUMBER").value;
1788
- }
1789
- return { find, as, where, return: returnFields, orderBy, limit };
1790
- }
1791
- parseExpression() {
1792
- let left = this.parseTerm();
1793
- while (this.match("AND") || this.match("OR")) {
1794
- const op = this.previous().value;
1795
- const right = this.parseTerm();
1796
- left = { op, left, right };
1797
- }
1798
- return left;
1799
- }
1800
- parseTerm() {
1801
- if (this.match("LPAREN")) {
1802
- const expr = this.parseExpression();
1803
- this.expect("RPAREN");
1804
- return expr;
1805
- }
1806
- if ((this.check("STRING") || this.check("NUMBER") || this.check("IDENTIFIER")) && this.tokens[this.current + 1]?.type === "IN") {
1807
- const value = this.parseValue();
1808
- this.expect("IN");
1809
- const field = this.parseAttributeReference();
1810
- return { type: "MEMBERSHIP", value, field };
1811
- }
1812
- return this.parsePredicate();
1813
- }
1814
- parsePredicate() {
1815
- const field = this.parseAttributeReference();
1816
- if (this.match("BETWEEN")) {
1817
- const min = this.expect("NUMBER").value;
1818
- this.expect("AND");
1819
- const max = this.expect("NUMBER").value;
1820
- return { type: "BETWEEN", field, min, max };
1821
- }
1822
- if (this.match("CONTAINS")) {
1823
- const pattern = this.expect("STRING").value;
1824
- return { type: "CONTAINS", field, pattern };
1825
- }
1826
- if (this.match("MATCHES")) {
1827
- const regex = this.expect("REGEX").value;
1828
- return { type: "MATCHES", field, regex };
1829
- }
1830
- if (this.match("IN")) {
1831
- const value = this.parseValue();
1832
- return { type: "MEMBERSHIP", value, field };
1833
- }
1834
- const op = this.expect("OPERATOR").value.trim();
1835
- const right = this.parseValue();
1836
- if (op === "=" || op === "==") {
1837
- return { type: "EQUALS", field, value: right };
1838
- } else {
1839
- return { type: "COMP", left: field, op, right };
1840
- }
1841
- }
1842
- parseAttributeReference() {
1843
- const variable = this.expect("VARIABLE").value;
1844
- const attributeParts = [];
1845
- while (this.check("DOT")) {
1846
- this.advance();
1847
- const attributePart = this.expect("IDENTIFIER").value;
1848
- attributeParts.push(this.toCamelCase(attributePart));
1849
- }
1850
- if (attributeParts.length > 0) {
1851
- return `${variable}.${attributeParts.join(".")}`;
1852
- }
1853
- return variable;
1854
- }
1855
- toCamelCase(str) {
1856
- return str;
1857
- }
1858
- parseValue() {
1859
- if (this.match("STRING"))
1860
- return this.previous().value;
1861
- if (this.match("NUMBER"))
1862
- return this.previous().value;
1863
- if (this.match("IDENTIFIER")) {
1864
- const value = this.previous().value;
1865
- if (value === "true")
1866
- return true;
1867
- if (value === "false")
1868
- return false;
1869
- return value;
1870
- }
1871
- if (this.match("VARIABLE"))
1872
- return this.previous().value;
1873
- throw new Error(`Expected value, got ${this.peek().type}`);
1874
- }
1875
- parseReturnFields() {
1876
- const fields = [];
1877
- do {
1878
- const field = this.parseAttributeReference();
1879
- fields.push(field);
1880
- } while (this.match("COMMA"));
1881
- return fields;
1882
- }
1883
- extractContainsFields(expr) {
1884
- const fields = [];
1885
- if ("op" in expr && (expr.op === "AND" || expr.op === "OR")) {
1886
- fields.push(...this.extractContainsFields(expr.left));
1887
- fields.push(...this.extractContainsFields(expr.right));
1888
- } else if ("type" in expr && expr.type === "CONTAINS" && "field" in expr) {
1889
- fields.push(expr.field);
1890
- }
1891
- return fields;
1892
- }
1893
- match(type) {
1894
- if (this.check(type)) {
1895
- this.advance();
1896
- return true;
1897
- }
1898
- return false;
1899
- }
1900
- check(type) {
1901
- if (this.isAtEnd())
1902
- return false;
1903
- return this.peek().type === type;
1904
- }
1905
- advance() {
1906
- if (!this.isAtEnd())
1907
- this.current++;
1908
- return this.previous();
1909
- }
1910
- isAtEnd() {
1911
- return this.peek().type === "EOF";
1912
- }
1913
- peek() {
1914
- return this.tokens[this.current] || {
1915
- type: "EOF",
1916
- value: "",
1917
- line: 0,
1918
- column: 0
1919
- };
1920
- }
1921
- previous() {
1922
- return this.tokens[this.current - 1] || {
1923
- type: "EOF",
1924
- value: "",
1925
- line: 0,
1926
- column: 0
1927
- };
1928
- }
1929
- expect(type) {
1930
- if (this.check(type)) {
1931
- return this.advance();
1932
- }
1933
- const token = this.peek();
1934
- this.errors.push({
1935
- line: token.line,
1936
- column: token.column,
1937
- message: `Expected ${type}, got ${token.type}`,
1938
- expected: [type]
1939
- });
1940
- throw new Error(`Expected ${type}, got ${token.type}`);
1941
- }
1942
- }
1943
-
1944
- class EQLSCompiler {
1945
- projectionMap = new Map;
1946
- tempCounter = 0;
1947
- compileAll(eqlsQuery) {
1948
- const baseGoals = [];
1949
- const baseVariables = new Set;
1950
- this.projectionMap.clear();
1951
- this.tempCounter = 0;
1952
- baseGoals.push({
1953
- predicate: "attr",
1954
- terms: [eqlsQuery.as, "type", eqlsQuery.find]
1955
- });
1956
- baseVariables.add(eqlsQuery.as.substring(1));
1957
- const returnGoals = [];
1958
- const returnVars = new Set;
1959
- if (eqlsQuery.return) {
1960
- for (const field of eqlsQuery.return) {
1961
- if (this.isAttributeReference(field)) {
1962
- const [entityVar, attributePath] = this.splitAttributeReference(field);
1963
- const outputVar = this.generateTempVar();
1964
- returnVars.add(outputVar);
1965
- returnGoals.push({
1966
- predicate: "attr",
1967
- terms: [entityVar, attributePath, `?${outputVar}`]
1968
- });
1969
- this.projectionMap.set(field, `?${outputVar}`);
1970
- } else {
1971
- returnVars.add(field.substring(1));
1972
- this.projectionMap.set(field, field);
1973
- }
1974
- }
1975
- }
1976
- const clauses = eqlsQuery.where ? this.toDNF(eqlsQuery.where) : [[]];
1977
- const compiledQueries = [];
1978
- for (const clause of clauses) {
1979
- const goals = [...baseGoals];
1980
- const variables = new Set(baseVariables);
1981
- for (const pred of clause) {
1982
- this.compilePredicate(pred, goals, variables);
1983
- }
1984
- for (const g of returnGoals)
1985
- goals.push(g);
1986
- for (const v of returnVars)
1987
- variables.add(v);
1988
- compiledQueries.push({ goals, variables });
1989
- }
1990
- return compiledQueries;
1991
- }
1992
- compile(eqlsQuery) {
1993
- const all = this.compileAll(eqlsQuery);
1994
- return all[0] || { goals: [], variables: new Set };
1995
- }
1996
- getProjectionMap() {
1997
- return this.projectionMap;
1998
- }
1999
- isAttributeReference(field) {
2000
- return field.includes(".") && field.startsWith("?");
2001
- }
2002
- splitAttributeReference(field) {
2003
- const parts = field.split(".");
2004
- if (parts.length < 2) {
2005
- throw new Error(`Invalid attribute reference: ${field}`);
2006
- }
2007
- const entityVar = parts[0];
2008
- const attributePath = parts.slice(1).join(".");
2009
- return [entityVar, attributePath];
2010
- }
2011
- compileExpression(expr, goals, variables) {
2012
- if (!expr || typeof expr !== "object") {
2013
- throw new Error(`Invalid expression: ${expr}`);
2014
- }
2015
- if ("op" in expr && (expr.op === "AND" || expr.op === "OR")) {
2016
- this.compileExpression(expr.left, goals, variables);
2017
- this.compileExpression(expr.right, goals, variables);
2018
- } else {
2019
- this.compilePredicate(expr, goals, variables);
2020
- }
2021
- }
2022
- compilePredicate(pred, goals, variables) {
2023
- switch (pred.type) {
2024
- case "EQUALS":
2025
- goals.push({
2026
- predicate: "attr",
2027
- terms: [
2028
- this.extractEntityVar(pred.field),
2029
- this.extractAttributePath(pred.field),
2030
- pred.value
2031
- ]
2032
- });
2033
- break;
2034
- case "MEMBERSHIP":
2035
- goals.push({
2036
- predicate: "attr",
2037
- terms: [
2038
- this.extractEntityVar(pred.field),
2039
- this.extractAttributePath(pred.field),
2040
- pred.value
2041
- ]
2042
- });
2043
- break;
2044
- case "COMP":
2045
- const tempVar = this.generateTempVar();
2046
- variables.add(tempVar);
2047
- goals.push({
2048
- predicate: "attr",
2049
- terms: [
2050
- this.extractEntityVar(pred.left),
2051
- this.extractAttributePath(pred.left),
2052
- `?${tempVar}`
2053
- ]
2054
- });
2055
- goals.push({
2056
- predicate: pred.op.toLowerCase(),
2057
- terms: [`?${tempVar}`, pred.right]
2058
- });
2059
- break;
2060
- case "BETWEEN":
2061
- const tempVar2 = this.generateTempVar();
2062
- variables.add(tempVar2);
2063
- goals.push({
2064
- predicate: "attr",
2065
- terms: [
2066
- this.extractEntityVar(pred.field),
2067
- this.extractAttributePath(pred.field),
2068
- `?${tempVar2}`
2069
- ]
2070
- });
2071
- goals.push({
2072
- predicate: "between",
2073
- terms: [`?${tempVar2}`, pred.min, pred.max]
2074
- });
2075
- break;
2076
- case "CONTAINS":
2077
- const tempVar3 = this.generateTempVar();
2078
- variables.add(tempVar3);
2079
- goals.push({
2080
- predicate: "attr",
2081
- terms: [
2082
- this.extractEntityVar(pred.field),
2083
- this.extractAttributePath(pred.field),
2084
- `?${tempVar3}`
2085
- ]
2086
- });
2087
- goals.push({
2088
- predicate: "contains",
2089
- terms: [`?${tempVar3}`, pred.pattern]
2090
- });
2091
- break;
2092
- case "MATCHES":
2093
- const tempVar4 = this.generateTempVar();
2094
- variables.add(tempVar4);
2095
- const attributePath = this.extractAttributePath(pred.field);
2096
- const entityVar = this.extractEntityVar(pred.field);
2097
- goals.push({
2098
- predicate: "attr",
2099
- terms: [entityVar, attributePath, `?${tempVar4}`]
2100
- });
2101
- goals.push({
2102
- predicate: "regex",
2103
- terms: [`?${tempVar4}`, pred.regex]
2104
- });
2105
- break;
2106
- }
2107
- }
2108
- extractEntityVar(field) {
2109
- const parts = field.split(".");
2110
- return parts[0];
2111
- }
2112
- extractAttributePath(field) {
2113
- const parts = field.split(".");
2114
- if (parts.length > 1) {
2115
- return parts.slice(1).join(".");
2116
- }
2117
- return field.substring(1);
2118
- }
2119
- generateTempVar() {
2120
- this.tempCounter += 1;
2121
- return `temp${this.tempCounter}`;
2122
- }
2123
- toDNF(expr) {
2124
- if ("op" in expr && (expr.op === "AND" || expr.op === "OR")) {
2125
- const left = this.toDNF(expr.left);
2126
- const right = this.toDNF(expr.right);
2127
- if (expr.op === "OR") {
2128
- return [...left, ...right];
2129
- }
2130
- const combined = [];
2131
- for (const l of left) {
2132
- for (const r of right) {
2133
- combined.push([...l, ...r]);
2134
- }
2135
- }
2136
- return combined;
2137
- }
2138
- return [[expr]];
2139
- }
2140
- }
2141
-
2142
- class EQLSProcessor {
2143
- parser = new EQLSParser;
2144
- compiler = new EQLSCompiler;
2145
- attributeResolver = new AttributeResolver;
2146
- catalog = [];
2147
- setSchema(catalog) {
2148
- this.catalog = catalog;
2149
- this.attributeResolver.buildSchema(catalog);
2150
- }
2151
- process(query) {
2152
- const parseResult = this.parser.parse(query);
2153
- if (parseResult.errors.length > 0) {
2154
- return { errors: parseResult.errors };
2155
- }
2156
- this.ensureFieldsInProjection(parseResult.query);
2157
- if (Object.keys(this.attributeResolver.getSchema()).length > 0) {
2158
- const entityType = "default";
2159
- const attributes = this.extractAttributes(parseResult.query);
2160
- const validation = this.attributeResolver.validateQuery(entityType, attributes);
2161
- if (!validation.valid) {
2162
- return {
2163
- errors: validation.errors.map((msg) => ({
2164
- message: msg,
2165
- line: 1,
2166
- column: 1
2167
- }))
2168
- };
2169
- }
2170
- this.resolveAttributesInQuery(parseResult.query, validation.resolved);
2171
- }
2172
- const compiledQueries = this.compiler.compileAll(parseResult.query);
2173
- const optimizer = new QueryOptimizer(this.catalog);
2174
- const optimizedQueries = compiledQueries.map((q) => optimizer.optimize(q));
2175
- const projectionMap = this.compiler.getProjectionMap();
2176
- return {
2177
- query: optimizedQueries[0],
2178
- queries: optimizedQueries,
2179
- errors: [],
2180
- projectionMap,
2181
- meta: {
2182
- orderBy: parseResult.query.orderBy,
2183
- limit: parseResult.query.limit
2184
- }
2185
- };
2186
- }
2187
- ensureFieldsInProjection(eqlsQuery) {
2188
- if (!eqlsQuery.return) {
2189
- eqlsQuery.return = [];
2190
- }
2191
- if (eqlsQuery.where) {
2192
- const matchesFields = this.extractMatchesFields(eqlsQuery.where);
2193
- for (const field of matchesFields) {
2194
- if (!eqlsQuery.return.includes(field)) {
2195
- eqlsQuery.return.push(field);
2196
- }
2197
- }
2198
- const containsFields = this.extractContainsFields(eqlsQuery.where);
2199
- for (const field of containsFields) {
2200
- if (!eqlsQuery.return.includes(field)) {
2201
- eqlsQuery.return.push(field);
2202
- }
2203
- }
2204
- }
2205
- if (eqlsQuery.orderBy?.field) {
2206
- const field = eqlsQuery.orderBy.field;
2207
- if (!eqlsQuery.return.includes(field)) {
2208
- eqlsQuery.return.push(field);
2209
- }
2210
- }
2211
- }
2212
- extractMatchesFields(expr) {
2213
- const fields = [];
2214
- if ("op" in expr && (expr.op === "AND" || expr.op === "OR")) {
2215
- fields.push(...this.extractMatchesFields(expr.left));
2216
- fields.push(...this.extractMatchesFields(expr.right));
2217
- } else if ("type" in expr && expr.type === "MATCHES" && "field" in expr) {
2218
- fields.push(expr.field);
2219
- }
2220
- return fields;
2221
- }
2222
- extractContainsFields(expr) {
2223
- const fields = [];
2224
- if ("op" in expr && (expr.op === "AND" || expr.op === "OR")) {
2225
- fields.push(...this.extractContainsFields(expr.left));
2226
- fields.push(...this.extractContainsFields(expr.right));
2227
- } else if ("type" in expr && expr.type === "CONTAINS" && "field" in expr) {
2228
- fields.push(expr.field);
2229
- }
2230
- return fields;
2231
- }
2232
- extractAttributes(eqlsQuery) {
2233
- const attributes = new Set;
2234
- if (eqlsQuery.where) {
2235
- this.extractAttributesFromExpression(eqlsQuery.where, attributes);
2236
- }
2237
- if (eqlsQuery.return) {
2238
- for (const field of eqlsQuery.return) {
2239
- if (this.isAttributeReference(field)) {
2240
- const [, attribute] = this.splitAttributeReference(field);
2241
- attributes.add(attribute);
2242
- }
2243
- }
2244
- }
2245
- return Array.from(attributes);
2246
- }
2247
- extractAttributesFromExpression(expr, attributes) {
2248
- if ("op" in expr && (expr.op === "AND" || expr.op === "OR")) {
2249
- this.extractAttributesFromExpression(expr.left, attributes);
2250
- this.extractAttributesFromExpression(expr.right, attributes);
2251
- } else if ("field" in expr) {
2252
- if (this.isAttributeReference(expr.field)) {
2253
- const [, attribute] = this.splitAttributeReference(expr.field);
2254
- attributes.add(attribute);
2255
- }
2256
- } else if ("left" in expr && "right" in expr) {
2257
- if (typeof expr.left === "string" && this.isAttributeReference(expr.left)) {
2258
- const [, attribute] = this.splitAttributeReference(expr.left);
2259
- attributes.add(attribute);
2260
- }
2261
- }
2262
- }
2263
- resolveAttributesInQuery(eqlsQuery, resolved) {
2264
- if (eqlsQuery.where) {
2265
- this.resolveAttributesInExpression(eqlsQuery.where, resolved);
2266
- }
2267
- if (eqlsQuery.return) {
2268
- for (let i = 0;i < eqlsQuery.return.length; i++) {
2269
- const field = eqlsQuery.return[i];
2270
- if (this.isAttributeReference(field)) {
2271
- const [entityVar, attribute] = this.splitAttributeReference(field);
2272
- const resolvedAttr = resolved.get(attribute);
2273
- if (resolvedAttr) {
2274
- eqlsQuery.return[i] = `${entityVar}.${resolvedAttr}`;
2275
- }
2276
- }
2277
- }
2278
- }
2279
- }
2280
- resolveAttributesInExpression(expr, resolved) {
2281
- if ("op" in expr && (expr.op === "AND" || expr.op === "OR")) {
2282
- this.resolveAttributesInExpression(expr.left, resolved);
2283
- this.resolveAttributesInExpression(expr.right, resolved);
2284
- } else if ("field" in expr) {
2285
- if (this.isAttributeReference(expr.field)) {
2286
- const [entityVar, attribute] = this.splitAttributeReference(expr.field);
2287
- const resolvedAttr = resolved.get(attribute);
2288
- if (resolvedAttr) {
2289
- expr.field = `${entityVar}.${resolvedAttr}`;
2290
- }
2291
- }
2292
- } else if ("left" in expr && "right" in expr) {
2293
- if (typeof expr.left === "string" && this.isAttributeReference(expr.left)) {
2294
- const [entityVar, attribute] = this.splitAttributeReference(expr.left);
2295
- const resolvedAttr = resolved.get(attribute);
2296
- if (resolvedAttr) {
2297
- expr.left = `${entityVar}.${resolvedAttr}`;
2298
- }
2299
- }
2300
- }
2301
- }
2302
- isAttributeReference(field) {
2303
- return field.includes(".");
2304
- }
2305
- splitAttributeReference(field) {
2306
- const parts = field.split(".");
2307
- return [parts[0], parts.slice(1).join(".")];
2308
- }
2309
- }
2310
-
2311
- // src/workflows/runners.ts
2312
- class HttpSourceRunner {
2313
- validate(spec) {
2314
- if (!spec.url) {
2315
- throw new WorkflowRuntimeError("HTTP source requires url");
2316
- }
2317
- if (spec.mode === "map" && !spec.mapFrom) {
2318
- throw new WorkflowRuntimeError("Map mode requires mapFrom dataset");
2319
- }
2320
- }
2321
- async run(spec, ctx) {
2322
- this.validate(spec);
2323
- const templateContext = {
2324
- env: ctx.env,
2325
- vars: ctx.vars
2326
- };
2327
- switch (spec.mode) {
2328
- case "batch":
2329
- return this.runBatch(spec, templateContext, ctx);
2330
- case "map":
2331
- return this.runMap(spec, templateContext, ctx);
2332
- default:
2333
- throw new WorkflowRuntimeError(`Unsupported HTTP mode: ${spec.mode}`);
2334
- }
2335
- }
2336
- async runBatch(spec, templateContext, ctx) {
2337
- const interpolatedSpec = interpolateObject(spec, templateContext);
2338
- const url = interpolatedSpec.url;
2339
- ctx.log({ message: `HTTP GET ${url}` });
2340
- try {
2341
- const response = await this.fetchWithTimeout(url, {
2342
- headers: interpolatedSpec.headers || {}
2343
- });
2344
- if (!response.ok) {
2345
- throw new WorkflowRuntimeError(`HTTP ${response.status}: ${response.statusText}`);
2346
- }
2347
- const data = await response.json();
2348
- const rows = this.extractDataArray(data);
2349
- const limitedRows = ctx.dry && ctx.limit ? rows.slice(0, ctx.limit) : rows;
2350
- return {
2351
- name: spec.mapFrom || "batch_result",
2352
- rows: limitedRows
2353
- };
2354
- } catch (error) {
2355
- if (error instanceof WorkflowRuntimeError) {
2356
- throw error;
2357
- }
2358
- throw new WorkflowRuntimeError(`HTTP request failed: ${error instanceof Error ? error.message : "Unknown error"}`);
2359
- }
2360
- }
2361
- async runMap(spec, templateContext, ctx) {
2362
- if (!spec.mapFrom) {
2363
- throw new WorkflowRuntimeError("Map mode requires mapFrom dataset");
2364
- }
2365
- const sourceDataset = ctx.getDataset(spec.mapFrom);
2366
- if (!sourceDataset) {
2367
- throw new WorkflowRuntimeError(`Dataset not found: ${spec.mapFrom}`);
2368
- }
2369
- const allRows = [];
2370
- const sourceRows = ctx.dry && ctx.limit ? sourceDataset.rows.slice(0, ctx.limit) : sourceDataset.rows;
2371
- for (const row of sourceRows) {
2372
- const rowContext = {
2373
- ...templateContext,
2374
- row
2375
- };
2376
- const interpolatedSpec = interpolateObject(spec, rowContext);
2377
- const url = interpolatedSpec.url;
2378
- try {
2379
- const response = await this.fetchWithTimeout(url, {
2380
- headers: interpolatedSpec.headers || {}
2381
- });
2382
- if (!response.ok) {
2383
- throw new WorkflowRuntimeError(`HTTP ${response.status} for row ${JSON.stringify(row)}: ${response.statusText}`);
2384
- }
2385
- const data = await response.json();
2386
- const rows = this.extractDataArray(data);
2387
- allRows.push(...rows);
2388
- } catch (error) {
2389
- if (error instanceof WorkflowRuntimeError) {
2390
- throw error;
2391
- }
2392
- throw new WorkflowRuntimeError(`HTTP request failed for row ${JSON.stringify(row)}: ${error instanceof Error ? error.message : "Unknown error"}`);
2393
- }
2394
- }
2395
- return {
2396
- name: `${spec.mapFrom}_mapped`,
2397
- rows: allRows
2398
- };
2399
- }
2400
- async fetchWithTimeout(url, options = {}, timeoutMs = 15000) {
2401
- const maxBytes = parseInt(process.env.TQL_HTTP_MAX_BYTES || "10485760");
2402
- const controller = new AbortController;
2403
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
2404
- try {
2405
- const response = await fetch(url, {
2406
- ...options,
2407
- signal: controller.signal
2408
- });
2409
- const contentLength = response.headers.get("content-length");
2410
- if (contentLength && parseInt(contentLength) > maxBytes) {
2411
- throw new WorkflowRuntimeError(`Response too large: ${contentLength} bytes (max: ${maxBytes})`);
2412
- }
2413
- return response;
2414
- } finally {
2415
- clearTimeout(timeoutId);
2416
- }
2417
- }
2418
- extractDataArray(data) {
2419
- if (data === null || data === undefined) {
2420
- return [];
2421
- }
2422
- if (Array.isArray(data)) {
2423
- return data;
2424
- }
2425
- for (const key of ["items", "data", "results", "rows"]) {
2426
- if (data[key] && Array.isArray(data[key])) {
2427
- return data[key];
2428
- }
2429
- }
2430
- return [data];
2431
- }
2432
- }
2433
-
2434
- class FileSourceRunner {
2435
- validate(spec) {
2436
- if (!spec.path) {
2437
- throw new WorkflowRuntimeError("File source requires path");
2438
- }
2439
- }
2440
- async run(spec, ctx) {
2441
- this.validate(spec);
2442
- const filePath = resolve(ctx.workingDir, spec.path);
2443
- ctx.log({ message: `Reading file: ${filePath}` });
2444
- try {
2445
- const content = await readFile(filePath, "utf-8");
2446
- const format = spec.format || "json";
2447
- const rows = format === "json" ? this.parseJSON(content) : this.parseCSV(content);
2448
- const limitedRows = ctx.dry && ctx.limit ? rows.slice(0, ctx.limit) : rows;
2449
- return {
2450
- name: "file_data",
2451
- rows: limitedRows
2452
- };
2453
- } catch (error) {
2454
- throw new WorkflowRuntimeError(`Failed to read file ${spec.path}: ${error instanceof Error ? error.message : "Unknown error"}`);
2455
- }
2456
- }
2457
- parseJSON(content) {
2458
- const data = JSON.parse(content);
2459
- if (Array.isArray(data)) {
2460
- return data;
2461
- }
2462
- if (data.rows && Array.isArray(data.rows)) {
2463
- return data.rows;
2464
- }
2465
- return [data];
2466
- }
2467
- parseCSV(content) {
2468
- const lines = content.trim().split(`
2469
- `);
2470
- if (lines.length === 0)
2471
- return [];
2472
- const headers = lines[0].split(",").map((h) => h.trim());
2473
- const rows = [];
2474
- for (let i = 1;i < lines.length; i++) {
2475
- const values = lines[i].split(",").map((v) => v.trim());
2476
- const row = {};
2477
- for (let j = 0;j < headers.length; j++) {
2478
- row[headers[j]] = values[j] || "";
2479
- }
2480
- rows.push(row);
2481
- }
2482
- return rows;
2483
- }
2484
- }
2485
-
2486
- class QueryRunner {
2487
- validate(spec) {
2488
- if (!spec.eqls || spec.eqls.trim().length === 0) {
2489
- throw new WorkflowRuntimeError("Query step requires non-empty eqls");
2490
- }
2491
- if (!spec.needs || spec.needs.length === 0) {
2492
- throw new WorkflowRuntimeError("Query step requires at least one dependency");
2493
- }
2494
- }
2495
- async run(spec, ctx) {
2496
- this.validate(spec);
2497
- const store = new EAVStore;
2498
- const inputDatasets = this.resolveInputDatasets(spec, ctx);
2499
- const entityType = inputDatasets[0]?.name || "item";
2500
- for (const dataset of inputDatasets) {
2501
- if (!dataset)
2502
- continue;
2503
- for (let i = 0;i < dataset.rows.length; i++) {
2504
- const row = dataset.rows[i];
2505
- const entityId = `${dataset.name}:${i}`;
2506
- const facts = jsonEntityFacts(entityId, row, dataset.name);
2507
- store.addFacts(facts);
2508
- }
2509
- }
2510
- let eqls = spec.eqls;
2511
- if (entityType !== "item") {
2512
- eqls = this.transformEQLForType(eqls, entityType, ctx.verbose);
2513
- }
2514
- try {
2515
- const processor = new EQLSProcessor;
2516
- const catalog = store.getCatalog();
2517
- processor.setSchema(catalog);
2518
- const evaluator = new DatalogEvaluator(store);
2519
- const parseResult = processor.process(eqls);
2520
- if (parseResult.errors.length > 0) {
2521
- throw new WorkflowRuntimeError(`Query parsing failed: ${parseResult.errors.map((e) => e.message).join("; ")}`);
2522
- }
2523
- const results = evaluator.evaluate(parseResult.query, parseResult.meta?.limit);
2524
- const rows = this.resultsToRows(results, parseResult.projectionMap);
2525
- const limitedRows = ctx.dry && ctx.limit ? rows.slice(0, ctx.limit) : rows;
2526
- return {
2527
- name: spec.out,
2528
- rows: limitedRows
2529
- };
2530
- } catch (error) {
2531
- throw new WorkflowRuntimeError(`Query execution failed: ${error instanceof Error ? error.message : "Unknown error"}`);
2532
- }
2533
- }
2534
- transformEQLForType(eqls, entityType, verbose) {
2535
- let transformed = eqls.replace(/FIND\s+item\s+AS/g, `FIND ${entityType} AS`);
2536
- if (verbose && transformed !== eqls) {
2537
- console.log(` [QUERY TRANSFORM] 'item' -> '${entityType}'`);
2538
- }
2539
- return transformed;
2540
- }
2541
- resultsToRows(results, projectionMap) {
2542
- if (!results || !results.bindings || !Array.isArray(results.bindings)) {
2543
- return [];
2544
- }
2545
- const inverseMap = new Map;
2546
- if (projectionMap) {
2547
- for (const [original, bound] of projectionMap.entries()) {
2548
- inverseMap.set(bound, original);
2549
- }
2550
- }
2551
- return results.bindings.map((binding) => {
2552
- const row = {};
2553
- for (const [key, value] of Object.entries(binding)) {
2554
- const originalField = inverseMap.get(key) || key;
2555
- const cleanKey = originalField.replace(/^\?/, "");
2556
- row[cleanKey] = value;
2557
- }
2558
- return row;
2559
- });
2560
- }
2561
- resolveInputDatasets(spec, ctx) {
2562
- const datasets = [];
2563
- const seen = new Set;
2564
- if (spec.from) {
2565
- const dataset = ctx.getDataset(spec.from);
2566
- if (!dataset) {
2567
- throw new WorkflowRuntimeError(`Dataset '${spec.from}' not found for query step`);
2568
- }
2569
- datasets.push(dataset);
2570
- seen.add(dataset.name ?? spec.from);
2571
- ctx.log({ message: `from: ${spec.from}` });
2572
- } else {
2573
- for (const dependency of spec.needs) {
2574
- const dataset = ctx.getDatasetByStepId(dependency) || ctx.getDataset(dependency);
2575
- if (!dataset) {
2576
- throw new WorkflowRuntimeError(`Dependency '${dependency}' has no available dataset for query step`);
2577
- }
2578
- const key = dataset.name || dependency;
2579
- if (!seen.has(key)) {
2580
- datasets.push(dataset);
2581
- seen.add(key);
2582
- }
2583
- }
2584
- }
2585
- if (datasets.length === 0) {
2586
- const available = Object.keys(ctx.datasets).join(", ") || "none";
2587
- throw new WorkflowRuntimeError(`No input datasets resolved for query step. Available datasets: ${available}`);
2588
- }
2589
- return datasets;
2590
- }
2591
- }
2592
-
2593
- class OutputRunner {
2594
- validate(spec) {
2595
- if (!spec.output) {
2596
- throw new WorkflowRuntimeError("Output step requires output configuration");
2597
- }
2598
- if (!spec.needs || spec.needs.length === 0) {
2599
- throw new WorkflowRuntimeError("Output step requires at least one dependency");
2600
- }
2601
- }
2602
- async run(spec, ctx) {
2603
- this.validate(spec);
2604
- const inputDataset = this.resolveOutputDataset(spec, ctx);
2605
- ctx.log({
2606
- message: `Using dataset '${inputDataset.name}' with ${inputDataset.rows.length} rows`
2607
- });
2608
- const allRows = inputDataset.rows;
2609
- const content = spec.output.format === "json" ? this.formatJSON(allRows) : this.formatCSV(allRows);
2610
- if (spec.output.kind === "file") {
2611
- await this.writeToFile(spec.output.path, content, ctx);
2612
- } else {
2613
- console.log(content);
2614
- }
2615
- }
2616
- formatJSON(rows) {
2617
- return JSON.stringify(rows, null, 2);
2618
- }
2619
- formatCSV(rows) {
2620
- if (rows.length === 0) {
2621
- return "";
2622
- }
2623
- const allKeys = new Set;
2624
- for (const row of rows) {
2625
- Object.keys(row).forEach((key) => allKeys.add(key));
2626
- }
2627
- const headers = Array.from(allKeys);
2628
- const csvRows = [headers.join(",")];
2629
- for (const row of rows) {
2630
- const values = headers.map((header) => {
2631
- const value = row[header];
2632
- if (value === null || value === undefined) {
2633
- return "";
2634
- }
2635
- const stringValue = String(value);
2636
- if (stringValue.includes(",") || stringValue.includes('"') || stringValue.includes(`
2637
- `)) {
2638
- return `"${stringValue.replace(/"/g, '""')}"`;
2639
- }
2640
- return stringValue;
2641
- });
2642
- csvRows.push(values.join(","));
2643
- }
2644
- return csvRows.join(`
2645
- `);
2646
- }
2647
- async writeToFile(path, content, ctx) {
2648
- try {
2649
- const dir = dirname(path);
2650
- await mkdir(dir, { recursive: true });
2651
- await writeFile(path, content, "utf-8");
2652
- } catch (error) {
2653
- throw new WorkflowRuntimeError(`Failed to write file ${path}: ${error instanceof Error ? error.message : "Unknown error"}`);
2654
- }
2655
- }
2656
- resolveOutputDataset(spec, ctx) {
2657
- const candidates = [];
2658
- for (const dependency of spec.needs) {
2659
- const datasetName = ctx.stepOutputs[dependency];
2660
- const dataset = datasetName && ctx.getDatasetByName(datasetName) || ctx.getDatasetByStepId(dependency) || ctx.getDataset(dependency);
2661
- if (dataset) {
2662
- candidates.push(dataset);
2663
- }
2664
- }
2665
- if (candidates.length === 0) {
2666
- const available = Object.keys(ctx.datasets).join(", ") || "none";
2667
- throw new WorkflowRuntimeError(`No datasets available for output step. Available datasets: ${available}`);
2668
- }
2669
- return candidates[candidates.length - 1];
2670
- }
2671
- }
2672
- var BUILTIN_RUNNERS = {
2673
- "source:http": new HttpSourceRunner,
2674
- "source:file": new FileSourceRunner,
2675
- "query:eqls": new QueryRunner,
2676
- "output:file": new OutputRunner,
2677
- "output:stdout": new OutputRunner
2678
- };
2679
- function getRunner(stepType, stepSpec) {
2680
- switch (stepType) {
2681
- case "source":
2682
- if (stepSpec.source?.kind === "http") {
2683
- return BUILTIN_RUNNERS["source:http"];
2684
- }
2685
- if (stepSpec.source?.kind === "file") {
2686
- return BUILTIN_RUNNERS["source:file"];
2687
- }
2688
- break;
2689
- case "query":
2690
- return BUILTIN_RUNNERS["query:eqls"];
2691
- case "output":
2692
- return BUILTIN_RUNNERS["output:file"];
2693
- }
2694
- throw new WorkflowRuntimeError(`No runner found for step type: ${stepType}`);
2695
- }
2696
- // src/workflows/cache.ts
2697
- import { createHash } from "crypto";
2698
- import { readFile as readFile2, writeFile as writeFile2, mkdir as mkdir2 } from "fs/promises";
2699
- import { existsSync } from "fs";
2700
- import { dirname as dirname2, join } from "path";
2701
- import { gzip, gunzip } from "zlib";
2702
- import { promisify } from "util";
2703
- var gzipAsync = promisify(gzip);
2704
- var gunzipAsync = promisify(gunzip);
2705
-
2706
- class FileCacheManager {
2707
- cacheDir;
2708
- constructor(cacheDir = ".tql-cache") {
2709
- this.cacheDir = cacheDir;
2710
- }
2711
- async get(key) {
2712
- try {
2713
- const filePath = this.getCacheFilePath(key);
2714
- if (!existsSync(filePath)) {
2715
- return null;
2716
- }
2717
- const compressed = await readFile2(filePath);
2718
- const decompressed = await gunzipAsync(compressed);
2719
- const dataset = JSON.parse(decompressed.toString("utf-8"));
2720
- return dataset;
2721
- } catch (error) {
2722
- return null;
2723
- }
2724
- }
2725
- async set(key, dataset) {
2726
- try {
2727
- const filePath = this.getCacheFilePath(key);
2728
- await mkdir2(dirname2(filePath), { recursive: true });
2729
- const json = JSON.stringify(dataset);
2730
- const compressed = await gzipAsync(Buffer.from(json, "utf-8"));
2731
- await writeFile2(filePath, compressed);
2732
- } catch (error) {
2733
- console.warn(`Cache write failed for key ${key}:`, error);
2734
- }
2735
- }
2736
- getCacheFilePath(key) {
2737
- return join(this.cacheDir, `${key}.bin`);
2738
- }
2739
- }
2740
-
2741
- class NoCacheManager {
2742
- async get() {
2743
- return null;
2744
- }
2745
- async set() {}
2746
- }
2747
-
2748
- class ReadOnlyCacheManager {
2749
- delegate;
2750
- constructor(delegate) {
2751
- this.delegate = delegate;
2752
- }
2753
- async get(key) {
2754
- return this.delegate.get(key);
2755
- }
2756
- async set() {}
2757
- }
2758
- function createCacheKey(stepSpec, inputDatasetsHash, secretsHash) {
2759
- if (!stepSpec || typeof stepSpec !== "object") {
2760
- throw new Error("Invalid step specification for cache key");
2761
- }
2762
- if (!inputDatasetsHash || !/^[a-f0-9]+$/.test(inputDatasetsHash)) {
2763
- throw new Error("Invalid input datasets hash for cache key");
2764
- }
2765
- if (secretsHash && !/^[a-f0-9]+$/.test(secretsHash)) {
2766
- throw new Error("Invalid secrets hash for cache key");
2767
- }
2768
- const normalizedSpec = normalizeSpecForCache(stepSpec);
2769
- const specHash = createHash("sha256").update(JSON.stringify(normalizedSpec)).digest("hex").substring(0, 16);
2770
- const inputHash = inputDatasetsHash.substring(0, 16);
2771
- const secretsPart = secretsHash ? `_${secretsHash.substring(0, 8)}` : "";
2772
- const cacheKey = `${specHash}_${inputHash}${secretsPart}`;
2773
- if (!/^[a-f0-9_]+$/.test(cacheKey)) {
2774
- throw new Error("Generated cache key contains invalid characters");
2775
- }
2776
- return cacheKey;
2777
- }
2778
- function createInputDatasetsHash(datasets) {
2779
- const sortedNames = Object.keys(datasets).sort();
2780
- const dataToHash = sortedNames.map((name) => {
2781
- const dataset = datasets[name];
2782
- return {
2783
- name: dataset?.name,
2784
- rowCount: dataset?.rows.length,
2785
- sample: dataset?.rows.slice(0, 3)
2786
- };
2787
- });
2788
- return createHash("sha256").update(JSON.stringify(dataToHash)).digest("hex");
2789
- }
2790
- function createTemplateVarsHash(env, vars) {
2791
- const nonSecretVars = { ...vars };
2792
- for (const [key, value] of Object.entries(env)) {
2793
- if (!isSecretKey(key)) {
2794
- nonSecretVars[key] = value;
2795
- }
2796
- }
2797
- const sortedEntries = Object.entries(nonSecretVars).sort(([a], [b]) => a.localeCompare(b));
2798
- return createHash("sha256").update(JSON.stringify(sortedEntries)).digest("hex");
2799
- }
2800
- function isSecretKey(key) {
2801
- const secretPatterns = [
2802
- /^.*key$/i,
2803
- /^.*secret$/i,
2804
- /^.*token$/i,
2805
- /^.*password$/i,
2806
- /^.*pass$/i,
2807
- /^api_/i,
2808
- /^auth_/i
2809
- ];
2810
- return secretPatterns.some((pattern) => pattern.test(key));
2811
- }
2812
- function normalizeSpecForCache(spec) {
2813
- if (spec === null || spec === undefined) {
2814
- return spec;
2815
- }
2816
- if (Array.isArray(spec)) {
2817
- return spec.map(normalizeSpecForCache);
2818
- }
2819
- if (typeof spec === "object") {
2820
- const normalized = {};
2821
- const sortedKeys = Object.keys(spec).sort();
2822
- for (const key of sortedKeys) {
2823
- const value = spec[key];
2824
- if (key === "url" && typeof value === "string") {
2825
- normalized[key] = value.replace(/(\$)?\{\{\s*[^}]+\s*\}\}/g, "{{TEMPLATE_VAR}}");
2826
- } else {
2827
- normalized[key] = normalizeSpecForCache(value);
2828
- }
2829
- }
2830
- return normalized;
2831
- }
2832
- return spec;
2833
- }
2834
- function createCacheManager(mode, cacheDir) {
2835
- switch (mode) {
2836
- case "off":
2837
- return new NoCacheManager;
2838
- case "read":
2839
- return new ReadOnlyCacheManager(new FileCacheManager(cacheDir));
2840
- case "write":
2841
- default:
2842
- return new FileCacheManager(cacheDir);
2843
- }
2844
- }
2845
- async function withCache(cacheManager, cacheKey, operation, logger) {
2846
- const cached = await cacheManager.get(cacheKey);
2847
- if (cached) {
2848
- logger?.({ cache: "hit" });
2849
- return { result: cached, cacheHit: true };
2850
- }
2851
- logger?.({ cache: "miss" });
2852
- const result = await operation();
2853
- if (result && typeof result === "object" && "name" in result && "rows" in result) {
2854
- await cacheManager.set(cacheKey, result);
2855
- logger?.({ cache: "write" });
2856
- }
2857
- return { result, cacheHit: false };
2858
- }
2859
- // src/workflows/engine.ts
2860
- import { readFile as readFile3 } from "fs/promises";
2861
- import { dirname as dirname3, resolve as resolve2 } from "path";
2862
-
2863
- // src/workflows/log-levels.ts
2864
- var LOG_LEVELS = {
2865
- DRY: "[DRY]",
2866
- START: "[START]",
2867
- RUN: "[RUN]",
2868
- DONE: "[DONE]",
2869
- FAIL: "[FAIL]",
2870
- INFO: "[INFO]",
2871
- LOG: "[LOG]"
2872
- };
2873
-
2874
- // src/workflows/engine.ts
2875
- class WorkflowEngine {
2876
- options;
2877
- datasetsByName = {};
2878
- datasetsByStepId = {};
2879
- stepOutputNames = {};
2880
- runId;
2881
- cacheManager;
2882
- events = [];
2883
- workingDir = process.cwd();
2884
- constructor(options = {}, cacheDir) {
2885
- this.options = options;
2886
- this.runId = `run_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
2887
- this.cacheManager = createCacheManager(options.cache || "write", cacheDir);
2888
- }
2889
- async executeWorkflowFile(filePath) {
2890
- this.workingDir = dirname3(resolve2(filePath));
2891
- const yamlContent = await readFile3(filePath, "utf-8");
2892
- const spec = parseWorkflow(yamlContent);
2893
- await this.executeWorkflow(spec);
2894
- }
2895
- async executeWorkflow(spec) {
2896
- if (this.options.dry) {
2897
- const limit = this.options.limit || 50;
2898
- const mapCap = 20;
2899
- console.log(`${LOG_LEVELS.DRY} DRY RUN (limit=${limit}, map cap=${mapCap})`);
2900
- const mapSources = spec.steps.filter((step) => step.type === "source" && step.source.mode === "map");
2901
- for (const source of mapSources) {
2902
- console.log(` mapFrom=${source.source.mapFrom} cap=${mapCap}`);
2903
- }
2904
- }
2905
- this.logEvent("workflow", "started", {});
2906
- try {
2907
- validateWorkflowSemantics(spec);
2908
- const plan = createExecutionPlan(spec);
2909
- validateExecutionPlan(plan);
2910
- const env = {
2911
- ...process.env,
2912
- ...spec.env,
2913
- ...this.options.vars
2914
- };
2915
- for (const stepId of plan.order) {
2916
- const step = plan.steps.find((s) => s.id === stepId);
2917
- if (!step) {
2918
- throw new WorkflowRuntimeError(`Step not found: ${stepId}`);
2919
- }
2920
- await this.executeStep(step, env);
2921
- }
2922
- this.logEvent("workflow", "completed", {});
2923
- } catch (error) {
2924
- this.logEvent("workflow", "failed", {
2925
- error: error instanceof Error ? error.message : "Unknown error"
2926
- });
2927
- if (error instanceof WorkflowValidationError) {
2928
- process.exit(1);
2929
- } else if (error instanceof WorkflowRuntimeError) {
2930
- process.exit(2);
2931
- } else {
2932
- process.exit(2);
2933
- }
2934
- }
2935
- }
2936
- async executeStep(step, env) {
2937
- const startTime = Date.now();
2938
- this.logEvent(step.id, "started", {});
2939
- try {
2940
- const ctx = {
2941
- datasets: this.datasetsByName,
2942
- stepOutputs: { ...this.stepOutputNames },
2943
- env,
2944
- vars: this.options.vars || {},
2945
- runId: this.runId,
2946
- dry: this.options.dry || false,
2947
- limit: this.options.limit,
2948
- cacheMode: this.options.cache || "write",
2949
- workingDir: this.workingDir,
2950
- verbose: this.options.verbose || false,
2951
- cache: this.cacheManager,
2952
- getDataset: (ref) => this.resolveDataset(ref),
2953
- getDatasetByName: (name) => this.datasetsByName[name],
2954
- getDatasetByStepId: (stepId) => this.datasetsByStepId[stepId],
2955
- log: (event) => this.logEvent(step.id, "info", event)
2956
- };
2957
- if (this.options.verbose) {
2958
- this.logVerboseInput(step);
2959
- }
2960
- const inputDatasetsHash = createInputDatasetsHash(this.datasetsByName);
2961
- const templateVarsHash = createTemplateVarsHash(env, ctx.vars);
2962
- const cacheKey = createCacheKey(step, inputDatasetsHash, templateVarsHash);
2963
- const { result, cacheHit } = await withCache(this.cacheManager, cacheKey, async () => {
2964
- const runner = getRunner(step.type, step);
2965
- let runnerSpec;
2966
- switch (step.type) {
2967
- case "source":
2968
- runnerSpec = step.source;
2969
- break;
2970
- case "query":
2971
- case "output":
2972
- runnerSpec = step;
2973
- break;
2974
- default:
2975
- runnerSpec = step;
2976
- }
2977
- return runner.run(runnerSpec, ctx);
2978
- }, (event) => this.logEvent(step.id, "cache", event));
2979
- if (result && step.out) {
2980
- const dataset = result;
2981
- dataset.name = step.out;
2982
- this.datasetsByName[step.out] = dataset;
2983
- this.datasetsByStepId[step.id] = dataset;
2984
- this.stepOutputNames[step.id] = step.out;
2985
- }
2986
- if (this.options.verbose) {
2987
- this.logVerboseOutput(step, result);
2988
- }
2989
- const duration = Date.now() - startTime;
2990
- const inputRows = this.getInputRowCount(step);
2991
- const outputRows = result && typeof result === "object" && "rows" in result ? result.rows.length : 0;
2992
- this.logEvent(step.id, "completed", {
2993
- durationMs: duration,
2994
- inputRows,
2995
- outputRows,
2996
- cache: cacheHit ? "hit" : "miss",
2997
- cacheKey
2998
- });
2999
- } catch (error) {
3000
- const duration = Date.now() - startTime;
3001
- this.logEvent(step.id, "failed", {
3002
- durationMs: duration,
3003
- error: error instanceof Error ? error.message : "Unknown error"
3004
- });
3005
- throw error;
3006
- }
3007
- }
3008
- getInputRowCount(step) {
3009
- if (!step.needs || step.needs.length === 0) {
3010
- return 0;
3011
- }
3012
- return step.needs.reduce((total, need) => {
3013
- const dataset = this.datasetsByStepId[need] || this.datasetsByName[need];
3014
- return total + (dataset?.rows.length || 0);
3015
- }, 0);
3016
- }
3017
- resolveDataset(ref) {
3018
- return this.datasetsByName[ref] || this.datasetsByStepId[ref];
3019
- }
3020
- logEvent(stepId, event, data = {}) {
3021
- const workflowEvent = {
3022
- runId: this.runId,
3023
- stepId,
3024
- event,
3025
- timestamp: Date.now(),
3026
- ...data
3027
- };
3028
- this.events.push(workflowEvent);
3029
- if (this.options.log === "json") {
3030
- console.log(JSON.stringify(workflowEvent));
3031
- } else {
3032
- this.logPretty(workflowEvent);
3033
- }
3034
- }
3035
- getCacheStatus(stepId) {
3036
- return "miss";
3037
- }
3038
- logPretty(event) {
3039
- const timestamp = new Date(event.timestamp).toLocaleTimeString();
3040
- switch (event.event) {
3041
- case "started":
3042
- if (event.stepId === "workflow") {
3043
- console.log(`${LOG_LEVELS.START} [${timestamp}] Starting workflow (${event.runId})`);
3044
- } else {
3045
- console.log(`${LOG_LEVELS.RUN} [${timestamp}] ${event.stepId}: Starting...`);
3046
- }
3047
- break;
3048
- case "completed":
3049
- if (event.stepId === "workflow") {
3050
- console.log(`${LOG_LEVELS.DONE} [${timestamp}] Workflow completed`);
3051
- } else {
3052
- const duration2 = event.durationMs ? `${event.durationMs}ms` : "";
3053
- const inputRows = event.inputRows !== undefined ? `${event.inputRows} in` : "";
3054
- const outputRows = event.outputRows !== undefined ? `${event.outputRows} out` : "";
3055
- const cache = event.cache ? `(${event.cache})` : "";
3056
- const cacheKey = event.cacheKey ? `[${event.cacheKey.slice(0, 8)}]` : "";
3057
- const details = [duration2, inputRows, outputRows, cache, cacheKey].filter(Boolean).join(", ");
3058
- console.log(`${LOG_LEVELS.DONE} [${timestamp}] ${event.stepId}: ${details}`);
3059
- }
3060
- break;
3061
- case "failed":
3062
- const duration = event.durationMs ? ` (${event.durationMs}ms)` : "";
3063
- console.log(`${LOG_LEVELS.FAIL} [${timestamp}] ${event.stepId}: Failed${duration}`);
3064
- if (event.error) {
3065
- console.log(` Error: ${event.error}`);
3066
- }
3067
- break;
3068
- case "cache":
3069
- break;
3070
- case "info":
3071
- if ("message" in event && typeof event.message === "string") {
3072
- console.log(`${LOG_LEVELS.INFO} [${timestamp}] ${event.stepId}: ${event.message}`);
3073
- } else {
3074
- console.log(`${LOG_LEVELS.INFO} [${timestamp}] ${event.stepId}: ${JSON.stringify(event)}`);
3075
- }
3076
- break;
3077
- default:
3078
- console.log(`${LOG_LEVELS.LOG} [${timestamp}] ${event.stepId}: ${event.event}`);
3079
- }
3080
- }
3081
- logVerboseInput(step) {
3082
- if (!step.needs || step.needs.length === 0) {
3083
- console.log(` \uD83D\uDCE5 INPUT: (no dependencies)`);
3084
- return;
3085
- }
3086
- for (const need of step.needs) {
3087
- const dataset = this.datasetsByStepId[need] || this.datasetsByName[need];
3088
- if (dataset && dataset.rows.length > 0) {
3089
- const sample = dataset.rows.slice(0, 3);
3090
- const schema = this.inferSchema(dataset.rows);
3091
- console.log(` \uD83D\uDCE5 INPUT [${need}]: ${dataset.rows.length} rows`);
3092
- console.log(` Schema: ${JSON.stringify(schema)}`);
3093
- console.log(` Sample: ${JSON.stringify(sample).slice(0, 200)}`);
3094
- } else {
3095
- console.log(` \uD83D\uDCE5 INPUT [${need}]: 0 rows (empty or not found)`);
3096
- }
3097
- }
3098
- }
3099
- logVerboseOutput(step, result) {
3100
- if (!result || typeof result !== "object" || !("rows" in result)) {
3101
- console.log(` \uD83D\uDCE4 OUTPUT: (no data)`);
3102
- return;
3103
- }
3104
- const dataset = result;
3105
- if (dataset.rows.length === 0) {
3106
- console.log(` \uD83D\uDCE4 OUTPUT: 0 rows (EMPTY - check for issues!)`);
3107
- return;
3108
- }
3109
- const sample = dataset.rows.slice(0, 3);
3110
- const schema = this.inferSchema(dataset.rows);
3111
- console.log(` \uD83D\uDCE4 OUTPUT: ${dataset.rows.length} rows`);
3112
- console.log(` Schema: ${JSON.stringify(schema)}`);
3113
- console.log(` Sample: ${JSON.stringify(sample).slice(0, 200)}`);
3114
- }
3115
- inferSchema(rows) {
3116
- const schema = {};
3117
- if (rows.length === 0)
3118
- return schema;
3119
- for (const key of Object.keys(rows[0])) {
3120
- const value = rows[0][key];
3121
- schema[key] = typeof value;
3122
- }
3123
- return schema;
3124
- }
3125
- getEvents() {
3126
- return [...this.events];
3127
- }
3128
- getDatasets() {
3129
- return { ...this.datasetsByName };
3130
- }
3131
- }
3132
- export {
3133
- withCache,
3134
- validateWorkflowSemantics,
3135
- validateExecutionPlan,
3136
- parseWorkflow,
3137
- interpolateTemplate,
3138
- interpolateObject,
3139
- getStepDependencies,
3140
- getRunner,
3141
- getParallelGroups,
3142
- WorkflowEngine as default,
3143
- createTemplateVarsHash,
3144
- createInputDatasetsHash,
3145
- createExecutionPlan,
3146
- createCacheManager,
3147
- createCacheKey,
3148
- WorkflowValidationError,
3149
- WorkflowRuntimeError,
3150
- WorkflowEngine,
3151
- WORKFLOW_SCHEMA,
3152
- ReadOnlyCacheManager,
3153
- QueryRunner,
3154
- OutputRunner,
3155
- NoCacheManager,
3156
- HttpSourceRunner,
3157
- FileSourceRunner,
3158
- FileCacheManager,
3159
- BUILTIN_RUNNERS
3160
- };