edsger 0.60.0 → 0.62.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth/env-store.js +3 -0
- package/dist/commands/data-flow/index.d.ts +17 -0
- package/dist/commands/data-flow/index.js +46 -0
- package/dist/commands/screen-flow/index.d.ts +4 -4
- package/dist/commands/screen-flow/index.js +5 -5
- package/dist/commands/sync-aws/index.d.ts +16 -0
- package/dist/commands/sync-aws/index.js +184 -0
- package/dist/commands/sync-datadog/index.d.ts +16 -0
- package/dist/commands/sync-datadog/index.js +199 -0
- package/dist/commands/sync-org-repos/index.d.ts +11 -0
- package/dist/commands/sync-org-repos/index.js +59 -0
- package/dist/commands/sync-terraform/index.d.ts +16 -0
- package/dist/commands/sync-terraform/index.js +211 -0
- package/dist/index.js +111 -2
- package/dist/phases/data-flow/index.d.ts +25 -0
- package/dist/phases/data-flow/index.js +257 -0
- package/dist/phases/data-flow/mcp-server.d.ts +85 -0
- package/dist/phases/data-flow/mcp-server.js +140 -0
- package/dist/phases/data-flow/prompts.d.ts +14 -0
- package/dist/phases/data-flow/prompts.js +36 -0
- package/dist/phases/data-flow/types.d.ts +71 -0
- package/dist/phases/data-flow/types.js +86 -0
- package/dist/phases/output-contracts.js +71 -0
- package/dist/phases/screen-flow/index.d.ts +2 -2
- package/dist/phases/screen-flow/index.js +27 -15
- package/dist/phases/screen-flow/mcp-server.d.ts +1 -1
- package/dist/phases/sync-org-repos/index.d.ts +24 -0
- package/dist/phases/sync-org-repos/index.js +143 -0
- package/dist/skills/phase/data-flow/SKILL.md +82 -0
- package/package.json +3 -3
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process MCP server for the data-flow phase. Exposes a single tool —
|
|
3
|
+
* `submit_data_flow` — that the agent calls with the structured extraction,
|
|
4
|
+
* plus `record_progress` for streaming status messages.
|
|
5
|
+
*
|
|
6
|
+
* Mirrors the shape of phases/screen-flow/mcp-server.ts; see that file for
|
|
7
|
+
* the design rationale (zod schema + cross-field consistency + capture state).
|
|
8
|
+
*/
|
|
9
|
+
import { z } from 'zod';
|
|
10
|
+
import type { DataFlowExtraction } from './types.js';
|
|
11
|
+
export interface DataFlowCaptureState {
|
|
12
|
+
captured: DataFlowExtraction | null;
|
|
13
|
+
}
|
|
14
|
+
export declare function createDataFlowCaptureState(): DataFlowCaptureState;
|
|
15
|
+
export type DataFlowProgressSink = (event: {
|
|
16
|
+
phase: 'detection' | 'enumeration' | 'nodes' | 'edges' | 'submission';
|
|
17
|
+
message: string;
|
|
18
|
+
}) => void;
|
|
19
|
+
export declare function validateConsistency(extraction: DataFlowExtraction): {
|
|
20
|
+
error: string | null;
|
|
21
|
+
};
|
|
22
|
+
export declare function createSubmitDataFlowTool(state: DataFlowCaptureState): import("@anthropic-ai/claude-agent-sdk").SdkMcpToolDefinition<{
|
|
23
|
+
summary: z.ZodString;
|
|
24
|
+
nodes: z.ZodArray<z.ZodObject<{
|
|
25
|
+
slug: z.ZodString;
|
|
26
|
+
name: z.ZodString;
|
|
27
|
+
kind: z.ZodEnum<{
|
|
28
|
+
model: "model";
|
|
29
|
+
source: "source";
|
|
30
|
+
dataset: "dataset";
|
|
31
|
+
transform: "transform";
|
|
32
|
+
sink: "sink";
|
|
33
|
+
queue: "queue";
|
|
34
|
+
}>;
|
|
35
|
+
file: z.ZodOptional<z.ZodString>;
|
|
36
|
+
description: z.ZodOptional<z.ZodString>;
|
|
37
|
+
tech: z.ZodOptional<z.ZodString>;
|
|
38
|
+
schedule: z.ZodOptional<z.ZodString>;
|
|
39
|
+
inputs: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
40
|
+
name: z.ZodString;
|
|
41
|
+
type: z.ZodOptional<z.ZodString>;
|
|
42
|
+
required: z.ZodOptional<z.ZodBoolean>;
|
|
43
|
+
description: z.ZodOptional<z.ZodString>;
|
|
44
|
+
}, z.core.$strip>>>;
|
|
45
|
+
outputs: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
46
|
+
name: z.ZodString;
|
|
47
|
+
type: z.ZodOptional<z.ZodString>;
|
|
48
|
+
required: z.ZodOptional<z.ZodBoolean>;
|
|
49
|
+
description: z.ZodOptional<z.ZodString>;
|
|
50
|
+
}, z.core.$strip>>>;
|
|
51
|
+
sample: z.ZodOptional<z.ZodObject<{
|
|
52
|
+
columns: z.ZodArray<z.ZodString>;
|
|
53
|
+
rows: z.ZodArray<z.ZodArray<z.ZodString>>;
|
|
54
|
+
}, z.core.$strip>>;
|
|
55
|
+
stats: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
56
|
+
label: z.ZodString;
|
|
57
|
+
value: z.ZodString;
|
|
58
|
+
}, z.core.$strip>>>;
|
|
59
|
+
}, z.core.$strip>>;
|
|
60
|
+
edges: z.ZodArray<z.ZodObject<{
|
|
61
|
+
fromSlug: z.ZodString;
|
|
62
|
+
toSlug: z.ZodString;
|
|
63
|
+
kind: z.ZodEnum<{
|
|
64
|
+
data: "data";
|
|
65
|
+
event: "event";
|
|
66
|
+
control: "control";
|
|
67
|
+
derives: "derives";
|
|
68
|
+
}>;
|
|
69
|
+
label: z.ZodOptional<z.ZodString>;
|
|
70
|
+
sourceFile: z.ZodOptional<z.ZodString>;
|
|
71
|
+
}, z.core.$strip>>;
|
|
72
|
+
}>;
|
|
73
|
+
export declare function createRecordProgressTool(sink?: DataFlowProgressSink): import("@anthropic-ai/claude-agent-sdk").SdkMcpToolDefinition<{
|
|
74
|
+
phase: z.ZodEnum<{
|
|
75
|
+
nodes: "nodes";
|
|
76
|
+
edges: "edges";
|
|
77
|
+
detection: "detection";
|
|
78
|
+
enumeration: "enumeration";
|
|
79
|
+
submission: "submission";
|
|
80
|
+
}>;
|
|
81
|
+
message: z.ZodString;
|
|
82
|
+
}>;
|
|
83
|
+
export declare function createDataFlowMcpServer(state: DataFlowCaptureState, options?: {
|
|
84
|
+
onProgress?: DataFlowProgressSink;
|
|
85
|
+
}): import("@anthropic-ai/claude-agent-sdk").McpSdkServerConfigWithInstance;
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process MCP server for the data-flow phase. Exposes a single tool —
|
|
3
|
+
* `submit_data_flow` — that the agent calls with the structured extraction,
|
|
4
|
+
* plus `record_progress` for streaming status messages.
|
|
5
|
+
*
|
|
6
|
+
* Mirrors the shape of phases/screen-flow/mcp-server.ts; see that file for
|
|
7
|
+
* the design rationale (zod schema + cross-field consistency + capture state).
|
|
8
|
+
*/
|
|
9
|
+
import { createSdkMcpServer, tool } from '@anthropic-ai/claude-agent-sdk';
|
|
10
|
+
import { z } from 'zod';
|
|
11
|
+
export function createDataFlowCaptureState() {
|
|
12
|
+
return { captured: null };
|
|
13
|
+
}
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Zod schemas (mirror types.ts)
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
const dataFieldSchema = z.object({
|
|
18
|
+
name: z.string().min(1),
|
|
19
|
+
type: z.string().optional(),
|
|
20
|
+
required: z.boolean().optional(),
|
|
21
|
+
description: z.string().optional(),
|
|
22
|
+
});
|
|
23
|
+
const dataSampleSchema = z.object({
|
|
24
|
+
columns: z.array(z.string()),
|
|
25
|
+
rows: z.array(z.array(z.string())),
|
|
26
|
+
});
|
|
27
|
+
const dataStatSchema = z.object({
|
|
28
|
+
label: z.string(),
|
|
29
|
+
value: z.string(),
|
|
30
|
+
});
|
|
31
|
+
const dataNodeSchema = z.object({
|
|
32
|
+
slug: z.string().min(1),
|
|
33
|
+
name: z.string().min(1),
|
|
34
|
+
kind: z.enum(['source', 'dataset', 'transform', 'sink', 'queue', 'model']),
|
|
35
|
+
file: z.string().optional(),
|
|
36
|
+
description: z.string().optional(),
|
|
37
|
+
tech: z.string().optional(),
|
|
38
|
+
schedule: z.string().optional(),
|
|
39
|
+
inputs: z.array(dataFieldSchema).optional(),
|
|
40
|
+
outputs: z.array(dataFieldSchema).optional(),
|
|
41
|
+
sample: dataSampleSchema.optional(),
|
|
42
|
+
stats: z.array(dataStatSchema).optional(),
|
|
43
|
+
});
|
|
44
|
+
const dataEdgeSchema = z.object({
|
|
45
|
+
fromSlug: z.string().min(1),
|
|
46
|
+
toSlug: z.string().min(1),
|
|
47
|
+
kind: z.enum(['data', 'event', 'control', 'derives']),
|
|
48
|
+
label: z.string().optional(),
|
|
49
|
+
sourceFile: z.string().optional(),
|
|
50
|
+
});
|
|
51
|
+
export function validateConsistency(extraction) {
|
|
52
|
+
const slugs = new Set();
|
|
53
|
+
for (const node of extraction.nodes) {
|
|
54
|
+
if (slugs.has(node.slug)) {
|
|
55
|
+
return {
|
|
56
|
+
error: `Duplicate node slug "${node.slug}". Each node.slug MUST be unique within the flow. Re-call submit_data_flow with deduplicated nodes.`,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
slugs.add(node.slug);
|
|
60
|
+
}
|
|
61
|
+
for (const edge of extraction.edges) {
|
|
62
|
+
if (!slugs.has(edge.fromSlug)) {
|
|
63
|
+
return {
|
|
64
|
+
error: `Edge fromSlug "${edge.fromSlug}" → "${edge.toSlug}" does not match any node slug. Either add the missing node or drop the edge, then re-call submit_data_flow.`,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
if (!slugs.has(edge.toSlug)) {
|
|
68
|
+
return {
|
|
69
|
+
error: `Edge fromSlug "${edge.fromSlug}" → toSlug "${edge.toSlug}" does not match any node slug. Either add the missing node or drop the edge, then re-call submit_data_flow.`,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return { error: null };
|
|
74
|
+
}
|
|
75
|
+
export function createSubmitDataFlowTool(state) {
|
|
76
|
+
return tool('submit_data_flow', [
|
|
77
|
+
'Submit the final data flow extraction. Call this EXACTLY once,',
|
|
78
|
+
'when you have finished mapping every data node and connection. Pass',
|
|
79
|
+
'the full structured flow as the argument. After this call succeeds,',
|
|
80
|
+
'end your turn — do NOT also paste the same data as a fenced code',
|
|
81
|
+
'block. If validation fails, the error message tells you what to fix;',
|
|
82
|
+
'call the tool again with corrected data.',
|
|
83
|
+
].join(' '), {
|
|
84
|
+
summary: z
|
|
85
|
+
.string()
|
|
86
|
+
.min(1)
|
|
87
|
+
.describe('1-3 sentence narrative of what this system does with data and the primary pipelines.'),
|
|
88
|
+
nodes: z
|
|
89
|
+
.array(dataNodeSchema)
|
|
90
|
+
.describe('Every data node: source / dataset / transform / sink / queue / model. node.slug MUST be unique within the flow.'),
|
|
91
|
+
edges: z
|
|
92
|
+
.array(dataEdgeSchema)
|
|
93
|
+
.describe('Connections. fromSlug = upstream, toSlug = downstream. Every fromSlug / toSlug MUST reference a slug present in nodes; drop edges whose endpoints you did not emit.'),
|
|
94
|
+
}, async (args) => {
|
|
95
|
+
const extraction = {
|
|
96
|
+
summary: args.summary,
|
|
97
|
+
nodes: args.nodes,
|
|
98
|
+
edges: args.edges,
|
|
99
|
+
};
|
|
100
|
+
const { error } = validateConsistency(extraction);
|
|
101
|
+
if (error) {
|
|
102
|
+
return {
|
|
103
|
+
content: [{ type: 'text', text: error }],
|
|
104
|
+
isError: true,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
state.captured = extraction;
|
|
108
|
+
return {
|
|
109
|
+
content: [
|
|
110
|
+
{
|
|
111
|
+
type: 'text',
|
|
112
|
+
text: `Captured ${extraction.nodes.length} data nodes / ${extraction.edges.length} connections. End your turn now.`,
|
|
113
|
+
},
|
|
114
|
+
],
|
|
115
|
+
};
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
export function createRecordProgressTool(sink) {
|
|
119
|
+
return tool('record_progress', 'Send a short status update to the user. Does not affect the extraction. Call it at each phase boundary so the user sees progress.', {
|
|
120
|
+
phase: z
|
|
121
|
+
.enum(['detection', 'enumeration', 'nodes', 'edges', 'submission'])
|
|
122
|
+
.describe('Which phase the message belongs to.'),
|
|
123
|
+
message: z.string().min(1).describe('Human-readable status update.'),
|
|
124
|
+
}, async (args) => {
|
|
125
|
+
sink?.({ phase: args.phase, message: args.message });
|
|
126
|
+
return {
|
|
127
|
+
content: [{ type: 'text', text: 'ok' }],
|
|
128
|
+
};
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
export function createDataFlowMcpServer(state, options) {
|
|
132
|
+
return createSdkMcpServer({
|
|
133
|
+
name: 'data-flow',
|
|
134
|
+
version: '1.0.0',
|
|
135
|
+
tools: [
|
|
136
|
+
createSubmitDataFlowTool(state),
|
|
137
|
+
createRecordProgressTool(options?.onProgress),
|
|
138
|
+
],
|
|
139
|
+
});
|
|
140
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompts for the data-flow phase. Loads the system prompt body from
|
|
3
|
+
* `skills/phase/data-flow/SKILL.md` (with optional project override) and
|
|
4
|
+
* appends the JSON output contract.
|
|
5
|
+
*/
|
|
6
|
+
export declare function createDataFlowSystemPrompt(options?: {
|
|
7
|
+
projectDir?: string;
|
|
8
|
+
hasCodebase?: boolean;
|
|
9
|
+
}): Promise<string>;
|
|
10
|
+
export declare function createDataFlowUserPrompt(args: {
|
|
11
|
+
productName: string;
|
|
12
|
+
productDescription?: string;
|
|
13
|
+
guidance?: string;
|
|
14
|
+
}): string;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompts for the data-flow phase. Loads the system prompt body from
|
|
3
|
+
* `skills/phase/data-flow/SKILL.md` (with optional project override) and
|
|
4
|
+
* appends the JSON output contract.
|
|
5
|
+
*/
|
|
6
|
+
import { processConditionals, resolveSkill, } from '../../services/skill-resolver.js';
|
|
7
|
+
import { OUTPUT_CONTRACTS } from '../output-contracts.js';
|
|
8
|
+
export async function createDataFlowSystemPrompt(options) {
|
|
9
|
+
const skill = await resolveSkill('phase/data-flow', {
|
|
10
|
+
projectDir: options?.projectDir,
|
|
11
|
+
});
|
|
12
|
+
if (!skill) {
|
|
13
|
+
throw new Error('Failed to load skill: phase/data-flow');
|
|
14
|
+
}
|
|
15
|
+
const prompt = processConditionals(skill.prompt, {
|
|
16
|
+
hasCodebase: options?.hasCodebase ?? true,
|
|
17
|
+
});
|
|
18
|
+
return `${prompt}
|
|
19
|
+
|
|
20
|
+
${OUTPUT_CONTRACTS['data-flow']}`;
|
|
21
|
+
}
|
|
22
|
+
export function createDataFlowUserPrompt(args) {
|
|
23
|
+
const guidanceBlock = args.guidance
|
|
24
|
+
? `\n\n**Human guidance for this run** (focus or exclude as instructed):\n${args.guidance}`
|
|
25
|
+
: '';
|
|
26
|
+
const descBlock = args.productDescription
|
|
27
|
+
? `\n**Product description**: ${args.productDescription}`
|
|
28
|
+
: '';
|
|
29
|
+
return `Map the data flow for **${args.productName}**.${descBlock}${guidanceBlock}
|
|
30
|
+
|
|
31
|
+
Start by detecting the stack (check package.json / pyproject.toml / go.mod / Cargo.toml / requirements.txt etc.), then look for: ETL/pipeline definitions, database migrations or schema files, queue/topic configs, model invocation sites, file ingest scripts. Read just enough source per node to fill in a useful DataNodeSchema — do not need to read everything.
|
|
32
|
+
|
|
33
|
+
Call \`mcp__data-flow__record_progress\` at each phase boundary so the user can see your progress (otherwise the CLI looks frozen).
|
|
34
|
+
|
|
35
|
+
When you are done, return the result by **calling the \`mcp__data-flow__submit_data_flow\` tool exactly once** with \`summary\`, \`nodes\`, and \`edges\` as arguments. Do not paste the JSON as a fenced text block — the tool call is the deliverable. If the tool returns an error, fix the issue it describes and call the tool again.`;
|
|
36
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Data Flow domain types.
|
|
3
|
+
*
|
|
4
|
+
* A DataNodeSchema is a structured description of one unit of data state or
|
|
5
|
+
* computation in a product — a source, dataset, transform, sink, queue, or
|
|
6
|
+
* model. The CLI extracts these from source code (pipeline definitions,
|
|
7
|
+
* schema files, queue handlers, etc.) and the desktop renders them with a
|
|
8
|
+
* unified <DataNodePreview> component.
|
|
9
|
+
*
|
|
10
|
+
* Companion to ScreenSchema: same flow-graph shape (nodes + edges with a
|
|
11
|
+
* shared `flows` table storing the JSONB schema), different domain. Data
|
|
12
|
+
* flow edges describe how data moves between nodes, not user navigation.
|
|
13
|
+
*/
|
|
14
|
+
export type DataNodeKind = 'source' | 'dataset' | 'transform' | 'sink' | 'queue' | 'model';
|
|
15
|
+
export interface DataField {
|
|
16
|
+
name: string;
|
|
17
|
+
type?: string;
|
|
18
|
+
required?: boolean;
|
|
19
|
+
description?: string;
|
|
20
|
+
}
|
|
21
|
+
export interface DataSample {
|
|
22
|
+
columns: string[];
|
|
23
|
+
rows: string[][];
|
|
24
|
+
}
|
|
25
|
+
export interface DataStat {
|
|
26
|
+
label: string;
|
|
27
|
+
value: string;
|
|
28
|
+
}
|
|
29
|
+
export interface DataNodeSchema {
|
|
30
|
+
/** Stable slug within the flow (e.g. 'raw-events', 'enrich-user'). */
|
|
31
|
+
slug: string;
|
|
32
|
+
/** Human-readable name. */
|
|
33
|
+
name: string;
|
|
34
|
+
kind: DataNodeKind;
|
|
35
|
+
/** Source file path (jump anchor); for datasets, the schema/migration file. */
|
|
36
|
+
file?: string;
|
|
37
|
+
/** One-sentence description. */
|
|
38
|
+
description?: string;
|
|
39
|
+
/** Technology/format hint: 'postgres', 'parquet', 'kafka', 'openai-api', etc. */
|
|
40
|
+
tech?: string;
|
|
41
|
+
/** For transforms: 'cron 0 0 * * *', 'on-event', 'manual', 'continuous'. */
|
|
42
|
+
schedule?: string;
|
|
43
|
+
/** Schema of inputs the node consumes. */
|
|
44
|
+
inputs?: DataField[];
|
|
45
|
+
/** Schema of outputs the node produces. */
|
|
46
|
+
outputs?: DataField[];
|
|
47
|
+
/** Tiny realistic sample (≤ 4 rows) for datasets. */
|
|
48
|
+
sample?: DataSample;
|
|
49
|
+
/** Volume / latency hints — free-form key/value pairs. */
|
|
50
|
+
stats?: DataStat[];
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Edge kinds. Direction is always "data movement": fromSlug = upstream,
|
|
54
|
+
* toSlug = downstream. The kind describes the *nature* of the connection.
|
|
55
|
+
*/
|
|
56
|
+
export type DataEdgeKind = 'data' | 'event' | 'control' | 'derives';
|
|
57
|
+
export interface DataEdge {
|
|
58
|
+
fromSlug: string;
|
|
59
|
+
toSlug: string;
|
|
60
|
+
kind: DataEdgeKind;
|
|
61
|
+
/** Free-form descriptor: 'nightly batch', 'on user signup', 'embedding'. */
|
|
62
|
+
label?: string;
|
|
63
|
+
/** File containing the connection definition (when distinct from from-node's file). */
|
|
64
|
+
sourceFile?: string;
|
|
65
|
+
}
|
|
66
|
+
export interface DataFlowExtraction {
|
|
67
|
+
summary: string;
|
|
68
|
+
nodes: DataNodeSchema[];
|
|
69
|
+
edges: DataEdge[];
|
|
70
|
+
}
|
|
71
|
+
export declare function isDataFlowExtraction(value: unknown): value is DataFlowExtraction;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Data Flow domain types.
|
|
3
|
+
*
|
|
4
|
+
* A DataNodeSchema is a structured description of one unit of data state or
|
|
5
|
+
* computation in a product — a source, dataset, transform, sink, queue, or
|
|
6
|
+
* model. The CLI extracts these from source code (pipeline definitions,
|
|
7
|
+
* schema files, queue handlers, etc.) and the desktop renders them with a
|
|
8
|
+
* unified <DataNodePreview> component.
|
|
9
|
+
*
|
|
10
|
+
* Companion to ScreenSchema: same flow-graph shape (nodes + edges with a
|
|
11
|
+
* shared `flows` table storing the JSONB schema), different domain. Data
|
|
12
|
+
* flow edges describe how data moves between nodes, not user navigation.
|
|
13
|
+
*/
|
|
14
|
+
// ============================================================================
|
|
15
|
+
// Runtime validation for AI-produced extraction
|
|
16
|
+
// ============================================================================
|
|
17
|
+
const NODE_KINDS = new Set([
|
|
18
|
+
'source',
|
|
19
|
+
'dataset',
|
|
20
|
+
'transform',
|
|
21
|
+
'sink',
|
|
22
|
+
'queue',
|
|
23
|
+
'model',
|
|
24
|
+
]);
|
|
25
|
+
const EDGE_KINDS = new Set([
|
|
26
|
+
'data',
|
|
27
|
+
'event',
|
|
28
|
+
'control',
|
|
29
|
+
'derives',
|
|
30
|
+
]);
|
|
31
|
+
function isRecord(value) {
|
|
32
|
+
return typeof value === 'object' && value !== null;
|
|
33
|
+
}
|
|
34
|
+
function isDataNodeSchema(value) {
|
|
35
|
+
if (!isRecord(value)) {
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
if (typeof value.slug !== 'string' || value.slug.length === 0) {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
if (typeof value.name !== 'string' || value.name.length === 0) {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
if (typeof value.kind !== 'string' ||
|
|
45
|
+
!NODE_KINDS.has(value.kind)) {
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
function isDataEdge(value) {
|
|
51
|
+
if (!isRecord(value)) {
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
if (typeof value.fromSlug !== 'string') {
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
if (typeof value.toSlug !== 'string') {
|
|
58
|
+
return false;
|
|
59
|
+
}
|
|
60
|
+
if (typeof value.kind !== 'string' ||
|
|
61
|
+
!EDGE_KINDS.has(value.kind)) {
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
66
|
+
export function isDataFlowExtraction(value) {
|
|
67
|
+
if (!isRecord(value)) {
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
if (typeof value.summary !== 'string') {
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
73
|
+
if (!Array.isArray(value.nodes)) {
|
|
74
|
+
return false;
|
|
75
|
+
}
|
|
76
|
+
if (!Array.isArray(value.edges)) {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
if (!value.nodes.every(isDataNodeSchema)) {
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
if (!value.edges.every(isDataEdge)) {
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
return true;
|
|
86
|
+
}
|
|
@@ -949,5 +949,76 @@ submit_screen_flow({
|
|
|
949
949
|
]
|
|
950
950
|
})
|
|
951
951
|
\`\`\`
|
|
952
|
+
`,
|
|
953
|
+
'data-flow': `
|
|
954
|
+
**CRITICAL — How to return the result**:
|
|
955
|
+
|
|
956
|
+
Return the extraction by calling the MCP tool
|
|
957
|
+
\`mcp__data-flow__submit_data_flow\` **exactly once** with three arguments:
|
|
958
|
+
|
|
959
|
+
- \`summary\` — 1-3 sentence narrative of what this system does with data and its primary pipelines
|
|
960
|
+
- \`nodes\` — array of DataNodeSchema objects (every source / dataset / transform / sink / queue / model)
|
|
961
|
+
- \`edges\` — array of DataEdge objects (connections, with direction = data movement)
|
|
962
|
+
|
|
963
|
+
The tool validates the arguments against the schema. If it returns an error,
|
|
964
|
+
fix the issue it describes and call the tool again. After a successful call,
|
|
965
|
+
end your turn — do not also paste the same data as a fenced text block.
|
|
966
|
+
|
|
967
|
+
You can also call \`mcp__data-flow__record_progress({ phase, message })\` at
|
|
968
|
+
each phase boundary (detection / enumeration / nodes / edges / submission)
|
|
969
|
+
to keep the user informed during long runs. This is observability only — it
|
|
970
|
+
does not affect the extraction.
|
|
971
|
+
|
|
972
|
+
DataNodeSchema fields:
|
|
973
|
+
- \`slug\` (unique within the flow), \`name\`, \`kind\`, \`file?\`
|
|
974
|
+
- \`kind\`: one of \`source\`, \`dataset\`, \`transform\`, \`sink\`, \`queue\`, \`model\`
|
|
975
|
+
- \`description?\`: one-sentence summary
|
|
976
|
+
- \`tech?\`: technology / format hint (e.g. \`postgres\`, \`parquet\`, \`kafka\`, \`openai-api\`)
|
|
977
|
+
- \`schedule?\`: for transforms (e.g. \`cron 0 0 * * *\`, \`on-event\`, \`manual\`, \`continuous\`)
|
|
978
|
+
- \`inputs?\` / \`outputs?\`: arrays of \`{ name, type?, required?, description? }\`
|
|
979
|
+
- \`sample?\`: \`{ columns: [string], rows: [[string]] }\` — at most 4 sample rows for datasets
|
|
980
|
+
- \`stats?\`: array of \`{ label, value }\` (volume, latency, count hints)
|
|
981
|
+
|
|
982
|
+
DataEdge fields:
|
|
983
|
+
- \`fromSlug\` (upstream), \`toSlug\` (downstream) — both MUST appear in nodes
|
|
984
|
+
- \`kind\`: one of \`data\`, \`event\`, \`control\`, \`derives\`
|
|
985
|
+
- \`label?\`: free-form descriptor (e.g. \`nightly batch\`, \`on user signup\`)
|
|
986
|
+
- \`sourceFile?\`: file containing the read/write/trigger code
|
|
987
|
+
|
|
988
|
+
Edge direction convention: fromSlug is upstream (data origin), toSlug is downstream
|
|
989
|
+
(data destination). A transform that reads from a dataset and writes to a queue
|
|
990
|
+
produces two edges: \`dataset → transform\` (kind: data) and \`transform → queue\`
|
|
991
|
+
(kind: event).
|
|
992
|
+
|
|
993
|
+
Schematic example of the tool call:
|
|
994
|
+
|
|
995
|
+
\`\`\`
|
|
996
|
+
submit_data_flow({
|
|
997
|
+
summary: "Nightly product-feed pipeline: scrape vendor sites, normalize, write to Postgres, publish change events to Kafka.",
|
|
998
|
+
nodes: [
|
|
999
|
+
{ slug: "vendor-scrape", name: "Vendor scraper", kind: "source",
|
|
1000
|
+
file: "src/scrape/vendor.ts", tech: "playwright",
|
|
1001
|
+
schedule: "cron 0 0 * * *",
|
|
1002
|
+
outputs: [{ name: "html", type: "string" }, { name: "url", type: "string" }] },
|
|
1003
|
+
{ slug: "normalize", name: "Normalize products", kind: "transform",
|
|
1004
|
+
file: "src/etl/normalize.ts", tech: "node",
|
|
1005
|
+
inputs: [{ name: "html", type: "string" }],
|
|
1006
|
+
outputs: [{ name: "sku", type: "string" }, { name: "price", type: "decimal" }] },
|
|
1007
|
+
{ slug: "products", name: "products", kind: "dataset",
|
|
1008
|
+
file: "supabase/migrations/0001_products.sql", tech: "postgres",
|
|
1009
|
+
sample: { columns: ["sku", "price"], rows: [["ABC-1", "9.99"]] } },
|
|
1010
|
+
{ slug: "change-events", name: "product.changed", kind: "queue",
|
|
1011
|
+
file: "src/queues/products.ts", tech: "kafka" }
|
|
1012
|
+
],
|
|
1013
|
+
edges: [
|
|
1014
|
+
{ fromSlug: "vendor-scrape", toSlug: "normalize", kind: "data",
|
|
1015
|
+
label: "raw HTML", sourceFile: "src/etl/normalize.ts" },
|
|
1016
|
+
{ fromSlug: "normalize", toSlug: "products", kind: "data",
|
|
1017
|
+
label: "upsert", sourceFile: "src/etl/normalize.ts" },
|
|
1018
|
+
{ fromSlug: "products", toSlug: "change-events", kind: "event",
|
|
1019
|
+
label: "on row change", sourceFile: "src/queues/products.ts" }
|
|
1020
|
+
]
|
|
1021
|
+
})
|
|
1022
|
+
\`\`\`
|
|
952
1023
|
`,
|
|
953
1024
|
};
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* screen-flow phase: clone the product's repo, ask Claude to map every
|
|
3
3
|
* user-facing screen and the transitions between them into a structured
|
|
4
|
-
* ScreenFlowExtraction, then persist the result to
|
|
5
|
-
*
|
|
4
|
+
* ScreenFlowExtraction, then persist the result to flows / flow_nodes /
|
|
5
|
+
* flow_edges (rows tagged `type = 'screen'`) via the Supabase SDK.
|
|
6
6
|
*
|
|
7
7
|
* Companion to find-architecture / find-bugs / find-features. Same workspace
|
|
8
8
|
* pattern, but writes to its own tables rather than filing issues.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* screen-flow phase: clone the product's repo, ask Claude to map every
|
|
3
3
|
* user-facing screen and the transitions between them into a structured
|
|
4
|
-
* ScreenFlowExtraction, then persist the result to
|
|
5
|
-
*
|
|
4
|
+
* ScreenFlowExtraction, then persist the result to flows / flow_nodes /
|
|
5
|
+
* flow_edges (rows tagged `type = 'screen'`) via the Supabase SDK.
|
|
6
6
|
*
|
|
7
7
|
* Companion to find-architecture / find-bugs / find-features. Same workspace
|
|
8
8
|
* pattern, but writes to its own tables rather than filing issues.
|
|
@@ -186,7 +186,7 @@ function tryFallbackParse(resultMessage, assistantText) {
|
|
|
186
186
|
// ============================================================================
|
|
187
187
|
async function markFlowRunning(supabase, flowId) {
|
|
188
188
|
const { error } = await supabase
|
|
189
|
-
.from('
|
|
189
|
+
.from('flows')
|
|
190
190
|
.update({ status: 'running', error: null })
|
|
191
191
|
.eq('id', flowId);
|
|
192
192
|
if (error) {
|
|
@@ -195,7 +195,7 @@ async function markFlowRunning(supabase, flowId) {
|
|
|
195
195
|
}
|
|
196
196
|
async function markFlowFailed(supabase, flowId, errorMessage) {
|
|
197
197
|
await supabase
|
|
198
|
-
.from('
|
|
198
|
+
.from('flows')
|
|
199
199
|
.update({
|
|
200
200
|
status: 'failed',
|
|
201
201
|
error: errorMessage,
|
|
@@ -204,9 +204,23 @@ async function markFlowFailed(supabase, flowId, errorMessage) {
|
|
|
204
204
|
.eq('id', flowId);
|
|
205
205
|
}
|
|
206
206
|
async function persistTheme(supabase, flowId, theme) {
|
|
207
|
+
// Theme is screen-flow-specific; stash it inside the generic options JSONB.
|
|
208
|
+
const { data, error: readError } = await supabase
|
|
209
|
+
.from('flows')
|
|
210
|
+
.select('options')
|
|
211
|
+
.eq('id', flowId)
|
|
212
|
+
.single();
|
|
213
|
+
if (readError) {
|
|
214
|
+
logWarning(`Could not read flow options: ${readError.message}`);
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
const nextOptions = {
|
|
218
|
+
...(data?.options ?? {}),
|
|
219
|
+
theme,
|
|
220
|
+
};
|
|
207
221
|
const { error } = await supabase
|
|
208
|
-
.from('
|
|
209
|
-
.update({
|
|
222
|
+
.from('flows')
|
|
223
|
+
.update({ options: nextOptions })
|
|
210
224
|
.eq('id', flowId);
|
|
211
225
|
if (error) {
|
|
212
226
|
logWarning(`Could not persist extracted theme: ${error.message}`);
|
|
@@ -214,7 +228,7 @@ async function persistTheme(supabase, flowId, theme) {
|
|
|
214
228
|
}
|
|
215
229
|
async function markFlowSuccess(supabase, flowId, summary) {
|
|
216
230
|
await supabase
|
|
217
|
-
.from('
|
|
231
|
+
.from('flows')
|
|
218
232
|
.update({
|
|
219
233
|
status: 'success',
|
|
220
234
|
summary,
|
|
@@ -225,14 +239,14 @@ async function markFlowSuccess(supabase, flowId, summary) {
|
|
|
225
239
|
}
|
|
226
240
|
async function persistFlow(supabase, flowId, extraction) {
|
|
227
241
|
// Re-runs replace prior content for the same flow row.
|
|
228
|
-
await supabase.from('
|
|
229
|
-
await supabase.from('
|
|
242
|
+
await supabase.from('flow_edges').delete().eq('flow_id', flowId);
|
|
243
|
+
await supabase.from('flow_nodes').delete().eq('flow_id', flowId);
|
|
230
244
|
if (extraction.nodes.length === 0) {
|
|
231
245
|
return { nodesCreated: 0, edgesCreated: 0 };
|
|
232
246
|
}
|
|
233
247
|
const nodeRows = extraction.nodes.map((n, i) => buildNodeRow(flowId, n, i));
|
|
234
248
|
const { data: insertedNodes, error: nodesError } = await supabase
|
|
235
|
-
.from('
|
|
249
|
+
.from('flow_nodes')
|
|
236
250
|
.insert(nodeRows)
|
|
237
251
|
.select('id, slug');
|
|
238
252
|
if (nodesError) {
|
|
@@ -244,7 +258,7 @@ async function persistFlow(supabase, flowId, extraction) {
|
|
|
244
258
|
.filter((e) => e !== null);
|
|
245
259
|
if (edgeRows.length > 0) {
|
|
246
260
|
const { error: edgesError } = await supabase
|
|
247
|
-
.from('
|
|
261
|
+
.from('flow_edges')
|
|
248
262
|
.insert(edgeRows);
|
|
249
263
|
if (edgesError) {
|
|
250
264
|
throw new Error(`Failed to insert edges: ${edgesError.message}`);
|
|
@@ -260,8 +274,6 @@ function buildNodeRow(flowId, node, index) {
|
|
|
260
274
|
flow_id: flowId,
|
|
261
275
|
slug: node.slug,
|
|
262
276
|
name: node.name,
|
|
263
|
-
route: node.route ?? null,
|
|
264
|
-
file: node.file ?? null,
|
|
265
277
|
kind: node.kind,
|
|
266
278
|
schema: node,
|
|
267
279
|
position_x: (index % COLUMNS) * COLUMN_WIDTH,
|
|
@@ -278,8 +290,8 @@ function buildEdgeRow(flowId, edge, slugToId) {
|
|
|
278
290
|
flow_id: flowId,
|
|
279
291
|
from_node_id: fromId,
|
|
280
292
|
to_node_id: toId,
|
|
281
|
-
|
|
282
|
-
|
|
293
|
+
label: edge.triggerLabel,
|
|
294
|
+
source_anchor: edge.triggerFile ?? null,
|
|
283
295
|
kind: edge.kind,
|
|
284
296
|
};
|
|
285
297
|
}
|
|
@@ -183,10 +183,10 @@ export declare function createSubmitScreenFlowTool(state: ScreenFlowCaptureState
|
|
|
183
183
|
export declare function createRecordProgressTool(sink?: ScreenFlowProgressSink): import("@anthropic-ai/claude-agent-sdk").SdkMcpToolDefinition<{
|
|
184
184
|
phase: z.ZodEnum<{
|
|
185
185
|
detection: "detection";
|
|
186
|
+
submission: "submission";
|
|
186
187
|
routing: "routing";
|
|
187
188
|
screens: "screens";
|
|
188
189
|
transitions: "transitions";
|
|
189
|
-
submission: "submission";
|
|
190
190
|
}>;
|
|
191
191
|
message: z.ZodString;
|
|
192
192
|
}>;
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase: sync-org-repos
|
|
3
|
+
*
|
|
4
|
+
* Fetches all repositories from a GitHub organization using the local `gh` CLI,
|
|
5
|
+
* then creates a product for each repo that isn't already linked to one
|
|
6
|
+
* within the same team.
|
|
7
|
+
*
|
|
8
|
+
* Uses `gh api --paginate` for truly unlimited pagination (no hardcoded cap).
|
|
9
|
+
* Forks and archived repos are filtered out client-side.
|
|
10
|
+
*/
|
|
11
|
+
export interface SyncOrgReposResult {
|
|
12
|
+
status: 'success' | 'error';
|
|
13
|
+
message: string;
|
|
14
|
+
total: number;
|
|
15
|
+
created: number;
|
|
16
|
+
skipped: number;
|
|
17
|
+
repos?: string[];
|
|
18
|
+
}
|
|
19
|
+
export declare function syncOrgRepos(opts: {
|
|
20
|
+
teamId: string;
|
|
21
|
+
orgLogin: string;
|
|
22
|
+
userId: string;
|
|
23
|
+
verbose?: boolean;
|
|
24
|
+
}): Promise<SyncOrgReposResult>;
|