edsger 0.59.0 → 0.61.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/auth/env-store.js +3 -0
  2. package/dist/commands/data-flow/index.d.ts +17 -0
  3. package/dist/commands/data-flow/index.js +46 -0
  4. package/dist/commands/recipes/index.d.ts +15 -0
  5. package/dist/commands/recipes/index.js +34 -0
  6. package/dist/commands/screen-flow/index.d.ts +4 -4
  7. package/dist/commands/screen-flow/index.js +5 -5
  8. package/dist/commands/sync-aws/index.d.ts +16 -0
  9. package/dist/commands/sync-aws/index.js +184 -0
  10. package/dist/commands/sync-datadog/index.d.ts +16 -0
  11. package/dist/commands/sync-datadog/index.js +199 -0
  12. package/dist/commands/sync-terraform/index.d.ts +16 -0
  13. package/dist/commands/sync-terraform/index.js +211 -0
  14. package/dist/index.js +99 -8
  15. package/dist/phases/data-flow/index.d.ts +25 -0
  16. package/dist/phases/data-flow/index.js +257 -0
  17. package/dist/phases/data-flow/mcp-server.d.ts +85 -0
  18. package/dist/phases/data-flow/mcp-server.js +140 -0
  19. package/dist/phases/data-flow/prompts.d.ts +14 -0
  20. package/dist/phases/data-flow/prompts.js +36 -0
  21. package/dist/phases/data-flow/types.d.ts +71 -0
  22. package/dist/phases/data-flow/types.js +86 -0
  23. package/dist/phases/output-contracts.js +71 -0
  24. package/dist/phases/recipes/index.d.ts +56 -0
  25. package/dist/phases/recipes/index.js +301 -0
  26. package/dist/phases/recipes/mcp-server.d.ts +63 -0
  27. package/dist/phases/recipes/mcp-server.js +204 -0
  28. package/dist/phases/recipes/prompts.d.ts +35 -0
  29. package/dist/phases/recipes/prompts.js +105 -0
  30. package/dist/phases/recipes/types.d.ts +42 -0
  31. package/dist/phases/recipes/types.js +16 -0
  32. package/dist/phases/screen-flow/index.d.ts +2 -2
  33. package/dist/phases/screen-flow/index.js +27 -15
  34. package/dist/phases/screen-flow/mcp-server.d.ts +1 -1
  35. package/dist/skills/phase/data-flow/SKILL.md +82 -0
  36. package/package.json +3 -3
  37. package/vitest.config.ts +1 -1
@@ -0,0 +1,85 @@
1
+ /**
2
+ * In-process MCP server for the data-flow phase. Exposes a single tool —
3
+ * `submit_data_flow` — that the agent calls with the structured extraction,
4
+ * plus `record_progress` for streaming status messages.
5
+ *
6
+ * Mirrors the shape of phases/screen-flow/mcp-server.ts; see that file for
7
+ * the design rationale (zod schema + cross-field consistency + capture state).
8
+ */
9
+ import { z } from 'zod';
10
+ import type { DataFlowExtraction } from './types.js';
11
+ export interface DataFlowCaptureState {
12
+ captured: DataFlowExtraction | null;
13
+ }
14
+ export declare function createDataFlowCaptureState(): DataFlowCaptureState;
15
+ export type DataFlowProgressSink = (event: {
16
+ phase: 'detection' | 'enumeration' | 'nodes' | 'edges' | 'submission';
17
+ message: string;
18
+ }) => void;
19
+ export declare function validateConsistency(extraction: DataFlowExtraction): {
20
+ error: string | null;
21
+ };
22
+ export declare function createSubmitDataFlowTool(state: DataFlowCaptureState): import("@anthropic-ai/claude-agent-sdk").SdkMcpToolDefinition<{
23
+ summary: z.ZodString;
24
+ nodes: z.ZodArray<z.ZodObject<{
25
+ slug: z.ZodString;
26
+ name: z.ZodString;
27
+ kind: z.ZodEnum<{
28
+ model: "model";
29
+ source: "source";
30
+ dataset: "dataset";
31
+ transform: "transform";
32
+ sink: "sink";
33
+ queue: "queue";
34
+ }>;
35
+ file: z.ZodOptional<z.ZodString>;
36
+ description: z.ZodOptional<z.ZodString>;
37
+ tech: z.ZodOptional<z.ZodString>;
38
+ schedule: z.ZodOptional<z.ZodString>;
39
+ inputs: z.ZodOptional<z.ZodArray<z.ZodObject<{
40
+ name: z.ZodString;
41
+ type: z.ZodOptional<z.ZodString>;
42
+ required: z.ZodOptional<z.ZodBoolean>;
43
+ description: z.ZodOptional<z.ZodString>;
44
+ }, z.core.$strip>>>;
45
+ outputs: z.ZodOptional<z.ZodArray<z.ZodObject<{
46
+ name: z.ZodString;
47
+ type: z.ZodOptional<z.ZodString>;
48
+ required: z.ZodOptional<z.ZodBoolean>;
49
+ description: z.ZodOptional<z.ZodString>;
50
+ }, z.core.$strip>>>;
51
+ sample: z.ZodOptional<z.ZodObject<{
52
+ columns: z.ZodArray<z.ZodString>;
53
+ rows: z.ZodArray<z.ZodArray<z.ZodString>>;
54
+ }, z.core.$strip>>;
55
+ stats: z.ZodOptional<z.ZodArray<z.ZodObject<{
56
+ label: z.ZodString;
57
+ value: z.ZodString;
58
+ }, z.core.$strip>>>;
59
+ }, z.core.$strip>>;
60
+ edges: z.ZodArray<z.ZodObject<{
61
+ fromSlug: z.ZodString;
62
+ toSlug: z.ZodString;
63
+ kind: z.ZodEnum<{
64
+ data: "data";
65
+ event: "event";
66
+ control: "control";
67
+ derives: "derives";
68
+ }>;
69
+ label: z.ZodOptional<z.ZodString>;
70
+ sourceFile: z.ZodOptional<z.ZodString>;
71
+ }, z.core.$strip>>;
72
+ }>;
73
+ export declare function createRecordProgressTool(sink?: DataFlowProgressSink): import("@anthropic-ai/claude-agent-sdk").SdkMcpToolDefinition<{
74
+ phase: z.ZodEnum<{
75
+ nodes: "nodes";
76
+ edges: "edges";
77
+ detection: "detection";
78
+ enumeration: "enumeration";
79
+ submission: "submission";
80
+ }>;
81
+ message: z.ZodString;
82
+ }>;
83
+ export declare function createDataFlowMcpServer(state: DataFlowCaptureState, options?: {
84
+ onProgress?: DataFlowProgressSink;
85
+ }): import("@anthropic-ai/claude-agent-sdk").McpSdkServerConfigWithInstance;
@@ -0,0 +1,140 @@
1
+ /**
2
+ * In-process MCP server for the data-flow phase. Exposes a single tool —
3
+ * `submit_data_flow` — that the agent calls with the structured extraction,
4
+ * plus `record_progress` for streaming status messages.
5
+ *
6
+ * Mirrors the shape of phases/screen-flow/mcp-server.ts; see that file for
7
+ * the design rationale (zod schema + cross-field consistency + capture state).
8
+ */
9
+ import { createSdkMcpServer, tool } from '@anthropic-ai/claude-agent-sdk';
10
+ import { z } from 'zod';
11
+ export function createDataFlowCaptureState() {
12
+ return { captured: null };
13
+ }
14
+ // ---------------------------------------------------------------------------
15
+ // Zod schemas (mirror types.ts)
16
+ // ---------------------------------------------------------------------------
17
+ const dataFieldSchema = z.object({
18
+ name: z.string().min(1),
19
+ type: z.string().optional(),
20
+ required: z.boolean().optional(),
21
+ description: z.string().optional(),
22
+ });
23
+ const dataSampleSchema = z.object({
24
+ columns: z.array(z.string()),
25
+ rows: z.array(z.array(z.string())),
26
+ });
27
+ const dataStatSchema = z.object({
28
+ label: z.string(),
29
+ value: z.string(),
30
+ });
31
+ const dataNodeSchema = z.object({
32
+ slug: z.string().min(1),
33
+ name: z.string().min(1),
34
+ kind: z.enum(['source', 'dataset', 'transform', 'sink', 'queue', 'model']),
35
+ file: z.string().optional(),
36
+ description: z.string().optional(),
37
+ tech: z.string().optional(),
38
+ schedule: z.string().optional(),
39
+ inputs: z.array(dataFieldSchema).optional(),
40
+ outputs: z.array(dataFieldSchema).optional(),
41
+ sample: dataSampleSchema.optional(),
42
+ stats: z.array(dataStatSchema).optional(),
43
+ });
44
+ const dataEdgeSchema = z.object({
45
+ fromSlug: z.string().min(1),
46
+ toSlug: z.string().min(1),
47
+ kind: z.enum(['data', 'event', 'control', 'derives']),
48
+ label: z.string().optional(),
49
+ sourceFile: z.string().optional(),
50
+ });
51
+ export function validateConsistency(extraction) {
52
+ const slugs = new Set();
53
+ for (const node of extraction.nodes) {
54
+ if (slugs.has(node.slug)) {
55
+ return {
56
+ error: `Duplicate node slug "${node.slug}". Each node.slug MUST be unique within the flow. Re-call submit_data_flow with deduplicated nodes.`,
57
+ };
58
+ }
59
+ slugs.add(node.slug);
60
+ }
61
+ for (const edge of extraction.edges) {
62
+ if (!slugs.has(edge.fromSlug)) {
63
+ return {
64
+ error: `Edge fromSlug "${edge.fromSlug}" → "${edge.toSlug}" does not match any node slug. Either add the missing node or drop the edge, then re-call submit_data_flow.`,
65
+ };
66
+ }
67
+ if (!slugs.has(edge.toSlug)) {
68
+ return {
69
+ error: `Edge fromSlug "${edge.fromSlug}" → toSlug "${edge.toSlug}" does not match any node slug. Either add the missing node or drop the edge, then re-call submit_data_flow.`,
70
+ };
71
+ }
72
+ }
73
+ return { error: null };
74
+ }
75
+ export function createSubmitDataFlowTool(state) {
76
+ return tool('submit_data_flow', [
77
+ 'Submit the final data flow extraction. Call this EXACTLY once,',
78
+ 'when you have finished mapping every data node and connection. Pass',
79
+ 'the full structured flow as the argument. After this call succeeds,',
80
+ 'end your turn — do NOT also paste the same data as a fenced code',
81
+ 'block. If validation fails, the error message tells you what to fix;',
82
+ 'call the tool again with corrected data.',
83
+ ].join(' '), {
84
+ summary: z
85
+ .string()
86
+ .min(1)
87
+ .describe('1-3 sentence narrative of what this system does with data and the primary pipelines.'),
88
+ nodes: z
89
+ .array(dataNodeSchema)
90
+ .describe('Every data node: source / dataset / transform / sink / queue / model. node.slug MUST be unique within the flow.'),
91
+ edges: z
92
+ .array(dataEdgeSchema)
93
+ .describe('Connections. fromSlug = upstream, toSlug = downstream. Every fromSlug / toSlug MUST reference a slug present in nodes; drop edges whose endpoints you did not emit.'),
94
+ }, async (args) => {
95
+ const extraction = {
96
+ summary: args.summary,
97
+ nodes: args.nodes,
98
+ edges: args.edges,
99
+ };
100
+ const { error } = validateConsistency(extraction);
101
+ if (error) {
102
+ return {
103
+ content: [{ type: 'text', text: error }],
104
+ isError: true,
105
+ };
106
+ }
107
+ state.captured = extraction;
108
+ return {
109
+ content: [
110
+ {
111
+ type: 'text',
112
+ text: `Captured ${extraction.nodes.length} data nodes / ${extraction.edges.length} connections. End your turn now.`,
113
+ },
114
+ ],
115
+ };
116
+ });
117
+ }
118
+ export function createRecordProgressTool(sink) {
119
+ return tool('record_progress', 'Send a short status update to the user. Does not affect the extraction. Call it at each phase boundary so the user sees progress.', {
120
+ phase: z
121
+ .enum(['detection', 'enumeration', 'nodes', 'edges', 'submission'])
122
+ .describe('Which phase the message belongs to.'),
123
+ message: z.string().min(1).describe('Human-readable status update.'),
124
+ }, async (args) => {
125
+ sink?.({ phase: args.phase, message: args.message });
126
+ return {
127
+ content: [{ type: 'text', text: 'ok' }],
128
+ };
129
+ });
130
+ }
131
+ export function createDataFlowMcpServer(state, options) {
132
+ return createSdkMcpServer({
133
+ name: 'data-flow',
134
+ version: '1.0.0',
135
+ tools: [
136
+ createSubmitDataFlowTool(state),
137
+ createRecordProgressTool(options?.onProgress),
138
+ ],
139
+ });
140
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Prompts for the data-flow phase. Loads the system prompt body from
3
+ * `skills/phase/data-flow/SKILL.md` (with optional project override) and
4
+ * appends the JSON output contract.
5
+ */
6
+ export declare function createDataFlowSystemPrompt(options?: {
7
+ projectDir?: string;
8
+ hasCodebase?: boolean;
9
+ }): Promise<string>;
10
+ export declare function createDataFlowUserPrompt(args: {
11
+ productName: string;
12
+ productDescription?: string;
13
+ guidance?: string;
14
+ }): string;
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Prompts for the data-flow phase. Loads the system prompt body from
3
+ * `skills/phase/data-flow/SKILL.md` (with optional project override) and
4
+ * appends the JSON output contract.
5
+ */
6
+ import { processConditionals, resolveSkill, } from '../../services/skill-resolver.js';
7
+ import { OUTPUT_CONTRACTS } from '../output-contracts.js';
8
+ export async function createDataFlowSystemPrompt(options) {
9
+ const skill = await resolveSkill('phase/data-flow', {
10
+ projectDir: options?.projectDir,
11
+ });
12
+ if (!skill) {
13
+ throw new Error('Failed to load skill: phase/data-flow');
14
+ }
15
+ const prompt = processConditionals(skill.prompt, {
16
+ hasCodebase: options?.hasCodebase ?? true,
17
+ });
18
+ return `${prompt}
19
+
20
+ ${OUTPUT_CONTRACTS['data-flow']}`;
21
+ }
22
+ export function createDataFlowUserPrompt(args) {
23
+ const guidanceBlock = args.guidance
24
+ ? `\n\n**Human guidance for this run** (focus or exclude as instructed):\n${args.guidance}`
25
+ : '';
26
+ const descBlock = args.productDescription
27
+ ? `\n**Product description**: ${args.productDescription}`
28
+ : '';
29
+ return `Map the data flow for **${args.productName}**.${descBlock}${guidanceBlock}
30
+
31
+ Start by detecting the stack (check package.json / pyproject.toml / go.mod / Cargo.toml / requirements.txt etc.), then look for: ETL/pipeline definitions, database migrations or schema files, queue/topic configs, model invocation sites, file ingest scripts. Read just enough source per node to fill in a useful DataNodeSchema — do not need to read everything.
32
+
33
+ Call \`mcp__data-flow__record_progress\` at each phase boundary so the user can see your progress (otherwise the CLI looks frozen).
34
+
35
+ When you are done, return the result by **calling the \`mcp__data-flow__submit_data_flow\` tool exactly once** with \`summary\`, \`nodes\`, and \`edges\` as arguments. Do not paste the JSON as a fenced text block — the tool call is the deliverable. If the tool returns an error, fix the issue it describes and call the tool again.`;
36
+ }
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Data Flow domain types.
3
+ *
4
+ * A DataNodeSchema is a structured description of one unit of data state or
5
+ * computation in a product — a source, dataset, transform, sink, queue, or
6
+ * model. The CLI extracts these from source code (pipeline definitions,
7
+ * schema files, queue handlers, etc.) and the desktop renders them with a
8
+ * unified <DataNodePreview> component.
9
+ *
10
+ * Companion to ScreenSchema: same flow-graph shape (nodes + edges with a
11
+ * shared `flows` table storing the JSONB schema), different domain. Data
12
+ * flow edges describe how data moves between nodes, not user navigation.
13
+ */
14
+ export type DataNodeKind = 'source' | 'dataset' | 'transform' | 'sink' | 'queue' | 'model';
15
+ export interface DataField {
16
+ name: string;
17
+ type?: string;
18
+ required?: boolean;
19
+ description?: string;
20
+ }
21
+ export interface DataSample {
22
+ columns: string[];
23
+ rows: string[][];
24
+ }
25
+ export interface DataStat {
26
+ label: string;
27
+ value: string;
28
+ }
29
+ export interface DataNodeSchema {
30
+ /** Stable slug within the flow (e.g. 'raw-events', 'enrich-user'). */
31
+ slug: string;
32
+ /** Human-readable name. */
33
+ name: string;
34
+ kind: DataNodeKind;
35
+ /** Source file path (jump anchor); for datasets, the schema/migration file. */
36
+ file?: string;
37
+ /** One-sentence description. */
38
+ description?: string;
39
+ /** Technology/format hint: 'postgres', 'parquet', 'kafka', 'openai-api', etc. */
40
+ tech?: string;
41
+ /** For transforms: 'cron 0 0 * * *', 'on-event', 'manual', 'continuous'. */
42
+ schedule?: string;
43
+ /** Schema of inputs the node consumes. */
44
+ inputs?: DataField[];
45
+ /** Schema of outputs the node produces. */
46
+ outputs?: DataField[];
47
+ /** Tiny realistic sample (≤ 4 rows) for datasets. */
48
+ sample?: DataSample;
49
+ /** Volume / latency hints — free-form key/value pairs. */
50
+ stats?: DataStat[];
51
+ }
52
+ /**
53
+ * Edge kinds. Direction is always "data movement": fromSlug = upstream,
54
+ * toSlug = downstream. The kind describes the *nature* of the connection.
55
+ */
56
+ export type DataEdgeKind = 'data' | 'event' | 'control' | 'derives';
57
+ export interface DataEdge {
58
+ fromSlug: string;
59
+ toSlug: string;
60
+ kind: DataEdgeKind;
61
+ /** Free-form descriptor: 'nightly batch', 'on user signup', 'embedding'. */
62
+ label?: string;
63
+ /** File containing the connection definition (when distinct from from-node's file). */
64
+ sourceFile?: string;
65
+ }
66
+ export interface DataFlowExtraction {
67
+ summary: string;
68
+ nodes: DataNodeSchema[];
69
+ edges: DataEdge[];
70
+ }
71
+ export declare function isDataFlowExtraction(value: unknown): value is DataFlowExtraction;
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Data Flow domain types.
3
+ *
4
+ * A DataNodeSchema is a structured description of one unit of data state or
5
+ * computation in a product — a source, dataset, transform, sink, queue, or
6
+ * model. The CLI extracts these from source code (pipeline definitions,
7
+ * schema files, queue handlers, etc.) and the desktop renders them with a
8
+ * unified <DataNodePreview> component.
9
+ *
10
+ * Companion to ScreenSchema: same flow-graph shape (nodes + edges with a
11
+ * shared `flows` table storing the JSONB schema), different domain. Data
12
+ * flow edges describe how data moves between nodes, not user navigation.
13
+ */
14
+ // ============================================================================
15
+ // Runtime validation for AI-produced extraction
16
+ // ============================================================================
17
+ const NODE_KINDS = new Set([
18
+ 'source',
19
+ 'dataset',
20
+ 'transform',
21
+ 'sink',
22
+ 'queue',
23
+ 'model',
24
+ ]);
25
+ const EDGE_KINDS = new Set([
26
+ 'data',
27
+ 'event',
28
+ 'control',
29
+ 'derives',
30
+ ]);
31
+ function isRecord(value) {
32
+ return typeof value === 'object' && value !== null;
33
+ }
34
+ function isDataNodeSchema(value) {
35
+ if (!isRecord(value)) {
36
+ return false;
37
+ }
38
+ if (typeof value.slug !== 'string' || value.slug.length === 0) {
39
+ return false;
40
+ }
41
+ if (typeof value.name !== 'string' || value.name.length === 0) {
42
+ return false;
43
+ }
44
+ if (typeof value.kind !== 'string' ||
45
+ !NODE_KINDS.has(value.kind)) {
46
+ return false;
47
+ }
48
+ return true;
49
+ }
50
+ function isDataEdge(value) {
51
+ if (!isRecord(value)) {
52
+ return false;
53
+ }
54
+ if (typeof value.fromSlug !== 'string') {
55
+ return false;
56
+ }
57
+ if (typeof value.toSlug !== 'string') {
58
+ return false;
59
+ }
60
+ if (typeof value.kind !== 'string' ||
61
+ !EDGE_KINDS.has(value.kind)) {
62
+ return false;
63
+ }
64
+ return true;
65
+ }
66
+ export function isDataFlowExtraction(value) {
67
+ if (!isRecord(value)) {
68
+ return false;
69
+ }
70
+ if (typeof value.summary !== 'string') {
71
+ return false;
72
+ }
73
+ if (!Array.isArray(value.nodes)) {
74
+ return false;
75
+ }
76
+ if (!Array.isArray(value.edges)) {
77
+ return false;
78
+ }
79
+ if (!value.nodes.every(isDataNodeSchema)) {
80
+ return false;
81
+ }
82
+ if (!value.edges.every(isDataEdge)) {
83
+ return false;
84
+ }
85
+ return true;
86
+ }
@@ -949,5 +949,76 @@ submit_screen_flow({
949
949
  ]
950
950
  })
951
951
  \`\`\`
952
+ `,
953
+ 'data-flow': `
954
+ **CRITICAL — How to return the result**:
955
+
956
+ Return the extraction by calling the MCP tool
957
+ \`mcp__data-flow__submit_data_flow\` **exactly once** with three arguments:
958
+
959
+ - \`summary\` — 1-3 sentence narrative of what this system does with data and its primary pipelines
960
+ - \`nodes\` — array of DataNodeSchema objects (every source / dataset / transform / sink / queue / model)
961
+ - \`edges\` — array of DataEdge objects (connections, with direction = data movement)
962
+
963
+ The tool validates the arguments against the schema. If it returns an error,
964
+ fix the issue it describes and call the tool again. After a successful call,
965
+ end your turn — do not also paste the same data as a fenced text block.
966
+
967
+ You can also call \`mcp__data-flow__record_progress({ phase, message })\` at
968
+ each phase boundary (detection / enumeration / nodes / edges / submission)
969
+ to keep the user informed during long runs. This is observability only — it
970
+ does not affect the extraction.
971
+
972
+ DataNodeSchema fields:
973
+ - \`slug\` (unique within the flow), \`name\`, \`kind\`, \`file?\`
974
+ - \`kind\`: one of \`source\`, \`dataset\`, \`transform\`, \`sink\`, \`queue\`, \`model\`
975
+ - \`description?\`: one-sentence summary
976
+ - \`tech?\`: technology / format hint (e.g. \`postgres\`, \`parquet\`, \`kafka\`, \`openai-api\`)
977
+ - \`schedule?\`: for transforms (e.g. \`cron 0 0 * * *\`, \`on-event\`, \`manual\`, \`continuous\`)
978
+ - \`inputs?\` / \`outputs?\`: arrays of \`{ name, type?, required?, description? }\`
979
+ - \`sample?\`: \`{ columns: [string], rows: [[string]] }\` — at most 4 sample rows for datasets
980
+ - \`stats?\`: array of \`{ label, value }\` (volume, latency, count hints)
981
+
982
+ DataEdge fields:
983
+ - \`fromSlug\` (upstream), \`toSlug\` (downstream) — both MUST appear in nodes
984
+ - \`kind\`: one of \`data\`, \`event\`, \`control\`, \`derives\`
985
+ - \`label?\`: free-form descriptor (e.g. \`nightly batch\`, \`on user signup\`)
986
+ - \`sourceFile?\`: file containing the read/write/trigger code
987
+
988
+ Edge direction convention: fromSlug is upstream (data origin), toSlug is downstream
989
+ (data destination). A transform that reads from a dataset and writes to a queue
990
+ produces two edges: \`dataset → transform\` (kind: data) and \`transform → queue\`
991
+ (kind: event).
992
+
993
+ Schematic example of the tool call:
994
+
995
+ \`\`\`
996
+ submit_data_flow({
997
+ summary: "Nightly product-feed pipeline: scrape vendor sites, normalize, write to Postgres, publish change events to Kafka.",
998
+ nodes: [
999
+ { slug: "vendor-scrape", name: "Vendor scraper", kind: "source",
1000
+ file: "src/scrape/vendor.ts", tech: "playwright",
1001
+ schedule: "cron 0 0 * * *",
1002
+ outputs: [{ name: "html", type: "string" }, { name: "url", type: "string" }] },
1003
+ { slug: "normalize", name: "Normalize products", kind: "transform",
1004
+ file: "src/etl/normalize.ts", tech: "node",
1005
+ inputs: [{ name: "html", type: "string" }],
1006
+ outputs: [{ name: "sku", type: "string" }, { name: "price", type: "decimal" }] },
1007
+ { slug: "products", name: "products", kind: "dataset",
1008
+ file: "supabase/migrations/0001_products.sql", tech: "postgres",
1009
+ sample: { columns: ["sku", "price"], rows: [["ABC-1", "9.99"]] } },
1010
+ { slug: "change-events", name: "product.changed", kind: "queue",
1011
+ file: "src/queues/products.ts", tech: "kafka" }
1012
+ ],
1013
+ edges: [
1014
+ { fromSlug: "vendor-scrape", toSlug: "normalize", kind: "data",
1015
+ label: "raw HTML", sourceFile: "src/etl/normalize.ts" },
1016
+ { fromSlug: "normalize", toSlug: "products", kind: "data",
1017
+ label: "upsert", sourceFile: "src/etl/normalize.ts" },
1018
+ { fromSlug: "products", toSlug: "change-events", kind: "event",
1019
+ label: "on row change", sourceFile: "src/queues/products.ts" }
1020
+ ]
1021
+ })
1022
+ \`\`\`
952
1023
  `,
953
1024
  };
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Recipes phase: clone the product's repo, ask Claude to identify each
3
+ * non-trivial capability the product implements, and persist HOW it's built
4
+ * via the recipes / product_recipes tables.
5
+ *
6
+ * Production-grade behaviours layered on top of the basic agent loop:
7
+ *
8
+ * - Heartbeat: `last_heartbeat_at` on the recipe_scans row is refreshed
9
+ * on every assistant message so the reader can detect stalled / crashed
10
+ * runs (see desktop-app/.../services/db/recipe-scans.ts for the lazy
11
+ * reaper).
12
+ * - Cancellation-safe writes: markRunning / markSuccess / markFailed only
13
+ * touch rows whose status is in {pending, running}. If the user clicked
14
+ * Stop and the row is now 'cancelled', the final write no-ops.
15
+ * - Per-call MCP writes: agent commits each create / update / link /
16
+ * unlink as it goes. There is no "submit at the end" buffer — partial
17
+ * progress survives even if the agent later errors out.
18
+ */
19
+ import type { SupabaseClient } from '@supabase/supabase-js';
20
+ import type { RecipeSummary } from './types.js';
21
+ export interface RecipesPhaseOptions {
22
+ productId: string;
23
+ scanId: string;
24
+ guidance?: string;
25
+ verbose?: boolean;
26
+ }
27
+ export interface RecipesPhaseResult {
28
+ status: 'success' | 'error' | 'cancelled';
29
+ message: string;
30
+ counts?: {
31
+ created: number;
32
+ updated: number;
33
+ linked: number;
34
+ unlinked: number;
35
+ };
36
+ }
37
+ export declare function runRecipesPhase(options: RecipesPhaseOptions): Promise<RecipesPhaseResult>;
38
+ export declare function getProductTeamId(supabase: SupabaseClient, productId: string): Promise<string | null>;
39
+ export declare function getScanCreator(supabase: SupabaseClient, scanId: string): Promise<{
40
+ created_by: string;
41
+ } | null>;
42
+ export declare function listTeamRecipes(supabase: SupabaseClient, teamId: string): Promise<RecipeSummary[]>;
43
+ export declare function listProductRecipeLinks(supabase: SupabaseClient, productId: string): Promise<{
44
+ recipe_id: string;
45
+ name: string;
46
+ }[]>;
47
+ /**
48
+ * Claim the row by flipping `pending` → `running`. Returns true on success
49
+ * (we won the claim) and false when the row has already moved on (e.g. user
50
+ * cancelled before the CLI started). Bounded by the status filter so we
51
+ * can't accidentally resurrect a 'cancelled' row.
52
+ */
53
+ export declare function markRunning(supabase: SupabaseClient, scanId: string): Promise<boolean>;
54
+ export declare function heartbeat(supabase: SupabaseClient, scanId: string): Promise<void>;
55
+ export declare function markFailed(supabase: SupabaseClient, scanId: string, errorMessage: string): Promise<boolean>;
56
+ export declare function markSuccess(supabase: SupabaseClient, scanId: string): Promise<boolean>;