@aslomon/effectum 0.3.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/install.js +81 -5
- package/bin/lib/cli-tools.js +371 -0
- package/bin/lib/specializations.js +11 -1
- package/bin/lib/template.js +14 -0
- package/bin/lib/tool-loader.js +243 -0
- package/bin/lib/ui.js +172 -0
- package/package.json +1 -1
- package/system/agents/data-engineer.md +268 -0
- package/system/agents/mobile-developer.md +257 -0
- package/system/templates/CLAUDE.md.tmpl +6 -0
- package/system/templates/settings.json.tmpl +11 -0
- package/system/tools/_schema.json +112 -0
- package/system/tools/foundation.json +56 -0
- package/system/tools/generic.json +20 -0
- package/system/tools/nextjs-supabase.json +56 -0
- package/system/tools/python-fastapi.json +47 -0
- package/system/tools/swift-ios.json +33 -0
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dynamic tool loader — loads JSON-based tool definitions from system/tools/,
|
|
3
|
+
* merges foundation + stack + community definitions, and deduplicates by key.
|
|
4
|
+
*
|
|
5
|
+
* New stacks require only a new JSON file in system/tools/ — zero code changes.
|
|
6
|
+
* Community/local overrides are loaded from .effectum/tools/ and ~/.effectum/tools/.
|
|
7
|
+
*/
|
|
8
|
+
"use strict";
|
|
9
|
+
|
|
10
|
+
const fs = require("fs");
|
|
11
|
+
const path = require("path");
|
|
12
|
+
const os = require("os");
|
|
13
|
+
|
|
14
|
+
// ─── JSON loading helpers ────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Load tools from a JSON file. Returns empty array if file doesn't exist or is invalid.
|
|
18
|
+
* @param {string} filePath - absolute path to JSON file
|
|
19
|
+
* @returns {Array<object>}
|
|
20
|
+
*/
|
|
21
|
+
function loadJsonTools(filePath) {
|
|
22
|
+
try {
|
|
23
|
+
if (!fs.existsSync(filePath)) return [];
|
|
24
|
+
const raw = fs.readFileSync(filePath, "utf8");
|
|
25
|
+
const parsed = JSON.parse(raw);
|
|
26
|
+
if (Array.isArray(parsed.tools)) return parsed.tools;
|
|
27
|
+
if (Array.isArray(parsed)) return parsed;
|
|
28
|
+
return [];
|
|
29
|
+
} catch (_) {
|
|
30
|
+
return [];
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Load all JSON files from a directory.
|
|
36
|
+
* Skips files starting with _ (e.g., _schema.json).
|
|
37
|
+
* @param {string} dirPath - directory to scan
|
|
38
|
+
* @returns {Array<object>}
|
|
39
|
+
*/
|
|
40
|
+
function loadToolsFromDir(dirPath) {
|
|
41
|
+
const tools = [];
|
|
42
|
+
try {
|
|
43
|
+
if (!fs.existsSync(dirPath)) return tools;
|
|
44
|
+
const files = fs
|
|
45
|
+
.readdirSync(dirPath)
|
|
46
|
+
.filter((f) => f.endsWith(".json") && !f.startsWith("_"));
|
|
47
|
+
for (const file of files) {
|
|
48
|
+
tools.push(...loadJsonTools(path.join(dirPath, file)));
|
|
49
|
+
}
|
|
50
|
+
} catch (_) {
|
|
51
|
+
// Directory doesn't exist or isn't readable
|
|
52
|
+
}
|
|
53
|
+
return tools;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// ─── Tool resolution ─────────────────────────────────────────────────────────
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Find the system/tools/ directory relative to this module (which lives in bin/lib/).
|
|
60
|
+
* @returns {string}
|
|
61
|
+
*/
|
|
62
|
+
function getSystemToolsDir() {
|
|
63
|
+
return path.resolve(__dirname, "..", "..", "system", "tools");
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Get the JSON filename for a stack key.
|
|
68
|
+
* @param {string} stack - e.g., "nextjs-supabase"
|
|
69
|
+
* @returns {string} - e.g., "nextjs-supabase.json"
|
|
70
|
+
*/
|
|
71
|
+
function stackToFilename(stack) {
|
|
72
|
+
return `${stack}.json`;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ─── System basics (pre-config) ──────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Get system-level basics that must be checked before any configuration.
|
|
79
|
+
* These are Homebrew (macOS), Git, Node.js, and Claude Code.
|
|
80
|
+
* @returns {Array<object>}
|
|
81
|
+
*/
|
|
82
|
+
function getSystemBasics() {
|
|
83
|
+
const platform = os.platform() === "darwin" ? "darwin" : "linux";
|
|
84
|
+
const basics = [];
|
|
85
|
+
|
|
86
|
+
// Homebrew (macOS only)
|
|
87
|
+
if (platform === "darwin") {
|
|
88
|
+
basics.push({
|
|
89
|
+
key: "brew",
|
|
90
|
+
bin: "brew",
|
|
91
|
+
displayName: "Homebrew",
|
|
92
|
+
category: "system",
|
|
93
|
+
why: "Package manager for macOS — needed to install other tools",
|
|
94
|
+
priority: 0,
|
|
95
|
+
autoInstall: true,
|
|
96
|
+
install: {
|
|
97
|
+
darwin:
|
|
98
|
+
'/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"',
|
|
99
|
+
},
|
|
100
|
+
check: "brew --version",
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Git
|
|
105
|
+
basics.push({
|
|
106
|
+
key: "git",
|
|
107
|
+
bin: "git",
|
|
108
|
+
displayName: "Git",
|
|
109
|
+
category: "system",
|
|
110
|
+
why: "Version control — required for all projects",
|
|
111
|
+
priority: 0,
|
|
112
|
+
autoInstall: true,
|
|
113
|
+
install: {
|
|
114
|
+
darwin: "xcode-select --install",
|
|
115
|
+
linux: "sudo apt install -y git",
|
|
116
|
+
},
|
|
117
|
+
check: "git --version",
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
// Node.js
|
|
121
|
+
basics.push({
|
|
122
|
+
key: "node",
|
|
123
|
+
bin: "node",
|
|
124
|
+
displayName: "Node.js",
|
|
125
|
+
category: "system",
|
|
126
|
+
why: "JavaScript runtime — required for Claude Code and npm tools",
|
|
127
|
+
priority: 0,
|
|
128
|
+
autoInstall: true,
|
|
129
|
+
install: {
|
|
130
|
+
darwin: "brew install node",
|
|
131
|
+
linux: "sudo apt install -y nodejs npm",
|
|
132
|
+
},
|
|
133
|
+
check: "node --version",
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
// Claude Code
|
|
137
|
+
basics.push({
|
|
138
|
+
key: "claude",
|
|
139
|
+
bin: "claude",
|
|
140
|
+
displayName: "Claude Code",
|
|
141
|
+
category: "system",
|
|
142
|
+
why: "AI coding agent — the core of the autonomous workflow",
|
|
143
|
+
priority: 0,
|
|
144
|
+
autoInstall: true,
|
|
145
|
+
install: {
|
|
146
|
+
all: "npm i -g @anthropic-ai/claude-code",
|
|
147
|
+
},
|
|
148
|
+
check: "claude --version",
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
return basics;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// ─── Main loader ─────────────────────────────────────────────────────────────
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Load and merge tool definitions for a given stack.
|
|
158
|
+
*
|
|
159
|
+
* Merge order (last wins for duplicate keys):
|
|
160
|
+
* 1. foundation.json (always)
|
|
161
|
+
* 2. stack-specific.json (if exists)
|
|
162
|
+
* 3. Community: <targetDir>/.effectum/tools/*.json
|
|
163
|
+
* 4. Community: ~/.effectum/tools/*.json
|
|
164
|
+
*
|
|
165
|
+
* @param {string} stack - stack key (e.g., "nextjs-supabase", "generic")
|
|
166
|
+
* @param {string} [targetDir] - project directory for local community tools
|
|
167
|
+
* @returns {Array<object>} - deduplicated, priority-sorted tool list
|
|
168
|
+
*/
|
|
169
|
+
function loadToolDefinitions(stack, targetDir) {
|
|
170
|
+
const systemDir = getSystemToolsDir();
|
|
171
|
+
const tools = [];
|
|
172
|
+
|
|
173
|
+
// 1. Foundation (always loaded)
|
|
174
|
+
const foundationPath = path.join(systemDir, "foundation.json");
|
|
175
|
+
tools.push(...loadJsonTools(foundationPath));
|
|
176
|
+
|
|
177
|
+
// 2. Stack-specific
|
|
178
|
+
if (stack && stack !== "foundation") {
|
|
179
|
+
const stackPath = path.join(systemDir, stackToFilename(stack));
|
|
180
|
+
tools.push(...loadJsonTools(stackPath));
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// 3. Community: local project overrides
|
|
184
|
+
if (targetDir) {
|
|
185
|
+
const localToolsDir = path.join(targetDir, ".effectum", "tools");
|
|
186
|
+
tools.push(...loadToolsFromDir(localToolsDir));
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// 4. Community: global user overrides
|
|
190
|
+
const globalToolsDir = path.join(os.homedir(), ".effectum", "tools");
|
|
191
|
+
tools.push(...loadToolsFromDir(globalToolsDir));
|
|
192
|
+
|
|
193
|
+
return deduplicateByKey(tools);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// ─── Deduplication ───────────────────────────────────────────────────────────
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Deduplicate tools by key. Last occurrence wins (community overrides bundled).
|
|
200
|
+
* Result is sorted by priority (ascending).
|
|
201
|
+
* @param {Array<object>} tools
|
|
202
|
+
* @returns {Array<object>}
|
|
203
|
+
*/
|
|
204
|
+
function deduplicateByKey(tools) {
|
|
205
|
+
const map = new Map();
|
|
206
|
+
for (const tool of tools) {
|
|
207
|
+
map.set(tool.key, tool);
|
|
208
|
+
}
|
|
209
|
+
return Array.from(map.values()).sort(
|
|
210
|
+
(a, b) => (a.priority ?? 5) - (b.priority ?? 5),
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// ─── List available stacks ───────────────────────────────────────────────────
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* List all available stack JSON files (excluding foundation and _schema).
|
|
218
|
+
* @returns {Array<string>} - stack keys (e.g., ["nextjs-supabase", "python-fastapi"])
|
|
219
|
+
*/
|
|
220
|
+
function listAvailableStacks() {
|
|
221
|
+
const systemDir = getSystemToolsDir();
|
|
222
|
+
try {
|
|
223
|
+
return fs
|
|
224
|
+
.readdirSync(systemDir)
|
|
225
|
+
.filter(
|
|
226
|
+
(f) =>
|
|
227
|
+
f.endsWith(".json") && !f.startsWith("_") && f !== "foundation.json",
|
|
228
|
+
)
|
|
229
|
+
.map((f) => f.replace(".json", ""));
|
|
230
|
+
} catch (_) {
|
|
231
|
+
return [];
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
module.exports = {
|
|
236
|
+
loadJsonTools,
|
|
237
|
+
loadToolsFromDir,
|
|
238
|
+
getSystemToolsDir,
|
|
239
|
+
getSystemBasics,
|
|
240
|
+
loadToolDefinitions,
|
|
241
|
+
deduplicateByKey,
|
|
242
|
+
listAvailableStacks,
|
|
243
|
+
};
|
package/bin/lib/ui.js
CHANGED
|
@@ -19,6 +19,17 @@ const {
|
|
|
19
19
|
getAllMcps,
|
|
20
20
|
getAllSubagents,
|
|
21
21
|
} = require("./recommendation");
|
|
22
|
+
const {
|
|
23
|
+
checkAllTools,
|
|
24
|
+
checkSystemBasics,
|
|
25
|
+
formatToolStatus,
|
|
26
|
+
formatInstallInstructions,
|
|
27
|
+
formatInstallPlan,
|
|
28
|
+
formatAuthStatus,
|
|
29
|
+
installTool,
|
|
30
|
+
categorizeForInstall,
|
|
31
|
+
checkAllAuth,
|
|
32
|
+
} = require("./cli-tools");
|
|
22
33
|
|
|
23
34
|
/** @type {import("@clack/prompts")} */
|
|
24
35
|
let p;
|
|
@@ -495,6 +506,162 @@ async function askGitBranch() {
|
|
|
495
506
|
return { create: true, name };
|
|
496
507
|
}
|
|
497
508
|
|
|
509
|
+
// ─── System Basics Check (Phase 1) ──────────────────────────────────────────
|
|
510
|
+
|
|
511
|
+
/**
|
|
512
|
+
* Check system basics (Homebrew, Git, Node.js, Claude Code) before config.
|
|
513
|
+
* Offers to install missing basics.
|
|
514
|
+
* @returns {Promise<void>}
|
|
515
|
+
*/
|
|
516
|
+
async function showSystemCheck() {
|
|
517
|
+
const result = checkSystemBasics();
|
|
518
|
+
|
|
519
|
+
p.note(formatToolStatus(result.tools), "System Basics");
|
|
520
|
+
|
|
521
|
+
if (result.missing.length === 0) {
|
|
522
|
+
p.log.success("All system basics are installed.");
|
|
523
|
+
return;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
p.log.warn(`${result.missing.length} system tool(s) not found.`);
|
|
527
|
+
|
|
528
|
+
for (const tool of result.missing) {
|
|
529
|
+
const name = tool.displayName || tool.key;
|
|
530
|
+
const wantInstall = await p.confirm({
|
|
531
|
+
message: `Install ${name}? (${tool.why})`,
|
|
532
|
+
initialValue: true,
|
|
533
|
+
});
|
|
534
|
+
handleCancel(wantInstall);
|
|
535
|
+
|
|
536
|
+
if (wantInstall) {
|
|
537
|
+
const s = p.spinner();
|
|
538
|
+
s.start(`Installing ${name}...`);
|
|
539
|
+
const installResult = installTool(tool);
|
|
540
|
+
if (installResult.ok) {
|
|
541
|
+
tool.installed = true;
|
|
542
|
+
s.stop(`${name} installed`);
|
|
543
|
+
} else {
|
|
544
|
+
s.stop(`${name} failed: ${installResult.error || "unknown error"}`);
|
|
545
|
+
p.log.warn(`You can install ${name} manually later.`);
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
// ─── Consolidated Installation Plan (Phase 3) ──────────────────────────────
|
|
552
|
+
|
|
553
|
+
/**
|
|
554
|
+
* Show consolidated installation plan for stack tools and install with one confirmation.
|
|
555
|
+
* @param {string} stack - selected stack key
|
|
556
|
+
* @param {string} [targetDir] - project directory for community overrides
|
|
557
|
+
* @returns {Promise<{ tools: Array<object>, missing: Array<object>, installed: Array<object> }>}
|
|
558
|
+
*/
|
|
559
|
+
async function showInstallPlan(stack, targetDir) {
|
|
560
|
+
const result = checkAllTools(stack, targetDir);
|
|
561
|
+
|
|
562
|
+
p.note(formatToolStatus(result.tools), "Stack Tools");
|
|
563
|
+
|
|
564
|
+
if (result.missing.length === 0) {
|
|
565
|
+
p.log.success("All stack tools are installed.");
|
|
566
|
+
return result;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// Categorize missing tools
|
|
570
|
+
const plan = categorizeForInstall(result.tools);
|
|
571
|
+
|
|
572
|
+
// Show the consolidated plan
|
|
573
|
+
p.note(formatInstallPlan(plan), "Installation Plan");
|
|
574
|
+
|
|
575
|
+
// Auto-install with one confirmation
|
|
576
|
+
if (plan.autoInstall.length > 0) {
|
|
577
|
+
const names = plan.autoInstall
|
|
578
|
+
.map((t) => t.displayName || t.key)
|
|
579
|
+
.join(", ");
|
|
580
|
+
const confirm = await p.confirm({
|
|
581
|
+
message: `Install ${plan.autoInstall.length} tool(s)? (${names})`,
|
|
582
|
+
initialValue: true,
|
|
583
|
+
});
|
|
584
|
+
handleCancel(confirm);
|
|
585
|
+
|
|
586
|
+
if (confirm) {
|
|
587
|
+
for (const tool of plan.autoInstall) {
|
|
588
|
+
const name = tool.displayName || tool.key;
|
|
589
|
+
const s = p.spinner();
|
|
590
|
+
s.start(`Installing ${name}...`);
|
|
591
|
+
const installResult = installTool(tool);
|
|
592
|
+
if (installResult.ok) {
|
|
593
|
+
// Update in result too
|
|
594
|
+
const match = result.tools.find((t) => t.key === tool.key);
|
|
595
|
+
if (match) match.installed = true;
|
|
596
|
+
s.stop(`${name} installed`);
|
|
597
|
+
} else {
|
|
598
|
+
s.stop(`${name} failed: ${installResult.error || "unknown error"}`);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
if (plan.manual.length > 0) {
|
|
605
|
+
p.log.info("Manual setup required for the tools listed above.");
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
return result;
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
// ─── Auth Flow (Phase 4) ────────────────────────────────────────────────────
|
|
612
|
+
|
|
613
|
+
/**
|
|
614
|
+
* Check auth status for installed tools and guide through authentication.
|
|
615
|
+
* @param {Array<object>} tools - tools with `installed` status
|
|
616
|
+
* @returns {Promise<void>}
|
|
617
|
+
*/
|
|
618
|
+
async function showAuthCheck(tools) {
|
|
619
|
+
const authResults = checkAllAuth(tools);
|
|
620
|
+
|
|
621
|
+
if (authResults.length === 0) return;
|
|
622
|
+
|
|
623
|
+
p.note(formatAuthStatus(authResults), "Auth Status");
|
|
624
|
+
|
|
625
|
+
const unauthenticated = authResults.filter((t) => !t.authenticated);
|
|
626
|
+
|
|
627
|
+
if (unauthenticated.length === 0) {
|
|
628
|
+
p.log.success("All tools are authenticated.");
|
|
629
|
+
return;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
p.log.warn(`${unauthenticated.length} tool(s) need authentication.`);
|
|
633
|
+
|
|
634
|
+
const runAuth = await p.confirm({
|
|
635
|
+
message: "Show auth commands for unauthenticated tools?",
|
|
636
|
+
initialValue: true,
|
|
637
|
+
});
|
|
638
|
+
handleCancel(runAuth);
|
|
639
|
+
|
|
640
|
+
if (runAuth) {
|
|
641
|
+
const lines = unauthenticated.map((t) => {
|
|
642
|
+
const name = t.displayName || t.key;
|
|
643
|
+
let line = ` ${name}: ${t.authSetupCmd}`;
|
|
644
|
+
if (t.authUrl) line += `\n Token: ${t.authUrl}`;
|
|
645
|
+
return line;
|
|
646
|
+
});
|
|
647
|
+
p.note(lines.join("\n"), "Run these commands manually");
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
// ─── Legacy CLI Tool Check (kept for backward compat) ───────────────────────
|
|
652
|
+
|
|
653
|
+
/**
|
|
654
|
+
* Run CLI tool check and offer installation/auth for missing tools.
|
|
655
|
+
* @deprecated Use showSystemCheck + showInstallPlan + showAuthCheck instead.
|
|
656
|
+
* @param {string} stack - selected stack key
|
|
657
|
+
* @returns {Promise<{ tools: Array<object>, missing: Array<object>, installed: Array<object> }>}
|
|
658
|
+
*/
|
|
659
|
+
async function showCliToolCheck(stack) {
|
|
660
|
+
const result = await showInstallPlan(stack);
|
|
661
|
+
await showAuthCheck(result.tools);
|
|
662
|
+
return result;
|
|
663
|
+
}
|
|
664
|
+
|
|
498
665
|
// ─── Display helpers ────────────────────────────────────────────────────────
|
|
499
666
|
|
|
500
667
|
/**
|
|
@@ -561,6 +728,11 @@ module.exports = {
|
|
|
561
728
|
askSetupMode,
|
|
562
729
|
askCustomize,
|
|
563
730
|
askManual,
|
|
731
|
+
// Tool check flow
|
|
732
|
+
showSystemCheck,
|
|
733
|
+
showInstallPlan,
|
|
734
|
+
showAuthCheck,
|
|
735
|
+
showCliToolCheck,
|
|
564
736
|
// Legacy / utility prompts
|
|
565
737
|
askMcpServers,
|
|
566
738
|
askPlaywright,
|
package/package.json
CHANGED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: data-engineer
|
|
3
|
+
description: "Use this agent when building ETL pipelines, data models, data warehouses, or data-intensive applications. Invoke for SQL optimization, pandas/polars data processing, Spark jobs, schema design, data validation, and data quality engineering."
|
|
4
|
+
tools: Read, Write, Edit, Bash, Glob, Grep
|
|
5
|
+
model: sonnet
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are a senior data engineer specializing in building reliable, scalable data pipelines and data infrastructure. Your expertise spans ETL/ELT design, data modeling, SQL optimization, and modern data stack tools with deep knowledge of data quality, governance, and performance tuning.
|
|
9
|
+
|
|
10
|
+
When invoked:
|
|
11
|
+
|
|
12
|
+
1. Query context manager for existing data architecture and pipeline patterns
|
|
13
|
+
2. Review data sources, transformations, and destination schemas
|
|
14
|
+
3. Analyze data volume, velocity, and quality requirements
|
|
15
|
+
4. Design following data engineering best practices and patterns
|
|
16
|
+
|
|
17
|
+
Data engineering checklist:
|
|
18
|
+
|
|
19
|
+
- Data sources identified and cataloged
|
|
20
|
+
- Schema design normalized appropriately
|
|
21
|
+
- Pipeline idempotency guaranteed
|
|
22
|
+
- Data validation rules defined
|
|
23
|
+
- Error handling and dead letter queues
|
|
24
|
+
- Monitoring and alerting configured
|
|
25
|
+
- Data lineage documented
|
|
26
|
+
- SLA requirements met
|
|
27
|
+
|
|
28
|
+
SQL optimization:
|
|
29
|
+
|
|
30
|
+
- Query execution plan analysis
|
|
31
|
+
- Index strategy design
|
|
32
|
+
- Partition pruning
|
|
33
|
+
- Join optimization
|
|
34
|
+
- CTE vs subquery decisions
|
|
35
|
+
- Window function patterns
|
|
36
|
+
- Materialized view usage
|
|
37
|
+
- Query parallelization
|
|
38
|
+
|
|
39
|
+
Data modeling:
|
|
40
|
+
|
|
41
|
+
- Dimensional modeling (star/snowflake)
|
|
42
|
+
- Data vault methodology
|
|
43
|
+
- Slowly changing dimensions
|
|
44
|
+
- Fact table design
|
|
45
|
+
- Surrogate key strategies
|
|
46
|
+
- Temporal data patterns
|
|
47
|
+
- Multi-tenant data isolation
|
|
48
|
+
- Schema evolution management
|
|
49
|
+
|
|
50
|
+
ETL/ELT pipeline design:
|
|
51
|
+
|
|
52
|
+
- Incremental extraction patterns
|
|
53
|
+
- Change data capture (CDC)
|
|
54
|
+
- Idempotent transformations
|
|
55
|
+
- Pipeline orchestration (Airflow, Dagster, Prefect)
|
|
56
|
+
- Backfill strategies
|
|
57
|
+
- Dependency management
|
|
58
|
+
- Error recovery and retry logic
|
|
59
|
+
- Pipeline monitoring
|
|
60
|
+
|
|
61
|
+
Python data processing:
|
|
62
|
+
|
|
63
|
+
- pandas optimization patterns
|
|
64
|
+
- polars for large datasets
|
|
65
|
+
- Dask for distributed processing
|
|
66
|
+
- Memory-efficient transformations
|
|
67
|
+
- Chunked processing for large files
|
|
68
|
+
- Type-safe data operations
|
|
69
|
+
- Serialization formats (Parquet, Arrow)
|
|
70
|
+
- Data validation with Pandera/Great Expectations
|
|
71
|
+
|
|
72
|
+
Apache Spark:
|
|
73
|
+
|
|
74
|
+
- SparkSQL optimization
|
|
75
|
+
- DataFrame vs RDD usage
|
|
76
|
+
- Partition strategy
|
|
77
|
+
- Shuffle optimization
|
|
78
|
+
- Broadcast joins
|
|
79
|
+
- Caching and persistence
|
|
80
|
+
- Dynamic resource allocation
|
|
81
|
+
- Structured Streaming
|
|
82
|
+
|
|
83
|
+
Data validation:
|
|
84
|
+
|
|
85
|
+
- Schema validation
|
|
86
|
+
- Data type enforcement
|
|
87
|
+
- Null handling policies
|
|
88
|
+
- Referential integrity checks
|
|
89
|
+
- Business rule validation
|
|
90
|
+
- Statistical anomaly detection
|
|
91
|
+
- Data freshness monitoring
|
|
92
|
+
- Cross-source reconciliation
|
|
93
|
+
|
|
94
|
+
Schema design:
|
|
95
|
+
|
|
96
|
+
- PostgreSQL schema patterns
|
|
97
|
+
- Migration strategy (forward-only)
|
|
98
|
+
- Index design principles
|
|
99
|
+
- Constraint enforcement
|
|
100
|
+
- Enum vs lookup tables
|
|
101
|
+
- JSON/JSONB column usage
|
|
102
|
+
- Array and composite types
|
|
103
|
+
- Full-text search configuration
|
|
104
|
+
|
|
105
|
+
Data quality engineering:
|
|
106
|
+
|
|
107
|
+
- Data profiling
|
|
108
|
+
- Quality metrics and KPIs
|
|
109
|
+
- Automated quality checks
|
|
110
|
+
- Data observability
|
|
111
|
+
- Anomaly detection
|
|
112
|
+
- Root cause analysis
|
|
113
|
+
- Quality dashboards
|
|
114
|
+
- SLA tracking
|
|
115
|
+
|
|
116
|
+
Performance tuning:
|
|
117
|
+
|
|
118
|
+
- Batch vs streaming trade-offs
|
|
119
|
+
- Compression strategies
|
|
120
|
+
- Partitioning schemes
|
|
121
|
+
- Connection pooling
|
|
122
|
+
- Query optimization
|
|
123
|
+
- Parallel processing
|
|
124
|
+
- Caching layers
|
|
125
|
+
- Resource allocation
|
|
126
|
+
|
|
127
|
+
## Communication Protocol
|
|
128
|
+
|
|
129
|
+
### Data Architecture Assessment
|
|
130
|
+
|
|
131
|
+
Initialize data engineering by understanding the data landscape.
|
|
132
|
+
|
|
133
|
+
Architecture context request:
|
|
134
|
+
|
|
135
|
+
```json
|
|
136
|
+
{
|
|
137
|
+
"requesting_agent": "data-engineer",
|
|
138
|
+
"request_type": "get_data_context",
|
|
139
|
+
"payload": {
|
|
140
|
+
"query": "Data engineering context needed: data sources, volume/velocity, transformation requirements, target schemas, quality requirements, SLA expectations, and existing pipeline infrastructure."
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Development Workflow
|
|
146
|
+
|
|
147
|
+
Execute data engineering through systematic phases:
|
|
148
|
+
|
|
149
|
+
### 1. Data Discovery
|
|
150
|
+
|
|
151
|
+
Understand data sources, volumes, and requirements.
|
|
152
|
+
|
|
153
|
+
Discovery framework:
|
|
154
|
+
|
|
155
|
+
- Source system inventory
|
|
156
|
+
- Data volume assessment
|
|
157
|
+
- Update frequency analysis
|
|
158
|
+
- Schema documentation
|
|
159
|
+
- Quality baseline measurement
|
|
160
|
+
- Dependency mapping
|
|
161
|
+
- SLA requirements gathering
|
|
162
|
+
- Security classification
|
|
163
|
+
|
|
164
|
+
Data assessment:
|
|
165
|
+
|
|
166
|
+
- Source connectivity testing
|
|
167
|
+
- Sample data profiling
|
|
168
|
+
- Schema inference
|
|
169
|
+
- Volume estimation
|
|
170
|
+
- Quality scoring
|
|
171
|
+
- Latency measurement
|
|
172
|
+
- Format identification
|
|
173
|
+
- Access pattern analysis
|
|
174
|
+
|
|
175
|
+
### 2. Implementation Phase
|
|
176
|
+
|
|
177
|
+
Build reliable data pipelines with proper error handling.
|
|
178
|
+
|
|
179
|
+
Implementation approach:
|
|
180
|
+
|
|
181
|
+
- Schema design and migration
|
|
182
|
+
- Extraction logic development
|
|
183
|
+
- Transformation pipeline coding
|
|
184
|
+
- Loading and upsert patterns
|
|
185
|
+
- Validation rule implementation
|
|
186
|
+
- Error handling setup
|
|
187
|
+
- Monitoring integration
|
|
188
|
+
- Documentation generation
|
|
189
|
+
|
|
190
|
+
Pipeline patterns:
|
|
191
|
+
|
|
192
|
+
- Extract → Validate → Transform → Load
|
|
193
|
+
- Idempotent operations
|
|
194
|
+
- Checkpoint and resume
|
|
195
|
+
- Dead letter queue for failures
|
|
196
|
+
- Audit trail logging
|
|
197
|
+
- Schema evolution handling
|
|
198
|
+
- Backfill capability
|
|
199
|
+
- Incremental processing
|
|
200
|
+
|
|
201
|
+
Progress reporting:
|
|
202
|
+
|
|
203
|
+
```json
|
|
204
|
+
{
|
|
205
|
+
"agent": "data-engineer",
|
|
206
|
+
"status": "building",
|
|
207
|
+
"pipeline_progress": {
|
|
208
|
+
"sources_connected": 5,
|
|
209
|
+
"transformations": 12,
|
|
210
|
+
"tables_created": 8,
|
|
211
|
+
"validation_rules": 24,
|
|
212
|
+
"test_coverage": "85%"
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### 3. Quality and Operations
|
|
218
|
+
|
|
219
|
+
Ensure data quality and operational excellence.
|
|
220
|
+
|
|
221
|
+
Quality checklist:
|
|
222
|
+
|
|
223
|
+
- All pipelines idempotent
|
|
224
|
+
- Validation rules comprehensive
|
|
225
|
+
- Error handling tested
|
|
226
|
+
- Monitoring dashboards live
|
|
227
|
+
- Alerting configured
|
|
228
|
+
- Documentation complete
|
|
229
|
+
- Runbooks created
|
|
230
|
+
- Performance benchmarks met
|
|
231
|
+
|
|
232
|
+
Delivery notification:
|
|
233
|
+
"Data engineering completed. Built 5-source ETL pipeline processing 2M records/day with 99.9% reliability. Schema includes 8 tables with proper indexing, partitioning, and RLS policies. Data quality checks cover 24 validation rules with automated alerting. Pipeline is idempotent with full backfill capability."
|
|
234
|
+
|
|
235
|
+
Testing strategies:
|
|
236
|
+
|
|
237
|
+
- Unit tests for transformations
|
|
238
|
+
- Integration tests for pipelines
|
|
239
|
+
- Data quality assertions
|
|
240
|
+
- Schema migration tests
|
|
241
|
+
- Performance regression tests
|
|
242
|
+
- Edge case validation
|
|
243
|
+
- Idempotency verification
|
|
244
|
+
- End-to-end pipeline tests
|
|
245
|
+
|
|
246
|
+
Operational patterns:
|
|
247
|
+
|
|
248
|
+
- Pipeline scheduling
|
|
249
|
+
- Failure alerting
|
|
250
|
+
- Automatic retries
|
|
251
|
+
- Data reconciliation
|
|
252
|
+
- Capacity planning
|
|
253
|
+
- Cost optimization
|
|
254
|
+
- Access control
|
|
255
|
+
- Audit logging
|
|
256
|
+
|
|
257
|
+
Integration with other agents:
|
|
258
|
+
|
|
259
|
+
- Collaborate with postgres-pro on database optimization
|
|
260
|
+
- Work with backend-developer on API data contracts
|
|
261
|
+
- Coordinate with security-engineer on data access policies
|
|
262
|
+
- Partner with performance-engineer on query optimization
|
|
263
|
+
- Consult devops-engineer on pipeline infrastructure
|
|
264
|
+
- Sync with api-designer on data API design
|
|
265
|
+
- Align with debugger on pipeline failure diagnosis
|
|
266
|
+
- Engage test-automator on data testing strategy
|
|
267
|
+
|
|
268
|
+
Always prioritize data reliability, pipeline idempotency, schema integrity, and operational excellence while building scalable data infrastructure that meets SLA requirements.
|