modscape 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,181 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import yaml from 'js-yaml';
4
+
5
+ const getFolderKey = (node) => {
6
+ if (node.resource_type === 'seed') return 'seeds';
7
+ return node.fqn?.[1] || 'default';
8
+ };
9
+
10
+ export async function importDbt(projectDir, options) {
11
+ const resolvedDir = path.resolve(projectDir || '.');
12
+ const manifestPath = path.join(resolvedDir, 'target', 'manifest.json');
13
+ const splitBy = options.splitBy || null;
14
+
15
+ // dbt_project.ymlからプロジェクト名を取得
16
+ const dbtProjectPath = path.join(resolvedDir, 'dbt_project.yml');
17
+ let projectName = path.basename(resolvedDir);
18
+
19
+ if (fs.existsSync(dbtProjectPath)) {
20
+ const dbtProject = yaml.load(fs.readFileSync(dbtProjectPath, 'utf8'));
21
+ projectName = dbtProject.name || projectName;
22
+ }
23
+
24
+ const outputDir = options.output || `modscape-${projectName}`;
25
+
26
+ try {
27
+ if (!fs.existsSync(manifestPath)) {
28
+ console.error(` ❌ manifest.json not found at: ${manifestPath}`);
29
+ console.error(` 💡 Run 'dbt parse' in your dbt project first.`);
30
+ return;
31
+ }
32
+
33
+ const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
34
+ console.log(` 🔍 Parsing dbt manifest: ${manifestPath}`);
35
+
36
+ const tables = [];
37
+ const domainsMap = new Map();
38
+ const tableSplitKeyMap = new Map();
39
+
40
+ const allNodes = { ...manifest.nodes, ...manifest.sources };
41
+
42
+ for (const [uniqueId, node] of Object.entries(allNodes)) {
43
+ if (!['model', 'seed', 'snapshot', 'source'].includes(node.resource_type)) continue;
44
+
45
+ const tableId = node.unique_id;
46
+
47
+ const columns = [];
48
+ if (node.columns) {
49
+ for (const [colKey, col] of Object.entries(node.columns)) {
50
+ columns.push({
51
+ id: col.name,
52
+ logical: {
53
+ name: col.name,
54
+ type: col.data_type || 'unknown',
55
+ description: col.description || ''
56
+ }
57
+ });
58
+ }
59
+ }
60
+
61
+ const tableEntry = {
62
+ id: tableId,
63
+ name: node.name,
64
+ logical_name: node.name,
65
+ physical_name: node.alias || node.name,
66
+ appearance: { type: 'table' },
67
+ conceptual: { description: node.description || '' },
68
+ columns,
69
+ lineage: { upstream: [] }
70
+ };
71
+
72
+ tables.push(tableEntry);
73
+
74
+ // split keyを決定
75
+ let splitKey = getFolderKey(node);
76
+ if (splitBy === 'schema') {
77
+ splitKey = node.schema || node.config?.schema || 'default';
78
+ } else if (splitBy === 'tag') {
79
+ const tags = node.tags || node.config?.tags || [];
80
+ splitKey = Array.isArray(tags) ? (tags[0] || 'untagged') : (tags || 'untagged');
81
+ }
82
+ tableSplitKeyMap.set(tableId, splitKey);
83
+
84
+ // domainsMapはfqn[1]ベースで常に構築
85
+ const domainName = getFolderKey(node);
86
+ if (domainName && domainName !== 'default') {
87
+ if (!domainsMap.has(domainName)) {
88
+ domainsMap.set(domainName, { id: domainName, name: domainName, tables: [] });
89
+ }
90
+ domainsMap.get(domainName).tables.push(tableId);
91
+ }
92
+ }
93
+
94
+ // lineage
95
+ for (const [uniqueId, node] of Object.entries(allNodes)) {
96
+ if (!['model', 'seed', 'snapshot', 'source'].includes(node.resource_type)) continue;
97
+ const tableEntry = tables.find(t => t.id === node.unique_id);
98
+ if (tableEntry && node.depends_on?.nodes) {
99
+ for (const upstreamId of node.depends_on.nodes) {
100
+ if (allNodes[upstreamId]) {
101
+ tableEntry.lineage.upstream.push(upstreamId);
102
+ }
103
+ }
104
+ }
105
+ }
106
+
107
+ // 出力先フォルダを作成
108
+ fs.mkdirSync(outputDir, { recursive: true });
109
+
110
+ if (splitBy) {
111
+ const splitMap = new Map();
112
+
113
+ for (const table of tables) {
114
+ const key = tableSplitKeyMap.get(table.id) || 'default';
115
+ if (!splitMap.has(key)) splitMap.set(key, []);
116
+ splitMap.get(key).push(table);
117
+ }
118
+
119
+ // 自己完結率を計算
120
+ const selfContainedRate = new Map();
121
+ for (const [key, splitTables] of splitMap.entries()) {
122
+ const tableIds = new Set(splitTables.map(t => t.id));
123
+ let internal = 0;
124
+ let external = 0;
125
+ for (const table of splitTables) {
126
+ for (const upstreamId of table.lineage?.upstream || []) {
127
+ if (tableIds.has(upstreamId)) internal++;
128
+ else external++;
129
+ }
130
+ }
131
+ const total = internal + external;
132
+ const rate = total > 0 ? Math.round(internal / total * 100) : 100;
133
+ selfContainedRate.set(key, { internal, external, rate });
134
+ }
135
+
136
+ let fileCount = 0;
137
+ for (const [key, splitTables] of splitMap.entries()) {
138
+ const splitOutputPath = path.join(outputDir, `${key}.yaml`);
139
+ const { rate, external } = selfContainedRate.get(key);
140
+
141
+ const splitDomains = Array.from(domainsMap.values())
142
+ .filter(d => d.tables.some(tid => splitTables.some(t => t.id === tid)))
143
+ .map(d => ({
144
+ ...d,
145
+ tables: d.tables.filter(tid => splitTables.some(t => t.id === tid))
146
+ }));
147
+
148
+ const outputModel = {
149
+ tables: splitTables,
150
+ relationships: [],
151
+ domains: splitDomains
152
+ };
153
+
154
+ fs.writeFileSync(splitOutputPath, yaml.dump(outputModel), 'utf8');
155
+
156
+ const icon = rate >= 80 ? '✅' : '⚠️ ';
157
+ const warning = rate < 80 ? ` (${external} cross-file lineage edges missing)` : '';
158
+ console.log(` ${icon} ${splitOutputPath} (${splitTables.length} tables, ${rate}% self-contained${warning})`);
159
+ fileCount++;
160
+ }
161
+
162
+ console.log(`\n 📦 Split into ${fileCount} files → ${outputDir}/`);
163
+ console.log(` 🚀 Run 'modscape dev ${outputDir}' to visualize.`);
164
+
165
+ } else {
166
+ const outputPath = path.join(outputDir, 'dbt-model.yaml');
167
+ const outputModel = {
168
+ tables,
169
+ relationships: [],
170
+ domains: Array.from(domainsMap.values())
171
+ };
172
+
173
+ fs.writeFileSync(outputPath, yaml.dump(outputModel), 'utf8');
174
+ console.log(` ✅ Successfully imported ${tables.length} tables → ${outputPath}`);
175
+ console.log(` 🚀 Run 'modscape dev ${outputDir}' to visualize.`);
176
+ }
177
+
178
+ } catch (error) {
179
+ console.error(` ❌ Failed to import dbt metadata: ${error.message}`);
180
+ }
181
+ }
package/src/index.js CHANGED
@@ -8,8 +8,11 @@ import { build } from './build.js';
8
8
  import { initProject } from './init.js';
9
9
  import { exportModel } from './export.js';
10
10
  import { createModel } from './create.js';
11
+ import { importDbt } from './import-dbt.js';
12
+ import { syncDbt } from './sync-dbt.js';
11
13
  import { createRequire } from 'module';
12
-
14
+ import { mergeModels } from './merge.js';
15
+
13
16
  const require = createRequire(import.meta.url);
14
17
  const pkg = require('../package.json');
15
18
 
@@ -67,4 +70,38 @@ program
67
70
  exportModel(paths, options);
68
71
  });
69
72
 
73
+ const dbtCommand = program
74
+ .command('dbt')
75
+ .description('dbt integration commands');
76
+
77
+ dbtCommand
78
+ .command('import')
79
+ .description('Import dbt project into Modscape YAML models')
80
+ .argument('[project-dir]', 'path to dbt project directory (default: current directory)')
81
+ .option('-o, --output <dir>', 'output directory (default: modscape-<project-name>)')
82
+ .option('--split-by <key>', 'split output by "schema", "tag", or "folder"')
83
+ .action((projectDir, options) => {
84
+ importDbt(projectDir, options);
85
+ });
86
+
87
+ // dbtCommandに追加
88
+ dbtCommand
89
+ .command('sync')
90
+ .description('Sync dbt project changes into existing Modscape YAML models')
91
+ .argument('[project-dir]', 'path to dbt project directory (default: current directory)')
92
+ .option('-o, --output <dir>', 'output directory (default: modscape-<project-name>)')
93
+ .action((projectDir, options) => {
94
+ syncDbt(projectDir, options);
95
+ });
96
+
97
+
98
+ program
99
+ .command('merge')
100
+ .description('Merge multiple YAML models into one')
101
+ .argument('<paths...>', 'YAML files or directories to merge')
102
+ .option('-o, --output <path>', 'output file path', 'merged.yaml')
103
+ .action((paths, options) => {
104
+ mergeModels(paths, options);
105
+ });
106
+
70
107
  program.parse();
package/src/init.js CHANGED
@@ -64,25 +64,35 @@ export async function initProject(options = {}) {
64
64
 
65
65
  console.log('\n Scaffolding modeling rules and commands...');
66
66
 
67
- // 1. Create .modscape/rules.md
67
+ // 1. Create .modscape/rules.md and .modscape/codegen-rules.md
68
68
  const rulesTemplatePath = path.join(__dirname, 'templates/rules.md');
69
69
  const rulesTemplate = fs.readFileSync(rulesTemplatePath, 'utf8');
70
70
  await safeWriteFile('.modscape/rules.md', rulesTemplate);
71
71
 
72
+ const codegenRulesTemplatePath = path.join(__dirname, 'templates/codegen-rules.md');
73
+ const codegenRulesTemplate = fs.readFileSync(codegenRulesTemplatePath, 'utf8');
74
+ await safeWriteFile('.modscape/codegen-rules.md', codegenRulesTemplate);
75
+
72
76
  // 2. Create agent-specific files
73
77
  if (agents.includes('gemini')) {
74
- const skillTemplate = fs.readFileSync(path.join(__dirname, 'templates/gemini/SKILL.md'), 'utf8');
75
- await safeWriteFile('.gemini/skills/modscape/SKILL.md', skillTemplate);
78
+ const modelingTemplate = fs.readFileSync(path.join(__dirname, 'templates/gemini/modscape-modeling/SKILL.md'), 'utf8');
79
+ await safeWriteFile('.gemini/skills/modscape-modeling/SKILL.md', modelingTemplate);
80
+ const codegenTemplate = fs.readFileSync(path.join(__dirname, 'templates/gemini/modscape-codegen/SKILL.md'), 'utf8');
81
+ await safeWriteFile('.gemini/skills/modscape-codegen/SKILL.md', codegenTemplate);
76
82
  }
77
83
 
78
84
  if (agents.includes('codex')) {
79
- const skillTemplate = fs.readFileSync(path.join(__dirname, 'templates/codex/SKILL.md'), 'utf8');
80
- await safeWriteFile('.codex/skills/modscape-modeling/SKILL.md', skillTemplate);
85
+ const modelingTemplate = fs.readFileSync(path.join(__dirname, 'templates/codex/modscape-modeling/SKILL.md'), 'utf8');
86
+ await safeWriteFile('.codex/skills/modscape-modeling/SKILL.md', modelingTemplate);
87
+ const codegenTemplate = fs.readFileSync(path.join(__dirname, 'templates/codex/modscape-codegen/SKILL.md'), 'utf8');
88
+ await safeWriteFile('.codex/skills/modscape-codegen/SKILL.md', codegenTemplate);
81
89
  }
82
90
 
83
91
  if (agents.includes('claude')) {
84
- const commandTemplate = fs.readFileSync(path.join(__dirname, 'templates/claude/command.md'), 'utf8');
85
- await safeWriteFile('.claude/commands/modscape/modeling.md', commandTemplate);
92
+ const modelingTemplate = fs.readFileSync(path.join(__dirname, 'templates/claude/modeling.md'), 'utf8');
93
+ await safeWriteFile('.claude/commands/modscape/modeling.md', modelingTemplate);
94
+ const codegenTemplate = fs.readFileSync(path.join(__dirname, 'templates/claude/codegen.md'), 'utf8');
95
+ await safeWriteFile('.claude/commands/modscape/codegen.md', codegenTemplate);
86
96
  }
87
97
 
88
98
  console.log('\n ✅ Initialization complete! Customize ".modscape/rules.md" to match your project standards.\n');
package/src/merge.js ADDED
@@ -0,0 +1,74 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import yaml from 'js-yaml';
4
+
5
+ const collectYamlFiles = (inputPath) => {
6
+ const stat = fs.statSync(inputPath);
7
+ if (stat.isDirectory()) {
8
+ return fs.readdirSync(inputPath)
9
+ .filter(f => f.endsWith('.yaml') || f.endsWith('.yml'))
10
+ .map(f => path.join(inputPath, f));
11
+ }
12
+ return [inputPath];
13
+ };
14
+
15
+ export function mergeModels(inputs, options) {
16
+ const outputPath = options.output || 'merged.yaml';
17
+
18
+ const mergedTables = [];
19
+ const mergedRelationships = [];
20
+ const mergedDomains = [];
21
+ const seenTableIds = new Set();
22
+ const seenDomainIds = new Set();
23
+
24
+ // 入力パスを全部ファイルに展開
25
+ const allFiles = [];
26
+ for (const input of inputs) {
27
+ allFiles.push(...collectYamlFiles(input));
28
+ }
29
+
30
+ if (allFiles.length === 0) {
31
+ console.error(` ❌ No YAML files found`);
32
+ return;
33
+ }
34
+
35
+ for (const filePath of allFiles) {
36
+ try {
37
+ const data = yaml.load(fs.readFileSync(filePath, 'utf8'));
38
+ if (!data) continue;
39
+
40
+ // tables: 重複IDは除外
41
+ for (const table of data.tables || []) {
42
+ if (!seenTableIds.has(table.id)) {
43
+ mergedTables.push(table);
44
+ seenTableIds.add(table.id);
45
+ }
46
+ }
47
+
48
+ // relationships: そのまま全部追加
49
+ mergedRelationships.push(...(data.relationships || []));
50
+
51
+ // domains: 重複IDは除外
52
+ for (const domain of data.domains || []) {
53
+ if (!seenDomainIds.has(domain.id)) {
54
+ mergedDomains.push(domain);
55
+ seenDomainIds.add(domain.id);
56
+ }
57
+ }
58
+
59
+ console.log(` 📄 ${filePath} (${(data.tables || []).length} tables)`);
60
+ } catch (e) {
61
+ console.error(` ❌ Failed to read ${filePath}: ${e.message}`);
62
+ }
63
+ }
64
+
65
+ const outputModel = {
66
+ tables: mergedTables,
67
+ relationships: mergedRelationships,
68
+ domains: mergedDomains
69
+ };
70
+
71
+ fs.writeFileSync(outputPath, yaml.dump(outputModel), 'utf8');
72
+ console.log(`\n ✅ Merged ${allFiles.length} files → ${outputPath} (${mergedTables.length} tables)`);
73
+ console.log(` 🚀 Run 'modscape dev ${outputPath}' to visualize.`);
74
+ }
@@ -0,0 +1,146 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import yaml from 'js-yaml';
4
+
5
+ const getFolderKey = (node) => {
6
+ if (node.resource_type === 'seed') return 'seeds';
7
+ return node.fqn?.[1] || 'default';
8
+ };
9
+
10
+ export async function syncDbt(projectDir, options) {
11
+ const resolvedDir = path.resolve(projectDir || '.');
12
+ const manifestPath = path.join(resolvedDir, 'target', 'manifest.json');
13
+
14
+ const dbtProjectPath = path.join(resolvedDir, 'dbt_project.yml');
15
+ let projectName = path.basename(resolvedDir);
16
+
17
+ if (fs.existsSync(dbtProjectPath)) {
18
+ const dbtProject = yaml.load(fs.readFileSync(dbtProjectPath, 'utf8'));
19
+ projectName = dbtProject.name || projectName;
20
+ }
21
+
22
+ const outputDir = options.output || `modscape-${projectName}`;
23
+
24
+ try {
25
+ if (!fs.existsSync(manifestPath)) {
26
+ console.error(` ❌ manifest.json not found at: ${manifestPath}`);
27
+ console.error(` 💡 Run 'dbt parse' in your dbt project first.`);
28
+ return;
29
+ }
30
+
31
+ if (!fs.existsSync(outputDir)) {
32
+ console.error(` ❌ Output directory not found: ${outputDir}`);
33
+ console.error(` 💡 Run 'modscape dbt import' first.`);
34
+ return;
35
+ }
36
+
37
+ const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
38
+ console.log(` 🔍 Parsing dbt manifest: ${manifestPath}`);
39
+
40
+ const latestTablesMap = new Map();
41
+ const allNodes = { ...manifest.nodes, ...manifest.sources };
42
+
43
+ for (const [uniqueId, node] of Object.entries(allNodes)) {
44
+ if (!['model', 'seed', 'snapshot', 'source'].includes(node.resource_type)) continue;
45
+
46
+ const tableId = node.unique_id;
47
+
48
+ const columns = [];
49
+ if (node.columns) {
50
+ for (const [colKey, col] of Object.entries(node.columns)) {
51
+ columns.push({
52
+ id: col.name,
53
+ logical: {
54
+ name: col.name,
55
+ type: col.data_type || 'unknown',
56
+ description: col.description || ''
57
+ }
58
+ });
59
+ }
60
+ }
61
+
62
+ const lineageUpstream = [];
63
+ if (node.depends_on?.nodes) {
64
+ for (const upstreamId of node.depends_on.nodes) {
65
+ if (allNodes[upstreamId]) {
66
+ lineageUpstream.push(upstreamId);
67
+ }
68
+ }
69
+ }
70
+
71
+ latestTablesMap.set(tableId, {
72
+ id: tableId,
73
+ name: node.name,
74
+ logical_name: node.name,
75
+ physical_name: node.alias || node.name,
76
+ appearance: { type: 'table' },
77
+ conceptual: { description: node.description || '' },
78
+ columns,
79
+ lineage: { upstream: lineageUpstream }
80
+ });
81
+ }
82
+
83
+ const yamlFiles = fs.readdirSync(outputDir)
84
+ .filter(f => f.endsWith('.yaml') || f.endsWith('.yml'));
85
+
86
+ if (yamlFiles.length === 0) {
87
+ console.error(` ❌ No YAML files found in: ${outputDir}`);
88
+ return;
89
+ }
90
+
91
+ let addedCount = 0;
92
+ let updatedCount = 0;
93
+ const processedTableIds = new Set();
94
+
95
+ for (const yamlFile of yamlFiles) {
96
+ const yamlPath = path.join(outputDir, yamlFile);
97
+ const existing = yaml.load(fs.readFileSync(yamlPath, 'utf8'));
98
+
99
+ if (!existing || !Array.isArray(existing.tables)) continue;
100
+
101
+ const newTables = existing.tables.map(table => {
102
+ const latest = latestTablesMap.get(table.id);
103
+ if (!latest) return table;
104
+
105
+ processedTableIds.add(table.id);
106
+ updatedCount++;
107
+
108
+ return {
109
+ ...table,
110
+ name: latest.name,
111
+ logical_name: latest.logical_name,
112
+ physical_name: latest.physical_name,
113
+ conceptual: latest.conceptual,
114
+ columns: latest.columns,
115
+ lineage: latest.lineage
116
+ };
117
+ });
118
+
119
+ const updated = { ...existing, tables: newTables };
120
+ fs.writeFileSync(yamlPath, yaml.dump(updated), 'utf8');
121
+ console.log(` 📄 Updated: ${yamlPath}`);
122
+ }
123
+
124
+ const newTables = [];
125
+ for (const [tableId, latest] of latestTablesMap.entries()) {
126
+ if (!processedTableIds.has(tableId)) {
127
+ newTables.push(latest);
128
+ addedCount++;
129
+ }
130
+ }
131
+
132
+ if (newTables.length > 0) {
133
+ const firstYamlPath = path.join(outputDir, yamlFiles[0]);
134
+ const firstYaml = yaml.load(fs.readFileSync(firstYamlPath, 'utf8'));
135
+ firstYaml.tables = [...firstYaml.tables, ...newTables];
136
+ fs.writeFileSync(firstYamlPath, yaml.dump(firstYaml), 'utf8');
137
+ console.log(` ➕ Added ${addedCount} new tables → ${yamlFiles[0]}`);
138
+ }
139
+
140
+ console.log(` ✅ Sync complete: ${updatedCount} updated, ${addedCount} added`);
141
+ console.log(` 🚀 Run 'modscape dev ${outputDir}' to visualize.`);
142
+
143
+ } catch (error) {
144
+ console.error(` ❌ Failed to sync dbt metadata: ${error.message}`);
145
+ }
146
+ }
@@ -0,0 +1,15 @@
1
+ Generate implementation code from a Modscape YAML model.
2
+
3
+ ## Instructions
4
+ 1. FIRST, read `.modscape/codegen-rules.md` to understand how to interpret the YAML.
5
+ 2. SECOND, read the target YAML file specified by the user (default: `model.yaml`).
6
+ 3. Ask the user which tool to target if not specified (dbt / SQLMesh / Spark SQL / plain SQL).
7
+ 4. Generate models in dependency order (upstream first) based on `lineage.upstream`.
8
+ 5. Add `-- TODO:` comments wherever the YAML does not provide enough information to generate definitive code.
9
+
10
+ ## Usage
11
+ ```
12
+ /modscape:codegen
13
+ /modscape:codegen path/to/model.yaml
14
+ /modscape:codegen path/to/model.yaml --target dbt
15
+ ```
@@ -1,5 +1,3 @@
1
- # /modscape:modeling
2
-
3
1
  Start an interactive data modeling session.
4
2
 
5
3
  ## Instructions
@@ -0,0 +1,138 @@
1
+ # Modscape Code Generation Rules
2
+
3
+ This file defines how to interpret a Modscape `model.yaml` when generating implementation code (dbt, SQLMesh, Spark, etc.).
4
+
5
+ Read this file alongside `.modscape/rules.md` (which defines the YAML schema) before generating any code.
6
+
7
+ ---
8
+
9
+ ## 1. Dependency Order (DAG)
10
+
11
+ Use `lineage.upstream` to determine build order. Always generate upstream models before downstream ones.
12
+
13
+ ```yaml
14
+ lineage:
15
+ upstream: [stg_orders, stg_order_items] # these must be generated first
16
+ ```
17
+
18
+ In dbt this becomes `{{ ref('stg_orders') }}`. In SQLMesh, `MODEL (... grain [...])` with `@this_model` references. Apply the equivalent pattern for your target tool.
19
+
20
+ ---
21
+
22
+ ## 2. Materialization Strategy
23
+
24
+ Map `implementation.materialization` directly to your target tool's config block. If `implementation` is absent, fall back to `appearance.type` as a hint:
25
+
26
+ | `appearance.type` | Default materialization |
27
+ |-------------------|------------------------|
28
+ | `fact` | `incremental` |
29
+ | `dimension` | `table` |
30
+ | `mart` | `table` |
31
+ | `hub` / `link` / `satellite` | `table` |
32
+ | `table` / `staging` | `view` |
33
+
34
+ ```yaml
35
+ # Explicit — always prefer this over the default
36
+ implementation:
37
+ materialization: incremental
38
+ incremental_strategy: merge
39
+ unique_key: order_id
40
+ partition_by: { field: order_date, granularity: day }
41
+ cluster_by: [customer_id]
42
+ ```
43
+
44
+ ---
45
+
46
+ ## 3. JOIN Conditions
47
+
48
+ Derive JOIN keys from two sources:
49
+
50
+ 1. **`relationships`** — explicit FK links between tables
51
+ 2. **`columns[].logical.isForeignKey: true`** — columns that carry FK values
52
+
53
+ Use matching column names across tables to infer the ON clause. When a column is `isForeignKey: true` and shares a name with another table's `isPrimaryKey: true` column, that is the join key.
54
+
55
+ ---
56
+
57
+ ## 4. SCD Type 2
58
+
59
+ When `appearance.scd: type2`, the dimension requires historical tracking:
60
+
61
+ - In **dbt**: generate a snapshot (`dbt snapshot`) driven by the `updated_at` column, then build the dimension model on top of the snapshot.
62
+ - In other tools: apply the equivalent row-versioning pattern.
63
+ - Always expose `valid_from`, `valid_to`, and `is_current` columns (defined in the YAML).
64
+ - When joining to a SCD type2 dimension from a fact table, filter `WHERE is_current = true` unless the query is point-in-time.
65
+
66
+ ---
67
+
68
+ ## 5. Mart Aggregations
69
+
70
+ When `implementation.grain` and `implementation.measures` are defined, use them directly:
71
+
72
+ ```yaml
73
+ implementation:
74
+ grain: [month_key, region_id] # → GROUP BY
75
+ measures:
76
+ - column: total_revenue
77
+ agg: sum
78
+ source_column: fct_sales.amount # → SUM(s.amount) AS total_revenue
79
+ - column: order_count
80
+ agg: count_distinct
81
+ source_column: fct_orders.order_id
82
+ ```
83
+
84
+ When `grain`/`measures` are **not** defined, infer from column metadata:
85
+ - `isPrimaryKey: true` non-measure columns → likely GROUP BY candidates
86
+ - `additivity: fully` → safe to use `SUM` or `COUNT`
87
+ - `additivity: semi` → use `AVG`, `MIN`, `MAX` or similar non-summable aggregation
88
+ - Column name patterns (`_count`, `_revenue`, `_total`, `_avg`) provide additional hints
89
+
90
+ ---
91
+
92
+ ## 6. Column Mapping
93
+
94
+ | YAML field | Code generation use |
95
+ |-----------|-------------------|
96
+ | `logical.name` | Column alias / documentation |
97
+ | `physical.name` | Actual column name in SQL (use this if present, otherwise use `id`) |
98
+ | `isPrimaryKey: true` | Declare as primary key constraint or unique test |
99
+ | `isForeignKey: true` | JOIN key candidate |
100
+ | `isPartitionKey: true` | Confirm as partition column |
101
+ | `isMetadata: true` | Audit/system column — include last, or exclude from business logic |
102
+ | `additivity` | Aggregation function choice (see section 5) |
103
+ | `physical.type` | Override the logical type for DDL generation |
104
+ | `physical.constraints` | Add NOT NULL / UNIQUE constraints where supported |
105
+
106
+ ---
107
+
108
+ ## 7. TODO Comments for Inferred Logic
109
+
110
+ When you must make an assumption that cannot be derived from the YAML, leave a `TODO` comment so the user can review it. Keep generated code runnable; do not leave placeholders that cause syntax errors.
111
+
112
+ Common TODO patterns:
113
+
114
+ ```sql
115
+ -- TODO: verify surrogate key generation method (currently using row_number — consider hash if order is non-deterministic)
116
+ -- TODO: confirm incremental watermark column (currently using 'updated_at' — adjust if your source uses a different audit column)
117
+ -- TODO: verify date range for date dimension (currently 2020-01-01 to 2030-12-31)
118
+ -- TODO: grain/measures not defined in YAML — aggregations inferred from column names; verify before production use
119
+ -- TODO: source schema assumed to be 'raw' — update {{ source(...) }} references to match your project
120
+ ```
121
+
122
+ ---
123
+
124
+ ## 8. Physical Table Names
125
+
126
+ When `physical_name` is set on a table, use it as the actual table name in DDL or config blocks. The `id` field is the logical reference name used in `ref()` calls and `lineage.upstream`.
127
+
128
+ ---
129
+
130
+ ## 9. What Modscape Does Not Define (Always TODO)
131
+
132
+ The following are intentionally out of scope for the YAML. Always emit a TODO when you encounter them:
133
+
134
+ - Surrogate key generation strategy (hash vs. sequence vs. row_number)
135
+ - Incremental filter / watermark logic
136
+ - Source system schema names
137
+ - Date dimension generation range and method
138
+ - Database-specific SQL dialect quirks
@@ -0,0 +1,20 @@
1
+ ---
2
+ name: modscape-codegen
3
+ description: Generate implementation code (dbt, SQLMesh, Spark SQL, etc.) from a Modscape YAML model.
4
+ ---
5
+
6
+ # Code Generation from Modscape YAML
7
+
8
+ You are a data pipeline engineer. Your task is to generate implementation code from a Modscape `model.yaml`.
9
+
10
+ BEFORE generating any code, you MUST read `.modscape/codegen-rules.md` to understand how to interpret the YAML.
11
+
12
+ ## Steps
13
+ 1. READ `.modscape/codegen-rules.md`.
14
+ 2. READ the target YAML file specified by the user (default: `model.yaml`).
15
+ 3. ASK which tool to target if not specified (dbt / SQLMesh / Spark SQL / plain SQL).
16
+ 4. GENERATE models in dependency order (upstream first) based on `lineage.upstream`.
17
+ 5. ADD `-- TODO:` comments wherever the YAML does not provide enough information to generate definitive code.
18
+
19
+ ## COMMAND: /modscape:codegen
20
+ Usage: `/modscape:codegen [path/to/model.yaml] [--target dbt|sqlmesh|spark|sql]`