forgecraft-mcp 1.2.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +525 -525
- package/dist/cli/help.js +44 -44
- package/dist/registry/renderer-skeletons.js +92 -92
- package/dist/shared/gs-score-logger.js +6 -6
- package/dist/tools/add-module.js +123 -123
- package/dist/tools/advice-registry.js +18 -18
- package/dist/tools/check-cascade-report.js +64 -64
- package/dist/tools/configure-mcp.d.ts +3 -0
- package/dist/tools/configure-mcp.d.ts.map +1 -1
- package/dist/tools/configure-mcp.js +10 -0
- package/dist/tools/configure-mcp.js.map +1 -1
- package/dist/tools/forgecraft-dispatch.d.ts.map +1 -1
- package/dist/tools/forgecraft-dispatch.js +3 -0
- package/dist/tools/forgecraft-dispatch.js.map +1 -1
- package/dist/tools/forgecraft-schema-params.d.ts +9 -0
- package/dist/tools/forgecraft-schema-params.d.ts.map +1 -1
- package/dist/tools/forgecraft-schema-params.js +21 -0
- package/dist/tools/forgecraft-schema-params.js.map +1 -1
- package/dist/tools/forgecraft-schema.d.ts +9 -0
- package/dist/tools/forgecraft-schema.d.ts.map +1 -1
- package/dist/tools/refresh-output.js +14 -14
- package/dist/tools/scaffold-spec-stubs.js +115 -115
- package/dist/tools/scaffold-templates.js +62 -62
- package/dist/tools/setup-artifact-writers.d.ts +30 -0
- package/dist/tools/setup-artifact-writers.d.ts.map +1 -1
- package/dist/tools/setup-artifact-writers.js +120 -8
- package/dist/tools/setup-artifact-writers.js.map +1 -1
- package/dist/tools/setup-phase1.d.ts +3 -0
- package/dist/tools/setup-phase1.d.ts.map +1 -1
- package/dist/tools/setup-phase1.js +79 -35
- package/dist/tools/setup-phase1.js.map +1 -1
- package/dist/tools/setup-phase2.d.ts +2 -0
- package/dist/tools/setup-phase2.d.ts.map +1 -1
- package/dist/tools/setup-phase2.js +10 -1
- package/dist/tools/setup-phase2.js.map +1 -1
- package/dist/tools/setup-project.d.ts +18 -0
- package/dist/tools/setup-project.d.ts.map +1 -1
- package/dist/tools/setup-project.js +77 -1
- package/dist/tools/setup-project.js.map +1 -1
- package/dist/tools/spec-parser-tags.d.ts +9 -0
- package/dist/tools/spec-parser-tags.d.ts.map +1 -1
- package/dist/tools/spec-parser-tags.js +92 -0
- package/dist/tools/spec-parser-tags.js.map +1 -1
- package/package.json +89 -86
- package/templates/analytics/instructions.yaml +37 -37
- package/templates/analytics/mcp-servers.yaml +11 -11
- package/templates/analytics/structure.yaml +25 -25
- package/templates/api/instructions.yaml +231 -231
- package/templates/api/mcp-servers.yaml +22 -13
- package/templates/api/nfr.yaml +23 -23
- package/templates/api/review.yaml +103 -103
- package/templates/api/structure.yaml +34 -34
- package/templates/api/verification.yaml +132 -132
- package/templates/cli/instructions.yaml +31 -31
- package/templates/cli/mcp-servers.yaml +11 -11
- package/templates/cli/review.yaml +53 -53
- package/templates/cli/structure.yaml +16 -16
- package/templates/data-lineage/instructions.yaml +28 -28
- package/templates/data-lineage/mcp-servers.yaml +22 -22
- package/templates/data-pipeline/instructions.yaml +84 -84
- package/templates/data-pipeline/mcp-servers.yaml +13 -13
- package/templates/data-pipeline/nfr.yaml +39 -39
- package/templates/data-pipeline/structure.yaml +23 -23
- package/templates/fintech/hooks.yaml +55 -55
- package/templates/fintech/instructions.yaml +112 -112
- package/templates/fintech/mcp-servers.yaml +13 -13
- package/templates/fintech/nfr.yaml +46 -46
- package/templates/fintech/playbook.yaml +210 -210
- package/templates/fintech/verification.yaml +239 -239
- package/templates/game/instructions.yaml +289 -289
- package/templates/game/mcp-servers.yaml +38 -38
- package/templates/game/nfr.yaml +64 -64
- package/templates/game/playbook.yaml +214 -214
- package/templates/game/review.yaml +97 -97
- package/templates/game/structure.yaml +67 -67
- package/templates/game/verification.yaml +174 -174
- package/templates/healthcare/instructions.yaml +42 -42
- package/templates/healthcare/mcp-servers.yaml +13 -13
- package/templates/healthcare/nfr.yaml +47 -47
- package/templates/hipaa/instructions.yaml +41 -41
- package/templates/hipaa/mcp-servers.yaml +13 -13
- package/templates/infra/instructions.yaml +104 -104
- package/templates/infra/mcp-servers.yaml +20 -20
- package/templates/infra/nfr.yaml +46 -46
- package/templates/infra/review.yaml +65 -65
- package/templates/infra/structure.yaml +25 -25
- package/templates/library/instructions.yaml +36 -36
- package/templates/library/mcp-servers.yaml +20 -20
- package/templates/library/review.yaml +56 -56
- package/templates/library/structure.yaml +19 -19
- package/templates/medallion-architecture/instructions.yaml +41 -41
- package/templates/medallion-architecture/mcp-servers.yaml +22 -22
- package/templates/ml/instructions.yaml +85 -85
- package/templates/ml/mcp-servers.yaml +11 -11
- package/templates/ml/nfr.yaml +39 -39
- package/templates/ml/structure.yaml +25 -25
- package/templates/ml/verification.yaml +156 -156
- package/templates/mobile/instructions.yaml +44 -44
- package/templates/mobile/mcp-servers.yaml +11 -11
- package/templates/mobile/nfr.yaml +49 -49
- package/templates/mobile/structure.yaml +27 -27
- package/templates/mobile/verification.yaml +121 -121
- package/templates/observability-xray/instructions.yaml +40 -40
- package/templates/observability-xray/mcp-servers.yaml +15 -15
- package/templates/realtime/instructions.yaml +42 -42
- package/templates/realtime/mcp-servers.yaml +13 -13
- package/templates/soc2/instructions.yaml +41 -41
- package/templates/soc2/mcp-servers.yaml +24 -24
- package/templates/social/instructions.yaml +43 -43
- package/templates/social/mcp-servers.yaml +24 -24
- package/templates/state-machine/instructions.yaml +42 -42
- package/templates/state-machine/mcp-servers.yaml +11 -11
- package/templates/tools-registry.yaml +164 -164
- package/templates/universal/hooks.yaml +531 -531
- package/templates/universal/instructions.yaml +1692 -1692
- package/templates/universal/mcp-servers.yaml +50 -50
- package/templates/universal/nfr.yaml +197 -197
- package/templates/universal/reference.yaml +326 -326
- package/templates/universal/review.yaml +204 -204
- package/templates/universal/skills.yaml +262 -262
- package/templates/universal/structure.yaml +67 -67
- package/templates/universal/verification.yaml +416 -416
- package/templates/web-react/hooks.yaml +44 -44
- package/templates/web-react/instructions.yaml +207 -207
- package/templates/web-react/mcp-servers.yaml +20 -20
- package/templates/web-react/nfr.yaml +27 -27
- package/templates/web-react/review.yaml +94 -94
- package/templates/web-react/structure.yaml +46 -46
- package/templates/web-react/verification.yaml +126 -126
- package/templates/web-static/instructions.yaml +115 -115
- package/templates/web-static/mcp-servers.yaml +20 -20
- package/templates/web3/instructions.yaml +44 -44
- package/templates/web3/mcp-servers.yaml +11 -11
- package/templates/web3/verification.yaml +159 -159
- package/templates/zero-trust/instructions.yaml +41 -41
- package/templates/zero-trust/mcp-servers.yaml +15 -15
|
@@ -1,53 +1,53 @@
|
|
|
1
|
-
tag: CLI
|
|
2
|
-
section: review
|
|
3
|
-
blocks:
|
|
4
|
-
- id: cli-architecture-review
|
|
5
|
-
tier: recommended
|
|
6
|
-
dimension: architecture
|
|
7
|
-
title: "CLI Architecture Review"
|
|
8
|
-
description: |
|
|
9
|
-
Evaluate CLI-specific patterns: argument parsing, output formatting, and exit codes.
|
|
10
|
-
checklist:
|
|
11
|
-
- id: cli-argument-parsing
|
|
12
|
-
description: "Arguments parsed with a proper library (yargs, commander, clap). No manual argv slicing."
|
|
13
|
-
severity: important
|
|
14
|
-
- id: cli-exit-codes
|
|
15
|
-
description: "Exit codes follow convention: 0 = success, 1 = general error, 2 = usage error. Documented."
|
|
16
|
-
severity: important
|
|
17
|
-
- id: cli-help-output
|
|
18
|
-
description: "--help flag yields clear usage with examples. Every subcommand has a description."
|
|
19
|
-
severity: important
|
|
20
|
-
- id: cli-stdin-stdout
|
|
21
|
-
description: "Supports stdin/stdout piping for composability. Machine-readable output (JSON) via --json flag."
|
|
22
|
-
severity: nice-to-have
|
|
23
|
-
|
|
24
|
-
- id: cli-code-quality-review
|
|
25
|
-
tier: recommended
|
|
26
|
-
dimension: code-quality
|
|
27
|
-
title: "CLI Code Quality Review"
|
|
28
|
-
description: |
|
|
29
|
-
Evaluate CLI UX and error handling.
|
|
30
|
-
checklist:
|
|
31
|
-
- id: cli-error-messages
|
|
32
|
-
description: "Errors show what went wrong, why, and how to fix it. Include the failing input value."
|
|
33
|
-
severity: critical
|
|
34
|
-
- id: cli-progress-feedback
|
|
35
|
-
description: "Long operations show progress (spinner, progress bar). Silent mode available via --quiet."
|
|
36
|
-
severity: important
|
|
37
|
-
- id: cli-destructive-confirm
|
|
38
|
-
description: "Destructive operations require confirmation unless --force or --yes flag is passed."
|
|
39
|
-
severity: important
|
|
40
|
-
|
|
41
|
-
- id: cli-test-review
|
|
42
|
-
tier: recommended
|
|
43
|
-
dimension: tests
|
|
44
|
-
title: "CLI Test Review"
|
|
45
|
-
description: |
|
|
46
|
-
Evaluate CLI-specific testing patterns.
|
|
47
|
-
checklist:
|
|
48
|
-
- id: cli-integration-tests
|
|
49
|
-
description: "End-to-end tests invoke the CLI binary and assert on stdout, stderr, and exit code."
|
|
50
|
-
severity: critical
|
|
51
|
-
- id: cli-snapshot-output
|
|
52
|
-
description: "Help text and formatted output tested via snapshots to catch unintended changes."
|
|
53
|
-
severity: nice-to-have
|
|
1
|
+
tag: CLI
|
|
2
|
+
section: review
|
|
3
|
+
blocks:
|
|
4
|
+
- id: cli-architecture-review
|
|
5
|
+
tier: recommended
|
|
6
|
+
dimension: architecture
|
|
7
|
+
title: "CLI Architecture Review"
|
|
8
|
+
description: |
|
|
9
|
+
Evaluate CLI-specific patterns: argument parsing, output formatting, and exit codes.
|
|
10
|
+
checklist:
|
|
11
|
+
- id: cli-argument-parsing
|
|
12
|
+
description: "Arguments parsed with a proper library (yargs, commander, clap). No manual argv slicing."
|
|
13
|
+
severity: important
|
|
14
|
+
- id: cli-exit-codes
|
|
15
|
+
description: "Exit codes follow convention: 0 = success, 1 = general error, 2 = usage error. Documented."
|
|
16
|
+
severity: important
|
|
17
|
+
- id: cli-help-output
|
|
18
|
+
description: "--help flag yields clear usage with examples. Every subcommand has a description."
|
|
19
|
+
severity: important
|
|
20
|
+
- id: cli-stdin-stdout
|
|
21
|
+
description: "Supports stdin/stdout piping for composability. Machine-readable output (JSON) via --json flag."
|
|
22
|
+
severity: nice-to-have
|
|
23
|
+
|
|
24
|
+
- id: cli-code-quality-review
|
|
25
|
+
tier: recommended
|
|
26
|
+
dimension: code-quality
|
|
27
|
+
title: "CLI Code Quality Review"
|
|
28
|
+
description: |
|
|
29
|
+
Evaluate CLI UX and error handling.
|
|
30
|
+
checklist:
|
|
31
|
+
- id: cli-error-messages
|
|
32
|
+
description: "Errors show what went wrong, why, and how to fix it. Include the failing input value."
|
|
33
|
+
severity: critical
|
|
34
|
+
- id: cli-progress-feedback
|
|
35
|
+
description: "Long operations show progress (spinner, progress bar). Silent mode available via --quiet."
|
|
36
|
+
severity: important
|
|
37
|
+
- id: cli-destructive-confirm
|
|
38
|
+
description: "Destructive operations require confirmation unless --force or --yes flag is passed."
|
|
39
|
+
severity: important
|
|
40
|
+
|
|
41
|
+
- id: cli-test-review
|
|
42
|
+
tier: recommended
|
|
43
|
+
dimension: tests
|
|
44
|
+
title: "CLI Test Review"
|
|
45
|
+
description: |
|
|
46
|
+
Evaluate CLI-specific testing patterns.
|
|
47
|
+
checklist:
|
|
48
|
+
- id: cli-integration-tests
|
|
49
|
+
description: "End-to-end tests invoke the CLI binary and assert on stdout, stderr, and exit code."
|
|
50
|
+
severity: critical
|
|
51
|
+
- id: cli-snapshot-output
|
|
52
|
+
description: "Help text and formatted output tested via snapshots to catch unintended changes."
|
|
53
|
+
severity: nice-to-have
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
tag: CLI
|
|
2
|
-
section: structure
|
|
3
|
-
language: typescript
|
|
4
|
-
entries:
|
|
5
|
-
- path: src/index.ts
|
|
6
|
-
type: file
|
|
7
|
-
description: "Entry point with shebang (#!/usr/bin/env node)"
|
|
8
|
-
- path: src/cli
|
|
9
|
-
type: directory
|
|
10
|
-
description: "CLI command definitions and argument parsing"
|
|
11
|
-
- path: src/shared/config
|
|
12
|
-
type: directory
|
|
13
|
-
description: "CLI configuration and env handling"
|
|
14
|
-
- path: src/shared/errors
|
|
15
|
-
type: directory
|
|
16
|
-
description: "Error hierarchy with user-friendly messages"
|
|
1
|
+
tag: CLI
|
|
2
|
+
section: structure
|
|
3
|
+
language: typescript
|
|
4
|
+
entries:
|
|
5
|
+
- path: src/index.ts
|
|
6
|
+
type: file
|
|
7
|
+
description: "Entry point with shebang (#!/usr/bin/env node)"
|
|
8
|
+
- path: src/cli
|
|
9
|
+
type: directory
|
|
10
|
+
description: "CLI command definitions and argument parsing"
|
|
11
|
+
- path: src/shared/config
|
|
12
|
+
type: directory
|
|
13
|
+
description: "CLI configuration and env handling"
|
|
14
|
+
- path: src/shared/errors
|
|
15
|
+
type: directory
|
|
16
|
+
description: "Error hierarchy with user-friendly messages"
|
|
@@ -1,28 +1,28 @@
|
|
|
1
|
-
tag: DATA-LINEAGE
|
|
2
|
-
section: instructions
|
|
3
|
-
blocks:
|
|
4
|
-
- id: field-coverage
|
|
5
|
-
tier: recommended
|
|
6
|
-
title: "100% Field Coverage Enforcement"
|
|
7
|
-
content: |
|
|
8
|
-
## 100% Field Coverage Enforcement
|
|
9
|
-
|
|
10
|
-
- Every data field must have a documented origin (source system, table, column) and destination (target system, table, column).
|
|
11
|
-
- Maintain a field-level lineage registry: no field enters production without a lineage entry. Enforce this in CI.
|
|
12
|
-
- Track field transformations explicitly: document every rename, type cast, aggregation, join, and filter applied to each field.
|
|
13
|
-
- Implement automated coverage checks: compare the lineage registry against actual schema definitions. Flag any untracked fields as errors.
|
|
14
|
-
- Version lineage metadata alongside code: lineage definitions live in the repository, not in external wikis or spreadsheets.
|
|
15
|
-
- Generate lineage reports per pipeline run: which fields were read, transformed, and written, with row counts and data quality metrics.
|
|
16
|
-
|
|
17
|
-
- id: lineage-tracking-decorators
|
|
18
|
-
tier: recommended
|
|
19
|
-
title: "Lineage Tracking Decorators & Annotations"
|
|
20
|
-
content: |
|
|
21
|
-
## Lineage Tracking Decorators & Annotations
|
|
22
|
-
|
|
23
|
-
- Use decorators or annotations on transformation functions to declare input fields, output fields, and transformation type.
|
|
24
|
-
- Standardize lineage metadata format: source, transformation, destination, timestamp, pipeline_id, run_id.
|
|
25
|
-
- Emit lineage events at runtime: every transformation step publishes a lineage event to a centralized lineage store.
|
|
26
|
-
- Support both code-level lineage (decorators on functions) and config-level lineage (YAML/JSON transformation specs).
|
|
27
|
-
- Integrate lineage with data quality: when a quality check fails, trace back through lineage to identify the source of bad data.
|
|
28
|
-
- Visualize lineage graphs: generate dependency diagrams showing field-level flow from source to consumption.
|
|
1
|
+
tag: DATA-LINEAGE
|
|
2
|
+
section: instructions
|
|
3
|
+
blocks:
|
|
4
|
+
- id: field-coverage
|
|
5
|
+
tier: recommended
|
|
6
|
+
title: "100% Field Coverage Enforcement"
|
|
7
|
+
content: |
|
|
8
|
+
## 100% Field Coverage Enforcement
|
|
9
|
+
|
|
10
|
+
- Every data field must have a documented origin (source system, table, column) and destination (target system, table, column).
|
|
11
|
+
- Maintain a field-level lineage registry: no field enters production without a lineage entry. Enforce this in CI.
|
|
12
|
+
- Track field transformations explicitly: document every rename, type cast, aggregation, join, and filter applied to each field.
|
|
13
|
+
- Implement automated coverage checks: compare the lineage registry against actual schema definitions. Flag any untracked fields as errors.
|
|
14
|
+
- Version lineage metadata alongside code: lineage definitions live in the repository, not in external wikis or spreadsheets.
|
|
15
|
+
- Generate lineage reports per pipeline run: which fields were read, transformed, and written, with row counts and data quality metrics.
|
|
16
|
+
|
|
17
|
+
- id: lineage-tracking-decorators
|
|
18
|
+
tier: recommended
|
|
19
|
+
title: "Lineage Tracking Decorators & Annotations"
|
|
20
|
+
content: |
|
|
21
|
+
## Lineage Tracking Decorators & Annotations
|
|
22
|
+
|
|
23
|
+
- Use decorators or annotations on transformation functions to declare input fields, output fields, and transformation type.
|
|
24
|
+
- Standardize lineage metadata format: source, transformation, destination, timestamp, pipeline_id, run_id.
|
|
25
|
+
- Emit lineage events at runtime: every transformation step publishes a lineage event to a centralized lineage store.
|
|
26
|
+
- Support both code-level lineage (decorators on functions) and config-level lineage (YAML/JSON transformation specs).
|
|
27
|
+
- Integrate lineage with data quality: when a quality check fails, trace back through lineage to identify the source of bad data.
|
|
28
|
+
- Visualize lineage graphs: generate dependency diagrams showing field-level flow from source to consumption.
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
tag: DATA-LINEAGE
|
|
2
|
-
section: mcp-servers
|
|
3
|
-
servers:
|
|
4
|
-
- name: postgres
|
|
5
|
-
description: "PostgreSQL database inspection — query lineage metadata and schema definitions"
|
|
6
|
-
command: npx
|
|
7
|
-
args: ["-y", "@modelcontextprotocol/server-postgres"]
|
|
8
|
-
tags: [DATA-LINEAGE, DATA-PIPELINE]
|
|
9
|
-
category: database
|
|
10
|
-
tier: recommended
|
|
11
|
-
env:
|
|
12
|
-
POSTGRES_CONNECTION_STRING: ""
|
|
13
|
-
url: "https://github.com/modelcontextprotocol/servers/tree/main/src/postgres"
|
|
14
|
-
|
|
15
|
-
- name: filesystem
|
|
16
|
-
description: "Filesystem access for lineage definition files and pipeline configurations"
|
|
17
|
-
command: npx
|
|
18
|
-
args: ["-y", "@modelcontextprotocol/server-filesystem"]
|
|
19
|
-
tags: [DATA-LINEAGE, UNIVERSAL]
|
|
20
|
-
category: filesystem
|
|
21
|
-
tier: optional
|
|
22
|
-
url: "https://github.com/modelcontextprotocol/servers/tree/main/src/filesystem"
|
|
1
|
+
tag: DATA-LINEAGE
|
|
2
|
+
section: mcp-servers
|
|
3
|
+
servers:
|
|
4
|
+
- name: postgres
|
|
5
|
+
description: "PostgreSQL database inspection — query lineage metadata and schema definitions"
|
|
6
|
+
command: npx
|
|
7
|
+
args: ["-y", "@modelcontextprotocol/server-postgres"]
|
|
8
|
+
tags: [DATA-LINEAGE, DATA-PIPELINE]
|
|
9
|
+
category: database
|
|
10
|
+
tier: recommended
|
|
11
|
+
env:
|
|
12
|
+
POSTGRES_CONNECTION_STRING: ""
|
|
13
|
+
url: "https://github.com/modelcontextprotocol/servers/tree/main/src/postgres"
|
|
14
|
+
|
|
15
|
+
- name: filesystem
|
|
16
|
+
description: "Filesystem access for lineage definition files and pipeline configurations"
|
|
17
|
+
command: npx
|
|
18
|
+
args: ["-y", "@modelcontextprotocol/server-filesystem"]
|
|
19
|
+
tags: [DATA-LINEAGE, UNIVERSAL]
|
|
20
|
+
category: filesystem
|
|
21
|
+
tier: optional
|
|
22
|
+
url: "https://github.com/modelcontextprotocol/servers/tree/main/src/filesystem"
|
|
@@ -1,84 +1,84 @@
|
|
|
1
|
-
tag: DATA-PIPELINE
|
|
2
|
-
section: instructions
|
|
3
|
-
blocks:
|
|
4
|
-
- id: etl-patterns
|
|
5
|
-
tier: recommended
|
|
6
|
-
title: "ETL & Pipeline Design"
|
|
7
|
-
content: |
|
|
8
|
-
## ETL & Pipeline Orchestration
|
|
9
|
-
|
|
10
|
-
- Design every pipeline stage to be idempotent. Re-running a stage with the same input must produce the same output without side effects or duplicates.
|
|
11
|
-
- Use a DAG-based orchestrator (e.g., Airflow, Dagster, Prefect) to define task dependencies, schedule runs, and handle retries with exponential backoff.
|
|
12
|
-
- Partition data by time (daily/hourly) or logical key so that re-processing a failed partition does not require reprocessing the entire dataset.
|
|
13
|
-
- Implement the Extract-Load-Transform (ELT) pattern when the target warehouse supports compute; push transformations into SQL/dbt models for transparency and version control.
|
|
14
|
-
- Tag every pipeline run with a unique run ID. Propagate this ID through all stages and into output metadata for end-to-end lineage tracking.
|
|
15
|
-
- Keep extraction, transformation, and loading logic in separate, independently testable modules. Avoid monolithic scripts that mix concerns.
|
|
16
|
-
- Define SLAs for each pipeline (e.g., "daily sales pipeline completes by 06:00 UTC"). Alert on SLA breach, not just on failure.
|
|
17
|
-
|
|
18
|
-
- id: data-validation
|
|
19
|
-
tier: recommended
|
|
20
|
-
title: "Data Validation & Quality"
|
|
21
|
-
content: |
|
|
22
|
-
## Data Validation & Quality Gates
|
|
23
|
-
|
|
24
|
-
- Validate data at every boundary: after extraction (schema conformance), after transformation (business rules), and before loading (referential integrity).
|
|
25
|
-
- Use schema contracts (e.g., JSON Schema, Avro, Protobuf, or Great Expectations suites) to enforce column types, nullability, and allowed value ranges.
|
|
26
|
-
- Implement row-count and distribution checks between stages. A sudden drop or spike in row count (> 20% deviation from baseline) should trigger an alert and pause downstream processing.
|
|
27
|
-
- Quarantine invalid records into a dead-letter table rather than dropping them silently. Include the original record, the validation error, and the run ID.
|
|
28
|
-
- Maintain a data quality dashboard that tracks freshness, completeness, uniqueness, and accuracy metrics per dataset over time.
|
|
29
|
-
- Write unit tests for transformation logic using fixed input fixtures. Write integration tests that run a mini-pipeline against a test database.
|
|
30
|
-
|
|
31
|
-
- id: reliability-patterns
|
|
32
|
-
tier: recommended
|
|
33
|
-
title: "Reliability & Error Handling"
|
|
34
|
-
content: |
|
|
35
|
-
## Reliability, Retry & Recovery
|
|
36
|
-
|
|
37
|
-
- Configure retries with exponential backoff and jitter for transient failures (network timeouts, rate limits, temporary unavailability). Cap retries at 3-5 attempts.
|
|
38
|
-
- Use checkpointing or write-ahead logs for long-running pipelines so that a restart resumes from the last successful checkpoint, not from the beginning.
|
|
39
|
-
- Implement circuit breakers on external API calls to prevent cascading failures when a source system is degraded.
|
|
40
|
-
- Log structured events (JSON) for every stage: start, success, failure, retry, and skip. Include record counts, duration, and error details.
|
|
41
|
-
- Design for exactly-once semantics where possible using upserts (INSERT ... ON CONFLICT UPDATE) or deduplication keys in the target store.
|
|
42
|
-
- Maintain a pipeline runbook that documents failure modes, recovery steps, and escalation contacts. Review and update it quarterly.
|
|
43
|
-
|
|
44
|
-
- id: data-pipeline-testing
|
|
45
|
-
tier: recommended
|
|
46
|
-
title: "Data Pipeline Testing Requirements"
|
|
47
|
-
content: |
|
|
48
|
-
## Data Pipeline Testing Requirements
|
|
49
|
-
|
|
50
|
-
### Data Quality Assertions at Every Stage
|
|
51
|
-
Assert at each pipeline boundary — after extraction, after transformation, before loading:
|
|
52
|
-
- **Row count** — within expected range or ± % of prior run baseline.
|
|
53
|
-
- **Null rate** — per column, against stated threshold (e.g., `customer_id` must never be null).
|
|
54
|
-
- **Schema compliance** — column names, types, nullable flags match the declared schema.
|
|
55
|
-
- **Referential integrity** — foreign key values exist in the referenced table.
|
|
56
|
-
- **Distribution checks** — value ranges and cardinality within expected bounds. Alert on sudden shifts (> 20% deviation from trailing 7-day average).
|
|
57
|
-
Use Great Expectations, dbt tests, or equivalent. Assertions are first-class artifacts — committed, versioned, and enforced in CI.
|
|
58
|
-
|
|
59
|
-
### Idempotency Tests
|
|
60
|
-
Re-running the pipeline with the same input must produce identical output with no side effects. Test explicitly:
|
|
61
|
-
- Run pipeline N once; capture output row count and checksum.
|
|
62
|
-
- Run pipeline N again with the same source data.
|
|
63
|
-
- Assert: output row count and checksum are identical; no duplicates in target; no reprocessed records in dead-letter queue.
|
|
64
|
-
Idempotency is a contract, not an assumption. If a stage is not idempotent by design, that decision requires an ADR.
|
|
65
|
-
|
|
66
|
-
### Backfill Correctness Tests
|
|
67
|
-
Historical data processed against current transformation logic must produce correct results:
|
|
68
|
-
- Define a fixed historical fixture (date range, known input, known output).
|
|
69
|
-
- Run current pipeline logic against the fixture.
|
|
70
|
-
- Assert output matches expected historical output.
|
|
71
|
-
Any change to transformation logic that causes a backfill regression is a breaking change requiring an ADR.
|
|
72
|
-
|
|
73
|
-
### Dead-Letter Queue Draining Tests
|
|
74
|
-
Malformed records must be handled — not silently dropped and not blocking the pipeline:
|
|
75
|
-
- Inject a record that violates the schema; assert it routes to the dead-letter table/queue.
|
|
76
|
-
- Inject a record that passes schema but fails a business rule; assert correct routing.
|
|
77
|
-
- Assert the DLQ record contains: original payload, validation error, run ID, timestamp.
|
|
78
|
-
- Test DLQ drain procedure: records can be corrected, resubmitted, and processed successfully.
|
|
79
|
-
|
|
80
|
-
### Volume / Scale Tests at Staging
|
|
81
|
-
Unit-scale fixtures are insufficient for pipeline correctness. Run at representative data volume:
|
|
82
|
-
- At staging: use a realistic data volume (minimum 10% of production record count, or a known large-batch scenario).
|
|
83
|
-
- Assert: completion within SLA window; no memory exhaustion; partition output correct.
|
|
84
|
-
- Document the scale test fixture size and the SLA threshold in the spec.
|
|
1
|
+
tag: DATA-PIPELINE
|
|
2
|
+
section: instructions
|
|
3
|
+
blocks:
|
|
4
|
+
- id: etl-patterns
|
|
5
|
+
tier: recommended
|
|
6
|
+
title: "ETL & Pipeline Design"
|
|
7
|
+
content: |
|
|
8
|
+
## ETL & Pipeline Orchestration
|
|
9
|
+
|
|
10
|
+
- Design every pipeline stage to be idempotent. Re-running a stage with the same input must produce the same output without side effects or duplicates.
|
|
11
|
+
- Use a DAG-based orchestrator (e.g., Airflow, Dagster, Prefect) to define task dependencies, schedule runs, and handle retries with exponential backoff.
|
|
12
|
+
- Partition data by time (daily/hourly) or logical key so that re-processing a failed partition does not require reprocessing the entire dataset.
|
|
13
|
+
- Implement the Extract-Load-Transform (ELT) pattern when the target warehouse supports compute; push transformations into SQL/dbt models for transparency and version control.
|
|
14
|
+
- Tag every pipeline run with a unique run ID. Propagate this ID through all stages and into output metadata for end-to-end lineage tracking.
|
|
15
|
+
- Keep extraction, transformation, and loading logic in separate, independently testable modules. Avoid monolithic scripts that mix concerns.
|
|
16
|
+
- Define SLAs for each pipeline (e.g., "daily sales pipeline completes by 06:00 UTC"). Alert on SLA breach, not just on failure.
|
|
17
|
+
|
|
18
|
+
- id: data-validation
|
|
19
|
+
tier: recommended
|
|
20
|
+
title: "Data Validation & Quality"
|
|
21
|
+
content: |
|
|
22
|
+
## Data Validation & Quality Gates
|
|
23
|
+
|
|
24
|
+
- Validate data at every boundary: after extraction (schema conformance), after transformation (business rules), and before loading (referential integrity).
|
|
25
|
+
- Use schema contracts (e.g., JSON Schema, Avro, Protobuf, or Great Expectations suites) to enforce column types, nullability, and allowed value ranges.
|
|
26
|
+
- Implement row-count and distribution checks between stages. A sudden drop or spike in row count (> 20% deviation from baseline) should trigger an alert and pause downstream processing.
|
|
27
|
+
- Quarantine invalid records into a dead-letter table rather than dropping them silently. Include the original record, the validation error, and the run ID.
|
|
28
|
+
- Maintain a data quality dashboard that tracks freshness, completeness, uniqueness, and accuracy metrics per dataset over time.
|
|
29
|
+
- Write unit tests for transformation logic using fixed input fixtures. Write integration tests that run a mini-pipeline against a test database.
|
|
30
|
+
|
|
31
|
+
- id: reliability-patterns
|
|
32
|
+
tier: recommended
|
|
33
|
+
title: "Reliability & Error Handling"
|
|
34
|
+
content: |
|
|
35
|
+
## Reliability, Retry & Recovery
|
|
36
|
+
|
|
37
|
+
- Configure retries with exponential backoff and jitter for transient failures (network timeouts, rate limits, temporary unavailability). Cap retries at 3-5 attempts.
|
|
38
|
+
- Use checkpointing or write-ahead logs for long-running pipelines so that a restart resumes from the last successful checkpoint, not from the beginning.
|
|
39
|
+
- Implement circuit breakers on external API calls to prevent cascading failures when a source system is degraded.
|
|
40
|
+
- Log structured events (JSON) for every stage: start, success, failure, retry, and skip. Include record counts, duration, and error details.
|
|
41
|
+
- Design for exactly-once semantics where possible using upserts (INSERT ... ON CONFLICT UPDATE) or deduplication keys in the target store.
|
|
42
|
+
- Maintain a pipeline runbook that documents failure modes, recovery steps, and escalation contacts. Review and update it quarterly.
|
|
43
|
+
|
|
44
|
+
- id: data-pipeline-testing
|
|
45
|
+
tier: recommended
|
|
46
|
+
title: "Data Pipeline Testing Requirements"
|
|
47
|
+
content: |
|
|
48
|
+
## Data Pipeline Testing Requirements
|
|
49
|
+
|
|
50
|
+
### Data Quality Assertions at Every Stage
|
|
51
|
+
Assert at each pipeline boundary — after extraction, after transformation, before loading:
|
|
52
|
+
- **Row count** — within expected range or ± % of prior run baseline.
|
|
53
|
+
- **Null rate** — per column, against stated threshold (e.g., `customer_id` must never be null).
|
|
54
|
+
- **Schema compliance** — column names, types, nullable flags match the declared schema.
|
|
55
|
+
- **Referential integrity** — foreign key values exist in the referenced table.
|
|
56
|
+
- **Distribution checks** — value ranges and cardinality within expected bounds. Alert on sudden shifts (> 20% deviation from trailing 7-day average).
|
|
57
|
+
Use Great Expectations, dbt tests, or equivalent. Assertions are first-class artifacts — committed, versioned, and enforced in CI.
|
|
58
|
+
|
|
59
|
+
### Idempotency Tests
|
|
60
|
+
Re-running the pipeline with the same input must produce identical output with no side effects. Test explicitly:
|
|
61
|
+
- Run pipeline N once; capture output row count and checksum.
|
|
62
|
+
- Run pipeline N again with the same source data.
|
|
63
|
+
- Assert: output row count and checksum are identical; no duplicates in target; no reprocessed records in dead-letter queue.
|
|
64
|
+
Idempotency is a contract, not an assumption. If a stage is not idempotent by design, that decision requires an ADR.
|
|
65
|
+
|
|
66
|
+
### Backfill Correctness Tests
|
|
67
|
+
Historical data processed against current transformation logic must produce correct results:
|
|
68
|
+
- Define a fixed historical fixture (date range, known input, known output).
|
|
69
|
+
- Run current pipeline logic against the fixture.
|
|
70
|
+
- Assert output matches expected historical output.
|
|
71
|
+
Any change to transformation logic that causes a backfill regression is a breaking change requiring an ADR.
|
|
72
|
+
|
|
73
|
+
### Dead-Letter Queue Draining Tests
|
|
74
|
+
Malformed records must be handled — not silently dropped and not blocking the pipeline:
|
|
75
|
+
- Inject a record that violates the schema; assert it routes to the dead-letter table/queue.
|
|
76
|
+
- Inject a record that passes schema but fails a business rule; assert correct routing.
|
|
77
|
+
- Assert the DLQ record contains: original payload, validation error, run ID, timestamp.
|
|
78
|
+
- Test DLQ drain procedure: records can be corrected, resubmitted, and processed successfully.
|
|
79
|
+
|
|
80
|
+
### Volume / Scale Tests at Staging
|
|
81
|
+
Unit-scale fixtures are insufficient for pipeline correctness. Run at representative data volume:
|
|
82
|
+
- At staging: use a realistic data volume (minimum 10% of production record count, or a known large-batch scenario).
|
|
83
|
+
- Assert: completion within SLA window; no memory exhaustion; partition output correct.
|
|
84
|
+
- Document the scale test fixture size and the SLA threshold in the spec.
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
tag: DATA-PIPELINE
|
|
2
|
-
section: mcp-servers
|
|
3
|
-
servers:
|
|
4
|
-
- name: postgres
|
|
5
|
-
description: "PostgreSQL database inspection, queries, and schema management"
|
|
6
|
-
command: npx
|
|
7
|
-
args: ["-y", "@modelcontextprotocol/server-postgres"]
|
|
8
|
-
tags: [DATA-PIPELINE, API]
|
|
9
|
-
category: database
|
|
10
|
-
tier: recommended
|
|
11
|
-
env:
|
|
12
|
-
POSTGRES_CONNECTION_STRING: ""
|
|
13
|
-
url: "https://github.com/modelcontextprotocol/servers/tree/main/src/postgres"
|
|
1
|
+
tag: DATA-PIPELINE
|
|
2
|
+
section: mcp-servers
|
|
3
|
+
servers:
|
|
4
|
+
- name: postgres
|
|
5
|
+
description: "PostgreSQL database inspection, queries, and schema management"
|
|
6
|
+
command: npx
|
|
7
|
+
args: ["-y", "@modelcontextprotocol/server-postgres"]
|
|
8
|
+
tags: [DATA-PIPELINE, API]
|
|
9
|
+
category: database
|
|
10
|
+
tier: recommended
|
|
11
|
+
env:
|
|
12
|
+
POSTGRES_CONNECTION_STRING: ""
|
|
13
|
+
url: "https://github.com/modelcontextprotocol/servers/tree/main/src/postgres"
|
|
@@ -1,39 +1,39 @@
|
|
|
1
|
-
tag: DATA-PIPELINE
|
|
2
|
-
section: nfr
|
|
3
|
-
blocks:
|
|
4
|
-
- id: pipeline-reliability
|
|
5
|
-
tier: recommended
|
|
6
|
-
title: "Pipeline Reliability"
|
|
7
|
-
content: |
|
|
8
|
-
## NFR: Pipeline Reliability
|
|
9
|
-
|
|
10
|
-
### SLAs
|
|
11
|
-
- Data freshness SLA: {{freshness_sla | default: 1 hour}} from source to destination.
|
|
12
|
-
- Pipeline success rate: ≥ {{pipeline_success_rate | default: 99%}} per day.
|
|
13
|
-
- Alert on SLA breach within 15 minutes.
|
|
14
|
-
|
|
15
|
-
### Idempotency
|
|
16
|
-
- Every pipeline step is idempotent — safe to rerun without duplicating data.
|
|
17
|
-
- Backfill supported: reprocess historical date ranges without manual intervention.
|
|
18
|
-
- Partial failures resume from last checkpoint, not from scratch.
|
|
19
|
-
|
|
20
|
-
### Data Quality
|
|
21
|
-
- Schema validation at ingestion. Reject or quarantine malformed records.
|
|
22
|
-
- Row count, null rate, uniqueness, and distribution checks after each stage.
|
|
23
|
-
- Data quality metrics tracked over time. Alert on anomalies.
|
|
24
|
-
|
|
25
|
-
- id: pipeline-performance
|
|
26
|
-
tier: recommended
|
|
27
|
-
title: "Pipeline Performance"
|
|
28
|
-
content: |
|
|
29
|
-
## NFR: Pipeline Performance
|
|
30
|
-
|
|
31
|
-
### Throughput
|
|
32
|
-
- Target throughput: {{pipeline_throughput | default: 100K records/minute}}.
|
|
33
|
-
- Batch vs. streaming decision documented with rationale.
|
|
34
|
-
- Partitioning strategy aligned with query patterns.
|
|
35
|
-
|
|
36
|
-
### Resource Management
|
|
37
|
-
- Compute resources auto-scale with data volume. No over-provisioning for peak.
|
|
38
|
-
- Cost per GB processed tracked. Budget alerts at 80% threshold.
|
|
39
|
-
- Temporary/staging data cleaned up automatically after pipeline completion.
|
|
1
|
+
tag: DATA-PIPELINE
|
|
2
|
+
section: nfr
|
|
3
|
+
blocks:
|
|
4
|
+
- id: pipeline-reliability
|
|
5
|
+
tier: recommended
|
|
6
|
+
title: "Pipeline Reliability"
|
|
7
|
+
content: |
|
|
8
|
+
## NFR: Pipeline Reliability
|
|
9
|
+
|
|
10
|
+
### SLAs
|
|
11
|
+
- Data freshness SLA: {{freshness_sla | default: 1 hour}} from source to destination.
|
|
12
|
+
- Pipeline success rate: ≥ {{pipeline_success_rate | default: 99%}} per day.
|
|
13
|
+
- Alert on SLA breach within 15 minutes.
|
|
14
|
+
|
|
15
|
+
### Idempotency
|
|
16
|
+
- Every pipeline step is idempotent — safe to rerun without duplicating data.
|
|
17
|
+
- Backfill supported: reprocess historical date ranges without manual intervention.
|
|
18
|
+
- Partial failures resume from last checkpoint, not from scratch.
|
|
19
|
+
|
|
20
|
+
### Data Quality
|
|
21
|
+
- Schema validation at ingestion. Reject or quarantine malformed records.
|
|
22
|
+
- Row count, null rate, uniqueness, and distribution checks after each stage.
|
|
23
|
+
- Data quality metrics tracked over time. Alert on anomalies.
|
|
24
|
+
|
|
25
|
+
- id: pipeline-performance
|
|
26
|
+
tier: recommended
|
|
27
|
+
title: "Pipeline Performance"
|
|
28
|
+
content: |
|
|
29
|
+
## NFR: Pipeline Performance
|
|
30
|
+
|
|
31
|
+
### Throughput
|
|
32
|
+
- Target throughput: {{pipeline_throughput | default: 100K records/minute}}.
|
|
33
|
+
- Batch vs. streaming decision documented with rationale.
|
|
34
|
+
- Partitioning strategy aligned with query patterns.
|
|
35
|
+
|
|
36
|
+
### Resource Management
|
|
37
|
+
- Compute resources auto-scale with data volume. No over-provisioning for peak.
|
|
38
|
+
- Cost per GB processed tracked. Budget alerts at 80% threshold.
|
|
39
|
+
- Temporary/staging data cleaned up automatically after pipeline completion.
|
|
@@ -1,23 +1,23 @@
|
|
|
1
|
-
tag: DATA-PIPELINE
|
|
2
|
-
section: structure
|
|
3
|
-
entries:
|
|
4
|
-
- path: dags/
|
|
5
|
-
description: "Pipeline orchestration definitions (Airflow DAGs, Prefect flows, Dagster jobs)"
|
|
6
|
-
- path: src/extractors/
|
|
7
|
-
description: "Data source connectors: API, database, file extractors"
|
|
8
|
-
- path: src/transformers/
|
|
9
|
-
description: "Data transformation logic: cleaning, enrichment, aggregation"
|
|
10
|
-
- path: src/loaders/
|
|
11
|
-
description: "Data sinks: warehouse loaders, file writers, API publishers"
|
|
12
|
-
- path: src/validators/
|
|
13
|
-
description: "Data quality checks: schema validation, business rules, anomaly detection"
|
|
14
|
-
- path: src/config/
|
|
15
|
-
description: "Pipeline configuration: sources, schedules, thresholds (YAML)"
|
|
16
|
-
- path: tests/
|
|
17
|
-
description: "Unit tests for transformers, validators, and business logic"
|
|
18
|
-
- path: tests/fixtures/
|
|
19
|
-
description: "Sample data files for deterministic testing"
|
|
20
|
-
- path: sql/
|
|
21
|
-
description: "SQL transformations and DDL for warehouse tables"
|
|
22
|
-
- path: scripts/
|
|
23
|
-
description: "Operational scripts: backfill, reprocess, data repair"
|
|
1
|
+
tag: DATA-PIPELINE
|
|
2
|
+
section: structure
|
|
3
|
+
entries:
|
|
4
|
+
- path: dags/
|
|
5
|
+
description: "Pipeline orchestration definitions (Airflow DAGs, Prefect flows, Dagster jobs)"
|
|
6
|
+
- path: src/extractors/
|
|
7
|
+
description: "Data source connectors: API, database, file extractors"
|
|
8
|
+
- path: src/transformers/
|
|
9
|
+
description: "Data transformation logic: cleaning, enrichment, aggregation"
|
|
10
|
+
- path: src/loaders/
|
|
11
|
+
description: "Data sinks: warehouse loaders, file writers, API publishers"
|
|
12
|
+
- path: src/validators/
|
|
13
|
+
description: "Data quality checks: schema validation, business rules, anomaly detection"
|
|
14
|
+
- path: src/config/
|
|
15
|
+
description: "Pipeline configuration: sources, schedules, thresholds (YAML)"
|
|
16
|
+
- path: tests/
|
|
17
|
+
description: "Unit tests for transformers, validators, and business logic"
|
|
18
|
+
- path: tests/fixtures/
|
|
19
|
+
description: "Sample data files for deterministic testing"
|
|
20
|
+
- path: sql/
|
|
21
|
+
description: "SQL transformations and DDL for warehouse tables"
|
|
22
|
+
- path: scripts/
|
|
23
|
+
description: "Operational scripts: backfill, reprocess, data repair"
|