dbt-meta 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt_meta-0.1.0/.gitignore +43 -0
- dbt_meta-0.1.0/CHANGELOG.md +109 -0
- dbt_meta-0.1.0/CLAUDE.md +492 -0
- dbt_meta-0.1.0/LICENSE +201 -0
- dbt_meta-0.1.0/PKG-INFO +452 -0
- dbt_meta-0.1.0/README.md +418 -0
- dbt_meta-0.1.0/pyproject.toml +92 -0
- dbt_meta-0.1.0/src/dbt_meta/__init__.py +8 -0
- dbt_meta-0.1.0/src/dbt_meta/catalog/__init__.py +8 -0
- dbt_meta-0.1.0/src/dbt_meta/catalog/parser.py +242 -0
- dbt_meta-0.1.0/src/dbt_meta/cli.py +1134 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/__init__.py +0 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/base.py +252 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/children.py +140 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/columns.py +407 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/config.py +122 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/deps.py +70 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/info.py +135 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/lineage_utils.py +141 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/parents.py +140 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/path.py +176 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/schema.py +202 -0
- dbt_meta-0.1.0/src/dbt_meta/command_impl/sql.py +73 -0
- dbt_meta-0.1.0/src/dbt_meta/commands.py +475 -0
- dbt_meta-0.1.0/src/dbt_meta/config.py +482 -0
- dbt_meta-0.1.0/src/dbt_meta/errors.py +207 -0
- dbt_meta-0.1.0/src/dbt_meta/fallback.py +249 -0
- dbt_meta-0.1.0/src/dbt_meta/manifest/__init__.py +6 -0
- dbt_meta-0.1.0/src/dbt_meta/manifest/finder.py +106 -0
- dbt_meta-0.1.0/src/dbt_meta/manifest/parser.py +153 -0
- dbt_meta-0.1.0/src/dbt_meta/templates/dbt-meta.toml +430 -0
- dbt_meta-0.1.0/src/dbt_meta/utils/__init__.py +96 -0
- dbt_meta-0.1.0/src/dbt_meta/utils/bigquery.py +308 -0
- dbt_meta-0.1.0/src/dbt_meta/utils/dev.py +257 -0
- dbt_meta-0.1.0/src/dbt_meta/utils/git.py +463 -0
- dbt_meta-0.1.0/src/dbt_meta/utils/model_state.py +160 -0
- dbt_meta-0.1.0/tests/__init__.py +1 -0
- dbt_meta-0.1.0/tests/conftest.py +165 -0
- dbt_meta-0.1.0/tests/integration/__init__.py +0 -0
- dbt_meta-0.1.0/tests/integration/conftest.py +59 -0
- dbt_meta-0.1.0/tests/integration/test_cli_e2e.py +528 -0
- dbt_meta-0.1.0/tests/test_bigquery_final_coverage.py +105 -0
- dbt_meta-0.1.0/tests/test_bigquery_retry.py +261 -0
- dbt_meta-0.1.0/tests/test_catalog.py +323 -0
- dbt_meta-0.1.0/tests/test_columns_deps_final.py +106 -0
- dbt_meta-0.1.0/tests/test_command_coverage.py +236 -0
- dbt_meta-0.1.0/tests/test_commands.py +1846 -0
- dbt_meta-0.1.0/tests/test_config.py +548 -0
- dbt_meta-0.1.0/tests/test_decision_tree_scenarios.py +526 -0
- dbt_meta-0.1.0/tests/test_edge_cases.py +470 -0
- dbt_meta-0.1.0/tests/test_errors.py +238 -0
- dbt_meta-0.1.0/tests/test_exception_handling.py +175 -0
- dbt_meta-0.1.0/tests/test_fallback.py +303 -0
- dbt_meta-0.1.0/tests/test_final_95_push.py +144 -0
- dbt_meta-0.1.0/tests/test_final_coverage_push.py +147 -0
- dbt_meta-0.1.0/tests/test_git_edge_coverage.py +168 -0
- dbt_meta-0.1.0/tests/test_git_safety.py +160 -0
- dbt_meta-0.1.0/tests/test_infrastructure.py +1214 -0
- dbt_meta-0.1.0/tests/test_model_states.py +206 -0
- dbt_meta-0.1.0/tests/test_path_bigquery_coverage.py +110 -0
- dbt_meta-0.1.0/tests/test_remaining_gaps.py +180 -0
- dbt_meta-0.1.0/tests/test_table_resolution.py +191 -0
- dbt_meta-0.1.0/tests/test_utils_coverage.py +187 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# macOS
|
|
2
|
+
.DS_Store
|
|
3
|
+
.AppleDouble
|
|
4
|
+
.LSOverride
|
|
5
|
+
|
|
6
|
+
# Vim
|
|
7
|
+
*.swp
|
|
8
|
+
*.swo
|
|
9
|
+
*~
|
|
10
|
+
|
|
11
|
+
# Emacs
|
|
12
|
+
*~
|
|
13
|
+
\#*\#
|
|
14
|
+
.\#*
|
|
15
|
+
|
|
16
|
+
# Backup files
|
|
17
|
+
*.bak
|
|
18
|
+
*.backup
|
|
19
|
+
|
|
20
|
+
# Logs
|
|
21
|
+
*.log
|
|
22
|
+
|
|
23
|
+
# Temporary files
|
|
24
|
+
tmp/
|
|
25
|
+
temp/
|
|
26
|
+
|
|
27
|
+
# Ignore all dotfiles and dotfolders
|
|
28
|
+
.*
|
|
29
|
+
|
|
30
|
+
# But keep these
|
|
31
|
+
!.gitignore
|
|
32
|
+
|
|
33
|
+
# Python cache and test artifacts
|
|
34
|
+
__pycache__/
|
|
35
|
+
*.py[cod]
|
|
36
|
+
*$py.class
|
|
37
|
+
htmlcov/
|
|
38
|
+
.coverage
|
|
39
|
+
.pytest_cache/
|
|
40
|
+
.benchmarks/
|
|
41
|
+
|
|
42
|
+
# Project-specific development files
|
|
43
|
+
CLAUDE.md*.json
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] - 2025-11-25
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
#### TOML Configuration Support
|
|
13
|
+
- **Modern configuration files** with XDG Base Directory compliance
|
|
14
|
+
- Config locations: `./.dbt-meta.toml`, `~/.config/dbt-meta/config.toml`, `~/.dbt-meta.toml`
|
|
15
|
+
- Priority: CLI flags > TOML config > Environment variables > Defaults
|
|
16
|
+
- Template-based initialization with inline documentation
|
|
17
|
+
- TOML parsing: `tomllib` (Python 3.11+) or `tomli` (Python <3.11)
|
|
18
|
+
|
|
19
|
+
#### Settings Management Commands
|
|
20
|
+
- **`meta settings init`** - Create config file from template
|
|
21
|
+
- **`meta settings show`** - Display current merged configuration (text or JSON)
|
|
22
|
+
- **`meta settings validate`** - Validate config file syntax and values
|
|
23
|
+
- **`meta settings path`** - Show path to active config file
|
|
24
|
+
|
|
25
|
+
#### Configuration System
|
|
26
|
+
- **`Config.from_toml()`** - Load configuration from TOML file
|
|
27
|
+
- **`Config.from_config_or_env()`** - Load from TOML with env var fallback
|
|
28
|
+
- **`Config.from_env()`** - Load from environment variables only
|
|
29
|
+
- **`Config.find_config_file()`** - Auto-discover config file
|
|
30
|
+
- Full configuration dataclass with type hints and validation
|
|
31
|
+
- Automatic path expansion (~/ to home directory)
|
|
32
|
+
- Boolean parsing with sensible defaults
|
|
33
|
+
|
|
34
|
+
#### CLI Improvements
|
|
35
|
+
- **`-h` short flag support** - Both `-h` and `--help` work for all commands
|
|
36
|
+
- Enabled via `context_settings={"help_option_names": ["-h", "--help"]}`
|
|
37
|
+
- Works for main app and all subcommands (settings, etc.)
|
|
38
|
+
- **Simplified `schema` command output**
|
|
39
|
+
- Text mode: Just the full table name (e.g., `admirals-bi-dwh.core_client.client_info`)
|
|
40
|
+
- JSON mode: `{"model_name": "...", "full_name": "..."}`
|
|
41
|
+
- Optimized for shell scripting and AI consumption
|
|
42
|
+
|
|
43
|
+
#### Username Sanitization
|
|
44
|
+
- **Improved BigQuery dataset compatibility**
|
|
45
|
+
- Replaces ALL non-alphanumeric characters (not just dots/hyphens)
|
|
46
|
+
- Uses regex: `re.sub(r'[^a-zA-Z0-9_]', '_', username)`
|
|
47
|
+
- Examples: `pavel.filianin` → `pavel_filianin`, `user@example.com` → `user_example_com`
|
|
48
|
+
- Ensures valid BigQuery dataset names (letters, numbers, underscores only)
|
|
49
|
+
|
|
50
|
+
#### Core Metadata Commands
|
|
51
|
+
- **`info`** - Model summary (name, schema, table, materialization, tags)
|
|
52
|
+
- **`schema`** - Full table name (database.schema.table)
|
|
53
|
+
- **`path`** - Relative file path to .sql file
|
|
54
|
+
- **`columns`** - Column names and types with catalog/BigQuery fallback
|
|
55
|
+
- **`sql`** - Compiled SQL (default) or raw SQL with `--jinja` flag
|
|
56
|
+
- **`docs`** - Column names, types, and descriptions
|
|
57
|
+
- **`config`** - Full dbt config (partition_by, cluster_by, etc.)
|
|
58
|
+
|
|
59
|
+
#### Dependency Navigation
|
|
60
|
+
- **`deps`** - Dependencies by type (refs, sources, macros)
|
|
61
|
+
- **`parents`** - Upstream dependencies (direct or all ancestors with `-a`)
|
|
62
|
+
- **`children`** - Downstream dependencies (direct or all descendants with `-a`)
|
|
63
|
+
|
|
64
|
+
#### Search and Discovery
|
|
65
|
+
- **`list [pattern]`** - List all models (optionally filter by pattern)
|
|
66
|
+
- **`search <query>`** - Search models by name or description
|
|
67
|
+
|
|
68
|
+
#### Fallback System
|
|
69
|
+
- **3-level fallback** - Production manifest → Dev manifest → Catalog.json
|
|
70
|
+
- **Catalog.json support** - Fallback to catalog when manifest columns empty
|
|
71
|
+
- **Environment variables** - `DBT_FALLBACK_TARGET`, `DBT_FALLBACK_CATALOG` (default: `true`)
|
|
72
|
+
- **Intelligent warnings** - Automatic git change detection with helpful suggestions
|
|
73
|
+
|
|
74
|
+
#### Output Modes
|
|
75
|
+
- **`--json, -j`** - JSON output for all commands (AI-friendly)
|
|
76
|
+
- **Rich formatted output** - Colored tables and text (default)
|
|
77
|
+
- **Combined flags** - Use `-dj`, `-ajd`, `-jm` for faster typing
|
|
78
|
+
|
|
79
|
+
#### Flexible Naming Configuration
|
|
80
|
+
- **Production table naming** - `DBT_PROD_TABLE_NAME` (alias_or_name, name, alias)
|
|
81
|
+
- **Production schema/database** - `DBT_PROD_SCHEMA_SOURCE` (config_or_model, model, config)
|
|
82
|
+
- **Dev schema naming** - `DBT_DEV_SCHEMA` (direct override, default: `personal_{username}`)
|
|
83
|
+
- **Username override** - `DBT_USER` (default: `$USER`)
|
|
84
|
+
|
|
85
|
+
#### Manifest Discovery
|
|
86
|
+
- **Auto-discovery** - Searches for manifest.json automatically
|
|
87
|
+
- **Simple Mode** - Works out-of-box with `./target/manifest.json` after `dbt compile`
|
|
88
|
+
- **Production Mode** - `DBT_PROD_MANIFEST_PATH` for central manifest location
|
|
89
|
+
- **Dev Mode** - `DBT_DEV_MANIFEST_PATH` (default: `./target/manifest.json`)
|
|
90
|
+
- **`-m, --manifest` flag** - Explicit manifest path override
|
|
91
|
+
|
|
92
|
+
#### Testing
|
|
93
|
+
- **416 comprehensive tests** - All passing
|
|
94
|
+
- **91.67% code coverage** - Exceeds 90% requirement
|
|
95
|
+
- Test categories: unit, integration, performance benchmarks
|
|
96
|
+
- Edge case testing: empty strings, null values, special characters
|
|
97
|
+
|
|
98
|
+
#### Performance
|
|
99
|
+
- **LRU caching** - Manifest parser cached for sub-10ms responses
|
|
100
|
+
- **orjson** - Fast JSON parsing (6-20x faster than stdlib)
|
|
101
|
+
- **Lazy loading** - Manifest loaded only when needed
|
|
102
|
+
|
|
103
|
+
#### Documentation
|
|
104
|
+
- Comprehensive README with examples and use cases
|
|
105
|
+
- Environment variables reference
|
|
106
|
+
- CLAUDE.md for AI agent integration
|
|
107
|
+
- Apache 2.0 license
|
|
108
|
+
|
|
109
|
+
[0.1.0]: https://github.com/Filianin/dbt-meta/releases/tag/v0.1.0
|
dbt_meta-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
|
|
5
|
+
**dbt-meta** is an AI-first CLI tool for extracting metadata from dbt's `manifest.json`.
|
|
6
|
+
|
|
7
|
+
**Key Design Principles:**
|
|
8
|
+
- Performance-first: LRU caching, orjson parser, lazy loading
|
|
9
|
+
- AI-optimized: JSON output mode, deterministic responses
|
|
10
|
+
- Production-first: Automatically prioritizes production manifest
|
|
11
|
+
- Fallback-enabled: BigQuery fallback when models missing from manifest
|
|
12
|
+
|
|
13
|
+
## Development Setup
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# Install in development mode
|
|
17
|
+
pip install -e ".[dev]"
|
|
18
|
+
|
|
19
|
+
# Run tests (95%+ coverage required)
|
|
20
|
+
pytest --cov=dbt_meta
|
|
21
|
+
|
|
22
|
+
# Type checking + linting
|
|
23
|
+
mypy src/dbt_meta && ruff check src/dbt_meta
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Development Guidelines
|
|
27
|
+
|
|
28
|
+
### File Management
|
|
29
|
+
|
|
30
|
+
- **NO temporary files in `/tmp/`** - Save all files in project root instead
|
|
31
|
+
- Temporary files should be visible in git (easy to review and discard)
|
|
32
|
+
- Test scripts, debug files, analysis files - all go in project root
|
|
33
|
+
- Example: `./test_catalog_fallback.sh` instead of `/tmp/test_catalog_fallback.sh`
|
|
34
|
+
|
|
35
|
+
## Architecture
|
|
36
|
+
|
|
37
|
+
### Module Structure
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
src/dbt_meta/
|
|
41
|
+
├── cli.py # Typer CLI + Rich formatting
|
|
42
|
+
├── commands.py # Command implementations + BigQuery fallback
|
|
43
|
+
├── errors.py # Exception hierarchy
|
|
44
|
+
├── config.py # Configuration management
|
|
45
|
+
├── fallback.py # 3-level fallback strategy
|
|
46
|
+
├── utils/ # Utility modules
|
|
47
|
+
│ ├── __init__.py # Parser caching, warnings
|
|
48
|
+
│ └── git.py # Git operations
|
|
49
|
+
└── manifest/
|
|
50
|
+
├── parser.py # Fast manifest parsing (orjson + caching)
|
|
51
|
+
└── finder.py # 4-level manifest discovery
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Key Patterns
|
|
55
|
+
|
|
56
|
+
#### 1. Three-Level Caching Strategy
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
# Level 1: Parser instance caching (commands.py:20-34)
|
|
60
|
+
@lru_cache(maxsize=1)
|
|
61
|
+
def _get_cached_parser(manifest_path: str) -> ManifestParser
|
|
62
|
+
|
|
63
|
+
# Level 2: Manifest lazy loading (manifest/parser.py:28-58)
|
|
64
|
+
@cached_property
|
|
65
|
+
def manifest(self) -> Dict[str, Any]
|
|
66
|
+
|
|
67
|
+
# Level 3: orjson for fast parsing (6-20x faster than stdlib)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**Result:** Sub-10ms response times after first command.
|
|
71
|
+
|
|
72
|
+
**CRITICAL:** Always use `_get_cached_parser()`, never instantiate `ManifestParser` directly.
|
|
73
|
+
|
|
74
|
+
#### 2. Manifest Discovery (3-level priority)
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
1. --manifest PATH (explicit CLI flag - highest priority)
|
|
78
|
+
2. DBT_DEV_MANIFEST_PATH (when --dev flag used, default: ./target/manifest.json)
|
|
79
|
+
3. DBT_PROD_MANIFEST_PATH (production, default: ~/dbt-state/manifest.json)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
**Critical distinction:**
|
|
83
|
+
- Production manifest uses `config.alias` for table names
|
|
84
|
+
- Dev manifest uses SQL filename for table names
|
|
85
|
+
- When both `--manifest` and `--dev` are used, `--dev` is ignored with a warning
|
|
86
|
+
- Always use `--dev` flag for dev tables
|
|
87
|
+
|
|
88
|
+
#### 3. BigQuery Fallback Pattern
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
model = parser.get_model(model_name)
|
|
92
|
+
if not model:
|
|
93
|
+
if os.environ.get('DBT_FALLBACK_BIGQUERY', 'true').lower() in ('true', '1', 'yes'):
|
|
94
|
+
dataset, table = _infer_table_parts(model_name)
|
|
95
|
+
bq_metadata = _fetch_table_metadata_from_bigquery(dataset, table)
|
|
96
|
+
# Return partial metadata with warning to stderr
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
**Supported:** `schema`, `columns`, `info`, `config`
|
|
100
|
+
**Not supported:** `deps`, `sql`, `parents`, `children` (dbt-specific)
|
|
101
|
+
|
|
102
|
+
#### 4. Dev Schema Resolution (2-level priority)
|
|
103
|
+
|
|
104
|
+
**Dev schema resolution (2-level):**
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
1. DBT_DEV_SCHEMA - Direct schema name (highest priority)
|
|
108
|
+
2. Default: "personal_{username}"
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Backward compatibility:** Old variables (`DBT_DEV_DATASET`, `DBT_DEV_SCHEMA_TEMPLATE`, `DBT_DEV_SCHEMA_PREFIX`) show deprecation warning.
|
|
112
|
+
|
|
113
|
+
Location: `config.py:28-35`, `utils/dev.py:81-93`
|
|
114
|
+
|
|
115
|
+
#### 5. Exception Hierarchy
|
|
116
|
+
|
|
117
|
+
**Consistent error handling with typed exceptions**:
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
# src/dbt_meta/errors.py
|
|
121
|
+
|
|
122
|
+
DbtMetaError (base)
|
|
123
|
+
├── ModelNotFoundError # Model not in manifest/BigQuery
|
|
124
|
+
├── ManifestNotFoundError # manifest.json not found
|
|
125
|
+
├── ManifestParseError # Invalid JSON in manifest
|
|
126
|
+
├── BigQueryError # BigQuery operation failed
|
|
127
|
+
├── GitOperationError # Git command failed
|
|
128
|
+
└── ConfigurationError # Invalid configuration
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
**All exceptions include:**
|
|
132
|
+
- `message`: Human-readable error description
|
|
133
|
+
- `suggestion`: Actionable fix (optional)
|
|
134
|
+
- Structured data for programmatic handling
|
|
135
|
+
|
|
136
|
+
**CLI error handling** (`cli.py:45-66`):
|
|
137
|
+
```python
|
|
138
|
+
try:
|
|
139
|
+
result = commands.schema(manifest_path, model_name)
|
|
140
|
+
# ... handle result
|
|
141
|
+
except DbtMetaError as e:
|
|
142
|
+
handle_error(e) # Rich formatted output with suggestion
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
**Example error output:**
|
|
146
|
+
```
|
|
147
|
+
Error: Model 'core__clients' not found
|
|
148
|
+
|
|
149
|
+
Suggestion: Searched in: production manifest, dev manifest
|
|
150
|
+
Try: meta list core
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
**Benefits:**
|
|
154
|
+
- Consistent error messages across all commands
|
|
155
|
+
- Actionable suggestions for users
|
|
156
|
+
- Easy to catch and handle in tests
|
|
157
|
+
- AI-friendly structured errors
|
|
158
|
+
|
|
159
|
+
#### 6. Configuration Management
|
|
160
|
+
|
|
161
|
+
**Centralized configuration with TOML and env var support** (v0.1.0):
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
# src/dbt_meta/config.py
|
|
165
|
+
|
|
166
|
+
from dbt_meta.config import Config
|
|
167
|
+
|
|
168
|
+
# Load from TOML config file (recommended)
|
|
169
|
+
config = Config.from_config_or_env()
|
|
170
|
+
# Priority: TOML config > Environment variables > Defaults
|
|
171
|
+
# Searches: ./.dbt-meta.toml, ~/.config/dbt-meta/config.toml, ~/.dbt-meta.toml
|
|
172
|
+
|
|
173
|
+
# Load from environment variables only
|
|
174
|
+
config = Config.from_env()
|
|
175
|
+
|
|
176
|
+
# Load from TOML file directly
|
|
177
|
+
config = Config.from_toml("/path/to/config.toml")
|
|
178
|
+
|
|
179
|
+
# Access configuration
|
|
180
|
+
config.prod_manifest_path # ~/dbt-state/manifest.json
|
|
181
|
+
config.dev_manifest_path # ./target/manifest.json
|
|
182
|
+
config.fallback_dev_enabled # True/False
|
|
183
|
+
config.fallback_bigquery_enabled # True/False
|
|
184
|
+
config.dev_dataset # personal_username (sanitized)
|
|
185
|
+
config.prod_table_name_strategy # alias_or_name | name | alias
|
|
186
|
+
config.prod_schema_source # config_or_model | model | config
|
|
187
|
+
|
|
188
|
+
# Validate configuration
|
|
189
|
+
warnings = config.validate()
|
|
190
|
+
for warning in warnings:
|
|
191
|
+
print(f"Warning: {warning}")
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
**Key features:**
|
|
195
|
+
- **TOML configuration** - Modern config files with XDG compliance
|
|
196
|
+
- **Priority system** - CLI flags > TOML > Env vars > Defaults
|
|
197
|
+
- Single source of truth for all configuration
|
|
198
|
+
- Automatic path expansion (~ to home directory)
|
|
199
|
+
- Boolean parsing with sensible defaults
|
|
200
|
+
- Validation with helpful warnings
|
|
201
|
+
- Type-safe dataclass with full type hints
|
|
202
|
+
- Username sanitization for BigQuery compatibility (replaces all non-alphanumeric chars)
|
|
203
|
+
|
|
204
|
+
**Dev schema resolution** (2-level):
|
|
205
|
+
```python
|
|
206
|
+
# Priority 1: Direct schema name
|
|
207
|
+
DBT_DEV_SCHEMA = "my_custom_dev_schema"
|
|
208
|
+
|
|
209
|
+
# Priority 2: Default with username (fallback)
|
|
210
|
+
# personal_{username} (from USER env var, sanitized with re.sub(r'[^a-zA-Z0-9_]', '_', username))
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
**Config file locations** (priority order):
|
|
214
|
+
1. `./.dbt-meta.toml` - Project-local config
|
|
215
|
+
2. `~/.config/dbt-meta/config.toml` - User config (XDG)
|
|
216
|
+
3. `~/.dbt-meta.toml` - Fallback
|
|
217
|
+
|
|
218
|
+
**Settings commands** (v0.1.0):
|
|
219
|
+
- `meta settings init` - Create config file from template
|
|
220
|
+
- `meta settings show` - Display merged configuration
|
|
221
|
+
- `meta settings validate` - Validate config file
|
|
222
|
+
- `meta settings path` - Show active config file path
|
|
223
|
+
|
|
224
|
+
Location: `config.py:24-473`
|
|
225
|
+
|
|
226
|
+
#### 7. CLI and User Experience
|
|
227
|
+
|
|
228
|
+
**Help system improvements** (v0.1.0):
|
|
229
|
+
```python
|
|
230
|
+
# Enable -h short flag for all commands and subcommands
|
|
231
|
+
app = typer.Typer(
|
|
232
|
+
name="dbt-meta",
|
|
233
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
settings_app = typer.Typer(
|
|
237
|
+
help="CLI settings management",
|
|
238
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
|
239
|
+
)
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
**Benefits:**
|
|
243
|
+
- Both `-h` and `--help` work for all commands
|
|
244
|
+
- Consistent UX across main app and subcommands
|
|
245
|
+
- Standard CLI convention
|
|
246
|
+
|
|
247
|
+
**Schema command output** (v0.1.0):
|
|
248
|
+
```bash
|
|
249
|
+
# Text mode - simple table name
|
|
250
|
+
meta schema core_client__client_info
|
|
251
|
+
# → admirals-bi-dwh.core_client.client_info
|
|
252
|
+
|
|
253
|
+
# JSON mode - structured output
|
|
254
|
+
meta schema -j core_client__client_info
|
|
255
|
+
# → {"model_name": "core_client__client_info", "full_name": "admirals-bi-dwh.core_client.client_info"}
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
**Purpose:** Minimalist output optimized for shell scripting and AI consumption
|
|
259
|
+
|
|
260
|
+
**Username sanitization** (v0.1.0):
|
|
261
|
+
```python
|
|
262
|
+
# config.py:36-57 - _calculate_dev_schema()
|
|
263
|
+
username_sanitized = re.sub(r'[^a-zA-Z0-9_]', '_', username)
|
|
264
|
+
# Replaces ALL non-alphanumeric characters (dots, hyphens, @, etc.)
|
|
265
|
+
# Examples:
|
|
266
|
+
# "pavel.filianin" → "pavel_filianin"
|
|
267
|
+
# "john-doe" → "john_doe"
|
|
268
|
+
# "user@example.com" → "user_example_com"
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
**Why:** BigQuery dataset names only allow letters, numbers, and underscores
|
|
272
|
+
|
|
273
|
+
Location: `cli.py:26-39`, `config.py:36-57`
|
|
274
|
+
|
|
275
|
+
#### 8. Fallback Strategy
|
|
276
|
+
|
|
277
|
+
**3-level fallback system with clean interface**:
|
|
278
|
+
|
|
279
|
+
```python
|
|
280
|
+
# src/dbt_meta/fallback.py
|
|
281
|
+
|
|
282
|
+
from dbt_meta.fallback import FallbackStrategy, FallbackLevel, FallbackResult
|
|
283
|
+
from dbt_meta.config import Config
|
|
284
|
+
|
|
285
|
+
config = Config.from_env()
|
|
286
|
+
strategy = FallbackStrategy(config)
|
|
287
|
+
|
|
288
|
+
# Try to get model with automatic fallback
|
|
289
|
+
result = strategy.get_model(
|
|
290
|
+
model_name="core__clients",
|
|
291
|
+
prod_parser=parser,
|
|
292
|
+
allowed_levels=[
|
|
293
|
+
FallbackLevel.PROD_MANIFEST,
|
|
294
|
+
FallbackLevel.DEV_MANIFEST,
|
|
295
|
+
FallbackLevel.BIGQUERY # Optional - exclude for deps/sql commands
|
|
296
|
+
]
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
if result.found:
|
|
300
|
+
print(f"Found in: {result.level.value}")
|
|
301
|
+
print(f"Data: {result.data}")
|
|
302
|
+
|
|
303
|
+
# Show warnings (e.g., "Using dev manifest")
|
|
304
|
+
for warning in result.warnings:
|
|
305
|
+
print(f"Warning: {warning}")
|
|
306
|
+
else:
|
|
307
|
+
# ModelNotFoundError raised if not found
|
|
308
|
+
pass
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
**Fallback levels (in priority order):**
|
|
312
|
+
1. `PROD_MANIFEST` - Production manifest (default source)
|
|
313
|
+
2. `DEV_MANIFEST` - Dev manifest (if enabled via `DBT_FALLBACK_TARGET`)
|
|
314
|
+
3. `BIGQUERY` - BigQuery metadata (if enabled via `DBT_FALLBACK_BIGQUERY`)
|
|
315
|
+
|
|
316
|
+
**Key features:**
|
|
317
|
+
- Consolidates logic previously duplicated across 10+ commands
|
|
318
|
+
- Automatic warning collection at each level
|
|
319
|
+
- Configurable allowed levels per command
|
|
320
|
+
- Clean error handling with `ModelNotFoundError`
|
|
321
|
+
- Type-safe enums and dataclasses
|
|
322
|
+
|
|
323
|
+
**Usage pattern for commands:**
|
|
324
|
+
```python
|
|
325
|
+
# commands with BigQuery support (schema, columns, info, config)
|
|
326
|
+
allowed_levels = [FallbackLevel.PROD_MANIFEST, FallbackLevel.DEV_MANIFEST, FallbackLevel.BIGQUERY]
|
|
327
|
+
|
|
328
|
+
# commands without BigQuery support (deps, sql, parents, children)
|
|
329
|
+
allowed_levels = [FallbackLevel.PROD_MANIFEST, FallbackLevel.DEV_MANIFEST]
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
Location: `fallback.py:18-198`
|
|
333
|
+
|
|
334
|
+
**Note:** BigQuery fallback (`_fetch_from_bigquery`) is currently a placeholder (returns None). Full implementation will be added when refactoring `commands.py` in Task 3.
|
|
335
|
+
|
|
336
|
+
## Adding a New Command
|
|
337
|
+
|
|
338
|
+
### 1. Add command function in `commands.py`
|
|
339
|
+
|
|
340
|
+
```python
|
|
341
|
+
def new_command(manifest_path: str, model_name: str) -> Optional[Dict]:
|
|
342
|
+
"""Extract metadata"""
|
|
343
|
+
parser = _get_cached_parser(manifest_path) # MUST use cached parser
|
|
344
|
+
model = parser.get_model(model_name)
|
|
345
|
+
|
|
346
|
+
if not model:
|
|
347
|
+
return None # Or add BigQuery fallback if applicable
|
|
348
|
+
|
|
349
|
+
return {'field': model.get('field', '')}
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
### 2. Add CLI command in `cli.py`
|
|
353
|
+
|
|
354
|
+
```python
|
|
355
|
+
@app.command()
|
|
356
|
+
def new_command(
|
|
357
|
+
model_name: str = typer.Argument(..., help="Model name"),
|
|
358
|
+
json_output: bool = typer.Option(False, "-j", "--json"),
|
|
359
|
+
):
|
|
360
|
+
"""Command description"""
|
|
361
|
+
manifest_path = get_manifest_path()
|
|
362
|
+
result = commands.new_command(manifest_path, model_name)
|
|
363
|
+
handle_command_output(result, json_output)
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
### 3. Add tests in `test_commands.py`
|
|
367
|
+
|
|
368
|
+
```python
|
|
369
|
+
def test_new_command(prod_manifest, test_model):
|
|
370
|
+
result = commands.new_command(prod_manifest, test_model)
|
|
371
|
+
assert result is not None
|
|
372
|
+
assert 'field' in result
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
### 4. Update help text in `cli.py`
|
|
376
|
+
|
|
377
|
+
Add to `_build_commands_panel()` if needed.
|
|
378
|
+
|
|
379
|
+
## Testing Strategy
|
|
380
|
+
|
|
381
|
+
**Coverage requirement:** 95%+ (pyproject.toml:79)
|
|
382
|
+
|
|
383
|
+
**Test markers:**
|
|
384
|
+
- `@pytest.mark.unit` - Fast unit tests
|
|
385
|
+
- `@pytest.mark.integration` - Integration tests
|
|
386
|
+
- `@pytest.mark.performance` - Performance benchmarks
|
|
387
|
+
|
|
388
|
+
**Test structure:**
|
|
389
|
+
- `test_commands.py` - Command implementations
|
|
390
|
+
- `test_infrastructure.py` - Manifest discovery + warnings
|
|
391
|
+
- `test_errors.py` - Exception hierarchy
|
|
392
|
+
- `test_config.py` - Configuration management
|
|
393
|
+
- `test_fallback.py` - Fallback strategy
|
|
394
|
+
- `conftest.py` - Shared fixtures (uses dynamic `test_model` fixture)
|
|
395
|
+
|
|
396
|
+
**Excluded from coverage:**
|
|
397
|
+
- `cli.py` - UI layer (tested manually)
|
|
398
|
+
- `manifest/finder.py` - Auto-discovery logic
|
|
399
|
+
|
|
400
|
+
## Important Code Locations
|
|
401
|
+
|
|
402
|
+
| Feature | Location |
|
|
403
|
+
|---------|----------|
|
|
404
|
+
| Exception hierarchy | `errors.py:13-203` |
|
|
405
|
+
| Error handler (CLI) | `cli.py:45-66` |
|
|
406
|
+
| Configuration management | `config.py:12-139` |
|
|
407
|
+
| Fallback strategy | `fallback.py:18-198` |
|
|
408
|
+
| Manifest discovery | `manifest/finder.py:26-89` |
|
|
409
|
+
| Parser caching | `commands.py:20-34`, `manifest/parser.py:28-58` |
|
|
410
|
+
| BigQuery fallback | `commands.py:399-446` |
|
|
411
|
+
| Dev schema resolution | `commands.py:934-1042` (deprecated, use `config.py`) |
|
|
412
|
+
| Prod table naming | `commands.py:452-493` |
|
|
413
|
+
| Lineage traversal | `commands.py:773-805` |
|
|
414
|
+
| Help formatting | `cli.py:43-157` |
|
|
415
|
+
|
|
416
|
+
## Environment Variables
|
|
417
|
+
|
|
418
|
+
**Preferred access:** Use `Config.from_env()` for centralized configuration management with validation.
|
|
419
|
+
|
|
420
|
+
**Manifest:**
|
|
421
|
+
- `DBT_PROD_MANIFEST_PATH` - Production manifest path (default: `~/.dbt-state/manifest.json`)
|
|
422
|
+
- `DBT_DEV_MANIFEST_PATH` - Dev manifest path (default: `./target/manifest.json`)
|
|
423
|
+
|
|
424
|
+
**Fallback control:**
|
|
425
|
+
- `DBT_FALLBACK_TARGET` - Enable dev manifest fallback (default: `true`)
|
|
426
|
+
- `DBT_FALLBACK_BIGQUERY` - Enable BigQuery fallback (default: `true`)
|
|
427
|
+
|
|
428
|
+
**Naming:**
|
|
429
|
+
- `DBT_PROD_TABLE_NAME` - `alias_or_name` (default), `name`, `alias`
|
|
430
|
+
- `DBT_PROD_SCHEMA_SOURCE` - `config_or_model` (default), `model`, `config`
|
|
431
|
+
- `DBT_DEV_SCHEMA` - Direct dev schema name (overrides default `personal_{username}`)
|
|
432
|
+
- `DBT_USER` - Override username for dev schema (default: `$USER`)
|
|
433
|
+
|
|
434
|
+
**Deprecated:**
|
|
435
|
+
- `DBT_DEV_DATASET` - Use `DBT_DEV_SCHEMA` instead (backward compatible with warning)
|
|
436
|
+
- `DBT_DEV_SCHEMA_TEMPLATE` - Use `DBT_DEV_SCHEMA` instead (no longer supported)
|
|
437
|
+
- `DBT_DEV_SCHEMA_PREFIX` - Use `DBT_DEV_SCHEMA` instead (no longer supported)
|
|
438
|
+
|
|
439
|
+
## Type Checking
|
|
440
|
+
|
|
441
|
+
**Strict mode enabled** - All functions must have type hints.
|
|
442
|
+
|
|
443
|
+
```python
|
|
444
|
+
from typing import Dict, List, Optional, Any
|
|
445
|
+
|
|
446
|
+
def command(manifest_path: str, model_name: str) -> Optional[Dict[str, Any]]:
|
|
447
|
+
"""Returns None if model not found"""
|
|
448
|
+
...
|
|
449
|
+
|
|
450
|
+
def search(manifest_path: str, query: str) -> List[Dict[str, str]]:
|
|
451
|
+
"""Always returns list (empty if no results)"""
|
|
452
|
+
...
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
## Data Source Decision Logic
|
|
456
|
+
|
|
457
|
+
For understanding how dbt-meta determines which data source to use (prod/dev manifest, BigQuery fallback):
|
|
458
|
+
|
|
459
|
+
**Reference documentation:**
|
|
460
|
+
- `.qa/decision_tree_visual.txt` - Visual decision tree with 5 critical scenarios
|
|
461
|
+
- `.qa/data_source_logic.md` - Detailed logic specification
|
|
462
|
+
|
|
463
|
+
**Key principle:**
|
|
464
|
+
When BigQuery fallback is needed (empty columns), ALWAYS use schema from the FOUND model:
|
|
465
|
+
- Model found in dev manifest → use dev schema (`personal_user`)
|
|
466
|
+
- Model found in prod manifest → use prod schema
|
|
467
|
+
- Never re-search production manifest after model is found
|
|
468
|
+
|
|
469
|
+
**Implementation:**
|
|
470
|
+
- `command_impl/base.py:107-151` - Main fallback orchestration
|
|
471
|
+
- `command_impl/columns.py:71-87` - BigQuery fallback with correct schema
|
|
472
|
+
- `utils/git.py:77-169` - Git status detection and warnings
|
|
473
|
+
|
|
474
|
+
## Publishing Checklist
|
|
475
|
+
|
|
476
|
+
1. `pytest && mypy src/dbt_meta && ruff check src/dbt_meta`
|
|
477
|
+
2. Update version in `pyproject.toml`, `src/dbt_meta/__init__.py`
|
|
478
|
+
3. Update `CHANGELOG.md` with version and date
|
|
479
|
+
4. Test: `pip install -e . && meta --version`
|
|
480
|
+
5. Tag: `git tag v0.x.0`
|
|
481
|
+
|
|
482
|
+
## Performance Benchmarks
|
|
483
|
+
|
|
484
|
+
- First command: 30-60ms (manifest parsing + caching)
|
|
485
|
+
- Subsequent commands: 5-10ms (cached parser)
|
|
486
|
+
- 865+ models parsed in ~35ms median
|
|
487
|
+
|
|
488
|
+
**Optimization rules:**
|
|
489
|
+
1. Always use `_get_cached_parser()` - never instantiate `ManifestParser` directly
|
|
490
|
+
2. Cache results in local variables - avoid repeated manifest access
|
|
491
|
+
3. Use generator expressions over list comprehensions when possible
|
|
492
|
+
4. Use `@lru_cache` for expensive helper functions
|