mdbt 0.4.38__tar.gz → 0.4.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbt-0.4.38 → mdbt-0.4.39}/PKG-INFO +1 -1
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/build_dbt_docs_ai.py +93 -3
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt.egg-info/PKG-INFO +1 -1
- {mdbt-0.4.38 → mdbt-0.4.39}/setup.py +1 -1
- {mdbt-0.4.38 → mdbt-0.4.39}/LICENSE +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/README.md +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/__init__.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/ai_core.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/build_unit_test_data_ai.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/cmdline.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/core.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/expectations_output_builder.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/lightdash.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/main.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/precommit_format.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/prompts.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/recce.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/sort_yaml_fields.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt/sql_sorter.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt.egg-info/SOURCES.txt +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt.egg-info/dependency_links.txt +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt.egg-info/entry_points.txt +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt.egg-info/requires.txt +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/mdbt.egg-info/top_level.txt +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/setup.cfg +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/tests/test_main.py +0 -0
- {mdbt-0.4.38 → mdbt-0.4.39}/tests/test_sort_yaml_fields.py +0 -0
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import subprocess
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
|
|
3
4
|
import pyperclip
|
|
4
5
|
from dotenv import find_dotenv
|
|
5
6
|
from dotenv import load_dotenv
|
|
7
|
+
from ruamel.yaml import YAML
|
|
6
8
|
|
|
7
9
|
from mdbt.ai_core import AiCore
|
|
8
10
|
from mdbt.prompts import Prompts
|
|
@@ -50,8 +52,13 @@ class BuildDBTDocs(AiCore):
|
|
|
50
52
|
|
|
51
53
|
system_instructions = system_instructions + sample_data[model_name]
|
|
52
54
|
|
|
55
|
+
# Get existing column descriptions from neighboring YML files
|
|
56
|
+
existing_descriptions = self.get_existing_column_descriptions(original_file_path)
|
|
57
|
+
if existing_descriptions:
|
|
58
|
+
system_instructions += f'Here are some existing column descriptions from neighboring models. These may or may not be relevant to the current model. Assess, and use as a source if it seems to match the sample data: \n{existing_descriptions}'
|
|
59
|
+
|
|
53
60
|
# Might bring this back in the future.
|
|
54
|
-
extra_info =
|
|
61
|
+
extra_info = ''
|
|
55
62
|
|
|
56
63
|
if mode == 1:
|
|
57
64
|
# Build new documentation
|
|
@@ -113,11 +120,94 @@ class BuildDBTDocs(AiCore):
|
|
|
113
120
|
|
|
114
121
|
@staticmethod
|
|
115
122
|
def backup_existing_yml_file(yml_file_path):
|
|
116
|
-
with open(yml_file_path,
|
|
123
|
+
with open(yml_file_path, 'r') as file:
|
|
117
124
|
yml_content = file.read()
|
|
118
|
-
with open(yml_file_path +
|
|
125
|
+
with open(yml_file_path + '.bak', 'w') as file:
|
|
119
126
|
file.write(yml_content)
|
|
120
127
|
|
|
128
|
+
def get_existing_column_descriptions(self, model_file_path: str) -> str:
|
|
129
|
+
"""
|
|
130
|
+
Search for YML files in the same folder as the model and extract
|
|
131
|
+
existing column descriptions to provide context for consistent naming.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
model_file_path: Path to the SQL model file.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
A formatted string containing existing column descriptions found
|
|
138
|
+
in neighboring YML files.
|
|
139
|
+
"""
|
|
140
|
+
folder_path = Path(model_file_path).parent
|
|
141
|
+
current_yml_path = model_file_path.replace('.sql', '.yml')
|
|
142
|
+
column_descriptions = {}
|
|
143
|
+
|
|
144
|
+
yaml = YAML()
|
|
145
|
+
yaml.preserve_quotes = True
|
|
146
|
+
|
|
147
|
+
# Find all .yml files in the same folder
|
|
148
|
+
yml_files = list(folder_path.glob('*.yml'))
|
|
149
|
+
|
|
150
|
+
for yml_file in yml_files:
|
|
151
|
+
# Skip the current model's yml file if it exists
|
|
152
|
+
if str(yml_file) == current_yml_path:
|
|
153
|
+
continue
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
with open(yml_file, 'r') as f:
|
|
157
|
+
yml_content = yaml.load(f)
|
|
158
|
+
|
|
159
|
+
if not yml_content:
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
# Extract column descriptions from models section
|
|
163
|
+
models = yml_content.get('models', [])
|
|
164
|
+
for model in models:
|
|
165
|
+
columns = model.get('columns', [])
|
|
166
|
+
for column in columns:
|
|
167
|
+
col_name = column.get('name')
|
|
168
|
+
col_desc = column.get('description')
|
|
169
|
+
if col_name and col_desc:
|
|
170
|
+
# Only store if not already seen, or prefer longer descriptions
|
|
171
|
+
if col_name not in column_descriptions or \
|
|
172
|
+
len(str(col_desc)) > len(str(column_descriptions[col_name])):
|
|
173
|
+
column_descriptions[col_name] = str(col_desc).strip()
|
|
174
|
+
|
|
175
|
+
except Exception as e:
|
|
176
|
+
print(f'Warning: Could not parse {yml_file}: {e}')
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
if not column_descriptions:
|
|
180
|
+
return ''
|
|
181
|
+
|
|
182
|
+
# Filter out ID columns to reduce noise (columns ending in _id or named id)
|
|
183
|
+
non_id_descriptions = {
|
|
184
|
+
col_name: desc for col_name, desc in column_descriptions.items()
|
|
185
|
+
if not col_name.lower().endswith('_id') and col_name.lower() != 'id'
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
# Limit to first 300 unique columns to avoid oversized prompts
|
|
189
|
+
max_columns = 300
|
|
190
|
+
limited_descriptions = dict(
|
|
191
|
+
list(sorted(non_id_descriptions.items()))[:max_columns]
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
if not limited_descriptions:
|
|
195
|
+
return ''
|
|
196
|
+
|
|
197
|
+
# Format the output
|
|
198
|
+
lines = [
|
|
199
|
+
'\n--- Existing Column Descriptions from Neighboring Models ---',
|
|
200
|
+
'Use these existing descriptions for consistency when the same '
|
|
201
|
+
'or similar columns appear in the new model:\n'
|
|
202
|
+
]
|
|
203
|
+
for col_name, description in limited_descriptions.items():
|
|
204
|
+
# Truncate very long descriptions for context
|
|
205
|
+
desc_preview = description[:200] + '...' if len(description) > 200 else description
|
|
206
|
+
lines.append(f' - {col_name}: {desc_preview}')
|
|
207
|
+
|
|
208
|
+
lines.append('--- End of Existing Column Descriptions ---\n')
|
|
209
|
+
return '\n'.join(lines)
|
|
210
|
+
|
|
121
211
|
def build_user_msg_mode_1(self, _sql_file_path: str, extra_info: str) -> str:
|
|
122
212
|
self.read_file(_sql_file_path)
|
|
123
213
|
model_name = _sql_file_path.split("/")[-1].split(".")[0]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|