mdbt 0.4.38__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {mdbt-0.4.38 → mdbt-0.5.1}/PKG-INFO +5 -5
  2. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/build_dbt_docs_ai.py +93 -3
  3. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt.egg-info/PKG-INFO +5 -5
  4. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt.egg-info/requires.txt +4 -4
  5. {mdbt-0.4.38 → mdbt-0.5.1}/setup.py +5 -5
  6. {mdbt-0.4.38 → mdbt-0.5.1}/LICENSE +0 -0
  7. {mdbt-0.4.38 → mdbt-0.5.1}/README.md +0 -0
  8. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/__init__.py +0 -0
  9. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/ai_core.py +0 -0
  10. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/build_unit_test_data_ai.py +0 -0
  11. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/cmdline.py +0 -0
  12. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/core.py +0 -0
  13. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/expectations_output_builder.py +0 -0
  14. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/lightdash.py +0 -0
  15. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/main.py +0 -0
  16. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/precommit_format.py +0 -0
  17. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/prompts.py +0 -0
  18. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/recce.py +0 -0
  19. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/sort_yaml_fields.py +0 -0
  20. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt/sql_sorter.py +0 -0
  21. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt.egg-info/SOURCES.txt +0 -0
  22. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt.egg-info/dependency_links.txt +0 -0
  23. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt.egg-info/entry_points.txt +0 -0
  24. {mdbt-0.4.38 → mdbt-0.5.1}/mdbt.egg-info/top_level.txt +0 -0
  25. {mdbt-0.4.38 → mdbt-0.5.1}/setup.cfg +0 -0
  26. {mdbt-0.4.38 → mdbt-0.5.1}/tests/test_main.py +0 -0
  27. {mdbt-0.4.38 → mdbt-0.5.1}/tests/test_sort_yaml_fields.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdbt
3
- Version: 0.4.38
3
+ Version: 0.5.1
4
4
  Summary: A CLI tool to manage dbt builds with state handling and manifest management
5
5
  Author: Craig Lathrop
6
6
  Author-email: info@markimicrowave.com
@@ -12,10 +12,10 @@ License-File: LICENSE
12
12
  Requires-Dist: click<9.0.0,>=8.0.0
13
13
  Requires-Dist: pyperclip<2.0.0,>=1.8.0
14
14
  Requires-Dist: snowflake-connector-python[pandas]<4.10.0,>=3.17.0
15
- Requires-Dist: python-dotenv<1.2.0,>=1.0.0
16
- Requires-Dist: openai<3.0.0,>=2.6.0
17
- Requires-Dist: sqlfluff==3.5.0
18
- Requires-Dist: sqlfluff-templater-dbt==3.5.0
15
+ Requires-Dist: python-dotenv<2.2.0,>=1.0.0
16
+ Requires-Dist: openai<5.0.0,>=2.6.0
17
+ Requires-Dist: sqlfluff<4.2.0,>=3.5.0
18
+ Requires-Dist: sqlfluff-templater-dbt<4.2.0,>=3.5.0
19
19
  Requires-Dist: wordninja==2.0.0
20
20
  Requires-Dist: ruamel.yaml<0.18.0
21
21
  Requires-Dist: recce<=0.44.3
@@ -1,8 +1,10 @@
1
1
  import subprocess
2
+ from pathlib import Path
2
3
 
3
4
  import pyperclip
4
5
  from dotenv import find_dotenv
5
6
  from dotenv import load_dotenv
7
+ from ruamel.yaml import YAML
6
8
 
7
9
  from mdbt.ai_core import AiCore
8
10
  from mdbt.prompts import Prompts
@@ -50,8 +52,13 @@ class BuildDBTDocs(AiCore):
50
52
 
51
53
  system_instructions = system_instructions + sample_data[model_name]
52
54
 
55
+ # Get existing column descriptions from neighboring YML files
56
+ existing_descriptions = self.get_existing_column_descriptions(original_file_path)
57
+ if existing_descriptions:
58
+ system_instructions += f'Here are some existing column descriptions from neighboring models. These may or may not be relevant to the current model. Assess, and use as a source if it seems to match the sample data: \n{existing_descriptions}'
59
+
53
60
  # Might bring this back in the future.
54
- extra_info = ""
61
+ extra_info = ''
55
62
 
56
63
  if mode == 1:
57
64
  # Build new documentation
@@ -113,11 +120,94 @@ class BuildDBTDocs(AiCore):
113
120
 
114
121
  @staticmethod
115
122
  def backup_existing_yml_file(yml_file_path):
116
- with open(yml_file_path, "r") as file:
123
+ with open(yml_file_path, 'r') as file:
117
124
  yml_content = file.read()
118
- with open(yml_file_path + ".bak", "w") as file:
125
+ with open(yml_file_path + '.bak', 'w') as file:
119
126
  file.write(yml_content)
120
127
 
128
+ def get_existing_column_descriptions(self, model_file_path: str) -> str:
129
+ """
130
+ Search for YML files in the same folder as the model and extract
131
+ existing column descriptions to provide context for consistent naming.
132
+
133
+ Args:
134
+ model_file_path: Path to the SQL model file.
135
+
136
+ Returns:
137
+ A formatted string containing existing column descriptions found
138
+ in neighboring YML files.
139
+ """
140
+ folder_path = Path(model_file_path).parent
141
+ current_yml_path = model_file_path.replace('.sql', '.yml')
142
+ column_descriptions = {}
143
+
144
+ yaml = YAML()
145
+ yaml.preserve_quotes = True
146
+
147
+ # Find all .yml files in the same folder
148
+ yml_files = list(folder_path.glob('*.yml'))
149
+
150
+ for yml_file in yml_files:
151
+ # Skip the current model's yml file if it exists
152
+ if str(yml_file) == current_yml_path:
153
+ continue
154
+
155
+ try:
156
+ with open(yml_file, 'r') as f:
157
+ yml_content = yaml.load(f)
158
+
159
+ if not yml_content:
160
+ continue
161
+
162
+ # Extract column descriptions from models section
163
+ models = yml_content.get('models', [])
164
+ for model in models:
165
+ columns = model.get('columns', [])
166
+ for column in columns:
167
+ col_name = column.get('name')
168
+ col_desc = column.get('description')
169
+ if col_name and col_desc:
170
+ # Only store if not already seen, or prefer longer descriptions
171
+ if col_name not in column_descriptions or \
172
+ len(str(col_desc)) > len(str(column_descriptions[col_name])):
173
+ column_descriptions[col_name] = str(col_desc).strip()
174
+
175
+ except Exception as e:
176
+ print(f'Warning: Could not parse {yml_file}: {e}')
177
+ continue
178
+
179
+ if not column_descriptions:
180
+ return ''
181
+
182
+ # Filter out ID columns to reduce noise (columns ending in _id or named id)
183
+ non_id_descriptions = {
184
+ col_name: desc for col_name, desc in column_descriptions.items()
185
+ if not col_name.lower().endswith('_id') and col_name.lower() != 'id'
186
+ }
187
+
188
+ # Limit to first 300 unique columns to avoid oversized prompts
189
+ max_columns = 300
190
+ limited_descriptions = dict(
191
+ list(sorted(non_id_descriptions.items()))[:max_columns]
192
+ )
193
+
194
+ if not limited_descriptions:
195
+ return ''
196
+
197
+ # Format the output
198
+ lines = [
199
+ '\n--- Existing Column Descriptions from Neighboring Models ---',
200
+ 'Use these existing descriptions for consistency when the same '
201
+ 'or similar columns appear in the new model:\n'
202
+ ]
203
+ for col_name, description in limited_descriptions.items():
204
+ # Truncate very long descriptions for context
205
+ desc_preview = description[:200] + '...' if len(description) > 200 else description
206
+ lines.append(f' - {col_name}: {desc_preview}')
207
+
208
+ lines.append('--- End of Existing Column Descriptions ---\n')
209
+ return '\n'.join(lines)
210
+
121
211
  def build_user_msg_mode_1(self, _sql_file_path: str, extra_info: str) -> str:
122
212
  self.read_file(_sql_file_path)
123
213
  model_name = _sql_file_path.split("/")[-1].split(".")[0]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdbt
3
- Version: 0.4.38
3
+ Version: 0.5.1
4
4
  Summary: A CLI tool to manage dbt builds with state handling and manifest management
5
5
  Author: Craig Lathrop
6
6
  Author-email: info@markimicrowave.com
@@ -12,10 +12,10 @@ License-File: LICENSE
12
12
  Requires-Dist: click<9.0.0,>=8.0.0
13
13
  Requires-Dist: pyperclip<2.0.0,>=1.8.0
14
14
  Requires-Dist: snowflake-connector-python[pandas]<4.10.0,>=3.17.0
15
- Requires-Dist: python-dotenv<1.2.0,>=1.0.0
16
- Requires-Dist: openai<3.0.0,>=2.6.0
17
- Requires-Dist: sqlfluff==3.5.0
18
- Requires-Dist: sqlfluff-templater-dbt==3.5.0
15
+ Requires-Dist: python-dotenv<2.2.0,>=1.0.0
16
+ Requires-Dist: openai<5.0.0,>=2.6.0
17
+ Requires-Dist: sqlfluff<4.2.0,>=3.5.0
18
+ Requires-Dist: sqlfluff-templater-dbt<4.2.0,>=3.5.0
19
19
  Requires-Dist: wordninja==2.0.0
20
20
  Requires-Dist: ruamel.yaml<0.18.0
21
21
  Requires-Dist: recce<=0.44.3
@@ -1,10 +1,10 @@
1
1
  click<9.0.0,>=8.0.0
2
2
  pyperclip<2.0.0,>=1.8.0
3
3
  snowflake-connector-python[pandas]<4.10.0,>=3.17.0
4
- python-dotenv<1.2.0,>=1.0.0
5
- openai<3.0.0,>=2.6.0
6
- sqlfluff==3.5.0
7
- sqlfluff-templater-dbt==3.5.0
4
+ python-dotenv<2.2.0,>=1.0.0
5
+ openai<5.0.0,>=2.6.0
6
+ sqlfluff<4.2.0,>=3.5.0
7
+ sqlfluff-templater-dbt<4.2.0,>=3.5.0
8
8
  wordninja==2.0.0
9
9
  ruamel.yaml<0.18.0
10
10
  recce<=0.44.3
@@ -3,7 +3,7 @@ from setuptools import setup
3
3
 
4
4
  setup(
5
5
  name="mdbt",
6
- version="0.4.38",
6
+ version="0.5.1",
7
7
  description="A CLI tool to manage dbt builds with state handling and manifest management",
8
8
  author="Craig Lathrop",
9
9
  author_email="info@markimicrowave.com",
@@ -12,10 +12,10 @@ setup(
12
12
  "click>=8.0.0,<9.0.0",
13
13
  "pyperclip>=1.8.0,<2.0.0",
14
14
  "snowflake-connector-python[pandas]>=3.17.0,<4.10.0",
15
- "python-dotenv>=1.0.0,<1.2.0",
16
- "openai>=2.6.0, <3.0.0",
17
- "sqlfluff==3.5.0",
18
- "sqlfluff-templater-dbt==3.5.0",
15
+ "python-dotenv>=1.0.0,<2.2.0",
16
+ "openai>=2.6.0, <5.0.0",
17
+ "sqlfluff>=3.5.0, <4.2.0",
18
+ "sqlfluff-templater-dbt>=3.5.0, <4.2.0",
19
19
  "wordninja==2.0.0",
20
20
  "ruamel.yaml<0.18.0",
21
21
  "recce<=0.44.3",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes