mdbt 0.4.39__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbt/build_dbt_docs_ai.py +42 -32
- mdbt/cmdline.py +4 -49
- mdbt/core.py +16 -13
- mdbt/prompts.py +4 -4
- mdbt/sort_yaml_fields.py +0 -2
- mdbt/sql_sorter.py +24 -19
- {mdbt-0.4.39.dist-info → mdbt-0.5.2.dist-info}/METADATA +5 -5
- mdbt-0.5.2.dist-info/RECORD +19 -0
- {mdbt-0.4.39.dist-info → mdbt-0.5.2.dist-info}/WHEEL +1 -1
- mdbt/lightdash.py +0 -84
- mdbt-0.4.39.dist-info/RECORD +0 -20
- {mdbt-0.4.39.dist-info → mdbt-0.5.2.dist-info}/entry_points.txt +0 -0
- {mdbt-0.4.39.dist-info → mdbt-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {mdbt-0.4.39.dist-info → mdbt-0.5.2.dist-info}/top_level.txt +0 -0
mdbt/build_dbt_docs_ai.py
CHANGED
|
@@ -46,19 +46,23 @@ class BuildDBTDocs(AiCore):
|
|
|
46
46
|
system_instructions = Prompts().dbt_docs_lte_l2_prompt
|
|
47
47
|
|
|
48
48
|
if sys_context:
|
|
49
|
-
system_instructions +=
|
|
49
|
+
system_instructions += (
|
|
50
|
+
f"\nContext about system docs are generated for: \n{sys_context}\n"
|
|
51
|
+
)
|
|
50
52
|
|
|
51
53
|
sample_data = self._get_sample_data_from_snowflake([model_name])
|
|
52
54
|
|
|
53
55
|
system_instructions = system_instructions + sample_data[model_name]
|
|
54
56
|
|
|
55
57
|
# Get existing column descriptions from neighboring YML files
|
|
56
|
-
existing_descriptions = self.get_existing_column_descriptions(
|
|
58
|
+
existing_descriptions = self.get_existing_column_descriptions(
|
|
59
|
+
original_file_path
|
|
60
|
+
)
|
|
57
61
|
if existing_descriptions:
|
|
58
|
-
system_instructions += f
|
|
62
|
+
system_instructions += f"Here are some existing column descriptions from neighboring models. These may or may not be relevant to the current model. Assess, and use as a source if it seems to match the sample data: \n{existing_descriptions}"
|
|
59
63
|
|
|
60
64
|
# Might bring this back in the future.
|
|
61
|
-
extra_info =
|
|
65
|
+
extra_info = ""
|
|
62
66
|
|
|
63
67
|
if mode == 1:
|
|
64
68
|
# Build new documentation
|
|
@@ -87,12 +91,12 @@ class BuildDBTDocs(AiCore):
|
|
|
87
91
|
output = assistant_responses[0]
|
|
88
92
|
# Check for ``` at end of output (str) and remove
|
|
89
93
|
# Remove trailing markdown code fences if present
|
|
90
|
-
lines = output.split(
|
|
94
|
+
lines = output.split("\n")
|
|
91
95
|
new_lines = []
|
|
92
96
|
for line in lines:
|
|
93
|
-
if
|
|
97
|
+
if "```" not in line:
|
|
94
98
|
new_lines.append(line)
|
|
95
|
-
output =
|
|
99
|
+
output = "\n".join(new_lines)
|
|
96
100
|
if not is_new:
|
|
97
101
|
clip_or_file = input(
|
|
98
102
|
f"1. to copy to clipboard\n2, to write to file ({yml_file_path}\n:"
|
|
@@ -111,7 +115,9 @@ class BuildDBTDocs(AiCore):
|
|
|
111
115
|
file.write(output)
|
|
112
116
|
if not self.is_file_committed(yml_file_path):
|
|
113
117
|
if not is_new:
|
|
114
|
-
commit_file = input(
|
|
118
|
+
commit_file = input(
|
|
119
|
+
"Press 1 to add to git, any other key to byapss: "
|
|
120
|
+
)
|
|
115
121
|
else:
|
|
116
122
|
commit_file = "1"
|
|
117
123
|
|
|
@@ -120,9 +126,9 @@ class BuildDBTDocs(AiCore):
|
|
|
120
126
|
|
|
121
127
|
@staticmethod
|
|
122
128
|
def backup_existing_yml_file(yml_file_path):
|
|
123
|
-
with open(yml_file_path,
|
|
129
|
+
with open(yml_file_path, "r") as file:
|
|
124
130
|
yml_content = file.read()
|
|
125
|
-
with open(yml_file_path +
|
|
131
|
+
with open(yml_file_path + ".bak", "w") as file:
|
|
126
132
|
file.write(yml_content)
|
|
127
133
|
|
|
128
134
|
def get_existing_column_descriptions(self, model_file_path: str) -> str:
|
|
@@ -138,14 +144,14 @@ class BuildDBTDocs(AiCore):
|
|
|
138
144
|
in neighboring YML files.
|
|
139
145
|
"""
|
|
140
146
|
folder_path = Path(model_file_path).parent
|
|
141
|
-
current_yml_path = model_file_path.replace(
|
|
147
|
+
current_yml_path = model_file_path.replace(".sql", ".yml")
|
|
142
148
|
column_descriptions = {}
|
|
143
149
|
|
|
144
150
|
yaml = YAML()
|
|
145
151
|
yaml.preserve_quotes = True
|
|
146
152
|
|
|
147
153
|
# Find all .yml files in the same folder
|
|
148
|
-
yml_files = list(folder_path.glob(
|
|
154
|
+
yml_files = list(folder_path.glob("*.yml"))
|
|
149
155
|
|
|
150
156
|
for yml_file in yml_files:
|
|
151
157
|
# Skip the current model's yml file if it exists
|
|
@@ -153,60 +159,64 @@ class BuildDBTDocs(AiCore):
|
|
|
153
159
|
continue
|
|
154
160
|
|
|
155
161
|
try:
|
|
156
|
-
with open(yml_file,
|
|
162
|
+
with open(yml_file, "r") as f:
|
|
157
163
|
yml_content = yaml.load(f)
|
|
158
164
|
|
|
159
165
|
if not yml_content:
|
|
160
166
|
continue
|
|
161
167
|
|
|
162
168
|
# Extract column descriptions from models section
|
|
163
|
-
models = yml_content.get(
|
|
169
|
+
models = yml_content.get("models", [])
|
|
164
170
|
for model in models:
|
|
165
|
-
columns = model.get(
|
|
171
|
+
columns = model.get("columns", [])
|
|
166
172
|
for column in columns:
|
|
167
|
-
col_name = column.get(
|
|
168
|
-
col_desc = column.get(
|
|
173
|
+
col_name = column.get("name")
|
|
174
|
+
col_desc = column.get("description")
|
|
169
175
|
if col_name and col_desc:
|
|
170
176
|
# Only store if not already seen, or prefer longer descriptions
|
|
171
|
-
if col_name not in column_descriptions or
|
|
172
|
-
|
|
177
|
+
if col_name not in column_descriptions or len(
|
|
178
|
+
str(col_desc)
|
|
179
|
+
) > len(str(column_descriptions[col_name])):
|
|
173
180
|
column_descriptions[col_name] = str(col_desc).strip()
|
|
174
181
|
|
|
175
182
|
except Exception as e:
|
|
176
|
-
print(f
|
|
183
|
+
print(f"Warning: Could not parse {yml_file}: {e}")
|
|
177
184
|
continue
|
|
178
185
|
|
|
179
186
|
if not column_descriptions:
|
|
180
|
-
return
|
|
187
|
+
return ""
|
|
181
188
|
|
|
182
189
|
# Filter out ID columns to reduce noise (columns ending in _id or named id)
|
|
183
190
|
non_id_descriptions = {
|
|
184
|
-
col_name: desc
|
|
185
|
-
|
|
191
|
+
col_name: desc
|
|
192
|
+
for col_name, desc in column_descriptions.items()
|
|
193
|
+
if not col_name.lower().endswith("_id") and col_name.lower() != "id"
|
|
186
194
|
}
|
|
187
195
|
|
|
188
196
|
# Limit to first 300 unique columns to avoid oversized prompts
|
|
189
197
|
max_columns = 300
|
|
190
198
|
limited_descriptions = dict(
|
|
191
|
-
|
|
199
|
+
sorted(non_id_descriptions.items())[:max_columns]
|
|
192
200
|
)
|
|
193
201
|
|
|
194
202
|
if not limited_descriptions:
|
|
195
|
-
return
|
|
203
|
+
return ""
|
|
196
204
|
|
|
197
205
|
# Format the output
|
|
198
206
|
lines = [
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
207
|
+
"\n--- Existing Column Descriptions from Neighboring Models ---",
|
|
208
|
+
"Use these existing descriptions for consistency when the same "
|
|
209
|
+
"or similar columns appear in the new model:\n",
|
|
202
210
|
]
|
|
203
211
|
for col_name, description in limited_descriptions.items():
|
|
204
212
|
# Truncate very long descriptions for context
|
|
205
|
-
desc_preview =
|
|
206
|
-
|
|
213
|
+
desc_preview = (
|
|
214
|
+
description[:200] + "..." if len(description) > 200 else description
|
|
215
|
+
)
|
|
216
|
+
lines.append(f" - {col_name}: {desc_preview}")
|
|
207
217
|
|
|
208
|
-
lines.append(
|
|
209
|
-
return
|
|
218
|
+
lines.append("--- End of Existing Column Descriptions ---\n")
|
|
219
|
+
return "\n".join(lines)
|
|
210
220
|
|
|
211
221
|
def build_user_msg_mode_1(self, _sql_file_path: str, extra_info: str) -> str:
|
|
212
222
|
self.read_file(_sql_file_path)
|
mdbt/cmdline.py
CHANGED
|
@@ -5,7 +5,6 @@ import click
|
|
|
5
5
|
from mdbt.build_dbt_docs_ai import BuildDBTDocs
|
|
6
6
|
from mdbt.build_unit_test_data_ai import BuildUnitTestDataAI
|
|
7
7
|
from mdbt.expectations_output_builder import ExpectationsOutputBuilder
|
|
8
|
-
from mdbt.lightdash import Lightdash
|
|
9
8
|
from mdbt.main import MDBT
|
|
10
9
|
from mdbt.precommit_format import PrecommitFormat
|
|
11
10
|
from mdbt.recce import Recce
|
|
@@ -243,7 +242,7 @@ def gbuild(ctx, main, full_refresh, threads):
|
|
|
243
242
|
"--is_new",
|
|
244
243
|
"-n",
|
|
245
244
|
is_flag=True,
|
|
246
|
-
help="Passing this flag will bypass the questions such as 'is this a new model,' and 'add to git"
|
|
245
|
+
help="Passing this flag will bypass the questions such as 'is this a new model,' and 'add to git",
|
|
247
246
|
)
|
|
248
247
|
@click.pass_context
|
|
249
248
|
def build_docs(ctx, select, sys_context, is_new):
|
|
@@ -251,6 +250,7 @@ def build_docs(ctx, select, sys_context, is_new):
|
|
|
251
250
|
dbt_docs = BuildDBTDocs()
|
|
252
251
|
dbt_docs.main(select, sys_context, is_new)
|
|
253
252
|
|
|
253
|
+
|
|
254
254
|
@mdbt.command()
|
|
255
255
|
@click.option(
|
|
256
256
|
"--select",
|
|
@@ -266,53 +266,6 @@ def build_unit(ctx, select):
|
|
|
266
266
|
build_unit_test_data.main(select)
|
|
267
267
|
|
|
268
268
|
|
|
269
|
-
@mdbt.command()
|
|
270
|
-
@click.option(
|
|
271
|
-
"--select",
|
|
272
|
-
"-s",
|
|
273
|
-
type=str,
|
|
274
|
-
help="Name of the model to start a lightdash preview for. If not provided, all models will be previewed.",
|
|
275
|
-
)
|
|
276
|
-
@click.option(
|
|
277
|
-
"--name",
|
|
278
|
-
"-n",
|
|
279
|
-
type=str,
|
|
280
|
-
help="Name of the lightdash preview. If no name given, the preview will take the name of the current branch.",
|
|
281
|
-
)
|
|
282
|
-
@click.option(
|
|
283
|
-
"--l43",
|
|
284
|
-
is_flag=True,
|
|
285
|
-
help="Include L3 and L4 models in the preview. Default is False.",
|
|
286
|
-
)
|
|
287
|
-
@click.pass_context
|
|
288
|
-
def ld_preview(ctx, select, name, l43):
|
|
289
|
-
"""Start a lightdash preview for a model."""
|
|
290
|
-
preview_name = name
|
|
291
|
-
Lightdash().lightdash_start_preview(ctx, select, preview_name, l43)
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
@mdbt.command()
|
|
295
|
-
@click.option("--select", "-s", type=str, help="Names of the model(s) to clean.")
|
|
296
|
-
@click.option(
|
|
297
|
-
"--split-names", is_flag=True, help="Split names like isupdated into is_updated."
|
|
298
|
-
)
|
|
299
|
-
@click.option(
|
|
300
|
-
"--remove-airbyte",
|
|
301
|
-
is_flag=True,
|
|
302
|
-
help="Whether to remove Airbyte specific lines. Default is True.",
|
|
303
|
-
)
|
|
304
|
-
@click.option(
|
|
305
|
-
"--overwrite",
|
|
306
|
-
is_flag=True,
|
|
307
|
-
help="Will overwrite the files. If not set, files will be saved to a folder.",
|
|
308
|
-
)
|
|
309
|
-
@click.pass_context
|
|
310
|
-
def clean_stg(select, split_names, remove_airbyte, overwrite):
|
|
311
|
-
"""Designed to clean files in the L1_stg folders only"""
|
|
312
|
-
sql_model_cleaner = SQLModelCleaner()
|
|
313
|
-
sql_model_cleaner.main(select, split_names, remove_airbyte, overwrite)
|
|
314
|
-
|
|
315
|
-
|
|
316
269
|
@mdbt.command()
|
|
317
270
|
@click.option("--select", "-s", type=str, help="Name of model to sort YML columns for.")
|
|
318
271
|
@click.option("--all-files", is_flag=True, help="Sort all YML files in the project.")
|
|
@@ -321,11 +274,13 @@ def sort_yaml(select, all_files, overwrite):
|
|
|
321
274
|
sy = SortYAML()
|
|
322
275
|
sy.main(select, all_files, overwrite)
|
|
323
276
|
|
|
277
|
+
|
|
324
278
|
@mdbt.command()
|
|
325
279
|
def sort_sql():
|
|
326
280
|
c = ColumnSorter()
|
|
327
281
|
c.main()
|
|
328
282
|
|
|
283
|
+
|
|
329
284
|
@mdbt.command()
|
|
330
285
|
@click.pass_context
|
|
331
286
|
def pre_commit(ctx):
|
mdbt/core.py
CHANGED
|
@@ -5,11 +5,12 @@ import subprocess
|
|
|
5
5
|
import sys
|
|
6
6
|
import typing as t
|
|
7
7
|
|
|
8
|
+
import snowflake.connector as snow
|
|
8
9
|
from cryptography.hazmat.backends import default_backend
|
|
9
10
|
from cryptography.hazmat.primitives import serialization
|
|
10
|
-
import snowflake.connector as snow
|
|
11
11
|
from dotenv import find_dotenv
|
|
12
12
|
from dotenv import load_dotenv
|
|
13
|
+
|
|
13
14
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
14
15
|
load_dotenv(find_dotenv("../.env"))
|
|
15
16
|
load_dotenv(find_dotenv(".env"))
|
|
@@ -29,12 +30,12 @@ class Core:
|
|
|
29
30
|
self.dbt_execute_command_output = ""
|
|
30
31
|
|
|
31
32
|
def _create_snowflake_connection(self):
|
|
32
|
-
rsa_key_str = os.environ.get(
|
|
33
|
+
rsa_key_str = os.environ.get("MAIN__RSA_KEY")
|
|
33
34
|
if not rsa_key_str:
|
|
34
|
-
raise ValueError(
|
|
35
|
+
raise ValueError("MAIN__RSA_KEY environment variable not set")
|
|
35
36
|
# The RSA key from the environment variable may be in single line or multi-line string
|
|
36
|
-
if
|
|
37
|
-
rsa_key_str = rsa_key_str.replace(
|
|
37
|
+
if "\\n" in rsa_key_str:
|
|
38
|
+
rsa_key_str = rsa_key_str.replace("\\n", "\n")
|
|
38
39
|
rsa_key_bytes = rsa_key_str.encode()
|
|
39
40
|
# Load private key object
|
|
40
41
|
private_key = serialization.load_pem_private_key(
|
|
@@ -43,13 +44,13 @@ class Core:
|
|
|
43
44
|
backend=default_backend(),
|
|
44
45
|
)
|
|
45
46
|
self._conn = snow.connect(
|
|
46
|
-
user=os.getenv(
|
|
47
|
-
account=os.getenv(
|
|
48
|
-
database=os.getenv(
|
|
47
|
+
user=os.getenv("MAIN__USER"),
|
|
48
|
+
account=os.getenv("MAIN__ACCOUNT"),
|
|
49
|
+
database=os.getenv("MAIN__DATABASE"),
|
|
49
50
|
private_key=private_key,
|
|
50
|
-
schema=os.getenv(
|
|
51
|
-
warehouse=os.getenv(
|
|
52
|
-
role=
|
|
51
|
+
schema=os.getenv("MAIN__SCHEMA"),
|
|
52
|
+
warehouse=os.getenv("MAIN__WAREHOUSE"),
|
|
53
|
+
role="MDBT",
|
|
53
54
|
)
|
|
54
55
|
|
|
55
56
|
self._cur = self._conn.cursor()
|
|
@@ -123,7 +124,9 @@ class Core:
|
|
|
123
124
|
raise Exception(f"Failure while running command: {' '.join(e.cmd)}")
|
|
124
125
|
# sys.exit(e.returncode)
|
|
125
126
|
|
|
126
|
-
def get_models(
|
|
127
|
+
def get_models(
|
|
128
|
+
self, select: str, all_files: bool = False
|
|
129
|
+
) -> t.List[t.Dict[str, t.Any]]:
|
|
127
130
|
if not all_files:
|
|
128
131
|
args = [
|
|
129
132
|
"--select",
|
|
@@ -138,4 +141,4 @@ class Core:
|
|
|
138
141
|
]
|
|
139
142
|
ls_json = self.dbt_ls_to_json(args)
|
|
140
143
|
|
|
141
|
-
return ls_json
|
|
144
|
+
return ls_json
|
mdbt/prompts.py
CHANGED
|
@@ -5,7 +5,7 @@ class Prompts:
|
|
|
5
5
|
return """
|
|
6
6
|
You will help build DBT documentation YML files for a given SQL query. Sometimes you will be asked to generate a description from scratch, other times you will be asked to fill in missing columns that exist in the model, but not in the documentation.
|
|
7
7
|
|
|
8
|
-
These models are built for Marki Microwave, a RF semicoductor company that designed and built RF and microwave components. The models are built to help the company understand their business better. The models are built in DBT and are used to generate reports and dashboards. For acronyms, or other terminology, consider in context of the RF semiconductor industry.
|
|
8
|
+
These models are built for Marki Microwave, a RF semicoductor company that designed and built RF and microwave components. The models are built to help the company understand their business better. The models are built in DBT and are used to generate reports and dashboards. For acronyms, or other terminology, consider in context of the RF semiconductor industry.
|
|
9
9
|
|
|
10
10
|
Primary DBT Guidelines:
|
|
11
11
|
|
|
@@ -27,7 +27,7 @@ Primary DBT Guidelines:
|
|
|
27
27
|
9. Reorder or order the column descriptions in the YML file in the same order they appear in the SQL query. If you are modifying an existing YML file, still re-order the elements, don't comment out the old element location and put a new element in.
|
|
28
28
|
10. If modifying an existing YML, leave the value of materialized as is. Do not change it to `table` if it is `view` or vice versa.
|
|
29
29
|
11. Use lowercase for all column names, metric, and dimension names. The sample data will come back with uppercase column names, but the YML file should have lowercase names.
|
|
30
|
-
|
|
30
|
+
|
|
31
31
|
12. If there is an primary key ID column as the first field, then add a
|
|
32
32
|
```
|
|
33
33
|
data_tests:
|
|
@@ -73,7 +73,7 @@ models:
|
|
|
73
73
|
|
|
74
74
|
- name: network_name
|
|
75
75
|
description: "The name of the network associated with the order."
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
- name: medical_revenue
|
|
78
78
|
description: "Total revenue from medical services."
|
|
79
79
|
|
|
@@ -90,7 +90,7 @@ This is a CSV data sample from the model:
|
|
|
90
90
|
return """
|
|
91
91
|
You will help build DBT documentation YML files for a given SQL query. Sometimes you will be asked to generate a description from scratch, other times you will be asked to fill in missing columns that exist in the model, but not in the documentation.
|
|
92
92
|
|
|
93
|
-
These models are built for Marki Microwave, a RF semicoductor company that designed and built RF and microwave components. The models are built to help the company understand their business better. The models are built in DBT and are used to generate reports and dashboards. For acronyms, or other terminology, consider in context of the RF semiconductor industry.
|
|
93
|
+
These models are built for Marki Microwave, a RF semicoductor company that designed and built RF and microwave components. The models are built to help the company understand their business better. The models are built in DBT and are used to generate reports and dashboards. For acronyms, or other terminology, consider in context of the RF semiconductor industry.
|
|
94
94
|
|
|
95
95
|
Primary DBT Guidelines:
|
|
96
96
|
|
mdbt/sort_yaml_fields.py
CHANGED
mdbt/sql_sorter.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
1
|
#!/usr/bin/env python3
|
|
4
2
|
import re
|
|
5
3
|
import sys
|
|
6
|
-
from typing import List
|
|
4
|
+
from typing import List
|
|
5
|
+
from typing import Tuple
|
|
7
6
|
|
|
8
7
|
import pyperclip
|
|
8
|
+
|
|
9
9
|
from mdbt.core import Core
|
|
10
10
|
|
|
11
|
+
|
|
11
12
|
class ColumnSorter(Core):
|
|
12
13
|
"""Sorts SQL select lines from the clipboard based on alias and data‐type rules."""
|
|
13
14
|
|
|
@@ -25,7 +26,9 @@ class ColumnSorter(Core):
|
|
|
25
26
|
try:
|
|
26
27
|
clipboard_content = pyperclip.paste()
|
|
27
28
|
except pyperclip.PyperclipException:
|
|
28
|
-
sys.stderr.write(
|
|
29
|
+
sys.stderr.write(
|
|
30
|
+
"Error: could not access clipboard. Is pyperclip installed and supported?\n"
|
|
31
|
+
)
|
|
29
32
|
sys.exit(1)
|
|
30
33
|
|
|
31
34
|
# 2) Split into individual lines
|
|
@@ -36,13 +39,15 @@ class ColumnSorter(Core):
|
|
|
36
39
|
sorted_lines = sorter.sort_lines(lines)
|
|
37
40
|
|
|
38
41
|
# 4) Join back together
|
|
39
|
-
result =
|
|
42
|
+
result = "\n".join(sorted_lines)
|
|
40
43
|
|
|
41
44
|
# 5) Copy sorted result back to clipboard
|
|
42
45
|
try:
|
|
43
46
|
pyperclip.copy(result)
|
|
44
47
|
except pyperclip.PyperclipException:
|
|
45
|
-
sys.stderr.write(
|
|
48
|
+
sys.stderr.write(
|
|
49
|
+
"Warning: could not write back to clipboard. Outputting to stdout instead.\n"
|
|
50
|
+
)
|
|
46
51
|
print(result)
|
|
47
52
|
sys.exit(0)
|
|
48
53
|
|
|
@@ -66,28 +71,28 @@ class ColumnSorter(Core):
|
|
|
66
71
|
- alias: the alias (e.g. 'foo_id') or base column name if no AS.
|
|
67
72
|
"""
|
|
68
73
|
# 1) Extract prefix (leading comma + whitespace), if present
|
|
69
|
-
m = re.match(r
|
|
74
|
+
m = re.match(r"^(\s*,\s*)(.*)$", line)
|
|
70
75
|
if m:
|
|
71
76
|
prefix = m.group(1)
|
|
72
77
|
rest = m.group(2).strip()
|
|
73
78
|
else:
|
|
74
79
|
# No leading comma, treat everything as rest
|
|
75
|
-
prefix =
|
|
80
|
+
prefix = ""
|
|
76
81
|
rest = line.strip()
|
|
77
82
|
|
|
78
83
|
# 2) Look for 'AS' (case-insensitive), split into left/right
|
|
79
84
|
# Use regex to split on whitespace+as+whitespace, max once
|
|
80
85
|
lower_rest = rest.lower()
|
|
81
|
-
if re.search(r
|
|
82
|
-
parts = re.split(r
|
|
86
|
+
if re.search(r"\s+as\s+", lower_rest):
|
|
87
|
+
parts = re.split(r"\s+as\s+", rest, maxsplit=1, flags=re.IGNORECASE)
|
|
83
88
|
expression_part = parts[0].strip()
|
|
84
89
|
alias = parts[1].strip()
|
|
85
90
|
else:
|
|
86
91
|
# No AS: take expression exactly as rest; derive alias from expression
|
|
87
92
|
expression_part = rest
|
|
88
93
|
# If there's a '::', drop the cast and use the part before it
|
|
89
|
-
if
|
|
90
|
-
alias = expression_part.split(
|
|
94
|
+
if "::" in expression_part:
|
|
95
|
+
alias = expression_part.split("::", 1)[0].strip()
|
|
91
96
|
else:
|
|
92
97
|
# If nothing to split, alias is simply the whole rest
|
|
93
98
|
alias = expression_part
|
|
@@ -114,14 +119,14 @@ class ColumnSorter(Core):
|
|
|
114
119
|
a = alias.lower()
|
|
115
120
|
expr_lower = expression.lower()
|
|
116
121
|
|
|
117
|
-
if a.endswith(
|
|
122
|
+
if a.endswith("_id"):
|
|
118
123
|
return 0
|
|
119
|
-
if a.endswith(
|
|
124
|
+
if a.endswith("_at"):
|
|
120
125
|
return 1
|
|
121
|
-
if a.startswith(
|
|
126
|
+
if a.startswith("is_"):
|
|
122
127
|
return 2
|
|
123
128
|
# Check for VARIANT cast
|
|
124
|
-
if re.search(r
|
|
129
|
+
if re.search(r"::\s*variant\b", expr_lower):
|
|
125
130
|
return 4
|
|
126
131
|
# Otherwise, everything else is group 3
|
|
127
132
|
return 3
|
|
@@ -153,13 +158,13 @@ class ColumnSorter(Core):
|
|
|
153
158
|
# Determine if original expr already contained ' as alias_lower' (case-insensitive)
|
|
154
159
|
# We can check if expr.lower().endswith(alias_lower) but that fails if casting was present.
|
|
155
160
|
# Instead, if alias_lower != expr.split('::')[0].strip().lower(), we assume original used AS.
|
|
156
|
-
base_no_cast = expr.split(
|
|
161
|
+
base_no_cast = expr.split("::", 1)[0].strip().lower()
|
|
157
162
|
if base_no_cast != alias_lower:
|
|
158
163
|
# original must have had an explicit alias, so we add ' as alias'
|
|
159
|
-
line_text = f
|
|
164
|
+
line_text = f"{prefix}{expr} as {alias_lower}"
|
|
160
165
|
else:
|
|
161
166
|
# no AS needed
|
|
162
|
-
line_text = f
|
|
167
|
+
line_text = f"{prefix}{expr}"
|
|
163
168
|
sorted_lines.append(line_text)
|
|
164
169
|
|
|
165
170
|
return sorted_lines
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mdbt
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: A CLI tool to manage dbt builds with state handling and manifest management
|
|
5
5
|
Author: Craig Lathrop
|
|
6
6
|
Author-email: info@markimicrowave.com
|
|
@@ -12,10 +12,10 @@ License-File: LICENSE
|
|
|
12
12
|
Requires-Dist: click<9.0.0,>=8.0.0
|
|
13
13
|
Requires-Dist: pyperclip<2.0.0,>=1.8.0
|
|
14
14
|
Requires-Dist: snowflake-connector-python[pandas]<4.10.0,>=3.17.0
|
|
15
|
-
Requires-Dist: python-dotenv<
|
|
16
|
-
Requires-Dist: openai<
|
|
17
|
-
Requires-Dist: sqlfluff
|
|
18
|
-
Requires-Dist: sqlfluff-templater-dbt
|
|
15
|
+
Requires-Dist: python-dotenv<2.2.0,>=1.0.0
|
|
16
|
+
Requires-Dist: openai<5.0.0,>=2.6.0
|
|
17
|
+
Requires-Dist: sqlfluff<4.2.0,>=3.5.0
|
|
18
|
+
Requires-Dist: sqlfluff-templater-dbt<4.2.0,>=3.5.0
|
|
19
19
|
Requires-Dist: wordninja==2.0.0
|
|
20
20
|
Requires-Dist: ruamel.yaml<0.18.0
|
|
21
21
|
Requires-Dist: recce<=0.44.3
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
mdbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mdbt/ai_core.py,sha256=EyuEciA6EIRfZ2vj4YxCLkdfmkke7tbZWPjBb_JOuNw,3961
|
|
3
|
+
mdbt/build_dbt_docs_ai.py,sha256=Ci7ilLR-YQDMLWo-K4AOJ_SKDe6BxINXJ5UA9NTGOR8,9440
|
|
4
|
+
mdbt/build_unit_test_data_ai.py,sha256=SJlxEjwrH3zyXbZFLQFl3_BZYIM3WLdubA8eUEYRbQU,4290
|
|
5
|
+
mdbt/cmdline.py,sha256=WTx1z-vgL40zUOsgF5Q-NZXPSprNovOnTZ-sYqhzwtc,9378
|
|
6
|
+
mdbt/core.py,sha256=kWcMZhYCeSAey1YK7UsimzD4UZIY_dnX6V1W5XmZtMI,5105
|
|
7
|
+
mdbt/expectations_output_builder.py,sha256=AXKEM-WO7FecYzfMLwzsOnQnVf7AiHBi_khyidE2lJs,3195
|
|
8
|
+
mdbt/main.py,sha256=UO3r7zOXmVpjnAIz2eeZVeQMFmgXa698Gm42Wo2qhRU,16939
|
|
9
|
+
mdbt/precommit_format.py,sha256=9HC10mh4QQIgaQSxdAwaCXbsiHT9cCrLdbc3PAQkotc,2845
|
|
10
|
+
mdbt/prompts.py,sha256=sqDqbrxfF7eFm5CcoiEAkwsBEVmX_1qVgLMeVoqzqNY,13979
|
|
11
|
+
mdbt/recce.py,sha256=P14CvWd7lRgTPUW7BVMLmcclSqby-_uSgpoI3r5VjTA,2327
|
|
12
|
+
mdbt/sort_yaml_fields.py,sha256=rrPW-DqX52Dd1iFyKLluapbMl0jXiKxfuoq3L3M66vo,5055
|
|
13
|
+
mdbt/sql_sorter.py,sha256=UVn7vFzomC4wGuzBBXLAP9SJtg40-O5fND_Fbcnzw44,6545
|
|
14
|
+
mdbt-0.5.2.dist-info/licenses/LICENSE,sha256=DrJpgQEYhttwpwcE56BzrGZ1aEfR_tqfaxsI5NlsYOE,1072
|
|
15
|
+
mdbt-0.5.2.dist-info/METADATA,sha256=TyHT9ATJRCqViiIuy4ZZ9f19KztKQk0czfHCljJ2-ME,933
|
|
16
|
+
mdbt-0.5.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
17
|
+
mdbt-0.5.2.dist-info/entry_points.txt,sha256=fVft1CYVP2MtZMMtsXN67S1T5RszfgKtAuaeoKLdCow,43
|
|
18
|
+
mdbt-0.5.2.dist-info/top_level.txt,sha256=-PP7vAl9EgVjRTzBovElczsPNjOfja6kjZssNmv5vo0,5
|
|
19
|
+
mdbt-0.5.2.dist-info/RECORD,,
|
mdbt/lightdash.py
DELETED
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
import subprocess
|
|
4
|
-
import sys
|
|
5
|
-
|
|
6
|
-
from click.core import Context
|
|
7
|
-
|
|
8
|
-
from mdbt.core import Core
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class Lightdash(Core):
|
|
12
|
-
|
|
13
|
-
def __init__(self, test_mode=False):
|
|
14
|
-
super().__init__(test_mode=test_mode)
|
|
15
|
-
|
|
16
|
-
def lightdash_start_preview(
|
|
17
|
-
self, ctx: Context, select: str, preview_name: str, l43: bool
|
|
18
|
-
):
|
|
19
|
-
# Check to make sure the LIGHTDASH_PROJECT env variable is set
|
|
20
|
-
if not os.getenv("LIGHTDASH_PROJECT"):
|
|
21
|
-
print(
|
|
22
|
-
"LIGHTDASH_PROJECT environment variable not set. Set this key to the ID of the project you will "
|
|
23
|
-
"promote charts to."
|
|
24
|
-
)
|
|
25
|
-
sys.exit(1)
|
|
26
|
-
else:
|
|
27
|
-
print(f"Building for LIGHTDASH_PROJECT: {os.getenv('LIGHTDASH_PROJECT')}")
|
|
28
|
-
|
|
29
|
-
self._check_lightdash_for_updates()
|
|
30
|
-
if not preview_name:
|
|
31
|
-
# If no preview name, use the current name of the git branch
|
|
32
|
-
result = subprocess.run(
|
|
33
|
-
["git", "branch", "--show-current"], stdout=subprocess.PIPE, text=True
|
|
34
|
-
)
|
|
35
|
-
preview_name = result.stdout.strip()
|
|
36
|
-
|
|
37
|
-
args = ["lightdash", "start-preview", "--name", preview_name]
|
|
38
|
-
|
|
39
|
-
if l43:
|
|
40
|
-
args = args + ["-s", "tag:l3 tag:l4"]
|
|
41
|
-
|
|
42
|
-
if select:
|
|
43
|
-
args = args + ["--select", select]
|
|
44
|
-
|
|
45
|
-
try:
|
|
46
|
-
print(f'Running command: {" ".join(args)}')
|
|
47
|
-
subprocess.run(args, check=True)
|
|
48
|
-
except subprocess.CalledProcessError as e:
|
|
49
|
-
self.handle_cmd_line_error(e)
|
|
50
|
-
|
|
51
|
-
@staticmethod
|
|
52
|
-
def _check_lightdash_for_updates():
|
|
53
|
-
api_str = 'curl -s "https://app.lightdash.cloud/api/v1/health"'
|
|
54
|
-
|
|
55
|
-
try:
|
|
56
|
-
result = subprocess.run(
|
|
57
|
-
api_str, shell=True, check=True, text=True, capture_output=True
|
|
58
|
-
)
|
|
59
|
-
# Convert to JSON
|
|
60
|
-
result_json = json.loads(result.stdout)
|
|
61
|
-
except subprocess.CalledProcessError as e:
|
|
62
|
-
print(f"Failure while running command: {api_str}")
|
|
63
|
-
print(e.stderr)
|
|
64
|
-
print(e.stdout)
|
|
65
|
-
sys.exit(e.returncode)
|
|
66
|
-
|
|
67
|
-
api_version = result_json["results"]["version"]
|
|
68
|
-
|
|
69
|
-
result = subprocess.run(
|
|
70
|
-
["lightdash", "--version"], check=True, text=True, capture_output=True
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
current_version = result.stdout.strip()
|
|
74
|
-
|
|
75
|
-
if api_version != current_version:
|
|
76
|
-
print(
|
|
77
|
-
f"API version {api_version} does not match current version {current_version}. Upgrading."
|
|
78
|
-
)
|
|
79
|
-
args = ["npm", "install", "-g", f"@lightdash/cli@{api_version}"]
|
|
80
|
-
subprocess.run(args, check=True)
|
|
81
|
-
else:
|
|
82
|
-
print(
|
|
83
|
-
f"API version {api_version} matches current version {current_version}."
|
|
84
|
-
)
|
mdbt-0.4.39.dist-info/RECORD
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
mdbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mdbt/ai_core.py,sha256=EyuEciA6EIRfZ2vj4YxCLkdfmkke7tbZWPjBb_JOuNw,3961
|
|
3
|
-
mdbt/build_dbt_docs_ai.py,sha256=Yx0AYdsl67WKUJmA_hPeEb8CKRoWrWAxbHqNcJjPiyM,9248
|
|
4
|
-
mdbt/build_unit_test_data_ai.py,sha256=SJlxEjwrH3zyXbZFLQFl3_BZYIM3WLdubA8eUEYRbQU,4290
|
|
5
|
-
mdbt/cmdline.py,sha256=meNATu3BzP_4Htt5VcoT923mlh9NsfK8og0JQgn9PCE,10822
|
|
6
|
-
mdbt/core.py,sha256=XO6_KwMnVumtilj0OWZBcfPM7SwaUOba2lkw1apxx6w,5089
|
|
7
|
-
mdbt/expectations_output_builder.py,sha256=AXKEM-WO7FecYzfMLwzsOnQnVf7AiHBi_khyidE2lJs,3195
|
|
8
|
-
mdbt/lightdash.py,sha256=qJBJ-pc5mN8GBA1MZElRhtA8aldrX-AgvHtha4iOA-Y,2745
|
|
9
|
-
mdbt/main.py,sha256=UO3r7zOXmVpjnAIz2eeZVeQMFmgXa698Gm42Wo2qhRU,16939
|
|
10
|
-
mdbt/precommit_format.py,sha256=9HC10mh4QQIgaQSxdAwaCXbsiHT9cCrLdbc3PAQkotc,2845
|
|
11
|
-
mdbt/prompts.py,sha256=2vCvh9hamgop92kGGaMKtap11F2MZiM7hHKjcwX4lhQ,13992
|
|
12
|
-
mdbt/recce.py,sha256=P14CvWd7lRgTPUW7BVMLmcclSqby-_uSgpoI3r5VjTA,2327
|
|
13
|
-
mdbt/sort_yaml_fields.py,sha256=QF-zwXgeZ4iuaV3CfLxz4w30EqC7y34uBuowO4DbYUA,5057
|
|
14
|
-
mdbt/sql_sorter.py,sha256=8bd6svrtcXp7ePT2g4FTGLTW55qbsVjXgUmba7L-G-4,6467
|
|
15
|
-
mdbt-0.4.39.dist-info/licenses/LICENSE,sha256=DrJpgQEYhttwpwcE56BzrGZ1aEfR_tqfaxsI5NlsYOE,1072
|
|
16
|
-
mdbt-0.4.39.dist-info/METADATA,sha256=PYkAn3kBAqlmc6vV45pJh7qRxW-WvJoWUHYyDKnjOn0,920
|
|
17
|
-
mdbt-0.4.39.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
18
|
-
mdbt-0.4.39.dist-info/entry_points.txt,sha256=fVft1CYVP2MtZMMtsXN67S1T5RszfgKtAuaeoKLdCow,43
|
|
19
|
-
mdbt-0.4.39.dist-info/top_level.txt,sha256=-PP7vAl9EgVjRTzBovElczsPNjOfja6kjZssNmv5vo0,5
|
|
20
|
-
mdbt-0.4.39.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|