mdbt 0.4.33__tar.gz → 0.4.34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbt-0.4.33 → mdbt-0.4.34}/PKG-INFO +1 -1
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/ai_core.py +1 -1
- mdbt-0.4.34/mdbt/build_dbt_docs_ai.py +145 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/build_unit_test_data_ai.py +1 -1
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/core.py +17 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/sort_yaml_fields.py +2 -18
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt.egg-info/PKG-INFO +1 -1
- {mdbt-0.4.33 → mdbt-0.4.34}/setup.py +1 -1
- mdbt-0.4.33/mdbt/build_dbt_docs_ai.py +0 -142
- {mdbt-0.4.33 → mdbt-0.4.34}/LICENSE +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/README.md +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/__init__.py +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/cmdline.py +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/expectations_output_builder.py +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/lightdash.py +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/main.py +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/precommit_format.py +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/prompts.py +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/recce.py +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt/sql_sorter.py +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt.egg-info/SOURCES.txt +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt.egg-info/dependency_links.txt +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt.egg-info/entry_points.txt +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt.egg-info/requires.txt +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/mdbt.egg-info/top_level.txt +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/setup.cfg +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/tests/test_main.py +0 -0
- {mdbt-0.4.33 → mdbt-0.4.34}/tests/test_sort_yaml_fields.py +0 -0
|
@@ -16,7 +16,7 @@ from mdbt.prompts import Prompts
|
|
|
16
16
|
|
|
17
17
|
class AiCore(Core):
|
|
18
18
|
|
|
19
|
-
def __init__(self, model: str = "gpt-
|
|
19
|
+
def __init__(self, model: str = "gpt-5", test_mode: bool = False):
|
|
20
20
|
super().__init__(test_mode=test_mode)
|
|
21
21
|
self.model = model
|
|
22
22
|
# Make sure you have OPENAI_API_KEY set in your environment variables.
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
|
|
3
|
+
import pyperclip
|
|
4
|
+
from dotenv import find_dotenv
|
|
5
|
+
from dotenv import load_dotenv
|
|
6
|
+
|
|
7
|
+
from mdbt.ai_core import AiCore
|
|
8
|
+
from mdbt.prompts import Prompts
|
|
9
|
+
|
|
10
|
+
load_dotenv(find_dotenv("../.env"))
|
|
11
|
+
load_dotenv(find_dotenv(".env"))
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BuildDBTDocs(AiCore):
|
|
15
|
+
"""
|
|
16
|
+
# Make sure you have OPENAI_API_KEY set in your environment variables.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self):
|
|
20
|
+
super().__init__()
|
|
21
|
+
|
|
22
|
+
def main(self, select, sys_context, is_new=False):
|
|
23
|
+
if not is_new:
|
|
24
|
+
print(
|
|
25
|
+
"""
|
|
26
|
+
1) Build new DBT documentation.
|
|
27
|
+
2) Check existing DBT documentation against model for missing definitions.
|
|
28
|
+
"""
|
|
29
|
+
)
|
|
30
|
+
mode = int(input())
|
|
31
|
+
else:
|
|
32
|
+
mode = 1
|
|
33
|
+
print("Getting file.")
|
|
34
|
+
models = self.get_models(select)
|
|
35
|
+
if not models:
|
|
36
|
+
raise ValueError(f"No models found for select '{select}'")
|
|
37
|
+
|
|
38
|
+
for model in models:
|
|
39
|
+
original_file_path = model["original_file_path"]
|
|
40
|
+
model_name = model["name"]
|
|
41
|
+
if "l4" in original_file_path.lower() or "l3" in original_file_path.lower():
|
|
42
|
+
system_instructions = Prompts().dbt_docs_gte_l3_prompt
|
|
43
|
+
else:
|
|
44
|
+
system_instructions = Prompts().dbt_docs_lte_l2_prompt
|
|
45
|
+
|
|
46
|
+
if sys_context:
|
|
47
|
+
system_instructions += f"\nContext about system docs are generated for: \n{sys_context}\n"
|
|
48
|
+
|
|
49
|
+
sample_data = self._get_sample_data_from_snowflake([model_name])
|
|
50
|
+
|
|
51
|
+
system_instructions = system_instructions + sample_data[model_name]
|
|
52
|
+
|
|
53
|
+
# Might bring this back in the future.
|
|
54
|
+
extra_info = ""
|
|
55
|
+
|
|
56
|
+
if mode == 1:
|
|
57
|
+
# Build new documentation
|
|
58
|
+
user_input = self.build_user_msg_mode_1(original_file_path, extra_info)
|
|
59
|
+
yml_file_path = original_file_path.replace(".sql", ".yml")
|
|
60
|
+
elif mode == 2:
|
|
61
|
+
# Check existing documentation
|
|
62
|
+
yml_file_path = original_file_path[:-4] + ".yml"
|
|
63
|
+
user_input = self.build_user_msg_mode_2(
|
|
64
|
+
original_file_path, yml_file_path, extra_info
|
|
65
|
+
)
|
|
66
|
+
else:
|
|
67
|
+
print(mode)
|
|
68
|
+
raise ValueError("Invalid mode")
|
|
69
|
+
|
|
70
|
+
messages = [
|
|
71
|
+
{"role": "user", "content": system_instructions + "\n" + user_input}
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
assistant_responses = []
|
|
75
|
+
result = self.send_message(messages)
|
|
76
|
+
assistant_responses.append(result)
|
|
77
|
+
|
|
78
|
+
messages.append({"role": "assistant", "content": assistant_responses[0]})
|
|
79
|
+
print(assistant_responses[0])
|
|
80
|
+
output = assistant_responses[0]
|
|
81
|
+
# Check for ``` at end of output (str) and remove
|
|
82
|
+
# Remove trailing markdown code fences if present
|
|
83
|
+
lines = output.split('\n')
|
|
84
|
+
new_lines = []
|
|
85
|
+
for line in lines:
|
|
86
|
+
if '```' not in line:
|
|
87
|
+
new_lines.append(line)
|
|
88
|
+
output = '\n'.join(new_lines)
|
|
89
|
+
if not is_new:
|
|
90
|
+
clip_or_file = input(
|
|
91
|
+
f"1. to copy to clipboard\n2, to write to file ({yml_file_path}\n:"
|
|
92
|
+
)
|
|
93
|
+
else:
|
|
94
|
+
clip_or_file = "2"
|
|
95
|
+
|
|
96
|
+
if clip_or_file == "1":
|
|
97
|
+
print("Output copied to clipboard")
|
|
98
|
+
pyperclip.copy(output)
|
|
99
|
+
elif clip_or_file == "2":
|
|
100
|
+
if mode == 2:
|
|
101
|
+
# Make a backup of the current YML file.
|
|
102
|
+
self.backup_existing_yml_file(yml_file_path)
|
|
103
|
+
with open(yml_file_path, "w") as file:
|
|
104
|
+
file.write(output)
|
|
105
|
+
if not self.is_file_committed(yml_file_path):
|
|
106
|
+
if not is_new:
|
|
107
|
+
commit_file = input("Press 1 to add to git, any other key to byapss: ")
|
|
108
|
+
else:
|
|
109
|
+
commit_file = "1"
|
|
110
|
+
|
|
111
|
+
if commit_file == "1":
|
|
112
|
+
subprocess.run(["git", "add", yml_file_path])
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def backup_existing_yml_file(yml_file_path):
|
|
116
|
+
with open(yml_file_path, "r") as file:
|
|
117
|
+
yml_content = file.read()
|
|
118
|
+
with open(yml_file_path + ".bak", "w") as file:
|
|
119
|
+
file.write(yml_content)
|
|
120
|
+
|
|
121
|
+
def build_user_msg_mode_1(self, _sql_file_path: str, extra_info: str) -> str:
|
|
122
|
+
self.read_file(_sql_file_path)
|
|
123
|
+
model_name = _sql_file_path.split("/")[-1].split(".")[0]
|
|
124
|
+
prompt_str = f"Build new DBT documentation for the following SQL query with model name {model_name}"
|
|
125
|
+
if len(extra_info):
|
|
126
|
+
prompt_str += f"\n{extra_info}"
|
|
127
|
+
|
|
128
|
+
return prompt_str
|
|
129
|
+
|
|
130
|
+
def build_user_msg_mode_2(
|
|
131
|
+
self, _sql_file_path: str, _yml_file_path: str, extra_info: str
|
|
132
|
+
) -> str:
|
|
133
|
+
self.read_file(_sql_file_path)
|
|
134
|
+
yml = self.read_file(_yml_file_path)
|
|
135
|
+
model_name = _sql_file_path.split("/")[-1].split(".")[0]
|
|
136
|
+
prompt_str = f"Check for missing columns in the following DBT documentation for the following SQL query with model name {model_name}. Identify any columns in the DBT documentation that do not exist in the SQL and comment them out."
|
|
137
|
+
if len(extra_info):
|
|
138
|
+
prompt_str += f"\n {extra_info}"
|
|
139
|
+
prompt_str += f"\nYML File Contents:\n{yml}"
|
|
140
|
+
|
|
141
|
+
return prompt_str
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
if __name__ == "__main__":
|
|
145
|
+
BuildDBTDocs().main("revenue_by_dvm")
|
|
@@ -122,3 +122,20 @@ class Core:
|
|
|
122
122
|
print(e.stdout)
|
|
123
123
|
raise Exception(f"Failure while running command: {' '.join(e.cmd)}")
|
|
124
124
|
# sys.exit(e.returncode)
|
|
125
|
+
|
|
126
|
+
def get_models(self, select: str, all_files: bool = False) -> t.List[t.Dict[str, t.Any]]:
|
|
127
|
+
if not all_files:
|
|
128
|
+
args = [
|
|
129
|
+
"--select",
|
|
130
|
+
select,
|
|
131
|
+
"--exclude",
|
|
132
|
+
"resource_type:test resource_type:seed resource_type:snapshot resource_type:source",
|
|
133
|
+
]
|
|
134
|
+
else:
|
|
135
|
+
args = [
|
|
136
|
+
"--exclude",
|
|
137
|
+
"resource_type:test resource_type:seed resource_type:snapshot resource_type:source",
|
|
138
|
+
]
|
|
139
|
+
ls_json = self.dbt_ls_to_json(args)
|
|
140
|
+
|
|
141
|
+
return ls_json
|
|
@@ -35,7 +35,7 @@ class SortYAML(AiCore):
|
|
|
35
35
|
all_files: Optional[bool] = False,
|
|
36
36
|
overwrite: Optional[bool] = False,
|
|
37
37
|
):
|
|
38
|
-
models = self.
|
|
38
|
+
models = self.get_models(select, all_files)
|
|
39
39
|
if len(models) > 1 and not overwrite:
|
|
40
40
|
raise ValueError(
|
|
41
41
|
"Multiple models found. Default copy to clipboard only works with one model. Use the --overwrite flag "
|
|
@@ -66,23 +66,7 @@ class SortYAML(AiCore):
|
|
|
66
66
|
else:
|
|
67
67
|
self.save_yml_to_clipboard(updated_schema)
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
mmw = MDBT()
|
|
71
|
-
if not all_files:
|
|
72
|
-
args = [
|
|
73
|
-
"--select",
|
|
74
|
-
select,
|
|
75
|
-
"--exclude",
|
|
76
|
-
"resource_type:test resource_type:seed resource_type:snapshot resource_type:source",
|
|
77
|
-
]
|
|
78
|
-
else:
|
|
79
|
-
args = [
|
|
80
|
-
"--exclude",
|
|
81
|
-
"resource_type:test resource_type:seed resource_type:snapshot resource_type:source",
|
|
82
|
-
]
|
|
83
|
-
ls_json = mmw.dbt_ls_to_json(args)
|
|
84
|
-
|
|
85
|
-
return ls_json
|
|
69
|
+
|
|
86
70
|
|
|
87
71
|
@staticmethod
|
|
88
72
|
def _get_schema_path_and_table(
|
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
import subprocess
|
|
2
|
-
|
|
3
|
-
import pyperclip
|
|
4
|
-
from dotenv import find_dotenv
|
|
5
|
-
from dotenv import load_dotenv
|
|
6
|
-
|
|
7
|
-
from mdbt.ai_core import AiCore
|
|
8
|
-
from mdbt.prompts import Prompts
|
|
9
|
-
|
|
10
|
-
load_dotenv(find_dotenv("../.env"))
|
|
11
|
-
load_dotenv(find_dotenv(".env"))
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class BuildDBTDocs(AiCore):
|
|
15
|
-
"""
|
|
16
|
-
# Make sure you have OPENAI_API_KEY set in your environment variables.
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
def __init__(self):
|
|
20
|
-
super().__init__()
|
|
21
|
-
|
|
22
|
-
def main(self, model_name, sys_context, is_new=False):
|
|
23
|
-
if model_name.endswith(".sql"):
|
|
24
|
-
model_name = model_name[:-4]
|
|
25
|
-
if not is_new:
|
|
26
|
-
print(
|
|
27
|
-
"""
|
|
28
|
-
1) Build new DBT documentation.
|
|
29
|
-
2) Check existing DBT documentation against model for missing definitions.
|
|
30
|
-
"""
|
|
31
|
-
)
|
|
32
|
-
mode = int(input())
|
|
33
|
-
else:
|
|
34
|
-
mode = 1
|
|
35
|
-
print("Getting file.")
|
|
36
|
-
sql_file_path = self.get_file_path(model_name)
|
|
37
|
-
|
|
38
|
-
if "l4" in sql_file_path.lower() or "l3" in sql_file_path.lower():
|
|
39
|
-
system_instructions = Prompts().dbt_docs_gte_l3_prompt
|
|
40
|
-
else:
|
|
41
|
-
system_instructions = Prompts().dbt_docs_lte_l2_prompt
|
|
42
|
-
|
|
43
|
-
if sys_context:
|
|
44
|
-
system_instructions += f"\nContext about system docs are generated for: \n{sys_context}\n"
|
|
45
|
-
|
|
46
|
-
sample_data = self._get_sample_data_from_snowflake([model_name])
|
|
47
|
-
|
|
48
|
-
system_instructions = system_instructions + sample_data[model_name]
|
|
49
|
-
|
|
50
|
-
# Might bring this back in the future.
|
|
51
|
-
extra_info = ""
|
|
52
|
-
|
|
53
|
-
if mode == 1:
|
|
54
|
-
# Build new documentation
|
|
55
|
-
user_input = self.build_user_msg_mode_1(sql_file_path, extra_info)
|
|
56
|
-
yml_file_path = sql_file_path.replace(".sql", ".yml")
|
|
57
|
-
elif mode == 2:
|
|
58
|
-
# Check existing documentation
|
|
59
|
-
yml_file_path = sql_file_path[:-4] + ".yml"
|
|
60
|
-
user_input = self.build_user_msg_mode_2(
|
|
61
|
-
sql_file_path, yml_file_path, extra_info
|
|
62
|
-
)
|
|
63
|
-
else:
|
|
64
|
-
print(mode)
|
|
65
|
-
raise ValueError("Invalid mode")
|
|
66
|
-
|
|
67
|
-
messages = [
|
|
68
|
-
{"role": "user", "content": system_instructions + "\n" + user_input}
|
|
69
|
-
]
|
|
70
|
-
|
|
71
|
-
assistant_responses = []
|
|
72
|
-
result = self.send_message(messages)
|
|
73
|
-
assistant_responses.append(result)
|
|
74
|
-
|
|
75
|
-
messages.append({"role": "assistant", "content": assistant_responses[0]})
|
|
76
|
-
print(assistant_responses[0])
|
|
77
|
-
output = assistant_responses[0]
|
|
78
|
-
# Check for ``` at end of output (str) and remove
|
|
79
|
-
# Remove trailing markdown code fences if present
|
|
80
|
-
lines = output.split('\n')
|
|
81
|
-
new_lines = []
|
|
82
|
-
for line in lines:
|
|
83
|
-
if '```' not in line:
|
|
84
|
-
new_lines.append(line)
|
|
85
|
-
output = '\n'.join(new_lines)
|
|
86
|
-
if not is_new:
|
|
87
|
-
clip_or_file = input(
|
|
88
|
-
f"1. to copy to clipboard\n2, to write to file ({yml_file_path}\n:"
|
|
89
|
-
)
|
|
90
|
-
else:
|
|
91
|
-
clip_or_file = "2"
|
|
92
|
-
|
|
93
|
-
if clip_or_file == "1":
|
|
94
|
-
print("Output copied to clipboard")
|
|
95
|
-
pyperclip.copy(output)
|
|
96
|
-
elif clip_or_file == "2":
|
|
97
|
-
if mode == 2:
|
|
98
|
-
# Make a backup of the current YML file.
|
|
99
|
-
self.backup_existing_yml_file(yml_file_path)
|
|
100
|
-
with open(yml_file_path, "w") as file:
|
|
101
|
-
file.write(output)
|
|
102
|
-
if not self.is_file_committed(yml_file_path):
|
|
103
|
-
if not is_new:
|
|
104
|
-
commit_file = input("Press 1 to add to git, any other key to byapss: ")
|
|
105
|
-
else:
|
|
106
|
-
commit_file = "1"
|
|
107
|
-
|
|
108
|
-
if commit_file == "1":
|
|
109
|
-
subprocess.run(["git", "add", yml_file_path])
|
|
110
|
-
|
|
111
|
-
@staticmethod
|
|
112
|
-
def backup_existing_yml_file(yml_file_path):
|
|
113
|
-
with open(yml_file_path, "r") as file:
|
|
114
|
-
yml_content = file.read()
|
|
115
|
-
with open(yml_file_path + ".bak", "w") as file:
|
|
116
|
-
file.write(yml_content)
|
|
117
|
-
|
|
118
|
-
def build_user_msg_mode_1(self, _sql_file_path: str, extra_info: str) -> str:
|
|
119
|
-
self.read_file(_sql_file_path)
|
|
120
|
-
model_name = _sql_file_path.split("/")[-1].split(".")[0]
|
|
121
|
-
prompt_str = f"Build new DBT documentation for the following SQL query with model name {model_name}"
|
|
122
|
-
if len(extra_info):
|
|
123
|
-
prompt_str += f"\n{extra_info}"
|
|
124
|
-
|
|
125
|
-
return prompt_str
|
|
126
|
-
|
|
127
|
-
def build_user_msg_mode_2(
|
|
128
|
-
self, _sql_file_path: str, _yml_file_path: str, extra_info: str
|
|
129
|
-
) -> str:
|
|
130
|
-
self.read_file(_sql_file_path)
|
|
131
|
-
yml = self.read_file(_yml_file_path)
|
|
132
|
-
model_name = _sql_file_path.split("/")[-1].split(".")[0]
|
|
133
|
-
prompt_str = f"Check for missing columns in the following DBT documentation for the following SQL query with model name {model_name}. Identify any columns in the DBT documentation that do not exist in the SQL and comment them out."
|
|
134
|
-
if len(extra_info):
|
|
135
|
-
prompt_str += f"\n {extra_info}"
|
|
136
|
-
prompt_str += f"\nYML File Contents:\n{yml}"
|
|
137
|
-
|
|
138
|
-
return prompt_str
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
if __name__ == "__main__":
|
|
142
|
-
BuildDBTDocs().main("revenue_by_dvm")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|