satif-ai 0.2.10__tar.gz → 0.2.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {satif_ai-0.2.10 → satif_ai-0.2.11}/PKG-INFO +1 -1
- {satif_ai-0.2.10 → satif_ai-0.2.11}/pyproject.toml +1 -1
- satif_ai-0.2.11/satif_ai/transformation_builders/syncpulse.py +308 -0
- satif_ai-0.2.10/satif_ai/transformation_builders/syncpulse.py +0 -277
- {satif_ai-0.2.10 → satif_ai-0.2.11}/LICENSE +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/README.md +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/__init__.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/adapters/__init__.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/adapters/tidy.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/standardize.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/standardizers/__init__.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/standardizers/ai.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/standardizers/ai_csv.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/standardizers/ai_xlsx.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/transform.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/transformation_builders/__init__.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/utils/__init__.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/utils/merge_sdif.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/utils/openai_mcp.py +0 -0
- {satif_ai-0.2.10 → satif_ai-0.2.11}/satif_ai/utils/zip.py +0 -0
@@ -0,0 +1,308 @@
|
|
1
|
+
import base64
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
from collections import defaultdict
|
5
|
+
from contextvars import ContextVar
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Any, Dict, List, Optional, Union
|
8
|
+
|
9
|
+
from agents import Agent, Runner, function_tool
|
10
|
+
from agents.mcp.server import MCPServer
|
11
|
+
from mcp import ClientSession
|
12
|
+
from satif_core import AsyncTransformationBuilder
|
13
|
+
from satif_core.types import FilePath
|
14
|
+
from satif_sdk.code_executors.local_executor import LocalCodeExecutor
|
15
|
+
from satif_sdk.comparators import get_comparator
|
16
|
+
from satif_sdk.representers import get_representer
|
17
|
+
from satif_sdk.transformers import CodeTransformer
|
18
|
+
|
19
|
+
CONTEXT_INPUT_SDIF_PATH: ContextVar[Optional[Path]] = ContextVar(
|
20
|
+
"CONTEXT_INPUT_SDIF_PATH", default=None
|
21
|
+
)
|
22
|
+
CONTEXT_OUTPUT_TARGET_FILES: ContextVar[Optional[Dict[Union[str, Path], str]]] = (
|
23
|
+
ContextVar("CONTEXT_OUTPUT_TARGET_FILES", default=None)
|
24
|
+
)
|
25
|
+
CONTEXT_SCHEMA_ONLY: ContextVar[Optional[bool]] = ContextVar(
|
26
|
+
"CONTEXT_SCHEMA_ONLY", default=None
|
27
|
+
)
|
28
|
+
|
29
|
+
|
30
|
+
def _format_comparison_output(
|
31
|
+
comparison_result: Dict[str, Any],
|
32
|
+
schema_only_mode: Optional[bool],
|
33
|
+
source_file_display_name: str,
|
34
|
+
target_file_display_name: str,
|
35
|
+
) -> str:
|
36
|
+
"""
|
37
|
+
Formats the comparison result string, with special handling for schema_only mode
|
38
|
+
where files are equivalent due to being empty.
|
39
|
+
"""
|
40
|
+
base_message_prefix = f"Comparison for {source_file_display_name} [SOURCE] with {target_file_display_name} [TARGET]:"
|
41
|
+
|
42
|
+
if schema_only_mode is True and comparison_result.get("are_equivalent") is True:
|
43
|
+
details = comparison_result.get("details", {})
|
44
|
+
row_comparison = details.get("row_comparison", {})
|
45
|
+
|
46
|
+
row_count1 = row_comparison.get("row_count1")
|
47
|
+
row_count2 = row_comparison.get("row_count2")
|
48
|
+
|
49
|
+
if (
|
50
|
+
isinstance(row_count1, (int, float))
|
51
|
+
and row_count1 == 0
|
52
|
+
and isinstance(row_count2, (int, float))
|
53
|
+
and row_count2 == 0
|
54
|
+
):
|
55
|
+
return f"{base_message_prefix} Files have the same headers but are both empty (no data rows). This should not happen. Please verify the instructions and try again."
|
56
|
+
|
57
|
+
# Default formatting if the special condition isn't met
|
58
|
+
return f"{base_message_prefix} {comparison_result}"
|
59
|
+
|
60
|
+
|
61
|
+
@function_tool
|
62
|
+
async def execute_transformation(code: str) -> str:
|
63
|
+
"""Executes the transformation code on the input and returns the
|
64
|
+
comparison difference between the transformed output and the target output example.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
code: The code to execute on the input.
|
68
|
+
"""
|
69
|
+
input_sdif_path = CONTEXT_INPUT_SDIF_PATH.get()
|
70
|
+
output_target_files_dict = CONTEXT_OUTPUT_TARGET_FILES.get()
|
71
|
+
schema_only_flag = CONTEXT_SCHEMA_ONLY.get()
|
72
|
+
|
73
|
+
if input_sdif_path is None or output_target_files_dict is None:
|
74
|
+
return "Error: Transformation context not initialized correctly via contextvars"
|
75
|
+
|
76
|
+
code_transformer = CodeTransformer(
|
77
|
+
function=code,
|
78
|
+
code_executor=LocalCodeExecutor(disable_security_warning=True),
|
79
|
+
)
|
80
|
+
generated_output_path = code_transformer.export(input_sdif_path)
|
81
|
+
|
82
|
+
comparisons = []
|
83
|
+
comparator_kwargs = {}
|
84
|
+
if schema_only_flag:
|
85
|
+
comparator_kwargs["check_structure_only"] = True
|
86
|
+
|
87
|
+
if os.path.isdir(generated_output_path):
|
88
|
+
# If it's a directory, compare each file with its corresponding target
|
89
|
+
generated_files = os.listdir(generated_output_path)
|
90
|
+
|
91
|
+
for (
|
92
|
+
output_base_file,
|
93
|
+
output_target_file_name,
|
94
|
+
) in output_target_files_dict.items():
|
95
|
+
if output_target_file_name in generated_files:
|
96
|
+
generated_file_path = os.path.join(
|
97
|
+
generated_output_path, output_target_file_name
|
98
|
+
)
|
99
|
+
comparator = get_comparator(output_target_file_name.split(".")[-1])
|
100
|
+
comparison = comparator.compare(
|
101
|
+
generated_file_path, output_base_file, **comparator_kwargs
|
102
|
+
)
|
103
|
+
formatted_message = _format_comparison_output(
|
104
|
+
comparison,
|
105
|
+
schema_only_flag,
|
106
|
+
generated_file_path,
|
107
|
+
output_target_file_name,
|
108
|
+
)
|
109
|
+
comparisons.append(formatted_message)
|
110
|
+
else:
|
111
|
+
comparisons.append(
|
112
|
+
f"Error: {output_target_file_name} not found in the generated output"
|
113
|
+
)
|
114
|
+
else:
|
115
|
+
# If it's a single file, ensure there's only one target and compare
|
116
|
+
if len(output_target_files_dict) == 1:
|
117
|
+
output_file = list(output_target_files_dict.keys())[0]
|
118
|
+
output_target_file_name = list(output_target_files_dict.values())[0]
|
119
|
+
comparator = get_comparator(
|
120
|
+
str(output_file).split(".")[-1]
|
121
|
+
) # Ensure output_file is string for split
|
122
|
+
comparison = comparator.compare(
|
123
|
+
generated_output_path, output_file, **comparator_kwargs
|
124
|
+
)
|
125
|
+
formatted_message = _format_comparison_output(
|
126
|
+
comparison,
|
127
|
+
schema_only_flag,
|
128
|
+
str(generated_output_path),
|
129
|
+
output_target_file_name,
|
130
|
+
)
|
131
|
+
comparisons.append(formatted_message)
|
132
|
+
else:
|
133
|
+
comparisons.append(
|
134
|
+
"Error: Single output file generated but multiple target files expected"
|
135
|
+
)
|
136
|
+
|
137
|
+
return "\n".join(comparisons)
|
138
|
+
|
139
|
+
|
140
|
+
class SyncpulseTransformationBuilder(AsyncTransformationBuilder):
|
141
|
+
"""This class is used to build a transformation code that will be used to transform a SDIF database into a set of files following the format of the given output files."""
|
142
|
+
|
143
|
+
def __init__(
|
144
|
+
self,
|
145
|
+
mcp_server: MCPServer,
|
146
|
+
mcp_session: ClientSession,
|
147
|
+
llm_model: str = "o4-mini",
|
148
|
+
):
|
149
|
+
self.mcp_server = mcp_server
|
150
|
+
self.mcp_session = mcp_session
|
151
|
+
self.llm_model = llm_model
|
152
|
+
|
153
|
+
async def build(
|
154
|
+
self,
|
155
|
+
sdif: Path,
|
156
|
+
output_target_files: Dict[FilePath, str] | List[FilePath] | FilePath,
|
157
|
+
output_sdif: Optional[Path] = None,
|
158
|
+
instructions: str = "",
|
159
|
+
schema_only: bool = False,
|
160
|
+
representer_kwargs: Optional[Dict[str, Any]] = None,
|
161
|
+
) -> str:
|
162
|
+
resolved_input_sdif_path = Path(sdif).resolve()
|
163
|
+
|
164
|
+
# OUTPUT_TARGET_FILES keys are absolute paths to original example files for local reading by representers/comparators.
|
165
|
+
# Values are agent-facing filenames.
|
166
|
+
resolved_output_target_files: Dict[Union[str, Path], str]
|
167
|
+
if isinstance(output_target_files, FilePath):
|
168
|
+
resolved_output_target_files = {
|
169
|
+
Path(output_target_files).resolve(): Path(output_target_files).name
|
170
|
+
}
|
171
|
+
elif isinstance(output_target_files, list):
|
172
|
+
resolved_output_target_files = {
|
173
|
+
Path(file_path).resolve(): Path(file_path).name
|
174
|
+
for file_path in output_target_files
|
175
|
+
}
|
176
|
+
elif isinstance(output_target_files, dict):
|
177
|
+
temp_map = {}
|
178
|
+
for k, v in output_target_files.items():
|
179
|
+
# Resolve Path keys to absolute paths
|
180
|
+
key_to_resolve = k
|
181
|
+
if (
|
182
|
+
isinstance(key_to_resolve, str) and Path(key_to_resolve).exists()
|
183
|
+
): # Check if string is a valid path
|
184
|
+
key_to_resolve = Path(key_to_resolve)
|
185
|
+
|
186
|
+
if isinstance(key_to_resolve, Path):
|
187
|
+
temp_map[key_to_resolve.resolve()] = v
|
188
|
+
else: # Keep non-Path keys as they are (e.g. if it's already a resolved string path from somewhere else)
|
189
|
+
temp_map[key_to_resolve] = v
|
190
|
+
resolved_output_target_files = temp_map
|
191
|
+
else:
|
192
|
+
resolved_output_target_files = {}
|
193
|
+
|
194
|
+
token_input_path = CONTEXT_INPUT_SDIF_PATH.set(resolved_input_sdif_path)
|
195
|
+
token_output_files = CONTEXT_OUTPUT_TARGET_FILES.set(
|
196
|
+
resolved_output_target_files
|
197
|
+
)
|
198
|
+
token_schema_only = CONTEXT_SCHEMA_ONLY.set(schema_only)
|
199
|
+
|
200
|
+
try:
|
201
|
+
# We must encode the path because special characters are not allowed in mcp read_resource()
|
202
|
+
input_sdif_mcp_uri_path = base64.b64encode(
|
203
|
+
str(resolved_input_sdif_path).encode()
|
204
|
+
).decode()
|
205
|
+
output_sdif_mcp_uri_path = (
|
206
|
+
base64.b64encode(str(output_sdif).encode()).decode()
|
207
|
+
if output_sdif
|
208
|
+
else None
|
209
|
+
)
|
210
|
+
|
211
|
+
input_schema = await self.mcp_session.read_resource(
|
212
|
+
f"schema://{input_sdif_mcp_uri_path}"
|
213
|
+
)
|
214
|
+
input_sample = await self.mcp_session.read_resource(
|
215
|
+
f"sample://{input_sdif_mcp_uri_path}"
|
216
|
+
)
|
217
|
+
|
218
|
+
output_schema_text = "N/A"
|
219
|
+
output_sample_text = "N/A"
|
220
|
+
if output_sdif_mcp_uri_path:
|
221
|
+
try:
|
222
|
+
output_schema_content = await self.mcp_session.read_resource(
|
223
|
+
f"schema://{output_sdif_mcp_uri_path}"
|
224
|
+
)
|
225
|
+
if output_schema_content.contents:
|
226
|
+
output_schema_text = output_schema_content.contents[0].text
|
227
|
+
except Exception as e:
|
228
|
+
print(
|
229
|
+
f"Warning: Could not read schema for output_sdif {output_sdif_mcp_uri_path}: {e}"
|
230
|
+
)
|
231
|
+
|
232
|
+
try:
|
233
|
+
output_sample_content = await self.mcp_session.read_resource(
|
234
|
+
f"sample://{output_sdif_mcp_uri_path}"
|
235
|
+
)
|
236
|
+
if output_sample_content.contents:
|
237
|
+
output_sample_text = output_sample_content.contents[0].text
|
238
|
+
except Exception as e:
|
239
|
+
print(
|
240
|
+
f"Warning: Could not read sample for output_sdif {output_sdif_mcp_uri_path}: {e}"
|
241
|
+
)
|
242
|
+
output_representation = defaultdict(dict)
|
243
|
+
if resolved_output_target_files:
|
244
|
+
for file_key_abs_path in list(resolved_output_target_files.keys()):
|
245
|
+
agent_facing_name = resolved_output_target_files[file_key_abs_path]
|
246
|
+
print(f"Representing {agent_facing_name} from {file_key_abs_path}")
|
247
|
+
try:
|
248
|
+
# Representer uses the absolute path (file_key_abs_path) to read the example file.
|
249
|
+
representer = get_representer(file_key_abs_path)
|
250
|
+
representation, used_params = representer.represent(
|
251
|
+
file_key_abs_path, **(representer_kwargs or {})
|
252
|
+
)
|
253
|
+
output_representation[agent_facing_name] = {
|
254
|
+
"representation": representation,
|
255
|
+
"used_params": used_params,
|
256
|
+
}
|
257
|
+
except Exception as e:
|
258
|
+
print(
|
259
|
+
f"Warning: Could not get representation for {agent_facing_name} (path {file_key_abs_path}): {e}"
|
260
|
+
)
|
261
|
+
output_representation[agent_facing_name] = (
|
262
|
+
f"Error representing file: {e}"
|
263
|
+
)
|
264
|
+
|
265
|
+
prompt = await self.mcp_session.get_prompt(
|
266
|
+
"create_transformation",
|
267
|
+
arguments={
|
268
|
+
"input_file": Path(
|
269
|
+
input_sdif_mcp_uri_path # Use the original sdif path for display name logic if needed
|
270
|
+
).name,
|
271
|
+
"input_schema": input_schema.contents[0].text
|
272
|
+
if input_schema.contents
|
273
|
+
else "Error reading input schema",
|
274
|
+
"input_sample": input_sample.contents[0].text
|
275
|
+
if input_sample.contents
|
276
|
+
else "Error reading input sample",
|
277
|
+
"output_files": str(list(resolved_output_target_files.values())),
|
278
|
+
"output_schema": output_schema_text,
|
279
|
+
"output_sample": output_sample_text
|
280
|
+
if not schema_only
|
281
|
+
else "Sample not available. File is empty (no data).",
|
282
|
+
"output_representation": str(output_representation),
|
283
|
+
"instructions": instructions
|
284
|
+
or "No instructions provided. Use the output example.",
|
285
|
+
},
|
286
|
+
)
|
287
|
+
agent = Agent(
|
288
|
+
name="Transformation Builder",
|
289
|
+
mcp_servers=[self.mcp_server],
|
290
|
+
tools=[execute_transformation],
|
291
|
+
model=self.llm_model,
|
292
|
+
)
|
293
|
+
result = await Runner.run(agent, prompt.messages[0].content.text)
|
294
|
+
transformation_code = self.parse_code(result.final_output)
|
295
|
+
return transformation_code
|
296
|
+
finally:
|
297
|
+
# Reset context variables after the task is done
|
298
|
+
CONTEXT_INPUT_SDIF_PATH.reset(token_input_path)
|
299
|
+
CONTEXT_OUTPUT_TARGET_FILES.reset(token_output_files)
|
300
|
+
CONTEXT_SCHEMA_ONLY.reset(token_schema_only)
|
301
|
+
|
302
|
+
def parse_code(self, code) -> str:
|
303
|
+
match = re.search(r"```(?:python)?(.*?)```", code, re.DOTALL)
|
304
|
+
if match:
|
305
|
+
return match.group(1).strip()
|
306
|
+
else:
|
307
|
+
# Handle case where no code block is found
|
308
|
+
return code.strip()
|
@@ -1,277 +0,0 @@
|
|
1
|
-
import base64
|
2
|
-
import os
|
3
|
-
import re
|
4
|
-
from collections import defaultdict
|
5
|
-
from pathlib import Path
|
6
|
-
from typing import Any, Dict, List, Optional, Union
|
7
|
-
|
8
|
-
from agents import Agent, Runner, function_tool
|
9
|
-
from agents.mcp.server import MCPServer
|
10
|
-
from mcp import ClientSession
|
11
|
-
from satif_core import AsyncTransformationBuilder
|
12
|
-
from satif_core.types import FilePath
|
13
|
-
from satif_sdk.code_executors.local_executor import LocalCodeExecutor
|
14
|
-
from satif_sdk.comparators import get_comparator
|
15
|
-
from satif_sdk.representers import get_representer
|
16
|
-
from satif_sdk.transformers import CodeTransformer
|
17
|
-
|
18
|
-
# Global variables for transformation
|
19
|
-
INPUT_SDIF_PATH: Optional[Path] = None
|
20
|
-
OUTPUT_TARGET_FILES: Optional[Dict[Union[str, Path], str]] = None
|
21
|
-
SCHEMA_ONLY: Optional[bool] = None
|
22
|
-
|
23
|
-
|
24
|
-
def _format_comparison_output(
|
25
|
-
comparison_result: Dict[str, Any],
|
26
|
-
schema_only_mode: Optional[bool],
|
27
|
-
source_file_display_name: str,
|
28
|
-
target_file_display_name: str,
|
29
|
-
) -> str:
|
30
|
-
"""
|
31
|
-
Formats the comparison result string, with special handling for schema_only mode
|
32
|
-
where files are equivalent due to being empty.
|
33
|
-
"""
|
34
|
-
base_message_prefix = f"Comparison for {source_file_display_name} [SOURCE] with {target_file_display_name} [TARGET]:"
|
35
|
-
|
36
|
-
if schema_only_mode is True and comparison_result.get("are_equivalent") is True:
|
37
|
-
details = comparison_result.get("details", {})
|
38
|
-
row_comparison = details.get("row_comparison", {})
|
39
|
-
|
40
|
-
row_count1 = row_comparison.get("row_count1")
|
41
|
-
row_count2 = row_comparison.get("row_count2")
|
42
|
-
|
43
|
-
if (
|
44
|
-
isinstance(row_count1, (int, float))
|
45
|
-
and row_count1 == 0
|
46
|
-
and isinstance(row_count2, (int, float))
|
47
|
-
and row_count2 == 0
|
48
|
-
):
|
49
|
-
return f"{base_message_prefix} Files have the same headers but are both empty (no data rows). This should not happen. Please verify the instructions and try again."
|
50
|
-
|
51
|
-
# Default formatting if the special condition isn't met
|
52
|
-
return f"{base_message_prefix} {comparison_result}"
|
53
|
-
|
54
|
-
|
55
|
-
@function_tool
|
56
|
-
async def execute_transformation(code: str) -> str:
|
57
|
-
"""Executes the transformation code on the input and returns the
|
58
|
-
comparison difference between the transformed output and the target output example.
|
59
|
-
|
60
|
-
Args:
|
61
|
-
code: The code to execute on the input.
|
62
|
-
"""
|
63
|
-
if INPUT_SDIF_PATH is None or OUTPUT_TARGET_FILES is None:
|
64
|
-
return "Error: Transformation context not initialized"
|
65
|
-
|
66
|
-
code_transformer = CodeTransformer(
|
67
|
-
function=code,
|
68
|
-
code_executor=LocalCodeExecutor(disable_security_warning=True),
|
69
|
-
)
|
70
|
-
generated_output_path = code_transformer.export(INPUT_SDIF_PATH)
|
71
|
-
|
72
|
-
comparisons = []
|
73
|
-
comparator_kwargs = {}
|
74
|
-
if SCHEMA_ONLY:
|
75
|
-
comparator_kwargs["check_structure_only"] = True
|
76
|
-
|
77
|
-
if os.path.isdir(generated_output_path):
|
78
|
-
# If it's a directory, compare each file with its corresponding target
|
79
|
-
generated_files = os.listdir(generated_output_path)
|
80
|
-
|
81
|
-
for (
|
82
|
-
output_base_file,
|
83
|
-
output_target_file_name,
|
84
|
-
) in OUTPUT_TARGET_FILES.items():
|
85
|
-
if output_target_file_name in generated_files:
|
86
|
-
generated_file_path = os.path.join(
|
87
|
-
generated_output_path, output_target_file_name
|
88
|
-
)
|
89
|
-
comparator = get_comparator(output_target_file_name.split(".")[-1])
|
90
|
-
comparison = comparator.compare(
|
91
|
-
generated_file_path, output_base_file, **comparator_kwargs
|
92
|
-
)
|
93
|
-
formatted_message = _format_comparison_output(
|
94
|
-
comparison,
|
95
|
-
SCHEMA_ONLY,
|
96
|
-
generated_file_path,
|
97
|
-
output_target_file_name,
|
98
|
-
)
|
99
|
-
comparisons.append(formatted_message)
|
100
|
-
else:
|
101
|
-
comparisons.append(
|
102
|
-
f"Error: {output_target_file_name} not found in the generated output"
|
103
|
-
)
|
104
|
-
else:
|
105
|
-
# If it's a single file, ensure there's only one target and compare
|
106
|
-
if len(OUTPUT_TARGET_FILES) == 1:
|
107
|
-
output_file = list(OUTPUT_TARGET_FILES.keys())[0]
|
108
|
-
output_target_file_name = list(OUTPUT_TARGET_FILES.values())[0]
|
109
|
-
comparator = get_comparator(output_file.split(".")[-1])
|
110
|
-
comparison = comparator.compare(
|
111
|
-
generated_output_path, output_file, **comparator_kwargs
|
112
|
-
)
|
113
|
-
formatted_message = _format_comparison_output(
|
114
|
-
comparison,
|
115
|
-
SCHEMA_ONLY,
|
116
|
-
str(generated_output_path),
|
117
|
-
output_target_file_name,
|
118
|
-
)
|
119
|
-
comparisons.append(formatted_message)
|
120
|
-
else:
|
121
|
-
comparisons.append(
|
122
|
-
"Error: Single output file generated but multiple target files expected"
|
123
|
-
)
|
124
|
-
|
125
|
-
return "\n".join(comparisons)
|
126
|
-
|
127
|
-
|
128
|
-
class SyncpulseTransformationBuilder(AsyncTransformationBuilder):
|
129
|
-
"""This class is used to build a transformation code that will be used to transform a SDIF database into a set of files following the format of the given output files."""
|
130
|
-
|
131
|
-
def __init__(
|
132
|
-
self,
|
133
|
-
mcp_server: MCPServer,
|
134
|
-
mcp_session: ClientSession,
|
135
|
-
llm_model: str = "o4-mini",
|
136
|
-
):
|
137
|
-
self.mcp_server = mcp_server
|
138
|
-
self.mcp_session = mcp_session
|
139
|
-
self.llm_model = llm_model
|
140
|
-
|
141
|
-
async def build(
|
142
|
-
self,
|
143
|
-
sdif: Path,
|
144
|
-
output_target_files: Dict[FilePath, str] | List[FilePath] | FilePath,
|
145
|
-
output_sdif: Optional[Path] = None,
|
146
|
-
instructions: str = "",
|
147
|
-
schema_only: bool = False,
|
148
|
-
representer_kwargs: Optional[Dict[str, Any]] = None,
|
149
|
-
) -> str:
|
150
|
-
global INPUT_SDIF_PATH, OUTPUT_TARGET_FILES, SCHEMA_ONLY
|
151
|
-
|
152
|
-
INPUT_SDIF_PATH = Path(sdif).resolve()
|
153
|
-
SCHEMA_ONLY = schema_only
|
154
|
-
# We must encode the path because special characters are not allowed in mcp read_resource()
|
155
|
-
input_sdif_mcp_uri_path = base64.b64encode(str(sdif).encode()).decode()
|
156
|
-
output_sdif_mcp_uri_path = (
|
157
|
-
base64.b64encode(str(output_sdif).encode()).decode()
|
158
|
-
if output_sdif
|
159
|
-
else None
|
160
|
-
)
|
161
|
-
|
162
|
-
input_schema = await self.mcp_session.read_resource(
|
163
|
-
f"schema://{input_sdif_mcp_uri_path}"
|
164
|
-
)
|
165
|
-
input_sample = await self.mcp_session.read_resource(
|
166
|
-
f"sample://{input_sdif_mcp_uri_path}"
|
167
|
-
)
|
168
|
-
|
169
|
-
output_schema_text = "N/A"
|
170
|
-
output_sample_text = "N/A"
|
171
|
-
if output_sdif_mcp_uri_path:
|
172
|
-
try:
|
173
|
-
output_schema_content = await self.mcp_session.read_resource(
|
174
|
-
f"schema://{output_sdif_mcp_uri_path}"
|
175
|
-
)
|
176
|
-
if output_schema_content.contents:
|
177
|
-
output_schema_text = output_schema_content.contents[0].text
|
178
|
-
except Exception as e:
|
179
|
-
print(
|
180
|
-
f"Warning: Could not read schema for output_sdif {output_sdif_mcp_uri_path}: {e}"
|
181
|
-
)
|
182
|
-
|
183
|
-
try:
|
184
|
-
output_sample_content = await self.mcp_session.read_resource(
|
185
|
-
f"sample://{output_sdif_mcp_uri_path}"
|
186
|
-
)
|
187
|
-
if output_sample_content.contents:
|
188
|
-
output_sample_text = output_sample_content.contents[0].text
|
189
|
-
except Exception as e:
|
190
|
-
print(
|
191
|
-
f"Warning: Could not read sample for output_sdif {output_sdif_mcp_uri_path}: {e}"
|
192
|
-
)
|
193
|
-
|
194
|
-
# OUTPUT_TARGET_FILES keys are absolute paths to original example files for local reading by representers/comparators.
|
195
|
-
# Values are agent-facing filenames.
|
196
|
-
if isinstance(output_target_files, FilePath):
|
197
|
-
OUTPUT_TARGET_FILES = {
|
198
|
-
Path(output_target_files).resolve(): Path(output_target_files).name
|
199
|
-
}
|
200
|
-
elif isinstance(output_target_files, list):
|
201
|
-
OUTPUT_TARGET_FILES = {
|
202
|
-
Path(file_path).resolve(): Path(file_path).name
|
203
|
-
for file_path in output_target_files
|
204
|
-
}
|
205
|
-
elif isinstance(output_target_files, dict):
|
206
|
-
temp_map = {}
|
207
|
-
for k, v in output_target_files.items():
|
208
|
-
if isinstance(k, Path):
|
209
|
-
temp_map[k.resolve()] = v
|
210
|
-
else:
|
211
|
-
temp_map[k] = v
|
212
|
-
OUTPUT_TARGET_FILES = temp_map
|
213
|
-
else:
|
214
|
-
OUTPUT_TARGET_FILES = {}
|
215
|
-
|
216
|
-
output_representation = defaultdict(dict)
|
217
|
-
if OUTPUT_TARGET_FILES:
|
218
|
-
for file_key_abs_path in list(OUTPUT_TARGET_FILES.keys()):
|
219
|
-
agent_facing_name = OUTPUT_TARGET_FILES[file_key_abs_path]
|
220
|
-
print(f"Representing {agent_facing_name} from {file_key_abs_path}")
|
221
|
-
try:
|
222
|
-
# Representer uses the absolute path (file_key_abs_path) to read the example file.
|
223
|
-
representer = get_representer(file_key_abs_path)
|
224
|
-
representation, used_params = representer.represent(
|
225
|
-
file_key_abs_path, **(representer_kwargs or {})
|
226
|
-
)
|
227
|
-
output_representation[agent_facing_name] = {
|
228
|
-
"representation": representation,
|
229
|
-
"used_params": used_params,
|
230
|
-
}
|
231
|
-
except Exception as e:
|
232
|
-
print(
|
233
|
-
f"Warning: Could not get representation for {agent_facing_name} (path {file_key_abs_path}): {e}"
|
234
|
-
)
|
235
|
-
output_representation[agent_facing_name] = (
|
236
|
-
f"Error representing file: {e}"
|
237
|
-
)
|
238
|
-
|
239
|
-
prompt = await self.mcp_session.get_prompt(
|
240
|
-
"create_transformation",
|
241
|
-
arguments={
|
242
|
-
"input_file": Path(
|
243
|
-
input_sdif_mcp_uri_path
|
244
|
-
).name, # Display name for prompt (from relative path)
|
245
|
-
"input_schema": input_schema.contents[0].text
|
246
|
-
if input_schema.contents
|
247
|
-
else "Error reading input schema",
|
248
|
-
"input_sample": input_sample.contents[0].text
|
249
|
-
if input_sample.contents
|
250
|
-
else "Error reading input sample",
|
251
|
-
"output_files": str(list(OUTPUT_TARGET_FILES.values())),
|
252
|
-
"output_schema": output_schema_text,
|
253
|
-
"output_sample": output_sample_text
|
254
|
-
if not SCHEMA_ONLY
|
255
|
-
else "Sample not available. File is empty (no data).",
|
256
|
-
"output_representation": str(output_representation),
|
257
|
-
"instructions": instructions
|
258
|
-
or "No instructions provided. Use the output example.",
|
259
|
-
},
|
260
|
-
)
|
261
|
-
agent = Agent(
|
262
|
-
name="Transformation Builder",
|
263
|
-
mcp_servers=[self.mcp_server],
|
264
|
-
tools=[execute_transformation],
|
265
|
-
model=self.llm_model,
|
266
|
-
)
|
267
|
-
result = await Runner.run(agent, prompt.messages[0].content.text)
|
268
|
-
transformation_code = self.parse_code(result.final_output)
|
269
|
-
return transformation_code
|
270
|
-
|
271
|
-
def parse_code(self, code) -> str:
|
272
|
-
match = re.search(r"```(?:python)?(.*?)```", code, re.DOTALL)
|
273
|
-
if match:
|
274
|
-
return match.group(1).strip()
|
275
|
-
else:
|
276
|
-
# Handle case where no code block is found
|
277
|
-
return code.strip()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|