satif-ai 0.2.10__py3-none-any.whl → 0.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- satif_ai/transformation_builders/syncpulse.py +144 -113
- {satif_ai-0.2.10.dist-info → satif_ai-0.2.11.dist-info}/METADATA +1 -1
- {satif_ai-0.2.10.dist-info → satif_ai-0.2.11.dist-info}/RECORD +6 -6
- {satif_ai-0.2.10.dist-info → satif_ai-0.2.11.dist-info}/LICENSE +0 -0
- {satif_ai-0.2.10.dist-info → satif_ai-0.2.11.dist-info}/WHEEL +0 -0
- {satif_ai-0.2.10.dist-info → satif_ai-0.2.11.dist-info}/entry_points.txt +0 -0
@@ -2,6 +2,7 @@ import base64
|
|
2
2
|
import os
|
3
3
|
import re
|
4
4
|
from collections import defaultdict
|
5
|
+
from contextvars import ContextVar
|
5
6
|
from pathlib import Path
|
6
7
|
from typing import Any, Dict, List, Optional, Union
|
7
8
|
|
@@ -15,10 +16,15 @@ from satif_sdk.comparators import get_comparator
|
|
15
16
|
from satif_sdk.representers import get_representer
|
16
17
|
from satif_sdk.transformers import CodeTransformer
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
CONTEXT_INPUT_SDIF_PATH: ContextVar[Optional[Path]] = ContextVar(
|
20
|
+
"CONTEXT_INPUT_SDIF_PATH", default=None
|
21
|
+
)
|
22
|
+
CONTEXT_OUTPUT_TARGET_FILES: ContextVar[Optional[Dict[Union[str, Path], str]]] = (
|
23
|
+
ContextVar("CONTEXT_OUTPUT_TARGET_FILES", default=None)
|
24
|
+
)
|
25
|
+
CONTEXT_SCHEMA_ONLY: ContextVar[Optional[bool]] = ContextVar(
|
26
|
+
"CONTEXT_SCHEMA_ONLY", default=None
|
27
|
+
)
|
22
28
|
|
23
29
|
|
24
30
|
def _format_comparison_output(
|
@@ -60,18 +66,22 @@ async def execute_transformation(code: str) -> str:
|
|
60
66
|
Args:
|
61
67
|
code: The code to execute on the input.
|
62
68
|
"""
|
63
|
-
|
64
|
-
|
69
|
+
input_sdif_path = CONTEXT_INPUT_SDIF_PATH.get()
|
70
|
+
output_target_files_dict = CONTEXT_OUTPUT_TARGET_FILES.get()
|
71
|
+
schema_only_flag = CONTEXT_SCHEMA_ONLY.get()
|
72
|
+
|
73
|
+
if input_sdif_path is None or output_target_files_dict is None:
|
74
|
+
return "Error: Transformation context not initialized correctly via contextvars"
|
65
75
|
|
66
76
|
code_transformer = CodeTransformer(
|
67
77
|
function=code,
|
68
78
|
code_executor=LocalCodeExecutor(disable_security_warning=True),
|
69
79
|
)
|
70
|
-
generated_output_path = code_transformer.export(
|
80
|
+
generated_output_path = code_transformer.export(input_sdif_path)
|
71
81
|
|
72
82
|
comparisons = []
|
73
83
|
comparator_kwargs = {}
|
74
|
-
if
|
84
|
+
if schema_only_flag:
|
75
85
|
comparator_kwargs["check_structure_only"] = True
|
76
86
|
|
77
87
|
if os.path.isdir(generated_output_path):
|
@@ -81,7 +91,7 @@ async def execute_transformation(code: str) -> str:
|
|
81
91
|
for (
|
82
92
|
output_base_file,
|
83
93
|
output_target_file_name,
|
84
|
-
) in
|
94
|
+
) in output_target_files_dict.items():
|
85
95
|
if output_target_file_name in generated_files:
|
86
96
|
generated_file_path = os.path.join(
|
87
97
|
generated_output_path, output_target_file_name
|
@@ -92,7 +102,7 @@ async def execute_transformation(code: str) -> str:
|
|
92
102
|
)
|
93
103
|
formatted_message = _format_comparison_output(
|
94
104
|
comparison,
|
95
|
-
|
105
|
+
schema_only_flag,
|
96
106
|
generated_file_path,
|
97
107
|
output_target_file_name,
|
98
108
|
)
|
@@ -103,16 +113,18 @@ async def execute_transformation(code: str) -> str:
|
|
103
113
|
)
|
104
114
|
else:
|
105
115
|
# If it's a single file, ensure there's only one target and compare
|
106
|
-
if len(
|
107
|
-
output_file = list(
|
108
|
-
output_target_file_name = list(
|
109
|
-
comparator = get_comparator(
|
116
|
+
if len(output_target_files_dict) == 1:
|
117
|
+
output_file = list(output_target_files_dict.keys())[0]
|
118
|
+
output_target_file_name = list(output_target_files_dict.values())[0]
|
119
|
+
comparator = get_comparator(
|
120
|
+
str(output_file).split(".")[-1]
|
121
|
+
) # Ensure output_file is string for split
|
110
122
|
comparison = comparator.compare(
|
111
123
|
generated_output_path, output_file, **comparator_kwargs
|
112
124
|
)
|
113
125
|
formatted_message = _format_comparison_output(
|
114
126
|
comparison,
|
115
|
-
|
127
|
+
schema_only_flag,
|
116
128
|
str(generated_output_path),
|
117
129
|
output_target_file_name,
|
118
130
|
)
|
@@ -147,126 +159,145 @@ class SyncpulseTransformationBuilder(AsyncTransformationBuilder):
|
|
147
159
|
schema_only: bool = False,
|
148
160
|
representer_kwargs: Optional[Dict[str, Any]] = None,
|
149
161
|
) -> str:
|
150
|
-
|
151
|
-
|
152
|
-
INPUT_SDIF_PATH = Path(sdif).resolve()
|
153
|
-
SCHEMA_ONLY = schema_only
|
154
|
-
# We must encode the path because special characters are not allowed in mcp read_resource()
|
155
|
-
input_sdif_mcp_uri_path = base64.b64encode(str(sdif).encode()).decode()
|
156
|
-
output_sdif_mcp_uri_path = (
|
157
|
-
base64.b64encode(str(output_sdif).encode()).decode()
|
158
|
-
if output_sdif
|
159
|
-
else None
|
160
|
-
)
|
161
|
-
|
162
|
-
input_schema = await self.mcp_session.read_resource(
|
163
|
-
f"schema://{input_sdif_mcp_uri_path}"
|
164
|
-
)
|
165
|
-
input_sample = await self.mcp_session.read_resource(
|
166
|
-
f"sample://{input_sdif_mcp_uri_path}"
|
167
|
-
)
|
168
|
-
|
169
|
-
output_schema_text = "N/A"
|
170
|
-
output_sample_text = "N/A"
|
171
|
-
if output_sdif_mcp_uri_path:
|
172
|
-
try:
|
173
|
-
output_schema_content = await self.mcp_session.read_resource(
|
174
|
-
f"schema://{output_sdif_mcp_uri_path}"
|
175
|
-
)
|
176
|
-
if output_schema_content.contents:
|
177
|
-
output_schema_text = output_schema_content.contents[0].text
|
178
|
-
except Exception as e:
|
179
|
-
print(
|
180
|
-
f"Warning: Could not read schema for output_sdif {output_sdif_mcp_uri_path}: {e}"
|
181
|
-
)
|
182
|
-
|
183
|
-
try:
|
184
|
-
output_sample_content = await self.mcp_session.read_resource(
|
185
|
-
f"sample://{output_sdif_mcp_uri_path}"
|
186
|
-
)
|
187
|
-
if output_sample_content.contents:
|
188
|
-
output_sample_text = output_sample_content.contents[0].text
|
189
|
-
except Exception as e:
|
190
|
-
print(
|
191
|
-
f"Warning: Could not read sample for output_sdif {output_sdif_mcp_uri_path}: {e}"
|
192
|
-
)
|
162
|
+
resolved_input_sdif_path = Path(sdif).resolve()
|
193
163
|
|
194
164
|
# OUTPUT_TARGET_FILES keys are absolute paths to original example files for local reading by representers/comparators.
|
195
165
|
# Values are agent-facing filenames.
|
166
|
+
resolved_output_target_files: Dict[Union[str, Path], str]
|
196
167
|
if isinstance(output_target_files, FilePath):
|
197
|
-
|
168
|
+
resolved_output_target_files = {
|
198
169
|
Path(output_target_files).resolve(): Path(output_target_files).name
|
199
170
|
}
|
200
171
|
elif isinstance(output_target_files, list):
|
201
|
-
|
172
|
+
resolved_output_target_files = {
|
202
173
|
Path(file_path).resolve(): Path(file_path).name
|
203
174
|
for file_path in output_target_files
|
204
175
|
}
|
205
176
|
elif isinstance(output_target_files, dict):
|
206
177
|
temp_map = {}
|
207
178
|
for k, v in output_target_files.items():
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
179
|
+
# Resolve Path keys to absolute paths
|
180
|
+
key_to_resolve = k
|
181
|
+
if (
|
182
|
+
isinstance(key_to_resolve, str) and Path(key_to_resolve).exists()
|
183
|
+
): # Check if string is a valid path
|
184
|
+
key_to_resolve = Path(key_to_resolve)
|
185
|
+
|
186
|
+
if isinstance(key_to_resolve, Path):
|
187
|
+
temp_map[key_to_resolve.resolve()] = v
|
188
|
+
else: # Keep non-Path keys as they are (e.g. if it's already a resolved string path from somewhere else)
|
189
|
+
temp_map[key_to_resolve] = v
|
190
|
+
resolved_output_target_files = temp_map
|
213
191
|
else:
|
214
|
-
|
192
|
+
resolved_output_target_files = {}
|
193
|
+
|
194
|
+
token_input_path = CONTEXT_INPUT_SDIF_PATH.set(resolved_input_sdif_path)
|
195
|
+
token_output_files = CONTEXT_OUTPUT_TARGET_FILES.set(
|
196
|
+
resolved_output_target_files
|
197
|
+
)
|
198
|
+
token_schema_only = CONTEXT_SCHEMA_ONLY.set(schema_only)
|
215
199
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
200
|
+
try:
|
201
|
+
# We must encode the path because special characters are not allowed in mcp read_resource()
|
202
|
+
input_sdif_mcp_uri_path = base64.b64encode(
|
203
|
+
str(resolved_input_sdif_path).encode()
|
204
|
+
).decode()
|
205
|
+
output_sdif_mcp_uri_path = (
|
206
|
+
base64.b64encode(str(output_sdif).encode()).decode()
|
207
|
+
if output_sdif
|
208
|
+
else None
|
209
|
+
)
|
210
|
+
|
211
|
+
input_schema = await self.mcp_session.read_resource(
|
212
|
+
f"schema://{input_sdif_mcp_uri_path}"
|
213
|
+
)
|
214
|
+
input_sample = await self.mcp_session.read_resource(
|
215
|
+
f"sample://{input_sdif_mcp_uri_path}"
|
216
|
+
)
|
217
|
+
|
218
|
+
output_schema_text = "N/A"
|
219
|
+
output_sample_text = "N/A"
|
220
|
+
if output_sdif_mcp_uri_path:
|
221
221
|
try:
|
222
|
-
|
223
|
-
|
224
|
-
representation, used_params = representer.represent(
|
225
|
-
file_key_abs_path, **(representer_kwargs or {})
|
222
|
+
output_schema_content = await self.mcp_session.read_resource(
|
223
|
+
f"schema://{output_sdif_mcp_uri_path}"
|
226
224
|
)
|
227
|
-
|
228
|
-
|
229
|
-
"used_params": used_params,
|
230
|
-
}
|
225
|
+
if output_schema_content.contents:
|
226
|
+
output_schema_text = output_schema_content.contents[0].text
|
231
227
|
except Exception as e:
|
232
228
|
print(
|
233
|
-
f"Warning: Could not
|
229
|
+
f"Warning: Could not read schema for output_sdif {output_sdif_mcp_uri_path}: {e}"
|
230
|
+
)
|
231
|
+
|
232
|
+
try:
|
233
|
+
output_sample_content = await self.mcp_session.read_resource(
|
234
|
+
f"sample://{output_sdif_mcp_uri_path}"
|
234
235
|
)
|
235
|
-
|
236
|
-
|
236
|
+
if output_sample_content.contents:
|
237
|
+
output_sample_text = output_sample_content.contents[0].text
|
238
|
+
except Exception as e:
|
239
|
+
print(
|
240
|
+
f"Warning: Could not read sample for output_sdif {output_sdif_mcp_uri_path}: {e}"
|
237
241
|
)
|
242
|
+
output_representation = defaultdict(dict)
|
243
|
+
if resolved_output_target_files:
|
244
|
+
for file_key_abs_path in list(resolved_output_target_files.keys()):
|
245
|
+
agent_facing_name = resolved_output_target_files[file_key_abs_path]
|
246
|
+
print(f"Representing {agent_facing_name} from {file_key_abs_path}")
|
247
|
+
try:
|
248
|
+
# Representer uses the absolute path (file_key_abs_path) to read the example file.
|
249
|
+
representer = get_representer(file_key_abs_path)
|
250
|
+
representation, used_params = representer.represent(
|
251
|
+
file_key_abs_path, **(representer_kwargs or {})
|
252
|
+
)
|
253
|
+
output_representation[agent_facing_name] = {
|
254
|
+
"representation": representation,
|
255
|
+
"used_params": used_params,
|
256
|
+
}
|
257
|
+
except Exception as e:
|
258
|
+
print(
|
259
|
+
f"Warning: Could not get representation for {agent_facing_name} (path {file_key_abs_path}): {e}"
|
260
|
+
)
|
261
|
+
output_representation[agent_facing_name] = (
|
262
|
+
f"Error representing file: {e}"
|
263
|
+
)
|
238
264
|
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
265
|
+
prompt = await self.mcp_session.get_prompt(
|
266
|
+
"create_transformation",
|
267
|
+
arguments={
|
268
|
+
"input_file": Path(
|
269
|
+
input_sdif_mcp_uri_path # Use the original sdif path for display name logic if needed
|
270
|
+
).name,
|
271
|
+
"input_schema": input_schema.contents[0].text
|
272
|
+
if input_schema.contents
|
273
|
+
else "Error reading input schema",
|
274
|
+
"input_sample": input_sample.contents[0].text
|
275
|
+
if input_sample.contents
|
276
|
+
else "Error reading input sample",
|
277
|
+
"output_files": str(list(resolved_output_target_files.values())),
|
278
|
+
"output_schema": output_schema_text,
|
279
|
+
"output_sample": output_sample_text
|
280
|
+
if not schema_only
|
281
|
+
else "Sample not available. File is empty (no data).",
|
282
|
+
"output_representation": str(output_representation),
|
283
|
+
"instructions": instructions
|
284
|
+
or "No instructions provided. Use the output example.",
|
285
|
+
},
|
286
|
+
)
|
287
|
+
agent = Agent(
|
288
|
+
name="Transformation Builder",
|
289
|
+
mcp_servers=[self.mcp_server],
|
290
|
+
tools=[execute_transformation],
|
291
|
+
model=self.llm_model,
|
292
|
+
)
|
293
|
+
result = await Runner.run(agent, prompt.messages[0].content.text)
|
294
|
+
transformation_code = self.parse_code(result.final_output)
|
295
|
+
return transformation_code
|
296
|
+
finally:
|
297
|
+
# Reset context variables after the task is done
|
298
|
+
CONTEXT_INPUT_SDIF_PATH.reset(token_input_path)
|
299
|
+
CONTEXT_OUTPUT_TARGET_FILES.reset(token_output_files)
|
300
|
+
CONTEXT_SCHEMA_ONLY.reset(token_schema_only)
|
270
301
|
|
271
302
|
def parse_code(self, code) -> str:
|
272
303
|
match = re.search(r"```(?:python)?(.*?)```", code, re.DOTALL)
|
@@ -8,13 +8,13 @@ satif_ai/standardizers/ai_csv.py,sha256=LbCRaLleujQRgSRRyt9ujbED-PIGRq1J8zRnejGM
|
|
8
8
|
satif_ai/standardizers/ai_xlsx.py,sha256=558Bzfy8WGuk5mdnjMvvtakQXcU3rmwK3ykPjpXKwmQ,15863
|
9
9
|
satif_ai/transform.py,sha256=g5XNeVCIKUgDW3UIhf02MN9xkXnWF3EJXS0Eig_hfD8,7677
|
10
10
|
satif_ai/transformation_builders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
-
satif_ai/transformation_builders/syncpulse.py,sha256=
|
11
|
+
satif_ai/transformation_builders/syncpulse.py,sha256=C7utZ6QOlay8a0wR5Ai33G0G3UJPleTpGt1wkX9m6uE,13346
|
12
12
|
satif_ai/utils/__init__.py,sha256=F-usaCt_vX872mXvtukuZdNMPnkVqDb8RaDgox2uow4,212
|
13
13
|
satif_ai/utils/merge_sdif.py,sha256=y4C6pgkdyer0QugroFKUck4Eud4Ap-tJzM-eclMo3Rw,25629
|
14
14
|
satif_ai/utils/openai_mcp.py,sha256=duCQZXG0mBs9DOOFIUvzraJhxD2IDzegWO9iOiLfFwY,3938
|
15
15
|
satif_ai/utils/zip.py,sha256=G_GK8629Iw0TLFCQJfnqOscv7MoKF5zdzxvEAbL7Gss,5186
|
16
|
-
satif_ai-0.2.
|
17
|
-
satif_ai-0.2.
|
18
|
-
satif_ai-0.2.
|
19
|
-
satif_ai-0.2.
|
20
|
-
satif_ai-0.2.
|
16
|
+
satif_ai-0.2.11.dist-info/LICENSE,sha256=kS8EN6yAaGZd7V5z6GKSn_x3ozcZltrfRky4vMPRCw8,1072
|
17
|
+
satif_ai-0.2.11.dist-info/METADATA,sha256=pp-DKnezO8ViDzaBvjeKqFyECkEJ_A8zktyMJjpd5ig,719
|
18
|
+
satif_ai-0.2.11.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
19
|
+
satif_ai-0.2.11.dist-info/entry_points.txt,sha256=Mz2SwYALjktap1bF-Q3EWBgiZVNT6QJCVsCs_fCV33Y,43
|
20
|
+
satif_ai-0.2.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|