satif-ai 0.2.10__py3-none-any.whl → 0.2.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import base64
2
2
  import os
3
3
  import re
4
4
  from collections import defaultdict
5
+ from contextvars import ContextVar
5
6
  from pathlib import Path
6
7
  from typing import Any, Dict, List, Optional, Union
7
8
 
@@ -15,10 +16,15 @@ from satif_sdk.comparators import get_comparator
15
16
  from satif_sdk.representers import get_representer
16
17
  from satif_sdk.transformers import CodeTransformer
17
18
 
18
- # Global variables for transformation
19
- INPUT_SDIF_PATH: Optional[Path] = None
20
- OUTPUT_TARGET_FILES: Optional[Dict[Union[str, Path], str]] = None
21
- SCHEMA_ONLY: Optional[bool] = None
19
+ CONTEXT_INPUT_SDIF_PATH: ContextVar[Optional[Path]] = ContextVar(
20
+ "CONTEXT_INPUT_SDIF_PATH", default=None
21
+ )
22
+ CONTEXT_OUTPUT_TARGET_FILES: ContextVar[Optional[Dict[Union[str, Path], str]]] = (
23
+ ContextVar("CONTEXT_OUTPUT_TARGET_FILES", default=None)
24
+ )
25
+ CONTEXT_SCHEMA_ONLY: ContextVar[Optional[bool]] = ContextVar(
26
+ "CONTEXT_SCHEMA_ONLY", default=None
27
+ )
22
28
 
23
29
 
24
30
  def _format_comparison_output(
@@ -60,18 +66,22 @@ async def execute_transformation(code: str) -> str:
60
66
  Args:
61
67
  code: The code to execute on the input.
62
68
  """
63
- if INPUT_SDIF_PATH is None or OUTPUT_TARGET_FILES is None:
64
- return "Error: Transformation context not initialized"
69
+ input_sdif_path = CONTEXT_INPUT_SDIF_PATH.get()
70
+ output_target_files_dict = CONTEXT_OUTPUT_TARGET_FILES.get()
71
+ schema_only_flag = CONTEXT_SCHEMA_ONLY.get()
72
+
73
+ if input_sdif_path is None or output_target_files_dict is None:
74
+ return "Error: Transformation context not initialized correctly via contextvars"
65
75
 
66
76
  code_transformer = CodeTransformer(
67
77
  function=code,
68
78
  code_executor=LocalCodeExecutor(disable_security_warning=True),
69
79
  )
70
- generated_output_path = code_transformer.export(INPUT_SDIF_PATH)
80
+ generated_output_path = code_transformer.export(input_sdif_path)
71
81
 
72
82
  comparisons = []
73
83
  comparator_kwargs = {}
74
- if SCHEMA_ONLY:
84
+ if schema_only_flag:
75
85
  comparator_kwargs["check_structure_only"] = True
76
86
 
77
87
  if os.path.isdir(generated_output_path):
@@ -81,7 +91,7 @@ async def execute_transformation(code: str) -> str:
81
91
  for (
82
92
  output_base_file,
83
93
  output_target_file_name,
84
- ) in OUTPUT_TARGET_FILES.items():
94
+ ) in output_target_files_dict.items():
85
95
  if output_target_file_name in generated_files:
86
96
  generated_file_path = os.path.join(
87
97
  generated_output_path, output_target_file_name
@@ -92,7 +102,7 @@ async def execute_transformation(code: str) -> str:
92
102
  )
93
103
  formatted_message = _format_comparison_output(
94
104
  comparison,
95
- SCHEMA_ONLY,
105
+ schema_only_flag,
96
106
  generated_file_path,
97
107
  output_target_file_name,
98
108
  )
@@ -103,16 +113,18 @@ async def execute_transformation(code: str) -> str:
103
113
  )
104
114
  else:
105
115
  # If it's a single file, ensure there's only one target and compare
106
- if len(OUTPUT_TARGET_FILES) == 1:
107
- output_file = list(OUTPUT_TARGET_FILES.keys())[0]
108
- output_target_file_name = list(OUTPUT_TARGET_FILES.values())[0]
109
- comparator = get_comparator(output_file.split(".")[-1])
116
+ if len(output_target_files_dict) == 1:
117
+ output_file = list(output_target_files_dict.keys())[0]
118
+ output_target_file_name = list(output_target_files_dict.values())[0]
119
+ comparator = get_comparator(
120
+ str(output_file).split(".")[-1]
121
+ ) # Ensure output_file is string for split
110
122
  comparison = comparator.compare(
111
123
  generated_output_path, output_file, **comparator_kwargs
112
124
  )
113
125
  formatted_message = _format_comparison_output(
114
126
  comparison,
115
- SCHEMA_ONLY,
127
+ schema_only_flag,
116
128
  str(generated_output_path),
117
129
  output_target_file_name,
118
130
  )
@@ -147,126 +159,145 @@ class SyncpulseTransformationBuilder(AsyncTransformationBuilder):
147
159
  schema_only: bool = False,
148
160
  representer_kwargs: Optional[Dict[str, Any]] = None,
149
161
  ) -> str:
150
- global INPUT_SDIF_PATH, OUTPUT_TARGET_FILES, SCHEMA_ONLY
151
-
152
- INPUT_SDIF_PATH = Path(sdif).resolve()
153
- SCHEMA_ONLY = schema_only
154
- # We must encode the path because special characters are not allowed in mcp read_resource()
155
- input_sdif_mcp_uri_path = base64.b64encode(str(sdif).encode()).decode()
156
- output_sdif_mcp_uri_path = (
157
- base64.b64encode(str(output_sdif).encode()).decode()
158
- if output_sdif
159
- else None
160
- )
161
-
162
- input_schema = await self.mcp_session.read_resource(
163
- f"schema://{input_sdif_mcp_uri_path}"
164
- )
165
- input_sample = await self.mcp_session.read_resource(
166
- f"sample://{input_sdif_mcp_uri_path}"
167
- )
168
-
169
- output_schema_text = "N/A"
170
- output_sample_text = "N/A"
171
- if output_sdif_mcp_uri_path:
172
- try:
173
- output_schema_content = await self.mcp_session.read_resource(
174
- f"schema://{output_sdif_mcp_uri_path}"
175
- )
176
- if output_schema_content.contents:
177
- output_schema_text = output_schema_content.contents[0].text
178
- except Exception as e:
179
- print(
180
- f"Warning: Could not read schema for output_sdif {output_sdif_mcp_uri_path}: {e}"
181
- )
182
-
183
- try:
184
- output_sample_content = await self.mcp_session.read_resource(
185
- f"sample://{output_sdif_mcp_uri_path}"
186
- )
187
- if output_sample_content.contents:
188
- output_sample_text = output_sample_content.contents[0].text
189
- except Exception as e:
190
- print(
191
- f"Warning: Could not read sample for output_sdif {output_sdif_mcp_uri_path}: {e}"
192
- )
162
+ resolved_input_sdif_path = Path(sdif).resolve()
193
163
 
194
164
  # OUTPUT_TARGET_FILES keys are absolute paths to original example files for local reading by representers/comparators.
195
165
  # Values are agent-facing filenames.
166
+ resolved_output_target_files: Dict[Union[str, Path], str]
196
167
  if isinstance(output_target_files, FilePath):
197
- OUTPUT_TARGET_FILES = {
168
+ resolved_output_target_files = {
198
169
  Path(output_target_files).resolve(): Path(output_target_files).name
199
170
  }
200
171
  elif isinstance(output_target_files, list):
201
- OUTPUT_TARGET_FILES = {
172
+ resolved_output_target_files = {
202
173
  Path(file_path).resolve(): Path(file_path).name
203
174
  for file_path in output_target_files
204
175
  }
205
176
  elif isinstance(output_target_files, dict):
206
177
  temp_map = {}
207
178
  for k, v in output_target_files.items():
208
- if isinstance(k, Path):
209
- temp_map[k.resolve()] = v
210
- else:
211
- temp_map[k] = v
212
- OUTPUT_TARGET_FILES = temp_map
179
+ # Resolve Path keys to absolute paths
180
+ key_to_resolve = k
181
+ if (
182
+ isinstance(key_to_resolve, str) and Path(key_to_resolve).exists()
183
+ ): # Check if string is a valid path
184
+ key_to_resolve = Path(key_to_resolve)
185
+
186
+ if isinstance(key_to_resolve, Path):
187
+ temp_map[key_to_resolve.resolve()] = v
188
+ else: # Keep non-Path keys as they are (e.g. if it's already a resolved string path from somewhere else)
189
+ temp_map[key_to_resolve] = v
190
+ resolved_output_target_files = temp_map
213
191
  else:
214
- OUTPUT_TARGET_FILES = {}
192
+ resolved_output_target_files = {}
193
+
194
+ token_input_path = CONTEXT_INPUT_SDIF_PATH.set(resolved_input_sdif_path)
195
+ token_output_files = CONTEXT_OUTPUT_TARGET_FILES.set(
196
+ resolved_output_target_files
197
+ )
198
+ token_schema_only = CONTEXT_SCHEMA_ONLY.set(schema_only)
215
199
 
216
- output_representation = defaultdict(dict)
217
- if OUTPUT_TARGET_FILES:
218
- for file_key_abs_path in list(OUTPUT_TARGET_FILES.keys()):
219
- agent_facing_name = OUTPUT_TARGET_FILES[file_key_abs_path]
220
- print(f"Representing {agent_facing_name} from {file_key_abs_path}")
200
+ try:
201
+ # We must encode the path because special characters are not allowed in mcp read_resource()
202
+ input_sdif_mcp_uri_path = base64.b64encode(
203
+ str(resolved_input_sdif_path).encode()
204
+ ).decode()
205
+ output_sdif_mcp_uri_path = (
206
+ base64.b64encode(str(output_sdif).encode()).decode()
207
+ if output_sdif
208
+ else None
209
+ )
210
+
211
+ input_schema = await self.mcp_session.read_resource(
212
+ f"schema://{input_sdif_mcp_uri_path}"
213
+ )
214
+ input_sample = await self.mcp_session.read_resource(
215
+ f"sample://{input_sdif_mcp_uri_path}"
216
+ )
217
+
218
+ output_schema_text = "N/A"
219
+ output_sample_text = "N/A"
220
+ if output_sdif_mcp_uri_path:
221
221
  try:
222
- # Representer uses the absolute path (file_key_abs_path) to read the example file.
223
- representer = get_representer(file_key_abs_path)
224
- representation, used_params = representer.represent(
225
- file_key_abs_path, **(representer_kwargs or {})
222
+ output_schema_content = await self.mcp_session.read_resource(
223
+ f"schema://{output_sdif_mcp_uri_path}"
226
224
  )
227
- output_representation[agent_facing_name] = {
228
- "representation": representation,
229
- "used_params": used_params,
230
- }
225
+ if output_schema_content.contents:
226
+ output_schema_text = output_schema_content.contents[0].text
231
227
  except Exception as e:
232
228
  print(
233
- f"Warning: Could not get representation for {agent_facing_name} (path {file_key_abs_path}): {e}"
229
+ f"Warning: Could not read schema for output_sdif {output_sdif_mcp_uri_path}: {e}"
230
+ )
231
+
232
+ try:
233
+ output_sample_content = await self.mcp_session.read_resource(
234
+ f"sample://{output_sdif_mcp_uri_path}"
234
235
  )
235
- output_representation[agent_facing_name] = (
236
- f"Error representing file: {e}"
236
+ if output_sample_content.contents:
237
+ output_sample_text = output_sample_content.contents[0].text
238
+ except Exception as e:
239
+ print(
240
+ f"Warning: Could not read sample for output_sdif {output_sdif_mcp_uri_path}: {e}"
237
241
  )
242
+ output_representation = defaultdict(dict)
243
+ if resolved_output_target_files:
244
+ for file_key_abs_path in list(resolved_output_target_files.keys()):
245
+ agent_facing_name = resolved_output_target_files[file_key_abs_path]
246
+ print(f"Representing {agent_facing_name} from {file_key_abs_path}")
247
+ try:
248
+ # Representer uses the absolute path (file_key_abs_path) to read the example file.
249
+ representer = get_representer(file_key_abs_path)
250
+ representation, used_params = representer.represent(
251
+ file_key_abs_path, **(representer_kwargs or {})
252
+ )
253
+ output_representation[agent_facing_name] = {
254
+ "representation": representation,
255
+ "used_params": used_params,
256
+ }
257
+ except Exception as e:
258
+ print(
259
+ f"Warning: Could not get representation for {agent_facing_name} (path {file_key_abs_path}): {e}"
260
+ )
261
+ output_representation[agent_facing_name] = (
262
+ f"Error representing file: {e}"
263
+ )
238
264
 
239
- prompt = await self.mcp_session.get_prompt(
240
- "create_transformation",
241
- arguments={
242
- "input_file": Path(
243
- input_sdif_mcp_uri_path
244
- ).name, # Display name for prompt (from relative path)
245
- "input_schema": input_schema.contents[0].text
246
- if input_schema.contents
247
- else "Error reading input schema",
248
- "input_sample": input_sample.contents[0].text
249
- if input_sample.contents
250
- else "Error reading input sample",
251
- "output_files": str(list(OUTPUT_TARGET_FILES.values())),
252
- "output_schema": output_schema_text,
253
- "output_sample": output_sample_text
254
- if not SCHEMA_ONLY
255
- else "Sample not available. File is empty (no data).",
256
- "output_representation": str(output_representation),
257
- "instructions": instructions
258
- or "No instructions provided. Use the output example.",
259
- },
260
- )
261
- agent = Agent(
262
- name="Transformation Builder",
263
- mcp_servers=[self.mcp_server],
264
- tools=[execute_transformation],
265
- model=self.llm_model,
266
- )
267
- result = await Runner.run(agent, prompt.messages[0].content.text)
268
- transformation_code = self.parse_code(result.final_output)
269
- return transformation_code
265
+ prompt = await self.mcp_session.get_prompt(
266
+ "create_transformation",
267
+ arguments={
268
+ "input_file": Path(
269
+ input_sdif_mcp_uri_path # Use the original sdif path for display name logic if needed
270
+ ).name,
271
+ "input_schema": input_schema.contents[0].text
272
+ if input_schema.contents
273
+ else "Error reading input schema",
274
+ "input_sample": input_sample.contents[0].text
275
+ if input_sample.contents
276
+ else "Error reading input sample",
277
+ "output_files": str(list(resolved_output_target_files.values())),
278
+ "output_schema": output_schema_text,
279
+ "output_sample": output_sample_text
280
+ if not schema_only
281
+ else "Sample not available. File is empty (no data).",
282
+ "output_representation": str(output_representation),
283
+ "instructions": instructions
284
+ or "No instructions provided. Use the output example.",
285
+ },
286
+ )
287
+ agent = Agent(
288
+ name="Transformation Builder",
289
+ mcp_servers=[self.mcp_server],
290
+ tools=[execute_transformation],
291
+ model=self.llm_model,
292
+ )
293
+ result = await Runner.run(agent, prompt.messages[0].content.text)
294
+ transformation_code = self.parse_code(result.final_output)
295
+ return transformation_code
296
+ finally:
297
+ # Reset context variables after the task is done
298
+ CONTEXT_INPUT_SDIF_PATH.reset(token_input_path)
299
+ CONTEXT_OUTPUT_TARGET_FILES.reset(token_output_files)
300
+ CONTEXT_SCHEMA_ONLY.reset(token_schema_only)
270
301
 
271
302
  def parse_code(self, code) -> str:
272
303
  match = re.search(r"```(?:python)?(.*?)```", code, re.DOTALL)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: satif-ai
3
- Version: 0.2.10
3
+ Version: 0.2.11
4
4
  Summary: AI Agents for Satif
5
5
  License: MIT
6
6
  Author: Syncpulse
@@ -8,13 +8,13 @@ satif_ai/standardizers/ai_csv.py,sha256=LbCRaLleujQRgSRRyt9ujbED-PIGRq1J8zRnejGM
8
8
  satif_ai/standardizers/ai_xlsx.py,sha256=558Bzfy8WGuk5mdnjMvvtakQXcU3rmwK3ykPjpXKwmQ,15863
9
9
  satif_ai/transform.py,sha256=g5XNeVCIKUgDW3UIhf02MN9xkXnWF3EJXS0Eig_hfD8,7677
10
10
  satif_ai/transformation_builders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- satif_ai/transformation_builders/syncpulse.py,sha256=c59BZicNnqs3NDKpflBAPqw42pGb6nYB2Zps0ChGyaM,11368
11
+ satif_ai/transformation_builders/syncpulse.py,sha256=C7utZ6QOlay8a0wR5Ai33G0G3UJPleTpGt1wkX9m6uE,13346
12
12
  satif_ai/utils/__init__.py,sha256=F-usaCt_vX872mXvtukuZdNMPnkVqDb8RaDgox2uow4,212
13
13
  satif_ai/utils/merge_sdif.py,sha256=y4C6pgkdyer0QugroFKUck4Eud4Ap-tJzM-eclMo3Rw,25629
14
14
  satif_ai/utils/openai_mcp.py,sha256=duCQZXG0mBs9DOOFIUvzraJhxD2IDzegWO9iOiLfFwY,3938
15
15
  satif_ai/utils/zip.py,sha256=G_GK8629Iw0TLFCQJfnqOscv7MoKF5zdzxvEAbL7Gss,5186
16
- satif_ai-0.2.10.dist-info/LICENSE,sha256=kS8EN6yAaGZd7V5z6GKSn_x3ozcZltrfRky4vMPRCw8,1072
17
- satif_ai-0.2.10.dist-info/METADATA,sha256=O5QWv8YJFtB5AIniv0LRgmSgpEaRLVdlz8WHZAru1X8,719
18
- satif_ai-0.2.10.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
19
- satif_ai-0.2.10.dist-info/entry_points.txt,sha256=Mz2SwYALjktap1bF-Q3EWBgiZVNT6QJCVsCs_fCV33Y,43
20
- satif_ai-0.2.10.dist-info/RECORD,,
16
+ satif_ai-0.2.11.dist-info/LICENSE,sha256=kS8EN6yAaGZd7V5z6GKSn_x3ozcZltrfRky4vMPRCw8,1072
17
+ satif_ai-0.2.11.dist-info/METADATA,sha256=pp-DKnezO8ViDzaBvjeKqFyECkEJ_A8zktyMJjpd5ig,719
18
+ satif_ai-0.2.11.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
19
+ satif_ai-0.2.11.dist-info/entry_points.txt,sha256=Mz2SwYALjktap1bF-Q3EWBgiZVNT6QJCVsCs_fCV33Y,43
20
+ satif_ai-0.2.11.dist-info/RECORD,,