satif-ai 0.2.10__tar.gz → 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: satif-ai
3
- Version: 0.2.10
3
+ Version: 0.2.11
4
4
  Summary: AI Agents for Satif
5
5
  License: MIT
6
6
  Author: Syncpulse
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "satif-ai"
3
- version = "0.2.10"
3
+ version = "0.2.11"
4
4
  description = "AI Agents for Satif"
5
5
  authors = [
6
6
  {name = "Syncpulse"}
@@ -0,0 +1,308 @@
1
+ import base64
2
+ import os
3
+ import re
4
+ from collections import defaultdict
5
+ from contextvars import ContextVar
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Optional, Union
8
+
9
+ from agents import Agent, Runner, function_tool
10
+ from agents.mcp.server import MCPServer
11
+ from mcp import ClientSession
12
+ from satif_core import AsyncTransformationBuilder
13
+ from satif_core.types import FilePath
14
+ from satif_sdk.code_executors.local_executor import LocalCodeExecutor
15
+ from satif_sdk.comparators import get_comparator
16
+ from satif_sdk.representers import get_representer
17
+ from satif_sdk.transformers import CodeTransformer
18
+
19
+ CONTEXT_INPUT_SDIF_PATH: ContextVar[Optional[Path]] = ContextVar(
20
+ "CONTEXT_INPUT_SDIF_PATH", default=None
21
+ )
22
+ CONTEXT_OUTPUT_TARGET_FILES: ContextVar[Optional[Dict[Union[str, Path], str]]] = (
23
+ ContextVar("CONTEXT_OUTPUT_TARGET_FILES", default=None)
24
+ )
25
+ CONTEXT_SCHEMA_ONLY: ContextVar[Optional[bool]] = ContextVar(
26
+ "CONTEXT_SCHEMA_ONLY", default=None
27
+ )
28
+
29
+
30
+ def _format_comparison_output(
31
+ comparison_result: Dict[str, Any],
32
+ schema_only_mode: Optional[bool],
33
+ source_file_display_name: str,
34
+ target_file_display_name: str,
35
+ ) -> str:
36
+ """
37
+ Formats the comparison result string, with special handling for schema_only mode
38
+ where files are equivalent due to being empty.
39
+ """
40
+ base_message_prefix = f"Comparison for {source_file_display_name} [SOURCE] with {target_file_display_name} [TARGET]:"
41
+
42
+ if schema_only_mode is True and comparison_result.get("are_equivalent") is True:
43
+ details = comparison_result.get("details", {})
44
+ row_comparison = details.get("row_comparison", {})
45
+
46
+ row_count1 = row_comparison.get("row_count1")
47
+ row_count2 = row_comparison.get("row_count2")
48
+
49
+ if (
50
+ isinstance(row_count1, (int, float))
51
+ and row_count1 == 0
52
+ and isinstance(row_count2, (int, float))
53
+ and row_count2 == 0
54
+ ):
55
+ return f"{base_message_prefix} Files have the same headers but are both empty (no data rows). This should not happen. Please verify the instructions and try again."
56
+
57
+ # Default formatting if the special condition isn't met
58
+ return f"{base_message_prefix} {comparison_result}"
59
+
60
+
61
+ @function_tool
62
+ async def execute_transformation(code: str) -> str:
63
+ """Executes the transformation code on the input and returns the
64
+ comparison difference between the transformed output and the target output example.
65
+
66
+ Args:
67
+ code: The code to execute on the input.
68
+ """
69
+ input_sdif_path = CONTEXT_INPUT_SDIF_PATH.get()
70
+ output_target_files_dict = CONTEXT_OUTPUT_TARGET_FILES.get()
71
+ schema_only_flag = CONTEXT_SCHEMA_ONLY.get()
72
+
73
+ if input_sdif_path is None or output_target_files_dict is None:
74
+ return "Error: Transformation context not initialized correctly via contextvars"
75
+
76
+ code_transformer = CodeTransformer(
77
+ function=code,
78
+ code_executor=LocalCodeExecutor(disable_security_warning=True),
79
+ )
80
+ generated_output_path = code_transformer.export(input_sdif_path)
81
+
82
+ comparisons = []
83
+ comparator_kwargs = {}
84
+ if schema_only_flag:
85
+ comparator_kwargs["check_structure_only"] = True
86
+
87
+ if os.path.isdir(generated_output_path):
88
+ # If it's a directory, compare each file with its corresponding target
89
+ generated_files = os.listdir(generated_output_path)
90
+
91
+ for (
92
+ output_base_file,
93
+ output_target_file_name,
94
+ ) in output_target_files_dict.items():
95
+ if output_target_file_name in generated_files:
96
+ generated_file_path = os.path.join(
97
+ generated_output_path, output_target_file_name
98
+ )
99
+ comparator = get_comparator(output_target_file_name.split(".")[-1])
100
+ comparison = comparator.compare(
101
+ generated_file_path, output_base_file, **comparator_kwargs
102
+ )
103
+ formatted_message = _format_comparison_output(
104
+ comparison,
105
+ schema_only_flag,
106
+ generated_file_path,
107
+ output_target_file_name,
108
+ )
109
+ comparisons.append(formatted_message)
110
+ else:
111
+ comparisons.append(
112
+ f"Error: {output_target_file_name} not found in the generated output"
113
+ )
114
+ else:
115
+ # If it's a single file, ensure there's only one target and compare
116
+ if len(output_target_files_dict) == 1:
117
+ output_file = list(output_target_files_dict.keys())[0]
118
+ output_target_file_name = list(output_target_files_dict.values())[0]
119
+ comparator = get_comparator(
120
+ str(output_file).split(".")[-1]
121
+ ) # Ensure output_file is string for split
122
+ comparison = comparator.compare(
123
+ generated_output_path, output_file, **comparator_kwargs
124
+ )
125
+ formatted_message = _format_comparison_output(
126
+ comparison,
127
+ schema_only_flag,
128
+ str(generated_output_path),
129
+ output_target_file_name,
130
+ )
131
+ comparisons.append(formatted_message)
132
+ else:
133
+ comparisons.append(
134
+ "Error: Single output file generated but multiple target files expected"
135
+ )
136
+
137
+ return "\n".join(comparisons)
138
+
139
+
140
+ class SyncpulseTransformationBuilder(AsyncTransformationBuilder):
141
+ """This class is used to build a transformation code that will be used to transform a SDIF database into a set of files following the format of the given output files."""
142
+
143
+ def __init__(
144
+ self,
145
+ mcp_server: MCPServer,
146
+ mcp_session: ClientSession,
147
+ llm_model: str = "o4-mini",
148
+ ):
149
+ self.mcp_server = mcp_server
150
+ self.mcp_session = mcp_session
151
+ self.llm_model = llm_model
152
+
153
+ async def build(
154
+ self,
155
+ sdif: Path,
156
+ output_target_files: Dict[FilePath, str] | List[FilePath] | FilePath,
157
+ output_sdif: Optional[Path] = None,
158
+ instructions: str = "",
159
+ schema_only: bool = False,
160
+ representer_kwargs: Optional[Dict[str, Any]] = None,
161
+ ) -> str:
162
+ resolved_input_sdif_path = Path(sdif).resolve()
163
+
164
+ # OUTPUT_TARGET_FILES keys are absolute paths to original example files for local reading by representers/comparators.
165
+ # Values are agent-facing filenames.
166
+ resolved_output_target_files: Dict[Union[str, Path], str]
167
+ if isinstance(output_target_files, FilePath):
168
+ resolved_output_target_files = {
169
+ Path(output_target_files).resolve(): Path(output_target_files).name
170
+ }
171
+ elif isinstance(output_target_files, list):
172
+ resolved_output_target_files = {
173
+ Path(file_path).resolve(): Path(file_path).name
174
+ for file_path in output_target_files
175
+ }
176
+ elif isinstance(output_target_files, dict):
177
+ temp_map = {}
178
+ for k, v in output_target_files.items():
179
+ # Resolve Path keys to absolute paths
180
+ key_to_resolve = k
181
+ if (
182
+ isinstance(key_to_resolve, str) and Path(key_to_resolve).exists()
183
+ ): # Check if string is a valid path
184
+ key_to_resolve = Path(key_to_resolve)
185
+
186
+ if isinstance(key_to_resolve, Path):
187
+ temp_map[key_to_resolve.resolve()] = v
188
+ else: # Keep non-Path keys as they are (e.g. if it's already a resolved string path from somewhere else)
189
+ temp_map[key_to_resolve] = v
190
+ resolved_output_target_files = temp_map
191
+ else:
192
+ resolved_output_target_files = {}
193
+
194
+ token_input_path = CONTEXT_INPUT_SDIF_PATH.set(resolved_input_sdif_path)
195
+ token_output_files = CONTEXT_OUTPUT_TARGET_FILES.set(
196
+ resolved_output_target_files
197
+ )
198
+ token_schema_only = CONTEXT_SCHEMA_ONLY.set(schema_only)
199
+
200
+ try:
201
+ # We must encode the path because special characters are not allowed in mcp read_resource()
202
+ input_sdif_mcp_uri_path = base64.b64encode(
203
+ str(resolved_input_sdif_path).encode()
204
+ ).decode()
205
+ output_sdif_mcp_uri_path = (
206
+ base64.b64encode(str(output_sdif).encode()).decode()
207
+ if output_sdif
208
+ else None
209
+ )
210
+
211
+ input_schema = await self.mcp_session.read_resource(
212
+ f"schema://{input_sdif_mcp_uri_path}"
213
+ )
214
+ input_sample = await self.mcp_session.read_resource(
215
+ f"sample://{input_sdif_mcp_uri_path}"
216
+ )
217
+
218
+ output_schema_text = "N/A"
219
+ output_sample_text = "N/A"
220
+ if output_sdif_mcp_uri_path:
221
+ try:
222
+ output_schema_content = await self.mcp_session.read_resource(
223
+ f"schema://{output_sdif_mcp_uri_path}"
224
+ )
225
+ if output_schema_content.contents:
226
+ output_schema_text = output_schema_content.contents[0].text
227
+ except Exception as e:
228
+ print(
229
+ f"Warning: Could not read schema for output_sdif {output_sdif_mcp_uri_path}: {e}"
230
+ )
231
+
232
+ try:
233
+ output_sample_content = await self.mcp_session.read_resource(
234
+ f"sample://{output_sdif_mcp_uri_path}"
235
+ )
236
+ if output_sample_content.contents:
237
+ output_sample_text = output_sample_content.contents[0].text
238
+ except Exception as e:
239
+ print(
240
+ f"Warning: Could not read sample for output_sdif {output_sdif_mcp_uri_path}: {e}"
241
+ )
242
+ output_representation = defaultdict(dict)
243
+ if resolved_output_target_files:
244
+ for file_key_abs_path in list(resolved_output_target_files.keys()):
245
+ agent_facing_name = resolved_output_target_files[file_key_abs_path]
246
+ print(f"Representing {agent_facing_name} from {file_key_abs_path}")
247
+ try:
248
+ # Representer uses the absolute path (file_key_abs_path) to read the example file.
249
+ representer = get_representer(file_key_abs_path)
250
+ representation, used_params = representer.represent(
251
+ file_key_abs_path, **(representer_kwargs or {})
252
+ )
253
+ output_representation[agent_facing_name] = {
254
+ "representation": representation,
255
+ "used_params": used_params,
256
+ }
257
+ except Exception as e:
258
+ print(
259
+ f"Warning: Could not get representation for {agent_facing_name} (path {file_key_abs_path}): {e}"
260
+ )
261
+ output_representation[agent_facing_name] = (
262
+ f"Error representing file: {e}"
263
+ )
264
+
265
+ prompt = await self.mcp_session.get_prompt(
266
+ "create_transformation",
267
+ arguments={
268
+ "input_file": Path(
269
+ input_sdif_mcp_uri_path # Use the original sdif path for display name logic if needed
270
+ ).name,
271
+ "input_schema": input_schema.contents[0].text
272
+ if input_schema.contents
273
+ else "Error reading input schema",
274
+ "input_sample": input_sample.contents[0].text
275
+ if input_sample.contents
276
+ else "Error reading input sample",
277
+ "output_files": str(list(resolved_output_target_files.values())),
278
+ "output_schema": output_schema_text,
279
+ "output_sample": output_sample_text
280
+ if not schema_only
281
+ else "Sample not available. File is empty (no data).",
282
+ "output_representation": str(output_representation),
283
+ "instructions": instructions
284
+ or "No instructions provided. Use the output example.",
285
+ },
286
+ )
287
+ agent = Agent(
288
+ name="Transformation Builder",
289
+ mcp_servers=[self.mcp_server],
290
+ tools=[execute_transformation],
291
+ model=self.llm_model,
292
+ )
293
+ result = await Runner.run(agent, prompt.messages[0].content.text)
294
+ transformation_code = self.parse_code(result.final_output)
295
+ return transformation_code
296
+ finally:
297
+ # Reset context variables after the task is done
298
+ CONTEXT_INPUT_SDIF_PATH.reset(token_input_path)
299
+ CONTEXT_OUTPUT_TARGET_FILES.reset(token_output_files)
300
+ CONTEXT_SCHEMA_ONLY.reset(token_schema_only)
301
+
302
+ def parse_code(self, code) -> str:
303
+ match = re.search(r"```(?:python)?(.*?)```", code, re.DOTALL)
304
+ if match:
305
+ return match.group(1).strip()
306
+ else:
307
+ # Handle case where no code block is found
308
+ return code.strip()
@@ -1,277 +0,0 @@
1
- import base64
2
- import os
3
- import re
4
- from collections import defaultdict
5
- from pathlib import Path
6
- from typing import Any, Dict, List, Optional, Union
7
-
8
- from agents import Agent, Runner, function_tool
9
- from agents.mcp.server import MCPServer
10
- from mcp import ClientSession
11
- from satif_core import AsyncTransformationBuilder
12
- from satif_core.types import FilePath
13
- from satif_sdk.code_executors.local_executor import LocalCodeExecutor
14
- from satif_sdk.comparators import get_comparator
15
- from satif_sdk.representers import get_representer
16
- from satif_sdk.transformers import CodeTransformer
17
-
18
- # Global variables for transformation
19
- INPUT_SDIF_PATH: Optional[Path] = None
20
- OUTPUT_TARGET_FILES: Optional[Dict[Union[str, Path], str]] = None
21
- SCHEMA_ONLY: Optional[bool] = None
22
-
23
-
24
- def _format_comparison_output(
25
- comparison_result: Dict[str, Any],
26
- schema_only_mode: Optional[bool],
27
- source_file_display_name: str,
28
- target_file_display_name: str,
29
- ) -> str:
30
- """
31
- Formats the comparison result string, with special handling for schema_only mode
32
- where files are equivalent due to being empty.
33
- """
34
- base_message_prefix = f"Comparison for {source_file_display_name} [SOURCE] with {target_file_display_name} [TARGET]:"
35
-
36
- if schema_only_mode is True and comparison_result.get("are_equivalent") is True:
37
- details = comparison_result.get("details", {})
38
- row_comparison = details.get("row_comparison", {})
39
-
40
- row_count1 = row_comparison.get("row_count1")
41
- row_count2 = row_comparison.get("row_count2")
42
-
43
- if (
44
- isinstance(row_count1, (int, float))
45
- and row_count1 == 0
46
- and isinstance(row_count2, (int, float))
47
- and row_count2 == 0
48
- ):
49
- return f"{base_message_prefix} Files have the same headers but are both empty (no data rows). This should not happen. Please verify the instructions and try again."
50
-
51
- # Default formatting if the special condition isn't met
52
- return f"{base_message_prefix} {comparison_result}"
53
-
54
-
55
- @function_tool
56
- async def execute_transformation(code: str) -> str:
57
- """Executes the transformation code on the input and returns the
58
- comparison difference between the transformed output and the target output example.
59
-
60
- Args:
61
- code: The code to execute on the input.
62
- """
63
- if INPUT_SDIF_PATH is None or OUTPUT_TARGET_FILES is None:
64
- return "Error: Transformation context not initialized"
65
-
66
- code_transformer = CodeTransformer(
67
- function=code,
68
- code_executor=LocalCodeExecutor(disable_security_warning=True),
69
- )
70
- generated_output_path = code_transformer.export(INPUT_SDIF_PATH)
71
-
72
- comparisons = []
73
- comparator_kwargs = {}
74
- if SCHEMA_ONLY:
75
- comparator_kwargs["check_structure_only"] = True
76
-
77
- if os.path.isdir(generated_output_path):
78
- # If it's a directory, compare each file with its corresponding target
79
- generated_files = os.listdir(generated_output_path)
80
-
81
- for (
82
- output_base_file,
83
- output_target_file_name,
84
- ) in OUTPUT_TARGET_FILES.items():
85
- if output_target_file_name in generated_files:
86
- generated_file_path = os.path.join(
87
- generated_output_path, output_target_file_name
88
- )
89
- comparator = get_comparator(output_target_file_name.split(".")[-1])
90
- comparison = comparator.compare(
91
- generated_file_path, output_base_file, **comparator_kwargs
92
- )
93
- formatted_message = _format_comparison_output(
94
- comparison,
95
- SCHEMA_ONLY,
96
- generated_file_path,
97
- output_target_file_name,
98
- )
99
- comparisons.append(formatted_message)
100
- else:
101
- comparisons.append(
102
- f"Error: {output_target_file_name} not found in the generated output"
103
- )
104
- else:
105
- # If it's a single file, ensure there's only one target and compare
106
- if len(OUTPUT_TARGET_FILES) == 1:
107
- output_file = list(OUTPUT_TARGET_FILES.keys())[0]
108
- output_target_file_name = list(OUTPUT_TARGET_FILES.values())[0]
109
- comparator = get_comparator(output_file.split(".")[-1])
110
- comparison = comparator.compare(
111
- generated_output_path, output_file, **comparator_kwargs
112
- )
113
- formatted_message = _format_comparison_output(
114
- comparison,
115
- SCHEMA_ONLY,
116
- str(generated_output_path),
117
- output_target_file_name,
118
- )
119
- comparisons.append(formatted_message)
120
- else:
121
- comparisons.append(
122
- "Error: Single output file generated but multiple target files expected"
123
- )
124
-
125
- return "\n".join(comparisons)
126
-
127
-
128
- class SyncpulseTransformationBuilder(AsyncTransformationBuilder):
129
- """This class is used to build a transformation code that will be used to transform a SDIF database into a set of files following the format of the given output files."""
130
-
131
- def __init__(
132
- self,
133
- mcp_server: MCPServer,
134
- mcp_session: ClientSession,
135
- llm_model: str = "o4-mini",
136
- ):
137
- self.mcp_server = mcp_server
138
- self.mcp_session = mcp_session
139
- self.llm_model = llm_model
140
-
141
- async def build(
142
- self,
143
- sdif: Path,
144
- output_target_files: Dict[FilePath, str] | List[FilePath] | FilePath,
145
- output_sdif: Optional[Path] = None,
146
- instructions: str = "",
147
- schema_only: bool = False,
148
- representer_kwargs: Optional[Dict[str, Any]] = None,
149
- ) -> str:
150
- global INPUT_SDIF_PATH, OUTPUT_TARGET_FILES, SCHEMA_ONLY
151
-
152
- INPUT_SDIF_PATH = Path(sdif).resolve()
153
- SCHEMA_ONLY = schema_only
154
- # We must encode the path because special characters are not allowed in mcp read_resource()
155
- input_sdif_mcp_uri_path = base64.b64encode(str(sdif).encode()).decode()
156
- output_sdif_mcp_uri_path = (
157
- base64.b64encode(str(output_sdif).encode()).decode()
158
- if output_sdif
159
- else None
160
- )
161
-
162
- input_schema = await self.mcp_session.read_resource(
163
- f"schema://{input_sdif_mcp_uri_path}"
164
- )
165
- input_sample = await self.mcp_session.read_resource(
166
- f"sample://{input_sdif_mcp_uri_path}"
167
- )
168
-
169
- output_schema_text = "N/A"
170
- output_sample_text = "N/A"
171
- if output_sdif_mcp_uri_path:
172
- try:
173
- output_schema_content = await self.mcp_session.read_resource(
174
- f"schema://{output_sdif_mcp_uri_path}"
175
- )
176
- if output_schema_content.contents:
177
- output_schema_text = output_schema_content.contents[0].text
178
- except Exception as e:
179
- print(
180
- f"Warning: Could not read schema for output_sdif {output_sdif_mcp_uri_path}: {e}"
181
- )
182
-
183
- try:
184
- output_sample_content = await self.mcp_session.read_resource(
185
- f"sample://{output_sdif_mcp_uri_path}"
186
- )
187
- if output_sample_content.contents:
188
- output_sample_text = output_sample_content.contents[0].text
189
- except Exception as e:
190
- print(
191
- f"Warning: Could not read sample for output_sdif {output_sdif_mcp_uri_path}: {e}"
192
- )
193
-
194
- # OUTPUT_TARGET_FILES keys are absolute paths to original example files for local reading by representers/comparators.
195
- # Values are agent-facing filenames.
196
- if isinstance(output_target_files, FilePath):
197
- OUTPUT_TARGET_FILES = {
198
- Path(output_target_files).resolve(): Path(output_target_files).name
199
- }
200
- elif isinstance(output_target_files, list):
201
- OUTPUT_TARGET_FILES = {
202
- Path(file_path).resolve(): Path(file_path).name
203
- for file_path in output_target_files
204
- }
205
- elif isinstance(output_target_files, dict):
206
- temp_map = {}
207
- for k, v in output_target_files.items():
208
- if isinstance(k, Path):
209
- temp_map[k.resolve()] = v
210
- else:
211
- temp_map[k] = v
212
- OUTPUT_TARGET_FILES = temp_map
213
- else:
214
- OUTPUT_TARGET_FILES = {}
215
-
216
- output_representation = defaultdict(dict)
217
- if OUTPUT_TARGET_FILES:
218
- for file_key_abs_path in list(OUTPUT_TARGET_FILES.keys()):
219
- agent_facing_name = OUTPUT_TARGET_FILES[file_key_abs_path]
220
- print(f"Representing {agent_facing_name} from {file_key_abs_path}")
221
- try:
222
- # Representer uses the absolute path (file_key_abs_path) to read the example file.
223
- representer = get_representer(file_key_abs_path)
224
- representation, used_params = representer.represent(
225
- file_key_abs_path, **(representer_kwargs or {})
226
- )
227
- output_representation[agent_facing_name] = {
228
- "representation": representation,
229
- "used_params": used_params,
230
- }
231
- except Exception as e:
232
- print(
233
- f"Warning: Could not get representation for {agent_facing_name} (path {file_key_abs_path}): {e}"
234
- )
235
- output_representation[agent_facing_name] = (
236
- f"Error representing file: {e}"
237
- )
238
-
239
- prompt = await self.mcp_session.get_prompt(
240
- "create_transformation",
241
- arguments={
242
- "input_file": Path(
243
- input_sdif_mcp_uri_path
244
- ).name, # Display name for prompt (from relative path)
245
- "input_schema": input_schema.contents[0].text
246
- if input_schema.contents
247
- else "Error reading input schema",
248
- "input_sample": input_sample.contents[0].text
249
- if input_sample.contents
250
- else "Error reading input sample",
251
- "output_files": str(list(OUTPUT_TARGET_FILES.values())),
252
- "output_schema": output_schema_text,
253
- "output_sample": output_sample_text
254
- if not SCHEMA_ONLY
255
- else "Sample not available. File is empty (no data).",
256
- "output_representation": str(output_representation),
257
- "instructions": instructions
258
- or "No instructions provided. Use the output example.",
259
- },
260
- )
261
- agent = Agent(
262
- name="Transformation Builder",
263
- mcp_servers=[self.mcp_server],
264
- tools=[execute_transformation],
265
- model=self.llm_model,
266
- )
267
- result = await Runner.run(agent, prompt.messages[0].content.text)
268
- transformation_code = self.parse_code(result.final_output)
269
- return transformation_code
270
-
271
- def parse_code(self, code) -> str:
272
- match = re.search(r"```(?:python)?(.*?)```", code, re.DOTALL)
273
- if match:
274
- return match.group(1).strip()
275
- else:
276
- # Handle case where no code block is found
277
- return code.strip()
File without changes
File without changes