flyteplugins-codegen 2.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,136 @@
1
+ """Prompt building and constants for LLM code generation."""
2
+
3
+ from typing import Optional
4
+
5
+ # Language-specific file extensions
6
+ FILE_EXTENSIONS = {"python": ".py"}
7
+
8
+ # Package manager mapping
9
+ PACKAGE_MANAGER_MAP = {"python": "pip package names (excluding standard library)"}
10
+
11
+ # Test framework configurations
12
+ TEST_FRAMEWORKS = {
13
+ "python": {
14
+ "name": "pytest",
15
+ "packages": ["pytest"],
16
+ "system_packages": [],
17
+ "command": "python -m pytest",
18
+ }
19
+ }
20
+
21
+ # Default system prompt
22
+ DEFAULT_SYSTEM_PROMPT = """You are a coding assistant that generates high-quality code in {language}."""
23
+
24
+ # Structured output requirements
25
+ STRUCTURED_OUTPUT_REQUIREMENTS = """
26
+ IMPORTANT: You must structure your response with:
27
+ 1. description: Brief explanation of what the code does
28
+ 2. language: The programming language used
29
+ 3. code: Complete executable code including all import statements and dependencies at the top
30
+ 4. system_packages: List of system packages needed
31
+ (e.g., ["gcc", "build-essential", "curl"]). Leave empty if none needed.
32
+
33
+ EXECUTION ENVIRONMENT:
34
+ - /var/inputs and /var/outputs directories are PRE-CREATED by the runtime. NEVER delete, recreate, or modify them.
35
+ NEVER use shutil.rmtree, os.rmdir, os.remove on /var/inputs or /var/outputs.
36
+ NEVER call os.makedirs('/var/outputs') or os.makedirs('/var/inputs') — they already exist.
37
+ - /var/inputs is READ-ONLY. Never write to /var/inputs.
38
+ - Write each declared output as a SEPARATE FILE under /var/outputs/: open('/var/outputs/<name>', 'w').write(str(value))
39
+ - Always use the literal path '/var/outputs' — never make it configurable or store it in a variable.
40
+ - Output files MUST be written before the script exits. Do NOT just print() values — you MUST write them to files.
41
+
42
+ Ensure all code is complete, executable, and follows best practices for the chosen language."""
43
+
44
+
45
+ def build_enhanced_prompt(
46
+ prompt: str,
47
+ language: str,
48
+ schema: Optional[str],
49
+ constraints: Optional[list[str]],
50
+ data_context: Optional[str],
51
+ inputs: Optional[dict[str, type]],
52
+ outputs: Optional[dict[str, type]],
53
+ ) -> str:
54
+ """Build enhanced prompt with language, schema, constraints, data context, inputs, and outputs.
55
+
56
+ Args:
57
+ prompt: User's prompt
58
+ language: Programming language
59
+ schema: Optional schema definition
60
+ constraints: Optional list of constraints
61
+ data_context: Optional extracted data context (stats, patterns, schemas)
62
+ inputs: Optional input types
63
+ outputs: Optional output types
64
+
65
+ Returns:
66
+ Enhanced prompt string
67
+ """
68
+ enhanced_prompt = f"Language: {language}\n\n{prompt}"
69
+
70
+ if schema:
71
+ enhanced_prompt += f"\n\nSchema:\n```\n{schema}\n```"
72
+
73
+ # Always add script requirement first, then user constraints
74
+ script_constraint = (
75
+ "REQUIRED: Your code will be saved as solution.py and imported by tests via "
76
+ "`from solution import ...`. Define ALL functions and classes at MODULE LEVEL "
77
+ "(not inside if __name__ == '__main__'). "
78
+ "Include an if __name__ == '__main__': block that parses command line arguments "
79
+ "using argparse and calls your functions. "
80
+ )
81
+
82
+ # Add CLI argument requirement based on declared inputs
83
+ if inputs:
84
+ # Build argument list from declared inputs
85
+ args_list = []
86
+ for name, param_type in inputs.items():
87
+ type_name = param_type.__name__ if hasattr(param_type, "__name__") else str(param_type)
88
+ # Clarify that File inputs are received as string paths
89
+ if "File" in type_name:
90
+ args_list.append(f"--{name} (str): path to {type_name.lower()}")
91
+ else:
92
+ args_list.append(f"--{name} ({type_name})")
93
+ args_spec = ", ".join(args_list)
94
+ script_constraint += f"Accept these command line arguments: {args_spec}. "
95
+
96
+ # Add explicit instruction for File handling
97
+ has_file_inputs = any("File" in str(t) for t in inputs.values())
98
+ if has_file_inputs:
99
+ script_constraint += (
100
+ "File arguments are string paths - use them directly with open() or other file operations."
101
+ )
102
+ elif data_context:
103
+ script_constraint += "Accept appropriate command line arguments to process the data samples."
104
+ else:
105
+ script_constraint += "Include appropriate command line arguments if needed."
106
+
107
+ all_constraints = [script_constraint]
108
+
109
+ # Add output requirement based on declared outputs
110
+ if outputs:
111
+ output_parts = []
112
+ for name, output_type in outputs.items():
113
+ type_name = output_type.__name__ if hasattr(output_type, "__name__") else str(output_type)
114
+ if "File" in type_name:
115
+ output_parts.append(f"- {name}: write the output file directly to /var/outputs/{name}")
116
+ else:
117
+ output_parts.append(f"- {name} ({type_name}): write the value to /var/outputs/{name}")
118
+ output_list = "\n".join(output_parts)
119
+ output_constraint = f"""OUTPUT REQUIREMENTS — you MUST write each output as a file under /var/outputs/:
120
+ {output_list}
121
+ Use this exact pattern for each output:
122
+ with open('/var/outputs/<name>', 'w') as f:
123
+ f.write(str(value))
124
+ /var/outputs/ already exists. NEVER delete, recreate, or modify the directory itself. Only write files into it.
125
+ Outputs MUST be written before the script exits — do NOT just print() values."""
126
+ all_constraints.append(output_constraint)
127
+
128
+ if constraints:
129
+ all_constraints.extend(constraints)
130
+
131
+ enhanced_prompt += "\n\nConstraints:\n" + "\n".join(f"- {c}" for c in all_constraints)
132
+
133
+ if data_context:
134
+ enhanced_prompt += f"\n\nData context:\n```\n{data_context}\n```"
135
+
136
+ return enhanced_prompt
@@ -0,0 +1,441 @@
1
+ Metadata-Version: 2.4
2
+ Name: flyteplugins-codegen
3
+ Version: 2.0.6
4
+ Summary: LLM-powered code generation and evaluation plugin for Flyte
5
+ Author-email: Samhita Alla <samhita@union.ai>
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: flyte
9
+ Requires-Dist: litellm
10
+ Requires-Dist: pandas
11
+ Requires-Dist: pandera[io]
12
+ Provides-Extra: agent
13
+ Requires-Dist: claude-agent-sdk; extra == "agent"
14
+
15
+ # Code Generation and Evaluation Plugin
16
+
17
+ Generate code from natural language prompts and validate it by running tests in an isolated sandbox. Works with any model that supports structured outputs (GPT-4, Claude, Gemini, etc. via LiteLLM) or directly with the Agent SDK (Claude-only).
18
+
19
+ > **Note:** Only Python is supported today.
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ pip install flyteplugins-codegen
25
+
26
+ # For Agent SDK mode (Claude-only)
27
+ pip install flyteplugins-codegen[agent-sdk]
28
+ ```
29
+
30
+ ## Quick start
31
+
32
+ ```python
33
+ import flyte
34
+ from flyte.io import File
35
+ from flyte.sandbox import sandbox_environment
36
+ from flyteplugins.codegen import AutoCoderAgent
37
+
38
+ agent = AutoCoderAgent(model="gpt-4.1", name="summarize-sales", resources=flyte.Resources(cpu=1, memory="1Gi"))
39
+
40
+ env = flyte.TaskEnvironment(
41
+ name="my-env",
42
+ secrets=[flyte.Secret(key="openai_key", as_env_var="OPENAI_API_KEY")],
43
+ image=flyte.Image.from_debian_base().with_pip_packages(
44
+ "flyteplugins-codegen",
45
+ ),
46
+ depends_on=[sandbox_environment], # Required
47
+ )
48
+
49
+ @env.task
50
+ async def process_data(csv_file: File) -> tuple[float, int, int]:
51
+ result = await agent.generate.aio(
52
+ prompt="Read the CSV and compute total_revenue, total_units and row_count.",
53
+ samples={"sales": csv_file},
54
+ outputs={"total_revenue": float, "total_units": int, "row_count": int},
55
+ )
56
+ return await result.run.aio()
57
+ ```
58
+
59
+ ## Two approaches
60
+
61
+ ### 1. LiteLLM (default)
62
+
63
+ Uses structured-output LLM calls to generate code, detect packages, build sandbox images, run tests, diagnose failures and iterate. Works with any model that supports structured outputs (GPT-4, Claude, Gemini, etc. via LiteLLM).
64
+
65
+ ```python
66
+ agent = AutoCoderAgent(
67
+ name="my-task",
68
+ model="gpt-4.1", # Any LiteLLM-compatible model
69
+ max_iterations=10, # Generate-test-fix iterations
70
+ )
71
+
72
+ result = await agent.generate.aio(
73
+ prompt="...",
74
+ samples={"input": my_file},
75
+ outputs={"result": str},
76
+ )
77
+ ```
78
+
79
+ **How it works:**
80
+
81
+ ```
82
+ prompt + samples
83
+ |
84
+ v
85
+ [generate_plan] --> CodePlan
86
+ |
87
+ v
88
+ [generate_code] --> CodeSolution (dependencies + code)
89
+ |
90
+ v
91
+ [detect_packages] --> pip/system packages
92
+ |
93
+ v
94
+ [build_image] --> Sandbox image with deps
95
+ |
96
+ +-- skip_tests=True? --> return result (no tests)
97
+ |
98
+ v
99
+ [generate_tests] --> pytest suite
100
+ |
101
+ v
102
+ [execute_tests] --> pass? return result
103
+ | |
104
+ | fail
105
+ v |
106
+ [diagnose_error] --> logic/environment/test_error
107
+ |
108
+ +-- logic error ---------> regenerate code with patch instructions
109
+ +-- environment error ---> add packages, rebuild image
110
+ +-- test error ----------> fix test expectations
111
+ |
112
+ v
113
+ (repeat up to max_iterations)
114
+ ```
115
+
116
+ ### 2. Agent SDK
117
+
118
+ Uses the Claude Agent SDK to autonomously generate, test and fix code. The agent has access to `Bash`, `Read`, `Write` and `Edit` tools and iterates on its own. Test execution is intercepted and run in an isolated `Sandbox`.
119
+
120
+ ```python
121
+ agent = AutoCoderAgent(
122
+ name="my-task",
123
+ model="claude-sonnet-4-5-20250929",
124
+ backend="claude", # Requires ANTHROPIC_API_KEY as a Flyte secret
125
+ )
126
+
127
+ result = await agent.generate.aio(
128
+ prompt="...",
129
+ samples={"input": my_file},
130
+ outputs={"result": str},
131
+ )
132
+ ```
133
+
134
+ **Key differences from LiteLLM:**
135
+
136
+ - Agent runs autonomously (no structured retry loop)
137
+ - Requires `ANTHROPIC_API_KEY` as a Flyte secret
138
+ - Claude-only (not model agnostic)
139
+ - Traces agent tool calls, reasoning and test results in the Flyte UI
140
+ - Test commands are intercepted via hooks and run in isolated sandbox environments
141
+
142
+ ## API reference
143
+
144
+ ### `AutoCoderAgent`
145
+
146
+ Create an agent instance with configuration, then call `generate()` per task.
147
+
148
+ ```python
149
+ agent = AutoCoderAgent(name="my-agent", model="gpt-4.1")
150
+
151
+ # Sync
152
+ result = agent.generate(prompt="...")
153
+
154
+ # Async
155
+ result = await agent.generate.aio(prompt="...")
156
+ ```
157
+
158
+ **Constructor parameters (agent-level config):**
159
+
160
+ | Parameter | Type | Default | Description |
161
+ | --------------------- | ----------------- | -------------- | ------------------------------------------------------------- |
162
+ | `name` | `str` | `"auto-coder"` | Unique name for tracking and image naming |
163
+ | `model` | `str` | `"gpt-4.1"` | LiteLLM model identifier |
164
+ | `system_prompt` | `str` | `None` | Custom system prompt override |
165
+ | `api_key` | `str` | `None` | Env var name for LLM API key |
166
+ | `api_base` | `str` | `None` | Custom API base URL |
167
+ | `litellm_params` | `dict` | `None` | Extra LiteLLM params (temperature, max_tokens, etc.) |
168
+ | `base_packages` | `list[str]` | `None` | Always-install pip packages |
169
+ | `resources` | `flyte.Resources` | `None` | Resources for sandbox execution (default: cpu=1, 1Gi) |
170
+ | `image_config` | `ImageConfig` | `None` | Registry, registry_secret, python_version |
171
+ | `max_iterations` | `int` | `10` | Max generate-test-fix iterations (LiteLLM mode) |
172
+ | `max_sample_rows` | `int` | `100` | Rows to sample from data for context |
173
+ | `skip_tests` | `bool` | `False` | Skip test generation and execution (LiteLLM mode only) |
174
+ | `network_access` | `bool` | `False` | Allow generated code to access the network inside the sandbox |
175
+ | `sandbox_retries` | `int` | `0` | Flyte task-level retries for each sandbox execution |
176
+ | `timeout` | `int` | `None` | Timeout in seconds for sandboxes |
177
+ | `env_vars` | `dict[str, str]` | `None` | Environment variables to pass to sandboxes |
178
+ | `secrets` | `list` | `None` | `flyte.Secret` objects to make available to sandboxes |
179
+ | `cache` | `str` | `"auto"` | CacheRequest for sandboxes: `"auto"`, `"override"`, or `"disable"` |
180
+ | `backend` | `str` | `"litellm"` | Execution backend: `"litellm"` or `"claude"` |
181
+ | `agent_max_turns` | `int` | `50` | Max turns when `backend="claude"` |
182
+
183
+ **`generate()` parameters (per-call):**
184
+
185
+ | Parameter | Type | Default | Description |
186
+ | ------------- | --------------------------------- | -------- | -------------------------------------------------------------------------------------------------------------- |
187
+ | `prompt` | `str` | required | Natural-language task description |
188
+ | `schema` | `str` | `None` | Free-form context about data formats, structures, or schemas. Included verbatim in the LLM prompt. |
189
+ | `constraints` | `list[str]` | `None` | Natural-language constraints (e.g., `"quantity must be positive"`) |
190
+ | `samples` | `dict[str, File \| pd.DataFrame]` | `None` | Sample data. Sampled for LLM context, converted to File inputs for the sandbox. Used as defaults at runtime. |
191
+ | `inputs` | `dict[str, type]` | `None` | Non-sample CLI argument types (e.g., `{"threshold": float}`). Sample entries are auto-added as File inputs. |
192
+ | `outputs` | `dict[str, type]` | `None` | Output types. Supported: `str, int, float, bool, datetime, timedelta, File`. |
193
+
194
+ ### `CodeGenEvalResult`
195
+
196
+ Returned by `agent.generate()`. Key fields:
197
+
198
+ ```python
199
+ result.success # bool — did tests pass?
200
+ result.solution # CodeSolution — generated code
201
+ result.tests # str — generated test code
202
+ result.output # str — test output
203
+ result.exit_code # int — test exit code
204
+ result.error # str | None — error message if failed
205
+ result.attempts # int — number of iterations used
206
+ result.image # str — built sandbox image with all deps
207
+ result.detected_packages # list[str] — pip packages detected
208
+ result.detected_system_packages # list[str] — apt packages detected
209
+ result.generated_schemas # dict[str, str] | None — Pandera schemas as code
210
+ result.data_context # str | None — extracted data context
211
+ result.original_samples # dict[str, File] | None — sample data as Files
212
+ ```
213
+
214
+ #### `result.as_task()`
215
+
216
+ Create a reusable sandbox from the generated code:
217
+
218
+ ```python
219
+ task = result.as_task(name="run-on-data")
220
+
221
+ # Call with your declared inputs — returns a tuple of outputs
222
+ total_revenue, total_units, transaction_count = task(sales_csv=my_file)
223
+
224
+ # If samples were provided, they are injected as defaults — override as needed
225
+ total_revenue, total_units, transaction_count = task(threshold=0.5) # samples used for data inputs
226
+
227
+ # With sandbox options
228
+ task = result.as_task(
229
+ name="run-on-data",
230
+ retries=3,
231
+ timeout=600,
232
+ env_vars={"API_URL": "https://..."},
233
+ )
234
+ ```
235
+
236
+ The task runs the generated script in the built sandbox image. Inputs are passed as `--name value` CLI arguments. Outputs are read from `/var/outputs/{name}` files.
237
+
238
+ #### `result.run()`
239
+
240
+ One-shot execution using sample data as defaults:
241
+
242
+ ```python
243
+ # Sync
244
+ total_revenue, total_units, transaction_count = result.run()
245
+
246
+ # Async
247
+ total_revenue, total_units, transaction_count = await result.run.aio()
248
+
249
+ # Override specific inputs
250
+ total_revenue, total_units, transaction_count = result.run(threshold=0.5)
251
+ ```
252
+
253
+ ## Data handling
254
+
255
+ When you pass `samples`, the plugin automatically:
256
+
257
+ 1. **Converts DataFrames to CSVs** and uploads as `File` objects
258
+ 2. **Infers Pandera schemas** — conservative type + nullability checks inferred from the sample data (no value constraints)
259
+ 3. **Applies natural-language constraints** — if `constraints` are provided, each one is parsed by the LLM into a Pandera check (e.g., `"quantity must be positive"` → `pa.Check.gt(0)`) and added to the schema
260
+ 4. **Extracts comprehensive context** — column stats, distributions, patterns, sample rows
261
+ 5. **Includes everything in the prompt** — the serialized schemas and data context are injected into the LLM prompt so the generated code is aware of exact column types, nullability and validation rules
262
+
263
+ Pandera is used purely for **prompt enrichment**, not runtime validation. The generated code itself doesn't import Pandera — it just benefits from the LLM knowing the precise data structure. The schemas are also stored on `result.generated_schemas` for inspection.
264
+
265
+ ```python
266
+ result = await agent.generate.aio(
267
+ prompt="Clean and validate the data, remove duplicates",
268
+ samples={"orders": orders_df, "products": products_file},
269
+ constraints=["quantity must be positive", "price between 0 and 10000"],
270
+ outputs={"cleaned_orders": File},
271
+ )
272
+
273
+ # Access generated schemas
274
+ print(result.generated_schemas) # {"orders": "DataFrameSchema(...)", "products": "..."}
275
+ ```
276
+
277
+ ## Configuration
278
+
279
+ ### Image configuration
280
+
281
+ ```python
282
+ agent = AutoCoderAgent(
283
+ model="gpt-4.1",
284
+ name="my-task",
285
+ image_config=ImageConfig(
286
+ registry="my-registry.io",
287
+ registry_secret="registry-creds",
288
+ python_version=(3, 12),
289
+ ),
290
+ )
291
+ ```
292
+
293
+ ### LiteLLM configuration
294
+
295
+ ```python
296
+ agent = AutoCoderAgent(
297
+ name="my-task",
298
+ model="anthropic/claude-sonnet-4-20250514",
299
+ api_key="ANTHROPIC_API_KEY", # env var name
300
+ litellm_params={
301
+ "temperature": 0.3,
302
+ "max_tokens": 4000,
303
+ },
304
+ )
305
+ ```
306
+
307
+ ### Skipping tests
308
+
309
+ Set `skip_tests=True` to skip test generation and execution. The agent will still generate code, detect packages, and build the sandbox image, but won't generate or run tests. This is useful when you trust the LLM output or want faster turnaround.
310
+
311
+ ```python
312
+ agent = AutoCoderAgent(
313
+ name="my-task",
314
+ model="gpt-4.1",
315
+ skip_tests=True, # No test generation or execution
316
+ )
317
+
318
+ result = await agent.generate.aio(
319
+ prompt="Parse JSON logs and extract error counts",
320
+ samples={"logs": log_file},
321
+ outputs={"error_count": int},
322
+ )
323
+
324
+ # result.as_task() and result.run() still work
325
+ error_count = await result.run.aio()
326
+ ```
327
+
328
+ > **Note:** `skip_tests` only applies to LiteLLM mode. In Agent SDK mode, the agent autonomously decides when to test.
329
+
330
+ ### Environment setup
331
+
332
+ `sandbox_environment` must be listed as a dependency of your TaskEnvironment:
333
+
334
+ ```python
335
+ from flyte.sandbox import sandbox_environment
336
+
337
+ env = flyte.TaskEnvironment(
338
+ name="my-env",
339
+ image=flyte.Image.auto(),
340
+ depends_on=[sandbox_environment], # Required
341
+ )
342
+ ```
343
+
344
+ This allows dynamically-created sandboxes to be registered with Flyte.
345
+
346
+ > **Tip:** Use one `AutoCoderAgent` per task. Each `generate()` call builds its own sandbox image and manages its own package/image state. Running multiple agents in the same task can cause resource contention and makes failures harder to diagnose.
347
+
348
+ ## Module Structure
349
+
350
+ ```
351
+ codegen/
352
+ ├── __init__.py # Public API: AutoCoderAgent, CodeGenEvalResult, types
353
+ ├── auto_coder_agent.py # AutoCoderAgent — config + generate() orchestrator
354
+ ├── core/
355
+ │ └── types.py # Pydantic models: CodeGenEvalResult, CodeSolution, CodePlan, etc.
356
+ ├── data/
357
+ │ ├── extraction.py # Extract context from DataFrames/Files (stats, patterns, samples)
358
+ │ └── schema.py # Pandera schema inference, constraint parsing via LLM
359
+ ├── execution/
360
+ │ ├── agent.py # Claude Agent SDK path with hooks and sandbox test interception
361
+ │ ├── docker.py # Image building (create_image_spec, incremental builds)
362
+ │ └── testing.py # Test execution in sandboxes
363
+ ├── generation/
364
+ │ ├── llm.py # LLM calls: plan, code, tests, diagnosis, fixes, verification
365
+ │ └── prompts.py # Prompt templates and constants
366
+ ```
367
+
368
+ ### Data flow
369
+
370
+ ```
371
+ User calls agent.generate(prompt, samples, outputs, ...)
372
+
373
+ ├─ Data Processing (both paths)
374
+ │ ├─ Convert DataFrames → CSV Files
375
+ │ ├─ Infer Pandera schemas
376
+ │ ├─ Apply user constraints (LLM-parsed)
377
+ │ └─ Extract data context (stats, patterns, samples)
378
+
379
+ ├─ LiteLLM Path (default) ├─ Agent SDK Path (backend="claude")
380
+ │ ├─ generate_plan() │ ├─ Build prompt with all context
381
+ │ ├─ generate_code() │ ├─ Launch Claude agent with hooks:
382
+ │ ├─ detect_packages() │ │ ├─ PreToolUse: trace + classify commands
383
+ │ ├─ build_image() │ │ │ ├─ pytest → run in sandbox
384
+ │ ├─ execute_tests() │ │ │ ├─ safe (ls, cat, ...) → allow
385
+ │ ├─ diagnose_error() (if failed) │ │ │ └─ denied (apt, pip, curl, ...) → block
386
+ │ ├─ fix code/tests/env │ │ ├─ PostToolUseFailure: trace errors
387
+ │ └─ repeat until pass or max_iterations │ │ └─ Stop: trace summary
388
+ │ │ ├─ Agent writes solution.py, tests.py, packages.txt
389
+ │ │ ├─ pytest intercepted → sandbox execution
390
+ │ │ └─ Agent iterates until tests pass
391
+
392
+ └─ Return CodeGenEvalResult
393
+ ├─ .solution (code)
394
+ ├─ .image (sandbox image with deps)
395
+ ├─ .as_task() → reusable sandbox
396
+ └─ .run() → execute on sample data
397
+ ```
398
+
399
+ ## Error handling
400
+
401
+ The LiteLLM path classifies test failures into three types:
402
+
403
+ | Type | Meaning | Action |
404
+ | ------------- | -------------------------- | ------------------------------------------------ |
405
+ | `logic` | Bug in generated code | Regenerate code with specific patch instructions |
406
+ | `environment` | Missing package/dependency | Add package, rebuild image |
407
+ | `test_error` | Bug in generated test | Fix test expectations |
408
+
409
+ If the same error persists after fixes, the plugin reclassifies it (logic <-> test_error) to try the other approach.
410
+
411
+ ## Observability
412
+
413
+ ### LiteLLM path
414
+
415
+ - Logs every iteration with attempt count, error type, and package changes
416
+ - Tracks total input/output tokens across all LLM calls
417
+ - Results include full conversation history for debugging
418
+
419
+ ### Agent SDK path
420
+
421
+ - Traces each tool call (name + input detail) via `PreToolUse` hook
422
+ - Traces tool failures via `PostToolUseFailure` hook
423
+ - Traces a summary when the agent finishes (total tool calls, tool distribution, final image/packages)
424
+ - Classifies Bash commands as safe, denied, or pytest (intercepted for sandbox execution)
425
+ - All traces appear in the Flyte UI under the task
426
+
427
+ ## Examples
428
+
429
+ See the `examples/` directory:
430
+
431
+ - **`example_csv_processing.py`** — Process CSVs with different schemas using LiteLLM. Shows batch processing with multiple CSV formats.
432
+ - **`example_csv_processing_sync.py`** — Synchronous version of CSV processing. Shows `agent.generate()` and `result.run()` without async.
433
+ - **`example_csv_processing_agent.py`** — CSV processing using Agent SDK with `backend="claude"`.
434
+ - **`example_dataframe_analysis.py`** — DataFrame analysis with constraints, `base_packages`, and `as_task()` for reusable execution.
435
+ - **`example_dataframe_analysis_agent.py`** — Same DataFrame analysis using Agent SDK.
436
+ - **`example_prompt_only.py`** — Log file analysis with `schema`, `constraints`, `samples`, and explicit `inputs`/`outputs`.
437
+ - **`example_prompt_only_agent.py`** — Same log analysis using Agent SDK.
438
+ - **`example_multi_input.py`** — Multi-input data join with primitives (`float`, `bool`).
439
+ - **`example_multi_input_agent.py`** — Same multi-input join using Agent SDK.
440
+ - **`example_durable_execution.py`** — Durable execution with injected failures, retries, and caching (LLM approach).
441
+ - **`example_durable_execution_agent.py`** — Same durable execution using Agent SDK.
@@ -0,0 +1,17 @@
1
+ flyteplugins/codegen/__init__.py,sha256=jumoM0Po0Tx1KqatcdNi_Mk1MYCLcccnyjn6d1zurLQ,366
2
+ flyteplugins/codegen/auto_coder_agent.py,sha256=fVM2-ZZijrPe7SwDZJ_kU8niIb5qepVhs-qwWvpLHm8,45820
3
+ flyteplugins/codegen/core/__init__.py,sha256=YjAN0PpvkhERFmlkEq78O2q92bmgyGAVttYXeDH_hyc,355
4
+ flyteplugins/codegen/core/types.py,sha256=YbJG1we7gLxtherSvTqg9mSjSfd4FNs017iUVOrdMBs,12276
5
+ flyteplugins/codegen/data/__init__.py,sha256=UDkKuvyBEZmsPaTR6yEexcQjLutMRxuV5-pWflGc_sI,670
6
+ flyteplugins/codegen/data/extraction.py,sha256=Q0ytA1pp4qXCB9nNdjEtEVn9yEcwdkRlDL6df1XtBSg,10029
7
+ flyteplugins/codegen/data/schema.py,sha256=ib2J6mdgzZtfxyKY4233aeYNPZi8KzUOCJVc15DazRo,9101
8
+ flyteplugins/codegen/execution/__init__.py,sha256=3i4bP_xNp8ZgsolRI7ccUtkIW-h-1zCXFrjWfltVu3U,192
9
+ flyteplugins/codegen/execution/agent.py,sha256=EBTAio0cjg9I9kHW3oHPnn6wjrq5E43MWHTvh3jf7nQ,27184
10
+ flyteplugins/codegen/execution/docker.py,sha256=4M2SzrsAcN_LmN_cFfAjT8K5hJQxiL1fA3QWK1jtnrA,7166
11
+ flyteplugins/codegen/generation/__init__.py,sha256=GkLiXfJeVQmLlHf4R08qzgUa5wYG97KseGjRtLCFZhU,1065
12
+ flyteplugins/codegen/generation/llm.py,sha256=CNFuC3YY-ZK8xjCHGJag34J6JtFbTNy_eDbxOoI1TEM,44237
13
+ flyteplugins/codegen/generation/prompts.py,sha256=EbH3bHNa8WlavHk2cf7i497J265rZ4ZPjfRdy97JJ7M,5774
14
+ flyteplugins_codegen-2.0.6.dist-info/METADATA,sha256=aVP2zMlCktjPFbGlUhCwR8WWCtPwxYDIewClWKoKBsg,19197
15
+ flyteplugins_codegen-2.0.6.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
16
+ flyteplugins_codegen-2.0.6.dist-info/top_level.txt,sha256=cgd779rPu9EsvdtuYgUxNHHgElaQvPn74KhB5XSeMBE,13
17
+ flyteplugins_codegen-2.0.6.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ flyteplugins