flyteplugins-codegen 2.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flyteplugins/codegen/__init__.py +18 -0
- flyteplugins/codegen/auto_coder_agent.py +1088 -0
- flyteplugins/codegen/core/__init__.py +19 -0
- flyteplugins/codegen/core/types.py +337 -0
- flyteplugins/codegen/data/__init__.py +27 -0
- flyteplugins/codegen/data/extraction.py +281 -0
- flyteplugins/codegen/data/schema.py +270 -0
- flyteplugins/codegen/execution/__init__.py +7 -0
- flyteplugins/codegen/execution/agent.py +671 -0
- flyteplugins/codegen/execution/docker.py +206 -0
- flyteplugins/codegen/generation/__init__.py +41 -0
- flyteplugins/codegen/generation/llm.py +1269 -0
- flyteplugins/codegen/generation/prompts.py +136 -0
- flyteplugins_codegen-2.0.6.dist-info/METADATA +441 -0
- flyteplugins_codegen-2.0.6.dist-info/RECORD +17 -0
- flyteplugins_codegen-2.0.6.dist-info/WHEEL +5 -0
- flyteplugins_codegen-2.0.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Prompt building and constants for LLM code generation."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
# Language-specific file extensions
|
|
6
|
+
FILE_EXTENSIONS = {"python": ".py"}
|
|
7
|
+
|
|
8
|
+
# Package manager mapping
|
|
9
|
+
PACKAGE_MANAGER_MAP = {"python": "pip package names (excluding standard library)"}
|
|
10
|
+
|
|
11
|
+
# Test framework configurations
|
|
12
|
+
TEST_FRAMEWORKS = {
|
|
13
|
+
"python": {
|
|
14
|
+
"name": "pytest",
|
|
15
|
+
"packages": ["pytest"],
|
|
16
|
+
"system_packages": [],
|
|
17
|
+
"command": "python -m pytest",
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
# Default system prompt
|
|
22
|
+
DEFAULT_SYSTEM_PROMPT = """You are a coding assistant that generates high-quality code in {language}."""
|
|
23
|
+
|
|
24
|
+
# Structured output requirements
|
|
25
|
+
STRUCTURED_OUTPUT_REQUIREMENTS = """
|
|
26
|
+
IMPORTANT: You must structure your response with:
|
|
27
|
+
1. description: Brief explanation of what the code does
|
|
28
|
+
2. language: The programming language used
|
|
29
|
+
3. code: Complete executable code including all import statements and dependencies at the top
|
|
30
|
+
4. system_packages: List of system packages needed
|
|
31
|
+
(e.g., ["gcc", "build-essential", "curl"]). Leave empty if none needed.
|
|
32
|
+
|
|
33
|
+
EXECUTION ENVIRONMENT:
|
|
34
|
+
- /var/inputs and /var/outputs directories are PRE-CREATED by the runtime. NEVER delete, recreate, or modify them.
|
|
35
|
+
NEVER use shutil.rmtree, os.rmdir, os.remove on /var/inputs or /var/outputs.
|
|
36
|
+
NEVER call os.makedirs('/var/outputs') or os.makedirs('/var/inputs') — they already exist.
|
|
37
|
+
- /var/inputs is READ-ONLY. Never write to /var/inputs.
|
|
38
|
+
- Write each declared output as a SEPARATE FILE under /var/outputs/: open('/var/outputs/<name>', 'w').write(str(value))
|
|
39
|
+
- Always use the literal path '/var/outputs' — never make it configurable or store it in a variable.
|
|
40
|
+
- Output files MUST be written before the script exits. Do NOT just print() values — you MUST write them to files.
|
|
41
|
+
|
|
42
|
+
Ensure all code is complete, executable, and follows best practices for the chosen language."""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def build_enhanced_prompt(
|
|
46
|
+
prompt: str,
|
|
47
|
+
language: str,
|
|
48
|
+
schema: Optional[str],
|
|
49
|
+
constraints: Optional[list[str]],
|
|
50
|
+
data_context: Optional[str],
|
|
51
|
+
inputs: Optional[dict[str, type]],
|
|
52
|
+
outputs: Optional[dict[str, type]],
|
|
53
|
+
) -> str:
|
|
54
|
+
"""Build enhanced prompt with language, schema, constraints, data context, inputs, and outputs.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
prompt: User's prompt
|
|
58
|
+
language: Programming language
|
|
59
|
+
schema: Optional schema definition
|
|
60
|
+
constraints: Optional list of constraints
|
|
61
|
+
data_context: Optional extracted data context (stats, patterns, schemas)
|
|
62
|
+
inputs: Optional input types
|
|
63
|
+
outputs: Optional output types
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Enhanced prompt string
|
|
67
|
+
"""
|
|
68
|
+
enhanced_prompt = f"Language: {language}\n\n{prompt}"
|
|
69
|
+
|
|
70
|
+
if schema:
|
|
71
|
+
enhanced_prompt += f"\n\nSchema:\n```\n{schema}\n```"
|
|
72
|
+
|
|
73
|
+
# Always add script requirement first, then user constraints
|
|
74
|
+
script_constraint = (
|
|
75
|
+
"REQUIRED: Your code will be saved as solution.py and imported by tests via "
|
|
76
|
+
"`from solution import ...`. Define ALL functions and classes at MODULE LEVEL "
|
|
77
|
+
"(not inside if __name__ == '__main__'). "
|
|
78
|
+
"Include an if __name__ == '__main__': block that parses command line arguments "
|
|
79
|
+
"using argparse and calls your functions. "
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Add CLI argument requirement based on declared inputs
|
|
83
|
+
if inputs:
|
|
84
|
+
# Build argument list from declared inputs
|
|
85
|
+
args_list = []
|
|
86
|
+
for name, param_type in inputs.items():
|
|
87
|
+
type_name = param_type.__name__ if hasattr(param_type, "__name__") else str(param_type)
|
|
88
|
+
# Clarify that File inputs are received as string paths
|
|
89
|
+
if "File" in type_name:
|
|
90
|
+
args_list.append(f"--{name} (str): path to {type_name.lower()}")
|
|
91
|
+
else:
|
|
92
|
+
args_list.append(f"--{name} ({type_name})")
|
|
93
|
+
args_spec = ", ".join(args_list)
|
|
94
|
+
script_constraint += f"Accept these command line arguments: {args_spec}. "
|
|
95
|
+
|
|
96
|
+
# Add explicit instruction for File handling
|
|
97
|
+
has_file_inputs = any("File" in str(t) for t in inputs.values())
|
|
98
|
+
if has_file_inputs:
|
|
99
|
+
script_constraint += (
|
|
100
|
+
"File arguments are string paths - use them directly with open() or other file operations."
|
|
101
|
+
)
|
|
102
|
+
elif data_context:
|
|
103
|
+
script_constraint += "Accept appropriate command line arguments to process the data samples."
|
|
104
|
+
else:
|
|
105
|
+
script_constraint += "Include appropriate command line arguments if needed."
|
|
106
|
+
|
|
107
|
+
all_constraints = [script_constraint]
|
|
108
|
+
|
|
109
|
+
# Add output requirement based on declared outputs
|
|
110
|
+
if outputs:
|
|
111
|
+
output_parts = []
|
|
112
|
+
for name, output_type in outputs.items():
|
|
113
|
+
type_name = output_type.__name__ if hasattr(output_type, "__name__") else str(output_type)
|
|
114
|
+
if "File" in type_name:
|
|
115
|
+
output_parts.append(f"- {name}: write the output file directly to /var/outputs/{name}")
|
|
116
|
+
else:
|
|
117
|
+
output_parts.append(f"- {name} ({type_name}): write the value to /var/outputs/{name}")
|
|
118
|
+
output_list = "\n".join(output_parts)
|
|
119
|
+
output_constraint = f"""OUTPUT REQUIREMENTS — you MUST write each output as a file under /var/outputs/:
|
|
120
|
+
{output_list}
|
|
121
|
+
Use this exact pattern for each output:
|
|
122
|
+
with open('/var/outputs/<name>', 'w') as f:
|
|
123
|
+
f.write(str(value))
|
|
124
|
+
/var/outputs/ already exists. NEVER delete, recreate, or modify the directory itself. Only write files into it.
|
|
125
|
+
Outputs MUST be written before the script exits — do NOT just print() values."""
|
|
126
|
+
all_constraints.append(output_constraint)
|
|
127
|
+
|
|
128
|
+
if constraints:
|
|
129
|
+
all_constraints.extend(constraints)
|
|
130
|
+
|
|
131
|
+
enhanced_prompt += "\n\nConstraints:\n" + "\n".join(f"- {c}" for c in all_constraints)
|
|
132
|
+
|
|
133
|
+
if data_context:
|
|
134
|
+
enhanced_prompt += f"\n\nData context:\n```\n{data_context}\n```"
|
|
135
|
+
|
|
136
|
+
return enhanced_prompt
|
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: flyteplugins-codegen
|
|
3
|
+
Version: 2.0.6
|
|
4
|
+
Summary: LLM-powered code generation and evaluation plugin for Flyte
|
|
5
|
+
Author-email: Samhita Alla <samhita@union.ai>
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: flyte
|
|
9
|
+
Requires-Dist: litellm
|
|
10
|
+
Requires-Dist: pandas
|
|
11
|
+
Requires-Dist: pandera[io]
|
|
12
|
+
Provides-Extra: agent
|
|
13
|
+
Requires-Dist: claude-agent-sdk; extra == "agent"
|
|
14
|
+
|
|
15
|
+
# Code Generation and Evaluation Plugin
|
|
16
|
+
|
|
17
|
+
Generate code from natural language prompts and validate it by running tests in an isolated sandbox. Works with any model that supports structured outputs (GPT-4, Claude, Gemini, etc. via LiteLLM) or directly with the Agent SDK (Claude-only).
|
|
18
|
+
|
|
19
|
+
> **Note:** Only Python is supported today.
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install flyteplugins-codegen
|
|
25
|
+
|
|
26
|
+
# For Agent SDK mode (Claude-only)
|
|
27
|
+
pip install flyteplugins-codegen[agent-sdk]
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick start
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import flyte
|
|
34
|
+
from flyte.io import File
|
|
35
|
+
from flyte.sandbox import sandbox_environment
|
|
36
|
+
from flyteplugins.codegen import AutoCoderAgent
|
|
37
|
+
|
|
38
|
+
agent = AutoCoderAgent(model="gpt-4.1", name="summarize-sales", resources=flyte.Resources(cpu=1, memory="1Gi"))
|
|
39
|
+
|
|
40
|
+
env = flyte.TaskEnvironment(
|
|
41
|
+
name="my-env",
|
|
42
|
+
secrets=[flyte.Secret(key="openai_key", as_env_var="OPENAI_API_KEY")],
|
|
43
|
+
image=flyte.Image.from_debian_base().with_pip_packages(
|
|
44
|
+
"flyteplugins-codegen",
|
|
45
|
+
),
|
|
46
|
+
depends_on=[sandbox_environment], # Required
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@env.task
|
|
50
|
+
async def process_data(csv_file: File) -> tuple[float, int, int]:
|
|
51
|
+
result = await agent.generate.aio(
|
|
52
|
+
prompt="Read the CSV and compute total_revenue, total_units and row_count.",
|
|
53
|
+
samples={"sales": csv_file},
|
|
54
|
+
outputs={"total_revenue": float, "total_units": int, "row_count": int},
|
|
55
|
+
)
|
|
56
|
+
return await result.run.aio()
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Two approaches
|
|
60
|
+
|
|
61
|
+
### 1. LiteLLM (default)
|
|
62
|
+
|
|
63
|
+
Uses structured-output LLM calls to generate code, detect packages, build sandbox images, run tests, diagnose failures and iterate. Works with any model that supports structured outputs (GPT-4, Claude, Gemini, etc. via LiteLLM).
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
agent = AutoCoderAgent(
|
|
67
|
+
name="my-task",
|
|
68
|
+
model="gpt-4.1", # Any LiteLLM-compatible model
|
|
69
|
+
max_iterations=10, # Generate-test-fix iterations
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
result = await agent.generate.aio(
|
|
73
|
+
prompt="...",
|
|
74
|
+
samples={"input": my_file},
|
|
75
|
+
outputs={"result": str},
|
|
76
|
+
)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**How it works:**
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
prompt + samples
|
|
83
|
+
|
|
|
84
|
+
v
|
|
85
|
+
[generate_plan] --> CodePlan
|
|
86
|
+
|
|
|
87
|
+
v
|
|
88
|
+
[generate_code] --> CodeSolution (dependencies + code)
|
|
89
|
+
|
|
|
90
|
+
v
|
|
91
|
+
[detect_packages] --> pip/system packages
|
|
92
|
+
|
|
|
93
|
+
v
|
|
94
|
+
[build_image] --> Sandbox image with deps
|
|
95
|
+
|
|
|
96
|
+
+-- skip_tests=True? --> return result (no tests)
|
|
97
|
+
|
|
|
98
|
+
v
|
|
99
|
+
[generate_tests] --> pytest suite
|
|
100
|
+
|
|
|
101
|
+
v
|
|
102
|
+
[execute_tests] --> pass? return result
|
|
103
|
+
| |
|
|
104
|
+
| fail
|
|
105
|
+
v |
|
|
106
|
+
[diagnose_error] --> logic/environment/test_error
|
|
107
|
+
|
|
|
108
|
+
+-- logic error ---------> regenerate code with patch instructions
|
|
109
|
+
+-- environment error ---> add packages, rebuild image
|
|
110
|
+
+-- test error ----------> fix test expectations
|
|
111
|
+
|
|
|
112
|
+
v
|
|
113
|
+
(repeat up to max_iterations)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### 2. Agent SDK
|
|
117
|
+
|
|
118
|
+
Uses the Claude Agent SDK to autonomously generate, test and fix code. The agent has access to `Bash`, `Read`, `Write` and `Edit` tools and iterates on its own. Test execution is intercepted and run in an isolated `Sandbox`.
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
agent = AutoCoderAgent(
|
|
122
|
+
name="my-task",
|
|
123
|
+
model="claude-sonnet-4-5-20250929",
|
|
124
|
+
backend="claude", # Requires ANTHROPIC_API_KEY as a Flyte secret
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
result = await agent.generate.aio(
|
|
128
|
+
prompt="...",
|
|
129
|
+
samples={"input": my_file},
|
|
130
|
+
outputs={"result": str},
|
|
131
|
+
)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
**Key differences from LiteLLM:**
|
|
135
|
+
|
|
136
|
+
- Agent runs autonomously (no structured retry loop)
|
|
137
|
+
- Requires `ANTHROPIC_API_KEY` as a Flyte secret
|
|
138
|
+
- Claude-only (not model agnostic)
|
|
139
|
+
- Traces agent tool calls, reasoning and test results in the Flyte UI
|
|
140
|
+
- Test commands are intercepted via hooks and run in isolated sandbox environments
|
|
141
|
+
|
|
142
|
+
## API reference
|
|
143
|
+
|
|
144
|
+
### `AutoCoderAgent`
|
|
145
|
+
|
|
146
|
+
Create an agent instance with configuration, then call `generate()` per task.
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
agent = AutoCoderAgent(name="my-agent", model="gpt-4.1")
|
|
150
|
+
|
|
151
|
+
# Sync
|
|
152
|
+
result = agent.generate(prompt="...")
|
|
153
|
+
|
|
154
|
+
# Async
|
|
155
|
+
result = await agent.generate.aio(prompt="...")
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
**Constructor parameters (agent-level config):**
|
|
159
|
+
|
|
160
|
+
| Parameter | Type | Default | Description |
|
|
161
|
+
| --------------------- | ----------------- | -------------- | ------------------------------------------------------------- |
|
|
162
|
+
| `name` | `str` | `"auto-coder"` | Unique name for tracking and image naming |
|
|
163
|
+
| `model` | `str` | `"gpt-4.1"` | LiteLLM model identifier |
|
|
164
|
+
| `system_prompt` | `str` | `None` | Custom system prompt override |
|
|
165
|
+
| `api_key` | `str` | `None` | Env var name for LLM API key |
|
|
166
|
+
| `api_base` | `str` | `None` | Custom API base URL |
|
|
167
|
+
| `litellm_params` | `dict` | `None` | Extra LiteLLM params (temperature, max_tokens, etc.) |
|
|
168
|
+
| `base_packages` | `list[str]` | `None` | Always-install pip packages |
|
|
169
|
+
| `resources` | `flyte.Resources` | `None` | Resources for sandbox execution (default: cpu=1, 1Gi) |
|
|
170
|
+
| `image_config` | `ImageConfig` | `None` | Registry, registry_secret, python_version |
|
|
171
|
+
| `max_iterations` | `int` | `10` | Max generate-test-fix iterations (LiteLLM mode) |
|
|
172
|
+
| `max_sample_rows` | `int` | `100` | Rows to sample from data for context |
|
|
173
|
+
| `skip_tests` | `bool` | `False` | Skip test generation and execution (LiteLLM mode only) |
|
|
174
|
+
| `network_access` | `bool` | `False` | Allow generated code to access the network inside the sandbox |
|
|
175
|
+
| `sandbox_retries` | `int` | `0` | Flyte task-level retries for each sandbox execution |
|
|
176
|
+
| `timeout` | `int` | `None` | Timeout in seconds for sandboxes |
|
|
177
|
+
| `env_vars` | `dict[str, str]` | `None` | Environment variables to pass to sandboxes |
|
|
178
|
+
| `secrets` | `list` | `None` | `flyte.Secret` objects to make available to sandboxes |
|
|
179
|
+
| `cache` | `str` | `"auto"` | CacheRequest for sandboxes: `"auto"`, `"override"`, or `"disable"` |
|
|
180
|
+
| `backend` | `str` | `"litellm"` | Execution backend: `"litellm"` or `"claude"` |
|
|
181
|
+
| `agent_max_turns` | `int` | `50` | Max turns when `backend="claude"` |
|
|
182
|
+
|
|
183
|
+
**`generate()` parameters (per-call):**
|
|
184
|
+
|
|
185
|
+
| Parameter | Type | Default | Description |
|
|
186
|
+
| ------------- | --------------------------------- | -------- | -------------------------------------------------------------------------------------------------------------- |
|
|
187
|
+
| `prompt` | `str` | required | Natural-language task description |
|
|
188
|
+
| `schema` | `str` | `None` | Free-form context about data formats, structures, or schemas. Included verbatim in the LLM prompt. |
|
|
189
|
+
| `constraints` | `list[str]` | `None` | Natural-language constraints (e.g., `"quantity must be positive"`) |
|
|
190
|
+
| `samples` | `dict[str, File \| pd.DataFrame]` | `None` | Sample data. Sampled for LLM context, converted to File inputs for the sandbox. Used as defaults at runtime. |
|
|
191
|
+
| `inputs` | `dict[str, type]` | `None` | Non-sample CLI argument types (e.g., `{"threshold": float}`). Sample entries are auto-added as File inputs. |
|
|
192
|
+
| `outputs` | `dict[str, type]` | `None` | Output types. Supported: `str, int, float, bool, datetime, timedelta, File`. |
|
|
193
|
+
|
|
194
|
+
### `CodeGenEvalResult`
|
|
195
|
+
|
|
196
|
+
Returned by `agent.generate()`. Key fields:
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
result.success # bool — did tests pass?
|
|
200
|
+
result.solution # CodeSolution — generated code
|
|
201
|
+
result.tests # str — generated test code
|
|
202
|
+
result.output # str — test output
|
|
203
|
+
result.exit_code # int — test exit code
|
|
204
|
+
result.error # str | None — error message if failed
|
|
205
|
+
result.attempts # int — number of iterations used
|
|
206
|
+
result.image # str — built sandbox image with all deps
|
|
207
|
+
result.detected_packages # list[str] — pip packages detected
|
|
208
|
+
result.detected_system_packages # list[str] — apt packages detected
|
|
209
|
+
result.generated_schemas # dict[str, str] | None — Pandera schemas as code
|
|
210
|
+
result.data_context # str | None — extracted data context
|
|
211
|
+
result.original_samples # dict[str, File] | None — sample data as Files
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
#### `result.as_task()`
|
|
215
|
+
|
|
216
|
+
Create a reusable sandbox from the generated code:
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
task = result.as_task(name="run-on-data")
|
|
220
|
+
|
|
221
|
+
# Call with your declared inputs — returns a tuple of outputs
|
|
222
|
+
total_revenue, total_units, transaction_count = task(sales_csv=my_file)
|
|
223
|
+
|
|
224
|
+
# If samples were provided, they are injected as defaults — override as needed
|
|
225
|
+
total_revenue, total_units, transaction_count = task(threshold=0.5) # samples used for data inputs
|
|
226
|
+
|
|
227
|
+
# With sandbox options
|
|
228
|
+
task = result.as_task(
|
|
229
|
+
name="run-on-data",
|
|
230
|
+
retries=3,
|
|
231
|
+
timeout=600,
|
|
232
|
+
env_vars={"API_URL": "https://..."},
|
|
233
|
+
)
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
The task runs the generated script in the built sandbox image. Inputs are passed as `--name value` CLI arguments. Outputs are read from `/var/outputs/{name}` files.
|
|
237
|
+
|
|
238
|
+
#### `result.run()`
|
|
239
|
+
|
|
240
|
+
One-shot execution using sample data as defaults:
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
# Sync
|
|
244
|
+
total_revenue, total_units, transaction_count = result.run()
|
|
245
|
+
|
|
246
|
+
# Async
|
|
247
|
+
total_revenue, total_units, transaction_count = await result.run.aio()
|
|
248
|
+
|
|
249
|
+
# Override specific inputs
|
|
250
|
+
total_revenue, total_units, transaction_count = result.run(threshold=0.5)
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
## Data handling
|
|
254
|
+
|
|
255
|
+
When you pass `samples`, the plugin automatically:
|
|
256
|
+
|
|
257
|
+
1. **Converts DataFrames to CSVs** and uploads as `File` objects
|
|
258
|
+
2. **Infers Pandera schemas** — conservative type + nullability checks inferred from the sample data (no value constraints)
|
|
259
|
+
3. **Applies natural-language constraints** — if `constraints` are provided, each one is parsed by the LLM into a Pandera check (e.g., `"quantity must be positive"` → `pa.Check.gt(0)`) and added to the schema
|
|
260
|
+
4. **Extracts comprehensive context** — column stats, distributions, patterns, sample rows
|
|
261
|
+
5. **Includes everything in the prompt** — the serialized schemas and data context are injected into the LLM prompt so the generated code is aware of exact column types, nullability and validation rules
|
|
262
|
+
|
|
263
|
+
Pandera is used purely for **prompt enrichment**, not runtime validation. The generated code itself doesn't import Pandera — it just benefits from the LLM knowing the precise data structure. The schemas are also stored on `result.generated_schemas` for inspection.
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
result = await agent.generate.aio(
|
|
267
|
+
prompt="Clean and validate the data, remove duplicates",
|
|
268
|
+
samples={"orders": orders_df, "products": products_file},
|
|
269
|
+
constraints=["quantity must be positive", "price between 0 and 10000"],
|
|
270
|
+
outputs={"cleaned_orders": File},
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Access generated schemas
|
|
274
|
+
print(result.generated_schemas) # {"orders": "DataFrameSchema(...)", "products": "..."}
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
## Configuration
|
|
278
|
+
|
|
279
|
+
### Image configuration
|
|
280
|
+
|
|
281
|
+
```python
|
|
282
|
+
agent = AutoCoderAgent(
|
|
283
|
+
model="gpt-4.1",
|
|
284
|
+
name="my-task",
|
|
285
|
+
image_config=ImageConfig(
|
|
286
|
+
registry="my-registry.io",
|
|
287
|
+
registry_secret="registry-creds",
|
|
288
|
+
python_version=(3, 12),
|
|
289
|
+
),
|
|
290
|
+
)
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
### LiteLLM configuration
|
|
294
|
+
|
|
295
|
+
```python
|
|
296
|
+
agent = AutoCoderAgent(
|
|
297
|
+
name="my-task",
|
|
298
|
+
model="anthropic/claude-sonnet-4-20250514",
|
|
299
|
+
api_key="ANTHROPIC_API_KEY", # env var name
|
|
300
|
+
litellm_params={
|
|
301
|
+
"temperature": 0.3,
|
|
302
|
+
"max_tokens": 4000,
|
|
303
|
+
},
|
|
304
|
+
)
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
### Skipping tests
|
|
308
|
+
|
|
309
|
+
Set `skip_tests=True` to skip test generation and execution. The agent will still generate code, detect packages, and build the sandbox image, but won't generate or run tests. This is useful when you trust the LLM output or want faster turnaround.
|
|
310
|
+
|
|
311
|
+
```python
|
|
312
|
+
agent = AutoCoderAgent(
|
|
313
|
+
name="my-task",
|
|
314
|
+
model="gpt-4.1",
|
|
315
|
+
skip_tests=True, # No test generation or execution
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
result = await agent.generate.aio(
|
|
319
|
+
prompt="Parse JSON logs and extract error counts",
|
|
320
|
+
samples={"logs": log_file},
|
|
321
|
+
outputs={"error_count": int},
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
# result.as_task() and result.run() still work
|
|
325
|
+
error_count = await result.run.aio()
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
> **Note:** `skip_tests` only applies to LiteLLM mode. In Agent SDK mode, the agent autonomously decides when to test.
|
|
329
|
+
|
|
330
|
+
### Environment setup
|
|
331
|
+
|
|
332
|
+
`sandbox_environment` must be listed as a dependency of your TaskEnvironment:
|
|
333
|
+
|
|
334
|
+
```python
|
|
335
|
+
from flyte.sandbox import sandbox_environment
|
|
336
|
+
|
|
337
|
+
env = flyte.TaskEnvironment(
|
|
338
|
+
name="my-env",
|
|
339
|
+
image=flyte.Image.auto(),
|
|
340
|
+
depends_on=[sandbox_environment], # Required
|
|
341
|
+
)
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
This allows dynamically-created sandboxes to be registered with Flyte.
|
|
345
|
+
|
|
346
|
+
> **Tip:** Use one `AutoCoderAgent` per task. Each `generate()` call builds its own sandbox image and manages its own package/image state. Running multiple agents in the same task can cause resource contention and makes failures harder to diagnose.
|
|
347
|
+
|
|
348
|
+
## Module Structure
|
|
349
|
+
|
|
350
|
+
```
|
|
351
|
+
codegen/
|
|
352
|
+
├── __init__.py # Public API: AutoCoderAgent, CodeGenEvalResult, types
|
|
353
|
+
├── auto_coder_agent.py # AutoCoderAgent — config + generate() orchestrator
|
|
354
|
+
├── core/
|
|
355
|
+
│ └── types.py # Pydantic models: CodeGenEvalResult, CodeSolution, CodePlan, etc.
|
|
356
|
+
├── data/
|
|
357
|
+
│ ├── extraction.py # Extract context from DataFrames/Files (stats, patterns, samples)
|
|
358
|
+
│ └── schema.py # Pandera schema inference, constraint parsing via LLM
|
|
359
|
+
├── execution/
|
|
360
|
+
│ ├── agent.py # Claude Agent SDK path with hooks and sandbox test interception
|
|
361
|
+
│ ├── docker.py # Image building (create_image_spec, incremental builds)
|
|
362
|
+
│ └── testing.py # Test execution in sandboxes
|
|
363
|
+
├── generation/
|
|
364
|
+
│ ├── llm.py # LLM calls: plan, code, tests, diagnosis, fixes, verification
|
|
365
|
+
│ └── prompts.py # Prompt templates and constants
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
### Data flow
|
|
369
|
+
|
|
370
|
+
```
|
|
371
|
+
User calls agent.generate(prompt, samples, outputs, ...)
|
|
372
|
+
│
|
|
373
|
+
├─ Data Processing (both paths)
|
|
374
|
+
│ ├─ Convert DataFrames → CSV Files
|
|
375
|
+
│ ├─ Infer Pandera schemas
|
|
376
|
+
│ ├─ Apply user constraints (LLM-parsed)
|
|
377
|
+
│ └─ Extract data context (stats, patterns, samples)
|
|
378
|
+
│
|
|
379
|
+
├─ LiteLLM Path (default) ├─ Agent SDK Path (backend="claude")
|
|
380
|
+
│ ├─ generate_plan() │ ├─ Build prompt with all context
|
|
381
|
+
│ ├─ generate_code() │ ├─ Launch Claude agent with hooks:
|
|
382
|
+
│ ├─ detect_packages() │ │ ├─ PreToolUse: trace + classify commands
|
|
383
|
+
│ ├─ build_image() │ │ │ ├─ pytest → run in sandbox
|
|
384
|
+
│ ├─ execute_tests() │ │ │ ├─ safe (ls, cat, ...) → allow
|
|
385
|
+
│ ├─ diagnose_error() (if failed) │ │ │ └─ denied (apt, pip, curl, ...) → block
|
|
386
|
+
│ ├─ fix code/tests/env │ │ ├─ PostToolUseFailure: trace errors
|
|
387
|
+
│ └─ repeat until pass or max_iterations │ │ └─ Stop: trace summary
|
|
388
|
+
│ │ ├─ Agent writes solution.py, tests.py, packages.txt
|
|
389
|
+
│ │ ├─ pytest intercepted → sandbox execution
|
|
390
|
+
│ │ └─ Agent iterates until tests pass
|
|
391
|
+
│
|
|
392
|
+
└─ Return CodeGenEvalResult
|
|
393
|
+
├─ .solution (code)
|
|
394
|
+
├─ .image (sandbox image with deps)
|
|
395
|
+
├─ .as_task() → reusable sandbox
|
|
396
|
+
└─ .run() → execute on sample data
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
## Error handling
|
|
400
|
+
|
|
401
|
+
The LiteLLM path classifies test failures into three types:
|
|
402
|
+
|
|
403
|
+
| Type | Meaning | Action |
|
|
404
|
+
| ------------- | -------------------------- | ------------------------------------------------ |
|
|
405
|
+
| `logic` | Bug in generated code | Regenerate code with specific patch instructions |
|
|
406
|
+
| `environment` | Missing package/dependency | Add package, rebuild image |
|
|
407
|
+
| `test_error` | Bug in generated test | Fix test expectations |
|
|
408
|
+
|
|
409
|
+
If the same error persists after fixes, the plugin reclassifies it (logic <-> test_error) to try the other approach.
|
|
410
|
+
|
|
411
|
+
## Observability
|
|
412
|
+
|
|
413
|
+
### LiteLLM path
|
|
414
|
+
|
|
415
|
+
- Logs every iteration with attempt count, error type, and package changes
|
|
416
|
+
- Tracks total input/output tokens across all LLM calls
|
|
417
|
+
- Results include full conversation history for debugging
|
|
418
|
+
|
|
419
|
+
### Agent SDK path
|
|
420
|
+
|
|
421
|
+
- Traces each tool call (name + input detail) via `PreToolUse` hook
|
|
422
|
+
- Traces tool failures via `PostToolUseFailure` hook
|
|
423
|
+
- Traces a summary when the agent finishes (total tool calls, tool distribution, final image/packages)
|
|
424
|
+
- Classifies Bash commands as safe, denied, or pytest (intercepted for sandbox execution)
|
|
425
|
+
- All traces appear in the Flyte UI under the task
|
|
426
|
+
|
|
427
|
+
## Examples
|
|
428
|
+
|
|
429
|
+
See the `examples/` directory:
|
|
430
|
+
|
|
431
|
+
- **`example_csv_processing.py`** — Process CSVs with different schemas using LiteLLM. Shows batch processing with multiple CSV formats.
|
|
432
|
+
- **`example_csv_processing_sync.py`** — Synchronous version of CSV processing. Shows `agent.generate()` and `result.run()` without async.
|
|
433
|
+
- **`example_csv_processing_agent.py`** — CSV processing using Agent SDK with `backend="claude"`.
|
|
434
|
+
- **`example_dataframe_analysis.py`** — DataFrame analysis with constraints, `base_packages`, and `as_task()` for reusable execution.
|
|
435
|
+
- **`example_dataframe_analysis_agent.py`** — Same DataFrame analysis using Agent SDK.
|
|
436
|
+
- **`example_prompt_only.py`** — Log file analysis with `schema`, `constraints`, `samples`, and explicit `inputs`/`outputs`.
|
|
437
|
+
- **`example_prompt_only_agent.py`** — Same log analysis using Agent SDK.
|
|
438
|
+
- **`example_multi_input.py`** — Multi-input data join with primitives (`float`, `bool`).
|
|
439
|
+
- **`example_multi_input_agent.py`** — Same multi-input join using Agent SDK.
|
|
440
|
+
- **`example_durable_execution.py`** — Durable execution with injected failures, retries, and caching (LLM approach).
|
|
441
|
+
- **`example_durable_execution_agent.py`** — Same durable execution using Agent SDK.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
flyteplugins/codegen/__init__.py,sha256=jumoM0Po0Tx1KqatcdNi_Mk1MYCLcccnyjn6d1zurLQ,366
|
|
2
|
+
flyteplugins/codegen/auto_coder_agent.py,sha256=fVM2-ZZijrPe7SwDZJ_kU8niIb5qepVhs-qwWvpLHm8,45820
|
|
3
|
+
flyteplugins/codegen/core/__init__.py,sha256=YjAN0PpvkhERFmlkEq78O2q92bmgyGAVttYXeDH_hyc,355
|
|
4
|
+
flyteplugins/codegen/core/types.py,sha256=YbJG1we7gLxtherSvTqg9mSjSfd4FNs017iUVOrdMBs,12276
|
|
5
|
+
flyteplugins/codegen/data/__init__.py,sha256=UDkKuvyBEZmsPaTR6yEexcQjLutMRxuV5-pWflGc_sI,670
|
|
6
|
+
flyteplugins/codegen/data/extraction.py,sha256=Q0ytA1pp4qXCB9nNdjEtEVn9yEcwdkRlDL6df1XtBSg,10029
|
|
7
|
+
flyteplugins/codegen/data/schema.py,sha256=ib2J6mdgzZtfxyKY4233aeYNPZi8KzUOCJVc15DazRo,9101
|
|
8
|
+
flyteplugins/codegen/execution/__init__.py,sha256=3i4bP_xNp8ZgsolRI7ccUtkIW-h-1zCXFrjWfltVu3U,192
|
|
9
|
+
flyteplugins/codegen/execution/agent.py,sha256=EBTAio0cjg9I9kHW3oHPnn6wjrq5E43MWHTvh3jf7nQ,27184
|
|
10
|
+
flyteplugins/codegen/execution/docker.py,sha256=4M2SzrsAcN_LmN_cFfAjT8K5hJQxiL1fA3QWK1jtnrA,7166
|
|
11
|
+
flyteplugins/codegen/generation/__init__.py,sha256=GkLiXfJeVQmLlHf4R08qzgUa5wYG97KseGjRtLCFZhU,1065
|
|
12
|
+
flyteplugins/codegen/generation/llm.py,sha256=CNFuC3YY-ZK8xjCHGJag34J6JtFbTNy_eDbxOoI1TEM,44237
|
|
13
|
+
flyteplugins/codegen/generation/prompts.py,sha256=EbH3bHNa8WlavHk2cf7i497J265rZ4ZPjfRdy97JJ7M,5774
|
|
14
|
+
flyteplugins_codegen-2.0.6.dist-info/METADATA,sha256=aVP2zMlCktjPFbGlUhCwR8WWCtPwxYDIewClWKoKBsg,19197
|
|
15
|
+
flyteplugins_codegen-2.0.6.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
16
|
+
flyteplugins_codegen-2.0.6.dist-info/top_level.txt,sha256=cgd779rPu9EsvdtuYgUxNHHgElaQvPn74KhB5XSeMBE,13
|
|
17
|
+
flyteplugins_codegen-2.0.6.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
flyteplugins
|