ibm-watsonx-orchestrate-evaluation-framework 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ibm-watsonx-orchestrate-evaluation-framework might be problematic. Click here for more details.
- {ibm_watsonx_orchestrate_evaluation_framework-1.0.8.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info}/METADATA +103 -109
- ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info/RECORD +96 -0
- wxo_agentic_evaluation/analytics/tools/main.py +1 -18
- wxo_agentic_evaluation/analyze_run.py +358 -97
- wxo_agentic_evaluation/arg_configs.py +28 -1
- wxo_agentic_evaluation/description_quality_checker.py +149 -0
- wxo_agentic_evaluation/evaluation_package.py +58 -17
- wxo_agentic_evaluation/inference_backend.py +32 -17
- wxo_agentic_evaluation/llm_user.py +2 -1
- wxo_agentic_evaluation/metrics/metrics.py +22 -1
- wxo_agentic_evaluation/prompt/bad_tool_descriptions_prompt.jinja2 +178 -0
- wxo_agentic_evaluation/prompt/llama_user_prompt.jinja2 +9 -1
- wxo_agentic_evaluation/prompt/off_policy_attack_generation_prompt.jinja2 +34 -0
- wxo_agentic_evaluation/prompt/on_policy_attack_generation_prompt.jinja2 +46 -0
- wxo_agentic_evaluation/prompt/template_render.py +34 -3
- wxo_agentic_evaluation/quick_eval.py +342 -0
- wxo_agentic_evaluation/red_teaming/attack_evaluator.py +113 -0
- wxo_agentic_evaluation/red_teaming/attack_generator.py +286 -0
- wxo_agentic_evaluation/red_teaming/attack_list.py +96 -0
- wxo_agentic_evaluation/red_teaming/attack_runner.py +128 -0
- wxo_agentic_evaluation/referenceless_eval/__init__.py +3 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/consts.py +28 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/base.py +27 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general.py +49 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection.py +31 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/loader.py +237 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/adapters.py +101 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/pipeline.py +263 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/semantic_checker.py +455 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/static_checker.py +156 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/transformation_prompts.py +509 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/types.py +547 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/__init__.py +3 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/field.py +258 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/metric.py +333 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/metrics_runner.py +188 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/prompt.py +409 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/utils.py +42 -0
- wxo_agentic_evaluation/referenceless_eval/prompt/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/prompt/runner.py +145 -0
- wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py +116 -0
- wxo_agentic_evaluation/service_instance.py +2 -2
- wxo_agentic_evaluation/service_provider/watsonx_provider.py +118 -4
- wxo_agentic_evaluation/tool_planner.py +3 -1
- wxo_agentic_evaluation/type.py +33 -2
- wxo_agentic_evaluation/utils/__init__.py +0 -1
- wxo_agentic_evaluation/utils/open_ai_tool_extractor.py +157 -0
- wxo_agentic_evaluation/utils/rich_utils.py +174 -0
- wxo_agentic_evaluation/utils/rouge_score.py +23 -0
- wxo_agentic_evaluation/utils/utils.py +167 -5
- ibm_watsonx_orchestrate_evaluation_framework-1.0.8.dist-info/RECORD +0 -56
- {ibm_watsonx_orchestrate_evaluation_framework-1.0.8.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info}/WHEEL +0 -0
- {ibm_watsonx_orchestrate_evaluation_framework-1.0.8.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,509 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
2
|
+
from typing import Any, Dict, List
|
|
3
|
+
|
|
4
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
5
|
+
# 1) extract_units
|
|
6
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
SINGLE_PARAM_SCHEMA: Dict[str, Any] = {
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"user_units_or_format": {
|
|
13
|
+
"type": ["string", "null"],
|
|
14
|
+
"description": (
|
|
15
|
+
"The canonical unit or format attached to `user_value`, normalized to lowercase, singular form, "
|
|
16
|
+
"and using standard abbreviations. "
|
|
17
|
+
"If this unit/format is composed of multiple units or formats write them all in a comma-separated list (e.g., 'second, millisecond', 'byte, kilobyte'). "
|
|
18
|
+
"If none, return an empty string ''.\n"
|
|
19
|
+
"Examples:\n"
|
|
20
|
+
" - Time: 'second', 'millisecond', 'hour', 'day'\n"
|
|
21
|
+
" - Data: 'byte', 'kilobyte', 'megabyte', 'gigabyte'\n"
|
|
22
|
+
" - Temperature: 'celsius', 'fahrenheit', 'kelvin'\n"
|
|
23
|
+
" - Length: 'meter', 'centimeter', 'inch', 'foot, inch'\n"
|
|
24
|
+
" - Volume: 'liter', 'milliliter'\n"
|
|
25
|
+
" - Weight: 'kilogram', 'gram', 'pound'\n"
|
|
26
|
+
" - Currency: 'usd', 'eur', 'jpy'\n"
|
|
27
|
+
" - Date formats: 'yyyy-mm-dd', 'month day, year', 'iso8601'\n"
|
|
28
|
+
" - Identifiers: 'uuid', 'hex'\n"
|
|
29
|
+
"If the user_value has no unit/format, return ''."
|
|
30
|
+
),
|
|
31
|
+
},
|
|
32
|
+
"user_value": {
|
|
33
|
+
"type": ["string", "null"],
|
|
34
|
+
"description": (
|
|
35
|
+
"The full and exact value provided by the user or from the conversation history (e.g. previous messages) for this parameter, always as a raw string (but still should be the value with these units/format: user_units_or_format).\n"
|
|
36
|
+
"Collect the full value (if needed from multiple parts in the conversation) and return it as-is, with the full value, which is the value with these units/format: user_units_or_format.\n"
|
|
37
|
+
"Examples:\n"
|
|
38
|
+
" - Time quantities: '30' (seconds), '2' (milliseconds), '1.5' (hours)\n"
|
|
39
|
+
" - Data sizes: '1000' (MB), '2' (GB), '512' (bytes)\n"
|
|
40
|
+
" - Temperatures: '25' (°C), '77' (°F)\n"
|
|
41
|
+
" - Dates: 'December 1st, 2024' (month day, year), '2024-06-20' (yyyy-mm-dd)\n"
|
|
42
|
+
" - Numbers: '0.75' (decimal), '42' (integer)\n"
|
|
43
|
+
" - Identifiers: '550e8400-e29b-41d4-a716-446655440000' (UUID)\n"
|
|
44
|
+
"If the user did not mention this parameter, return `null`."
|
|
45
|
+
),
|
|
46
|
+
},
|
|
47
|
+
"spec_units_or_format": {
|
|
48
|
+
"type": ["string", "null"],
|
|
49
|
+
"description": (
|
|
50
|
+
"The canonical unit or format defined or implied by the parameter's JSON Schema, "
|
|
51
|
+
"normalized to lowercase and singular form, using the same conventions as `user_units_or_format`. "
|
|
52
|
+
"If this unit/format is composed of multiple units or formats write them all in a comma-separated list (e.g., 'second, millisecond', 'byte, kilobyte'). "
|
|
53
|
+
"Examples: 'second', 'byte', 'yyyy-mm-dd', 'uuid'.\n"
|
|
54
|
+
"If the spec and user_value use the same unit/format, return exactly that same canonical string for both. "
|
|
55
|
+
"If the schema specifies no unit/format, return ''."
|
|
56
|
+
),
|
|
57
|
+
},
|
|
58
|
+
"transformation_summary": {
|
|
59
|
+
"type": ["string", "null"],
|
|
60
|
+
"description": (
|
|
61
|
+
"A brief summary of the transformation needed to convert `user_value` to the `spec_units_or_format`. "
|
|
62
|
+
"This should be a human-readable description of the conversion logic, not the actual code."
|
|
63
|
+
"Examples:\n"
|
|
64
|
+
" - 'Convert seconds to milliseconds by multiplying by 1000 - e.g., 30 seconds multiplied by 1000 to be in milliseconds.'\n"
|
|
65
|
+
" - 'Convert bytes to megabytes by dividing by 1024 - e.g., 2048 bytes divided by 1024 to be in megabytes.'\n"
|
|
66
|
+
" - 'Convert Celsius to Kelvin by adding 273.15 - e.g., 25°C is added 273.15 to be in Kelvin.'\n"
|
|
67
|
+
" - 'Convert date string from 'month day, year' to 'yyyy-mm-dd' format - e.g., 'December 1st, 2024' converted to 'yyyy-mm-dd' format.'\n"
|
|
68
|
+
" - 'Convert foots and inches to centimeters by multiplying by 30.48 - e.g., 5 feet 10 inches (5*30.48 + 10*2.54) to be in centimeters.'\n"
|
|
69
|
+
"If no transformation is needed (i.e., `user_units_or_format` matches `spec_units_or_format`), return ''"
|
|
70
|
+
),
|
|
71
|
+
},
|
|
72
|
+
},
|
|
73
|
+
"required": [
|
|
74
|
+
"user_value",
|
|
75
|
+
"user_units_or_format",
|
|
76
|
+
"spec_units_or_format",
|
|
77
|
+
"transformation_summary",
|
|
78
|
+
],
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def build_multi_extract_units_schema(params: List[str]) -> Dict[str, Any]:
|
|
83
|
+
"""
|
|
84
|
+
Construct a JSON Schema whose top-level properties are each parameter name.
|
|
85
|
+
Each parameter maps to an object matching SINGLE_PARAM_SCHEMA.
|
|
86
|
+
"""
|
|
87
|
+
schema: Dict[str, Any] = {
|
|
88
|
+
"type": "object",
|
|
89
|
+
"properties": {},
|
|
90
|
+
"required": params.copy(),
|
|
91
|
+
}
|
|
92
|
+
for pname in params:
|
|
93
|
+
schema["properties"][pname] = deepcopy(SINGLE_PARAM_SCHEMA)
|
|
94
|
+
return schema
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# -------------------------------------------------------------------
|
|
98
|
+
# 2) System prompt template for multi-parameter unit/format extraction
|
|
99
|
+
# -------------------------------------------------------------------
|
|
100
|
+
# We include a `{schema}` placeholder, which will be replaced at runtime
|
|
101
|
+
# with a JSON-dumped version of the schema built for the current params.
|
|
102
|
+
MULTI_EXTRACT_UNITS_SYSTEM: str = """\
|
|
103
|
+
You are an expert in natural language understanding and API specifications.
|
|
104
|
+
Given:
|
|
105
|
+
1. A user context (natural-language instructions).
|
|
106
|
+
2. A JSON Schema snippet that describes **all** parameters the tool expects.
|
|
107
|
+
3. A list of all parameter names.
|
|
108
|
+
|
|
109
|
+
Your task:
|
|
110
|
+
For each parameter name, identify:
|
|
111
|
+
- The "user_units_or_format" explicitly or implicitly attached to that value.
|
|
112
|
+
(If none, return an empty string `""`.)
|
|
113
|
+
- The raw "user_value" mentioned in the user context or conversation history, as a string.
|
|
114
|
+
- The "spec_units_or_format" defined or implied by the JSON Schema (type/description).
|
|
115
|
+
(If none, return an empty string `""`.)
|
|
116
|
+
- A brief "transformation_summary" describing how to convert `user_value` to `spec_units_or_format`.
|
|
117
|
+
|
|
118
|
+
Respond with exactly one JSON object whose keys are the parameter names,
|
|
119
|
+
and whose values are objects with "user_value", "user_units_or_format", and "spec_units_or_format".
|
|
120
|
+
The JSON must match this schema exactly:
|
|
121
|
+
|
|
122
|
+
{schema}
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# -------------------------------------------------------------------
|
|
127
|
+
# 3) User prompt template for multi-parameter unit extraction
|
|
128
|
+
# -------------------------------------------------------------------
|
|
129
|
+
# Use Python .format(...) placeholders for:
|
|
130
|
+
# context = The conversation/context string
|
|
131
|
+
# full_spec = JSON.dumps(...) of the combined JSON Schema snippet for all params
|
|
132
|
+
# parameter_names = Comma-separated list of parameter names
|
|
133
|
+
MULTI_EXTRACT_UNITS_USER: str = """\
|
|
134
|
+
|
|
135
|
+
Examples (multi-parameter):
|
|
136
|
+
|
|
137
|
+
1) Context: [{{"role":"user", "content":"Change the interval to 30 seconds and set threshold to 0.75."}},
|
|
138
|
+
{{"role":"assistant", "content":"{{"id":"tool_call_1","type":"function","function":{{"name":"set_interval_and_threshold","arguments":{{"interval":30,"threshold":0.75}}}}}}"}}]
|
|
139
|
+
Full Spec:
|
|
140
|
+
{{
|
|
141
|
+
"name": "set_interval_and_threshold",
|
|
142
|
+
"description": "Set the interval and threshold for monitoring.",
|
|
143
|
+
"parameters": {{
|
|
144
|
+
"type": "object",
|
|
145
|
+
"properties": {{
|
|
146
|
+
"interval": {{
|
|
147
|
+
"type": "integer",
|
|
148
|
+
"description": "Interval duration in seconds"
|
|
149
|
+
}},
|
|
150
|
+
"threshold": {{
|
|
151
|
+
"type": "number",
|
|
152
|
+
"description": "Threshold limit (0.0 to 1.0)"
|
|
153
|
+
}}
|
|
154
|
+
}},
|
|
155
|
+
"required": ["interval", "threshold"]
|
|
156
|
+
}}
|
|
157
|
+
}}
|
|
158
|
+
Parameter names: "interval, threshold"
|
|
159
|
+
-> {{
|
|
160
|
+
"interval": {{
|
|
161
|
+
"user_units_or_format":"second",
|
|
162
|
+
"user_value":"30",
|
|
163
|
+
"spec_units_or_format":"second",
|
|
164
|
+
"transformation_summary":""
|
|
165
|
+
}},
|
|
166
|
+
"threshold": {{
|
|
167
|
+
"user_units_or_format":"",
|
|
168
|
+
"user_value":"0.75",
|
|
169
|
+
"spec_units_or_format":"",
|
|
170
|
+
"transformation_summary":""
|
|
171
|
+
}}
|
|
172
|
+
}}
|
|
173
|
+
|
|
174
|
+
2) Context: [{{"role":"user", "content":"Download up to 2 GB of data and retry 5 times."}},
|
|
175
|
+
{{"role":"assistant", "content":"{{"id":"tool_call_2","type":"function","function":{{"name":"download_data","arguments":{{"size":"2147483648","retries":5}}}}}}"}}]
|
|
176
|
+
Full Spec:
|
|
177
|
+
{{
|
|
178
|
+
"name": "download_data",
|
|
179
|
+
"description": "Download data with specified size and retry count.",
|
|
180
|
+
"parameters": {{
|
|
181
|
+
"type": "object",
|
|
182
|
+
"properties": {{
|
|
183
|
+
"size": {{
|
|
184
|
+
"type": "string",
|
|
185
|
+
"description": "Size limit in bytes"
|
|
186
|
+
}},
|
|
187
|
+
"retries": {{
|
|
188
|
+
"type": "integer",
|
|
189
|
+
"description": "Maximum retry count"
|
|
190
|
+
}}
|
|
191
|
+
}},
|
|
192
|
+
"required": ["size", "retries"]
|
|
193
|
+
}}
|
|
194
|
+
}}
|
|
195
|
+
Parameter names: "size, retries"
|
|
196
|
+
-> {{
|
|
197
|
+
"size": {{
|
|
198
|
+
"user_units_or_format":"gigabyte",
|
|
199
|
+
"user_value":"2",
|
|
200
|
+
"spec_units_or_format":"byte",
|
|
201
|
+
"transformation_summary":"Convert gigabytes to bytes by multiplying by 1024^3 - e.g., 2 GB multiplied by 1024^3 to be in bytes."
|
|
202
|
+
}},
|
|
203
|
+
"retries": {{
|
|
204
|
+
"user_units_or_format":"",
|
|
205
|
+
"user_value":"5",
|
|
206
|
+
"spec_units_or_format":"",
|
|
207
|
+
"transformation_summary":""
|
|
208
|
+
}}
|
|
209
|
+
}}
|
|
210
|
+
|
|
211
|
+
3) Context: [{{"role":"user", "content":"Set backup_date to December 1st, 2024 and limit to 100MB."}},
|
|
212
|
+
{{"role":"assistant", "content":"{{\"id\":\"tool_call_3\",\"type\":\"function\",\"function\":{{\"name\":\"set_backup_parameters\",\"arguments\":{{\"backup_date\":\"2024-12-01\",\"limit\":\"104857600\"}}}}}}"}}]
|
|
213
|
+
Full Spec:
|
|
214
|
+
{{
|
|
215
|
+
"name": "set_backup_parameters",
|
|
216
|
+
"description": "Set parameters for backup operation.",
|
|
217
|
+
"parameters": {{
|
|
218
|
+
"type": "object",
|
|
219
|
+
"properties": {{
|
|
220
|
+
"backup_date": {{
|
|
221
|
+
"type": "string",
|
|
222
|
+
"format": "date",
|
|
223
|
+
"description": "Date of backup in YYYY-MM-DD"
|
|
224
|
+
}},
|
|
225
|
+
"limit": {{
|
|
226
|
+
"type": "string",
|
|
227
|
+
"description": "File size cap (in bytes)"
|
|
228
|
+
}}
|
|
229
|
+
}},
|
|
230
|
+
"required": ["backup_date", "limit"]
|
|
231
|
+
}}
|
|
232
|
+
}}
|
|
233
|
+
Parameter names: "backup_date, limit"
|
|
234
|
+
-> {{
|
|
235
|
+
"backup_date": {{
|
|
236
|
+
"user_units_or_format":"month day, year",
|
|
237
|
+
"user_value":"December 1st, 2024",
|
|
238
|
+
"spec_units_or_format":"yyyy-mm-dd",
|
|
239
|
+
"transformation_summary":"Convert 'month day, year' format to 'yyyy-mm-dd' - e.g., 'December 1st, 2024' converted to 'yyyy-mm-dd' format."
|
|
240
|
+
|
|
241
|
+
}},
|
|
242
|
+
"limit": {{
|
|
243
|
+
"user_units_or_format":"megabyte",
|
|
244
|
+
"user_value":"100",
|
|
245
|
+
"spec_units_or_format":"byte",
|
|
246
|
+
"transformation_summary":"Convert megabytes to bytes by multiplying by 1024^2 - e.g., 100 MB multiplied by 1024^2 to be in bytes."
|
|
247
|
+
}}
|
|
248
|
+
}}
|
|
249
|
+
|
|
250
|
+
Context:
|
|
251
|
+
{context}
|
|
252
|
+
|
|
253
|
+
Full Spec (JSON Schema snippet for all parameters):
|
|
254
|
+
{full_spec}
|
|
255
|
+
|
|
256
|
+
Parameter names: {parameter_names}
|
|
257
|
+
|
|
258
|
+
Please return exactly one JSON object matching the schema defined in the system prompt.
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
262
|
+
# 2) generate_transformation_code
|
|
263
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
264
|
+
|
|
265
|
+
# System prompt for code generation
|
|
266
|
+
GENERATE_CODE_SYSTEM: str = """\
|
|
267
|
+
You are an expert Python engineer. Generate a self-contained Python module that converts between arbitrary units or formats. Your code must define exactly two functions:
|
|
268
|
+
|
|
269
|
+
1. transformation_code(input_value: str) -> <transformed_type>
|
|
270
|
+
- **Purpose**: Convert a string in OLD_UNITS into its equivalent in TRANSFORMED_UNITS.
|
|
271
|
+
- **Behavior**:
|
|
272
|
+
- Parse the numeric or textual content from `input_value` (e.g. “10 ms”, “December 1st, 2011”).
|
|
273
|
+
- Attach the OLD_UNITS and perform a conversion to TRANSFORMED_UNITS using standard Python libraries (e.g. `pint`, `datetime`/`dateutil`, or built-ins).
|
|
274
|
+
- Return the result as the specified `<transformed_type>` (e.g. `int`, `float`, `str`, `list[float]`, etc.).
|
|
275
|
+
- **Error Handling**: If parsing or conversion is unsupported, raise a `ValueError` with a clear message.
|
|
276
|
+
|
|
277
|
+
2. convert_example_str_transformed_to_transformed_type(transformed_value: str) -> <transformed_type>
|
|
278
|
+
- **Purpose**: Parse a raw string in the example transformed format into the same `<transformed_type>`.
|
|
279
|
+
- **Behavior**:
|
|
280
|
+
- Strip any non-numeric or formatting characters as needed.
|
|
281
|
+
- Return the parsed value.
|
|
282
|
+
- **Error Handling**: If parsing fails, raise a `ValueError`.
|
|
283
|
+
|
|
284
|
+
You will be provided with the following information:
|
|
285
|
+
- TRANSFORMATION SUMMARY: A brief description of the transformation logic, e.g., "Convert a footer and inch value to centimeters by multiplying by 30.48 the foot value and by 2.54 the inch value, e.g., '5 feet 10 inches' calculated by 5*30.48 + 10*2.54 to be in centimeters."
|
|
286
|
+
- OLD UNITS: The units or format of the input value (e.g., "millisecond", "celsius", "yyyy-mm-dd").
|
|
287
|
+
- EXAMPLE FORMAT OF OLD VALUE: An example string in the OLD UNITS (e.g, "1000" (milliseconds), "25" (celsius), "December 1st, 2011" (month day, year)).
|
|
288
|
+
- TRANSFORMED UNITS: The units or format of the transformed value (e.g., "second", "kelvin", "unix timestamp").
|
|
289
|
+
- EXAMPLE FORMAT OF TRANSFORMED VALUE: An example string (may not be fully representative - therefore you should only take in account the units when implementing the transformation logic) in the TRANSFORMED UNITS (e.g., "10 s", "[298.15]", "1322697600").
|
|
290
|
+
- TRANSFORMED TYPE: The type of the transformed value (e.g., `int`, `float`, `str`, `list[float]`).
|
|
291
|
+
|
|
292
|
+
Your response must be a valid Python script that defines the two functions above, with no additional text or formatting.
|
|
293
|
+
The script should be self-contained and runnable in a standard Python environment without any external dependencies (except for standard libraries).
|
|
294
|
+
If the transformation is not supported or possible with the standard Python libraries, return an empty string in the generated_code field.
|
|
295
|
+
Respond with ONLY a JSON object matching this schema (no Markdown fences, no extra text):
|
|
296
|
+
{
|
|
297
|
+
"generated_code": "<full python script>"
|
|
298
|
+
}"""
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
generated_code_example1 = (
|
|
302
|
+
"from datetime import datetime, timezone\n"
|
|
303
|
+
"import dateutil.parser\n\n"
|
|
304
|
+
"def transformation_code(input_value: str) -> int:\n"
|
|
305
|
+
' """\n'
|
|
306
|
+
" Convert a date string with the format 'month day, year' to a unix timestamp.\n\n"
|
|
307
|
+
" Args:\n"
|
|
308
|
+
" input_value (str): The date string to convert.\n\n"
|
|
309
|
+
" Returns:\n"
|
|
310
|
+
" int: The unix timestamp representing the date.\n\n"
|
|
311
|
+
" Example:\n"
|
|
312
|
+
" >>> transformation_code('December 1st, 2011')\n"
|
|
313
|
+
" estimated output: 1322697600\n"
|
|
314
|
+
' """\n\n'
|
|
315
|
+
" # Parse the date string, dateutil.parser automatically handles 'st', 'nd', 'rd', 'th'\n"
|
|
316
|
+
" dt = dateutil.parser.parse(input_value)\n\n"
|
|
317
|
+
" # Ensure the datetime is treated as UTC\n"
|
|
318
|
+
" dt = dt.replace(tzinfo=timezone.utc) if dt.tzinfo is None else dt.astimezone(timezone.utc)\n\n"
|
|
319
|
+
" # Convert to Unix timestamp\n"
|
|
320
|
+
" return int(dt.timestamp())\n\n"
|
|
321
|
+
"def convert_example_str_transformed_to_transformed_type(transformed_value: str) -> int:\n"
|
|
322
|
+
' """\n'
|
|
323
|
+
" Convert a string representation of a unix timestamp to an integer.\n\n"
|
|
324
|
+
" Args:\n"
|
|
325
|
+
" transformed_value (str): The string representation of the unix timestamp.\n\n"
|
|
326
|
+
" Returns:\n"
|
|
327
|
+
" int: The unix timestamp as an integer.\n\n"
|
|
328
|
+
" Example:\n"
|
|
329
|
+
" >>> convert_example_str_transformed_to_transformed_type('1322697600')\n"
|
|
330
|
+
" 1322697600\n"
|
|
331
|
+
' """\n\n'
|
|
332
|
+
" # Strip any whitespace and convert to integer\n"
|
|
333
|
+
" transformed_value = transformed_value.strip()\n"
|
|
334
|
+
" return int(transformed_value)\n"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
transformation_eval_example1 = (
|
|
338
|
+
"""
|
|
339
|
+
### Example 1:
|
|
340
|
+
|
|
341
|
+
TRANSFORMATION SUMMARY: Convert a date string with the format 'month day, year' to a unix timestamp, e.g., 'December 1st, 2024' converted to a unix timestamp.
|
|
342
|
+
OLD UNITS: month day, year
|
|
343
|
+
EXAMPLE FORMAT OF OLD VALUE: 'December 1st, 2011'
|
|
344
|
+
TRANSFORMED UNITS: unix timestamp
|
|
345
|
+
EXAMPLE FORMAT OF TRANSFORMED VALUE: '1322697600'
|
|
346
|
+
TRANSFORMED TYPE: int
|
|
347
|
+
|
|
348
|
+
RESPONSE:
|
|
349
|
+
{{"""
|
|
350
|
+
+ '"generated_code": "'
|
|
351
|
+
+ generated_code_example1
|
|
352
|
+
+ '"'
|
|
353
|
+
+ """}}"""
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
generated_code_example2 = (
|
|
357
|
+
"def transformation_code(input_value: str) -> float:\n"
|
|
358
|
+
' """\n'
|
|
359
|
+
" Convert a string in milliseconds to seconds.\n\n"
|
|
360
|
+
" Args:\n"
|
|
361
|
+
" input_value (str): The input value in milliseconds.\n\n"
|
|
362
|
+
" Returns:\n"
|
|
363
|
+
" float: The converted value in seconds.\n\n"
|
|
364
|
+
" Example:\n"
|
|
365
|
+
" >>> transformation_code('1000')\n"
|
|
366
|
+
" estimated output: 1.0\n"
|
|
367
|
+
' """\n\n'
|
|
368
|
+
" return float(input_value.strip()) / 1000\n\n"
|
|
369
|
+
"def convert_example_str_transformed_to_transformed_type(transformed_value: str) -> float:\n"
|
|
370
|
+
' """\n'
|
|
371
|
+
" Convert a string representation of seconds to a float.\n\n"
|
|
372
|
+
" Args:\n"
|
|
373
|
+
" transformed_value (str): The string representation of the value in seconds.\n\n"
|
|
374
|
+
" Returns:\n"
|
|
375
|
+
" float: The converted value in seconds.\n\n"
|
|
376
|
+
" Example:\n"
|
|
377
|
+
" >>> convert_example_str_transformed_to_transformed_type('10')\n"
|
|
378
|
+
" 10.0\n"
|
|
379
|
+
' """\n\n'
|
|
380
|
+
" return float(transformed_value.strip())\n"
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
transformation_eval_example2 = (
|
|
384
|
+
"""
|
|
385
|
+
### Example 2:
|
|
386
|
+
|
|
387
|
+
TRANSFORMATION SUMMARY: Convert a string in milliseconds to seconds, e.g., '1000' (milliseconds) divided by 1000 to be in seconds.
|
|
388
|
+
OLD UNITS: millisecond
|
|
389
|
+
EXAMPLE FORMAT OF OLD VALUE: '1000'
|
|
390
|
+
TRANSFORMED UNITS: second
|
|
391
|
+
EXAMPLE FORMAT OF TRANSFORMED VALUE: '10'
|
|
392
|
+
TRANSFORMED TYPE: float
|
|
393
|
+
|
|
394
|
+
RESPONSE:
|
|
395
|
+
{{"""
|
|
396
|
+
+ '"generated_code": "'
|
|
397
|
+
+ generated_code_example2
|
|
398
|
+
+ '"'
|
|
399
|
+
+ """}}"""
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
generated_code_example3 = (
|
|
403
|
+
"def transformation_code(input_value: str) -> list[float]:\n"
|
|
404
|
+
' """\n'
|
|
405
|
+
" Convert a temperature string in Celsius to Kelvin.\n\n"
|
|
406
|
+
" Args:\n"
|
|
407
|
+
" input_value (str): The temperature in Celsius.\n\n"
|
|
408
|
+
" Returns:\n"
|
|
409
|
+
" list[float]: The converted temperature in Kelvin as a list.\n\n"
|
|
410
|
+
" Example:\n"
|
|
411
|
+
" >>> transformation_code('25')\n"
|
|
412
|
+
" estimated output: [298.15]\n"
|
|
413
|
+
' """\n\n'
|
|
414
|
+
" # Convert Celsius to Kelvin (K = C + 273.15)\n"
|
|
415
|
+
" kelvin_value = float(input_value) + 273.15\n"
|
|
416
|
+
" # Return as a list with one element\n"
|
|
417
|
+
" return [kelvin_value]\n\n"
|
|
418
|
+
"def convert_example_str_transformed_to_transformed_type(transformed_value: str) -> list[float]:\n"
|
|
419
|
+
' """\n'
|
|
420
|
+
" Convert a string representation of a temperature in Kelvin to a list of floats.\n\n"
|
|
421
|
+
" Args:\n"
|
|
422
|
+
" transformed_value (str): The temperature in Kelvin as a string.\n\n"
|
|
423
|
+
" Returns:\n"
|
|
424
|
+
" list[float]: The converted temperature in Kelvin as a list.\n\n"
|
|
425
|
+
" Example:\n"
|
|
426
|
+
" >>> convert_example_str_transformed_to_transformed_type('[35]')\n"
|
|
427
|
+
" [35.0]\n"
|
|
428
|
+
' """\n\n'
|
|
429
|
+
" # Remove the brackets and convert to float\n"
|
|
430
|
+
" transformed_value = transformed_value.strip()[1:-1]\n"
|
|
431
|
+
" # Return as a list with one element\n"
|
|
432
|
+
" return [float(transformed_value)]\n"
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
transformation_eval_example3 = (
|
|
436
|
+
"""
|
|
437
|
+
### Example 3:
|
|
438
|
+
|
|
439
|
+
TRANSFORMATION SUMMARY: Convert a temperature string in Celsius to Kelvin, e.g., '25' (Celsius) added 273.15 to be in Kelvin.
|
|
440
|
+
OLD UNITS: celsius
|
|
441
|
+
EXAMPLE FORMAT OF OLD VALUE: '25'
|
|
442
|
+
TRANSFORMED UNITS: kelvin
|
|
443
|
+
EXAMPLE FORMAT OF TRANSFORMED VALUE: '[35]'
|
|
444
|
+
TRANSFORMED TYPE: list
|
|
445
|
+
|
|
446
|
+
RESPONSE:
|
|
447
|
+
{{"""
|
|
448
|
+
+ '"generated_code": "'
|
|
449
|
+
+ generated_code_example3
|
|
450
|
+
+ '"'
|
|
451
|
+
+ """}}"""
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
transformation_eval_example4 = """
|
|
455
|
+
### Unsupported Transformation Example:
|
|
456
|
+
|
|
457
|
+
TRANSFORMATION SUMMARY:
|
|
458
|
+
OLD UNITS: unit1
|
|
459
|
+
EXAMPLE FORMAT OF OLD VALUE: ABC
|
|
460
|
+
TRANSFORMED UNITS: unit2
|
|
461
|
+
EXAMPLE FORMAT OF TRANSFORMED VALUE: DEF
|
|
462
|
+
TRANSFORMED TYPE: str
|
|
463
|
+
|
|
464
|
+
RESPONSE:
|
|
465
|
+
{{"generated_code": ""}}"""
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
# User prompt template for code generation
|
|
469
|
+
# Use Python format-style placeholders:
|
|
470
|
+
# transformation_eval_examples, old_value, old_units, transformed_value, transformed_units, transformed_type
|
|
471
|
+
GENERATE_CODE_USER: str = (
|
|
472
|
+
f"""\
|
|
473
|
+
Few-shot examples for how to convert:
|
|
474
|
+
|
|
475
|
+
{transformation_eval_example1}
|
|
476
|
+
|
|
477
|
+
{transformation_eval_example2}
|
|
478
|
+
|
|
479
|
+
{transformation_eval_example3}
|
|
480
|
+
|
|
481
|
+
{transformation_eval_example4}
|
|
482
|
+
|
|
483
|
+
"""
|
|
484
|
+
+ """\
|
|
485
|
+
|
|
486
|
+
TASK:
|
|
487
|
+
|
|
488
|
+
TRANSFORMATION SUMMARY: {transformation_summary}
|
|
489
|
+
OLD UNITS: {old_units}
|
|
490
|
+
EXAMPLE FORMAT OF OLD VALUE: {old_value}
|
|
491
|
+
TRANSFORMED UNITS: {transformed_units}
|
|
492
|
+
EXAMPLE FORMAT OF TRANSFORMED VALUE: {transformed_value}
|
|
493
|
+
TRANSFORMED TYPE: {transformed_type}
|
|
494
|
+
|
|
495
|
+
RESPONSE:
|
|
496
|
+
"""
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
# JSON Schema dict for validation
|
|
500
|
+
GENERATE_CODE_SCHEMA: Dict[str, Any] = {
|
|
501
|
+
"type": "object",
|
|
502
|
+
"properties": {
|
|
503
|
+
"generated_code": {
|
|
504
|
+
"type": "string",
|
|
505
|
+
"description": "The generated Python code for the transformation. Should be a valid Python script without any Markdown formatting.",
|
|
506
|
+
}
|
|
507
|
+
},
|
|
508
|
+
"required": ["generated_code"],
|
|
509
|
+
}
|