cpee 2.1.71 → 2.1.73

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,635 @@
1
+ <?xml version="1.0"?>
2
+ <testset xmlns="http://cpee.org/ns/properties/2.0">
3
+ <executionhandler>ruby</executionhandler>
4
+ <dataelements>
5
+ <best_pipeline/>
6
+ <res>[]</res>
7
+ <current_performance>1.0</current_performance>
8
+ <results>[{"user":"manglej6","raw":[{"name":"ok","value":"ok"}],"data":{"ok":"ok"}},{"user":"weissh9","raw":[{"name":"ok","value":"ok"}],"data":{"ok":"ok"}}]</results>
9
+ <current_pipeline>```python
10
+ import pandas as pd
11
+ from sklearn.model_selection import train_test_split
12
+ from sklearn.pipeline import Pipeline
13
+ from sklearn.preprocessing import StandardScaler
14
+ from sklearn.ensemble import RandomForestClassifier
15
+ from sklearn.metrics import f1_score
16
+
17
+ # Load the dataset
18
+ DATASET_PATH = 'path_to_your_dataset.csv' # Replace with your actual dataset path
19
+ data = pd.read_csv(DATASET_PATH)
20
+
21
+ # Data Preprocessing
22
+ # Remove 'Id' column
23
+ data = data.drop(columns=['Id'])
24
+
25
+ # Separate features and target
26
+ X = data.iloc[:, :-1] # Features: all columns except the last one
27
+ y = data.iloc[:, -1] # Target: the last column
28
+
29
+ # Split into train and test sets
30
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
31
+
32
+ # Model Selection with a pipeline
33
+ pipeline = Pipeline([
34
+ ('scaler', StandardScaler()), # Feature Engineering: Scaling
35
+ ('classifier', RandomForestClassifier(random_state=42)) # Model Selection: Random Forest Classifier
36
+ ])
37
+
38
+ # Train the model
39
+ pipeline.fit(X_train, y_train)
40
+
41
+ # Predict on the test set
42
+ y_predict = pipeline.predict(X_test)
43
+
44
+ # Model Evaluation
45
+ performance = f1_score(y_test, y_predict, average='macro')
46
+ ```</current_pipeline>
47
+ <dec_DP>1</dec_DP>
48
+ <dec_FE>0</dec_FE>
49
+ <final_pipeline>```python
50
+ import pandas as pd
51
+ from sklearn.model_selection import train_test_split
52
+ from sklearn.pipeline import Pipeline
53
+ from sklearn.preprocessing import StandardScaler
54
+ from sklearn.ensemble import RandomForestClassifier
55
+ from sklearn.metrics import f1_score
56
+
57
+ # Load the dataset
58
+ DATASET_PATH = 'path_to_your_dataset.csv' # Replace with your actual dataset path
59
+ data = pd.read_csv(DATASET_PATH)
60
+
61
+ # Data Preprocessing
62
+ # Remove 'Id' column
63
+ data = data.drop(columns=['Id'])
64
+
65
+ # Separate features and target
66
+ X = data.iloc[:, :-1] # Features: all columns except the last one
67
+ y = data.iloc[:, -1] # Target: the last column
68
+
69
+ # Split into train and test sets
70
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
71
+
72
+ # Model Selection with a pipeline
73
+ pipeline = Pipeline([
74
+ ('scaler', StandardScaler()), # Feature Engineering: Scaling
75
+ ('classifier', RandomForestClassifier(random_state=42)) # Model Selection: Random Forest Classifier
76
+ ])
77
+
78
+ # Train the model
79
+ pipeline.fit(X_train, y_train)
80
+
81
+ # Predict on the test set
82
+ y_predict = pipeline.predict(X_test)
83
+
84
+ # Model Evaluation
85
+ performance = f1_score(y_test, y_predict, average='macro')
86
+ ```</final_pipeline>
87
+ <output>{"user":"yg","raw":[{"name":"textual_des","value":"-Overview: This is a 3-class, 4-feature tabular classification task aimed at identifying the species of an iris plant based on input data.\r\n-Dataset Description: The dataset is a CSV table with six columns. The first column, 'Id', contains the unique identifiers for each iris (e.g., 1). The second to fifth columns represent different properties of the iris. The sixth column indicates the species name, such as 'Iris-setosa'.\r\n-Evaluation Metric: The performance will be evaluated using the Macro-F1 score."},{"name":"dataset_name","value":"Iris.csv"}],"data":{"textual_des":"-Overview: This is a 3-class, 4-feature tabular classification task aimed at identifying the species of an iris plant based on input data.\r\n-Dataset Description: The dataset is a CSV table with six columns. The first column, 'Id', contains the unique identifiers for each iris (e.g., 1). The second to fifth columns represent different properties of the iris. The sixth column indicates the species name, such as 'Iris-setosa'.\r\n-Evaluation Metric: The performance will be evaluated using the Macro-F1 score.","dataset_name":"Iris.csv"}}</output>
88
+ <best_performance>0</best_performance>
89
+ <final_performance>1.0</final_performance>
90
+ <textual_des>-Overview: This is a 3-class, 4-feature tabular classification task aimed at identifying the species of an iris plant based on input data.
91
+ -Dataset Description: The dataset is a CSV table with six columns. The first column, 'Id', contains the unique identifiers for each iris (e.g., 1). The second to fifth columns represent different properties of the iris. The sixth column indicates the species name, such as 'Iris-setosa'.
92
+ -Evaluation Metric: The performance will be evaluated using the Macro-F1 score.</textual_des>
93
+ <dataset_name>Iris.csv</dataset_name>
94
+ <round>1</round>
95
+ </dataelements>
96
+ <endpoints>
97
+ <user>https-post://cpee.org/services/timeout-user.php</user>
98
+ <auto>https-post://cpee.org/services/timeout-auto.php</auto>
99
+ <subprocess>https-post://cpee.org/flow/start/url/</subprocess>
100
+ <timeout>https-post://cpee.org/services/timeout.php</timeout>
101
+ <send>https-post://cpee.org/ing/correlators/message/send/</send>
102
+ <receive>https-get://cpee.org/ing/correlators/message/receive/</receive>
103
+ <worklist>https-post://cpee.org/worklist/server/</worklist>
104
+ <llm_api>https://lehre.bpm.in.tum.de/ports/17777/llm</llm_api>
105
+ <evaluate_api>https://lehre.bpm.in.tum.de/ports/17778/evaluate</evaluate_api>
106
+ </endpoints>
107
+ <attributes>
108
+ <info>ML-pipe-multi</info>
109
+ <modeltype>CPEE</modeltype>
110
+ <theme>extended</theme>
111
+ <organisation1>http://cpee.org/~demo/orgviz/organisation_informatik.xml</organisation1>
112
+ <creator>Christine Ashcreek</creator>
113
+ <author>Christine Ashcreek</author>
114
+ <design_stage>development</design_stage>
115
+ <design_dir>Templates.dir/</design_dir>
116
+ </attributes>
117
+ <description>
118
+ <description xmlns="http://cpee.org/ns/description/1.0">
119
+ <call id="a1" endpoint="worklist">
120
+ <parameters>
121
+ <label>Input Textual Task Desc. and Real Dataset</label>
122
+ <arguments>
123
+ <orgmodel>https://lehre.bpm.in.tum.de/~yanggu/inputdata.xml</orgmodel>
124
+ <form>https://lehre.bpm.in.tum.de/~yanggu/inputdata.html</form>
125
+ <role>Engineer</role>
126
+ <priority>2</priority>
127
+ <handling rngui-nonfunctional="true">single</handling>
128
+ <restrictions/>
129
+ <data/>
130
+ </arguments>
131
+ </parameters>
132
+ <code>
133
+ <prepare/>
134
+ <finalize output="result">data.output = result
135
+ data.textual_des = result['data']['textual_des']
136
+ data.dataset_name = result['data']['dataset_name']
137
+ #data.dataset = result['data']['dataset']</finalize>
138
+ <update output="result"/>
139
+ <rescue output="result"/>
140
+ </code>
141
+ <annotations>
142
+ <_generic/>
143
+ <_timing>
144
+ <_timing_weight/>
145
+ <_timing_avg/>
146
+ <explanations/>
147
+ </_timing>
148
+ <_shifting>
149
+ <_shifting_type>Duration</_shifting_type>
150
+ </_shifting>
151
+ <_context_data_analysis>
152
+ <probes/>
153
+ <ips/>
154
+ </_context_data_analysis>
155
+ <report>
156
+ <url/>
157
+ </report>
158
+ <_notes>
159
+ <_notes_general/>
160
+ </_notes>
161
+ </annotations>
162
+ <documentation>
163
+ <input/>
164
+ <output/>
165
+ <implementation>
166
+ <description/>
167
+ </implementation>
168
+ <code>
169
+ <description/>
170
+ </code>
171
+ </documentation>
172
+ </call>
173
+ <manipulate id="a10" label="Initialize #round, the current pipeline and its performance">data.current_pipeline = ""
174
+ data.current_performance = 0
175
+ data.round = 0</manipulate>
176
+ <loop mode="post_test" condition="data.current_performance &gt; data.best_performance &amp;&amp; data.current_performance != 1">
177
+ <_probability>
178
+ <_probability_min/>
179
+ <_probability_max/>
180
+ <_probability_avg/>
181
+ </_probability>
182
+ <manipulate id="a12" label="Record #round, the best pipeline and its performance">data.best_pipeline = data.current_pipeline
183
+ data.best_performance = data.current_performance
184
+ data.round = data.round + 1</manipulate>
185
+ <call id="a4" endpoint="llm_api">
186
+ <parameters>
187
+ <label>Make Dec. of Data Preprocessing by LLM</label>
188
+ <method>:post</method>
189
+ <arguments>
190
+ <prompt>!data.prompt1</prompt>
191
+ </arguments>
192
+ </parameters>
193
+ <code>
194
+ <prepare>if data.round == 1
195
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a complete ML pipeline in Python for a given ML task. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory.\n\nFor this subtask, decide whether the pipeline requires a 'Data Preprocessing' component based on the following textual task description. If the answer is Yes, return only '1'. If the answer is No, return only '0'. \nImportant: Always return only '1' or '0'."
196
+ data.prompt1 = data.temp + "\n# Textual Task Description: \n" + data.textual_des
197
+ else
198
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a new complete ML pipeline in Python for a given ML task, aiming to achieve performance that surpasses the previous pipeline. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory. \n\nFor this subtask, carefully think and reason to decide whether the pipeline requires a 'Data Preprocessing' component based on the following textual task description and previous ML pipeline with its performance. If the answer is Yes, return only '1'. If the answer is No, return only '0'. \nImportant: Always return only '1' or '0'."
199
+ data.prompt1 = data.temp + "\n# Textual Task Description: \n" + data.textual_des + "\n# Previous Pipeline Code: \n" + data.best_pipeline + "\n# Previous Pipeline Performance: \n" + data.best_performance.to_s
200
+ end</prepare>
201
+ <finalize output="result">data.dec_DP = result['llm']['output']</finalize>
202
+ <update output="result"/>
203
+ <rescue output="result"/>
204
+ </code>
205
+ <annotations>
206
+ <_generic/>
207
+ <_timing>
208
+ <_timing_weight/>
209
+ <_timing_avg/>
210
+ <explanations/>
211
+ </_timing>
212
+ <_shifting>
213
+ <_shifting_type>Duration</_shifting_type>
214
+ </_shifting>
215
+ <_context_data_analysis>
216
+ <probes/>
217
+ <ips/>
218
+ </_context_data_analysis>
219
+ <report>
220
+ <url/>
221
+ </report>
222
+ <_notes>
223
+ <_notes_general/>
224
+ </_notes>
225
+ </annotations>
226
+ <documentation>
227
+ <input/>
228
+ <output/>
229
+ <implementation>
230
+ <description/>
231
+ </implementation>
232
+ <code>
233
+ <description/>
234
+ </code>
235
+ </documentation>
236
+ </call>
237
+ <choose mode="exclusive">
238
+ <alternative condition="data.dec_DP == '1'">
239
+ <_probability>
240
+ <_probability_min/>
241
+ <_probability_max/>
242
+ <_probability_avg/>
243
+ </_probability>
244
+ <call id="a5" endpoint="llm_api">
245
+ <parameters>
246
+ <label>Generate Code of DP by LLM</label>
247
+ <method>:post</method>
248
+ <arguments>
249
+ <prompt>!data.prompt2</prompt>
250
+ </arguments>
251
+ </parameters>
252
+ <code>
253
+ <prepare>if data.round == 1
254
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a complete ML pipeline in Python for a given ML task. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory. \n\nFor this subtask, generate the pipeline code up to the 'Data Preprocessing' component based on the following textual task description. \nImportant: Your output should only contain the generated pipeline code, which must be a directly executable Python file without any manual adjustments or any errors. Use {pd.read_csv(DATASET_PATH)} to load the dataset. Import necessary python packages."
255
+ data.prompt2 = data.temp + "\n# Textual Task Description: \n" + data.textual_des
256
+ else
257
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a new complete ML pipeline in Python for a given ML task, aiming to achieve performance that surpasses the previous pipeline. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory. \n\nFor this subtask, carefully think and reason through the following textual task description and previous ML pipeline with its performance. Generate the pipeline code up to the 'Data Preprocessing' component, aiming to achieve improved performance results. \nImportant: Your output should only contain the generated pipeline code, which must be a directly executable Python file without any manual adjustments or any errors. Use {pd.read_csv(DATASET_PATH)} to load the dataset. Import necessary python packages."
258
+ data.prompt2 = data.temp + "\n# Textual Task Description: \n" + data.textual_des + "\n# Previous Pipeline Code: \n" + data.best_pipeline + "\n# Previous Pipeline Performance: \n" + data.best_performance.to_s
259
+ end</prepare>
260
+ <finalize output="result">data.current_pipeline = result['llm']['output']</finalize>
261
+ <update output="result"/>
262
+ <rescue output="result"/>
263
+ </code>
264
+ <annotations>
265
+ <_generic/>
266
+ <_timing>
267
+ <_timing_weight/>
268
+ <_timing_avg/>
269
+ <explanations/>
270
+ </_timing>
271
+ <_shifting>
272
+ <_shifting_type>Duration</_shifting_type>
273
+ </_shifting>
274
+ <_context_data_analysis>
275
+ <probes/>
276
+ <ips/>
277
+ </_context_data_analysis>
278
+ <report>
279
+ <url/>
280
+ </report>
281
+ <_notes>
282
+ <_notes_general/>
283
+ </_notes>
284
+ </annotations>
285
+ <documentation>
286
+ <input/>
287
+ <output/>
288
+ <implementation>
289
+ <description/>
290
+ </implementation>
291
+ <code>
292
+ <description/>
293
+ </code>
294
+ </documentation>
295
+ </call>
296
+ </alternative>
297
+ <otherwise/>
298
+ </choose>
299
+ <call id="a6" endpoint="llm_api">
300
+ <parameters>
301
+ <label>Make Dec. of Feature Engineeering by LLM</label>
302
+ <method>:post</method>
303
+ <arguments>
304
+ <prompt>!data.prompt3</prompt>
305
+ </arguments>
306
+ </parameters>
307
+ <code>
308
+ <prepare>if data.round == 1
309
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a complete ML pipeline in Python for a given ML task. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory. \n\nFor this subtask, decide whether the pipeline requires a 'Feature Engineering' component based on the following textual task description and current pipeline code. If the answer is Yes, return only '1'. If the answer is No, return only '0'. \nImportant: Always return only '1' or '0'."
310
+ data.prompt3 = data.temp + "\n# Textual Task Description: \n" + data.textual_des + "\n# Current Pipeline Code: \n" + data.current_pipeline
311
+ else
312
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a new complete ML pipeline in Python for a given ML task, aiming to achieve performance that surpasses the previous pipeline. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory. \n\nFor this subtask, carefully think and reason to decide whether the pipeline requires a 'Feature Engineering' component based on the following textual task description, previous ML pipeline with its performance, and current pipeline code. If the answer is Yes, return only '1'. If the answer is No, return only '0'. \nImportant: Always return only '1' or '0'."
313
+ data.prompt3 = data.temp + "\n# Textual Task Description: \n" + data.textual_des + "\n# Previous Pipeline Code: \n" + data.best_pipeline + "\n# Previous Pipeline Performance: \n" + data.best_performance.to_s + "\n# Current Pipeline Code: \n" + data.current_pipeline
314
+ end</prepare>
315
+ <finalize output="result">data.dec_FE = result['llm']['output']</finalize>
316
+ <update output="result"/>
317
+ <rescue output="result"/>
318
+ </code>
319
+ <annotations>
320
+ <_generic/>
321
+ <_timing>
322
+ <_timing_weight/>
323
+ <_timing_avg/>
324
+ <explanations/>
325
+ </_timing>
326
+ <_shifting>
327
+ <_shifting_type>Duration</_shifting_type>
328
+ </_shifting>
329
+ <_context_data_analysis>
330
+ <probes/>
331
+ <ips/>
332
+ </_context_data_analysis>
333
+ <report>
334
+ <url/>
335
+ </report>
336
+ <_notes>
337
+ <_notes_general/>
338
+ </_notes>
339
+ </annotations>
340
+ <documentation>
341
+ <input/>
342
+ <output/>
343
+ <implementation>
344
+ <description/>
345
+ </implementation>
346
+ <code>
347
+ <description/>
348
+ </code>
349
+ </documentation>
350
+ </call>
351
+ <choose mode="exclusive">
352
+ <alternative condition="data.dec_FE == '1'">
353
+ <_probability>
354
+ <_probability_min/>
355
+ <_probability_max/>
356
+ <_probability_avg/>
357
+ </_probability>
358
+ <call id="a7" endpoint="llm_api">
359
+ <parameters>
360
+ <label>Generate Code of FE by LLM</label>
361
+ <method>:post</method>
362
+ <arguments>
363
+ <prompt>!data.prompt4</prompt>
364
+ </arguments>
365
+ </parameters>
366
+ <code>
367
+ <prepare>if data.round == 1
368
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a complete ML pipeline in Python for a given ML task. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory. \n\nFor this subtask, generate the pipeline code up to the 'Feature Engineering' component based on the following textual task description and current pipeline code. \nImportant: Your output should only contain the generated pipeline code, which must be a directly executable Python file without any manual adjustments or any errors. Ensure that the current pipeline code remains unchanged and necessary python packages are imported."
369
+ data.prompt4 = data.temp + "\n# Textual Task Description: \n" + data.textual_des + "\n# Current Pipeline Code: \n" + data.current_pipeline
370
+ else
371
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a new complete ML pipeline in Python for a given ML task, aiming to achieve performance that surpasses the previous pipeline. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory. \n\nFor this subtask, carefully think and reason through the following textual task description, previous ML pipeline with its performance, and current pipeline code. Generate the pipeline code up to the 'Feature Engineering' component, aiming to achieve improved performance results. \nImportant: Your output should only contain the generated pipeline code, which must be a directly executable Python file without any manual adjustments or any errors. Ensure that the current pipeline code remains unchanged and necessary python packages are imported."
372
+ data.prompt4 = data.temp + "\n# Textual Task Description: \n" + data.textual_des + "\n# Previous Pipeline Code: \n" + data.best_pipeline + "\n# Previous Pipeline Performance: \n" + data.best_performance.to_s + "\n# Current Pipeline Code: \n" + data.current_pipeline
373
+ end</prepare>
374
+ <finalize output="result">data.current_pipeline = result['llm']['output']</finalize>
375
+ <update output="result"/>
376
+ <rescue output="result"/>
377
+ </code>
378
+ <annotations>
379
+ <_generic/>
380
+ <_timing>
381
+ <_timing_weight/>
382
+ <_timing_avg/>
383
+ <explanations/>
384
+ </_timing>
385
+ <_shifting>
386
+ <_shifting_type>Duration</_shifting_type>
387
+ </_shifting>
388
+ <_context_data_analysis>
389
+ <probes/>
390
+ <ips/>
391
+ </_context_data_analysis>
392
+ <report>
393
+ <url/>
394
+ </report>
395
+ <_notes>
396
+ <_notes_general/>
397
+ </_notes>
398
+ </annotations>
399
+ <documentation>
400
+ <input/>
401
+ <output/>
402
+ <implementation>
403
+ <description/>
404
+ </implementation>
405
+ <code>
406
+ <description/>
407
+ </code>
408
+ </documentation>
409
+ </call>
410
+ </alternative>
411
+ <otherwise/>
412
+ </choose>
413
+ <call id="a8" endpoint="llm_api">
414
+ <parameters>
415
+ <label>Generate Code of Model Selection by LLM</label>
416
+ <method>:post</method>
417
+ <arguments>
418
+ <prompt>!data.prompt5</prompt>
419
+ </arguments>
420
+ </parameters>
421
+ <code>
422
+ <prepare>if data.round == 1
423
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a complete ML pipeline in Python for a given ML task. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory. \n\nFor this subtask, generate the pipeline code up to the 'Model Selection' component based on the following textual task description and current pipeline code. \nImportant: Your output should only contain the generated pipeline code, which must be a directly executable Python file without any manual adjustments or any errors. Ensure that only one best model is directly defined without hyperparameter tuning algorithm in the code, the current pipeline code remains unchanged, and necessary python packages are imported."
424
+ data.prompt5 = data.temp + "\n# Textual Task Description: \n" + data.textual_des + "\n# Current Pipeline Code: \n" + data.current_pipeline
425
+ else
426
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a new complete ML pipeline in Python for a given ML task, aiming to achieve performance that surpasses the previous pipeline. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory. \n\nFor this subtask, carefully think and reason through the following textual task description, previous ML pipeline with its performance, and current pipeline code. Generate the pipeline code up to the 'Model Selection' component, aiming to achieve improved performance results. \nImportant: Your output should only contain the generated pipeline code, which must be a directly executable Python file without any manual adjustments or any errors. Ensure that only one best model is defined without hyperparameter tuning algorithm in the code, the current pipeline code remains unchanged, and necessary python packages are imported."
427
+ data.prompt5 = data.temp + "\n# Textual Task Description: \n" + data.textual_des + "\n# Previous Pipeline Code: \n" + data.best_pipeline + "\n# Previous Pipeline Performance: \n" + data.best_performance.to_s + "\n# Current Pipeline Code: \n" + data.current_pipeline
428
+ end</prepare>
429
+ <finalize output="result">data.current_pipeline = result['llm']['output']</finalize>
430
+ <update output="result"/>
431
+ <rescue output="result"/>
432
+ </code>
433
+ <annotations>
434
+ <_generic/>
435
+ <_timing>
436
+ <_timing_weight/>
437
+ <_timing_avg/>
438
+ <explanations/>
439
+ </_timing>
440
+ <_shifting>
441
+ <_shifting_type>Duration</_shifting_type>
442
+ </_shifting>
443
+ <_context_data_analysis>
444
+ <probes/>
445
+ <ips/>
446
+ </_context_data_analysis>
447
+ <report>
448
+ <url/>
449
+ </report>
450
+ <_notes>
451
+ <_notes_general/>
452
+ </_notes>
453
+ </annotations>
454
+ <documentation>
455
+ <input/>
456
+ <output/>
457
+ <implementation>
458
+ <description/>
459
+ </implementation>
460
+ <code>
461
+ <description/>
462
+ </code>
463
+ </documentation>
464
+ </call>
465
+ <call id="a9" endpoint="llm_api">
466
+ <parameters>
467
+ <label>Generate Code of Model Evaluation by LLM</label>
468
+ <method>:post</method>
469
+ <arguments>
470
+ <prompt>!data.prompt6</prompt>
471
+ </arguments>
472
+ </parameters>
473
+ <code>
474
+ <prepare>if data.round == 1
475
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a complete ML pipeline in Python for a given ML task. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory. \n\nFor this subtask, generate the complete ML pipeline code up to the 'Model Evaluation' component based on the following textual task description and current pipeline code. \nImportant: (1) Your output should only include the generated pipeline code, which must be a directly executable Python file without any manual adjustments or any errors. (2) Use {X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)} to split the data. Train the model on (X_train, y_train) and use the trained model to predict {y_predict} for {X_test}. Then, for the classification task, calculate the macro F1 score on the test dataset as the final evaluation metric using {performance = sklearn.metrics.f1_score(y_test, y_predict, average='macro')}. For the regression task, calculate the R2 score on the test dataset as the final evaluation metric using {performance = sklearn.metrics.r2_score(y_test, y_predict)}. (3) Must use 'performance' variable as the final evaluation metric and Don't include any other performance evaluation code. (4) Ensure that the current pipeline code remains unchanged and necessary python packages are imported."
476
+ data.prompt6 = data.temp + "\n# Textual Task Description: \n" + data.textual_des + "\n# Current Pipeline Code: \n" + data.current_pipeline
477
+ else
478
+ data.temp = "You are an expert machine learning algorithm engineer, and your final task is to create a new complete ML pipeline in Python for a given ML task, aiming to achieve performance that surpasses the previous pipeline. A complete ML pipeline typically includes up to four components: Data Preprocessing, Feature Engineering, Model Selection, and Model Evaluation. The last two components are mandatory. \n\nFor this subtask, carefully think and reason through the following textual task description, previous ML pipeline with its performance, and current pipeline code. Generate the pipeline code up to the 'Model Evaluation' component, aiming to achieve improved performance results. \nImportant: (1) Your output should only include the generated pipeline code, which must be a directly executable Python file without any manual adjustments or any errors. (2) Use {X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)} to split the data. Train the model on (X_train, y_train) and use the trained model to predict {y_predict} for {X_test}. Then, for the classification task, calculate the macro F1 score on the test dataset as the final evaluation metric using {performance = sklearn.metrics.f1_score(y_test, y_predict, average='macro')}. For the regression task, calculate the R2 score on the test dataset as the final evaluation metric using {performance = sklearn.metrics.r2_score(y_test, y_predict)}. (3) Must use 'performance' variable as the final evaluation metric and Don't include any other performance evaluation code. (4) Ensure that the current pipeline code remains unchanged and necessary python packages are imported."
479
+ data.prompt6 = data.temp + "\n# Textual Task Description: \n" + data.textual_des + "\n# Previous Pipeline Code: \n" + data.best_pipeline + "\n# Previous Pipeline Performance: \n" + data.best_performance.to_s + "\n# Current Pipeline Code: \n" + data.current_pipeline
480
+ end</prepare>
481
+ <finalize output="result">data.current_pipeline = result['llm']['output']</finalize>
482
+ <update output="result"/>
483
+ <rescue output="result"/>
484
+ </code>
485
+ <annotations>
486
+ <_generic/>
487
+ <_timing>
488
+ <_timing_weight/>
489
+ <_timing_avg/>
490
+ <explanations/>
491
+ </_timing>
492
+ <_shifting>
493
+ <_shifting_type>Duration</_shifting_type>
494
+ </_shifting>
495
+ <_context_data_analysis>
496
+ <probes/>
497
+ <ips/>
498
+ </_context_data_analysis>
499
+ <report>
500
+ <url/>
501
+ </report>
502
+ <_notes>
503
+ <_notes_general/>
504
+ </_notes>
505
+ </annotations>
506
+ <documentation>
507
+ <input/>
508
+ <output/>
509
+ <implementation>
510
+ <description/>
511
+ </implementation>
512
+ <code>
513
+ <description/>
514
+ </code>
515
+ </documentation>
516
+ </call>
517
+ <call id="a3" endpoint="evaluate_api">
518
+ <parameters>
519
+ <label>Evaluate Whole ML Pipeline</label>
520
+ <method>:post</method>
521
+ <arguments>
522
+ <pipeline_code>!data.current_pipeline</pipeline_code>
523
+ <dataset_path>!data.dataset_path</dataset_path>
524
+ <round>!data.round</round>
525
+ </arguments>
526
+ </parameters>
527
+ <code>
528
+ <prepare>data.dataset_path = "/home/yanggu/public_html/Execute/" + data.dataset_name</prepare>
529
+ <finalize output="result">data.current_performance = result['evaluation']['performance']</finalize>
530
+ <update output="result"/>
531
+ <rescue output="result"/>
532
+ </code>
533
+ <annotations>
534
+ <_generic/>
535
+ <_timing>
536
+ <_timing_weight/>
537
+ <_timing_avg/>
538
+ <explanations/>
539
+ </_timing>
540
+ <_shifting>
541
+ <_shifting_type>Duration</_shifting_type>
542
+ </_shifting>
543
+ <_context_data_analysis>
544
+ <probes/>
545
+ <ips/>
546
+ </_context_data_analysis>
547
+ <report>
548
+ <url/>
549
+ </report>
550
+ <_notes>
551
+ <_notes_general/>
552
+ </_notes>
553
+ </annotations>
554
+ <documentation>
555
+ <input/>
556
+ <output/>
557
+ <implementation>
558
+ <description/>
559
+ </implementation>
560
+ <code>
561
+ <description/>
562
+ </code>
563
+ </documentation>
564
+ </call>
565
+ </loop>
566
+ <manipulate id="a2" label="Return Final Pipeline and Performance">if data.current_performance == 1
567
+ data.final_pipeline = data.current_pipeline
568
+ data.final_performance = data.current_performance
569
+ else
570
+ data.final_pipeline = data.best_pipeline
571
+ data.final_performance = data.best_performance
572
+ end</manipulate>
573
+ <call id="a13" endpoint="worklist">
574
+ <parameters>
575
+ <label>output 12</label>
576
+ <arguments>
577
+ <orgmodel>https://lehre.bpm.in.tum.de/~yanggu/inputdata.xml</orgmodel>
578
+ <form>https://lehre.bpm.in.tum.de/~yanggu/outputdata.html</form>
579
+ <role>Engineer</role>
580
+ <priority>2</priority>
581
+ <handling rngui-nonfunctional="true">single</handling>
582
+ <restrictions/>
583
+ <data>
584
+ <performance>!data.final_performance</performance>
585
+ <dataset_name>!data.dataset_name</dataset_name>
586
+ </data>
587
+ </arguments>
588
+ </parameters>
589
+ <code>
590
+ <prepare/>
591
+ <finalize output="result"/>
592
+ <update output="result"/>
593
+ <rescue output="result"/>
594
+ </code>
595
+ <annotations>
596
+ <_generic/>
597
+ <_timing>
598
+ <_timing_weight/>
599
+ <_timing_avg/>
600
+ <explanations/>
601
+ </_timing>
602
+ <_shifting>
603
+ <_shifting_type>Duration</_shifting_type>
604
+ </_shifting>
605
+ <_context_data_analysis>
606
+ <probes/>
607
+ <ips/>
608
+ </_context_data_analysis>
609
+ <report>
610
+ <url/>
611
+ </report>
612
+ <_notes>
613
+ <_notes_general/>
614
+ </_notes>
615
+ </annotations>
616
+ <documentation>
617
+ <input/>
618
+ <output/>
619
+ <implementation>
620
+ <description/>
621
+ </implementation>
622
+ <code>
623
+ <description/>
624
+ </code>
625
+ </documentation>
626
+ </call>
627
+ <stop id="a11"/>
628
+ </description>
629
+ </description>
630
+ <transformation>
631
+ <description type="copy"/>
632
+ <dataelements type="none"/>
633
+ <endpoints type="none"/>
634
+ </transformation>
635
+ </testset>