ilovetools 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ilovetools/__init__.py +42 -0
- ilovetools/ai/__init__.py +13 -0
- ilovetools/ai/embeddings.py +270 -0
- ilovetools/ai/inference.py +5 -0
- ilovetools/ai/llm_helpers.py +141 -0
- ilovetools/audio/__init__.py +5 -0
- ilovetools/automation/__init__.py +5 -0
- ilovetools/conversion/__init__.py +5 -0
- ilovetools/data/__init__.py +27 -0
- ilovetools/data/feature_engineering.py +497 -0
- ilovetools/data/preprocessing.py +234 -0
- ilovetools/database/__init__.py +5 -0
- ilovetools/datetime/__init__.py +5 -0
- ilovetools/files/__init__.py +5 -0
- ilovetools/image/__init__.py +5 -0
- ilovetools/ml/__init__.py +603 -0
- ilovetools/ml/clustering.py +1107 -0
- ilovetools/ml/cross_validation.py +612 -0
- ilovetools/ml/dimensionality.py +1001 -0
- ilovetools/ml/ensemble.py +872 -0
- ilovetools/ml/feature_selection.py +971 -0
- ilovetools/ml/imbalanced.py +797 -0
- ilovetools/ml/interpretation.py +915 -0
- ilovetools/ml/metrics.py +601 -0
- ilovetools/ml/pipeline.py +711 -0
- ilovetools/ml/timeseries.py +984 -0
- ilovetools/ml/tuning.py +781 -0
- ilovetools/security/__init__.py +5 -0
- ilovetools/text/__init__.py +5 -0
- ilovetools/utils/__init__.py +5 -0
- ilovetools/validation/__init__.py +5 -0
- ilovetools/web/__init__.py +5 -0
- ilovetools-0.2.3.dist-info/METADATA +143 -0
- ilovetools-0.2.3.dist-info/RECORD +38 -0
- ilovetools-0.2.3.dist-info/WHEEL +5 -0
- ilovetools-0.2.3.dist-info/licenses/LICENSE +21 -0
- ilovetools-0.2.3.dist-info/top_level.txt +2 -0
- tests/__init__.py +3 -0
|
@@ -0,0 +1,711 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ML Pipeline utilities for workflow automation
|
|
3
|
+
Each function has TWO names: full descriptive name + abbreviated alias
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List, Dict, Any, Callable, Optional, Tuple
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
# Full names
|
|
11
|
+
'create_pipeline',
|
|
12
|
+
'add_pipeline_step',
|
|
13
|
+
'execute_pipeline',
|
|
14
|
+
'validate_pipeline',
|
|
15
|
+
'serialize_pipeline',
|
|
16
|
+
'deserialize_pipeline',
|
|
17
|
+
'pipeline_transform',
|
|
18
|
+
'pipeline_fit_transform',
|
|
19
|
+
'get_pipeline_params',
|
|
20
|
+
'set_pipeline_params',
|
|
21
|
+
'clone_pipeline',
|
|
22
|
+
'pipeline_summary',
|
|
23
|
+
# Abbreviated aliases
|
|
24
|
+
'create_pipe',
|
|
25
|
+
'add_step',
|
|
26
|
+
'execute_pipe',
|
|
27
|
+
'validate_pipe',
|
|
28
|
+
'serialize_pipe',
|
|
29
|
+
'deserialize_pipe',
|
|
30
|
+
'pipe_transform',
|
|
31
|
+
'pipe_fit_transform',
|
|
32
|
+
'get_params',
|
|
33
|
+
'set_params',
|
|
34
|
+
'clone_pipe',
|
|
35
|
+
'pipe_summary',
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def create_pipeline(steps: Optional[List[Tuple[str, Callable]]] = None) -> Dict[str, Any]:
|
|
40
|
+
"""
|
|
41
|
+
Create a new ML pipeline.
|
|
42
|
+
|
|
43
|
+
Alias: create_pipe()
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
steps: Optional list of (name, function) tuples
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
dict: Pipeline object
|
|
50
|
+
|
|
51
|
+
Examples:
|
|
52
|
+
>>> from ilovetools.ml import create_pipe # Short alias
|
|
53
|
+
|
|
54
|
+
>>> # Create empty pipeline
|
|
55
|
+
>>> pipeline = create_pipe()
|
|
56
|
+
>>> print(pipeline)
|
|
57
|
+
{'steps': [], 'fitted': False, 'params': {}}
|
|
58
|
+
|
|
59
|
+
>>> # Create with steps
|
|
60
|
+
>>> def scale(X):
|
|
61
|
+
... return [[x / 10 for x in row] for row in X]
|
|
62
|
+
>>>
|
|
63
|
+
>>> pipeline = create_pipe([('scaler', scale)])
|
|
64
|
+
>>> print(len(pipeline['steps']))
|
|
65
|
+
1
|
|
66
|
+
|
|
67
|
+
>>> from ilovetools.ml import create_pipeline # Full name
|
|
68
|
+
>>> pipeline = create_pipeline()
|
|
69
|
+
|
|
70
|
+
Notes:
|
|
71
|
+
- Foundation for ML workflows
|
|
72
|
+
- Add steps incrementally
|
|
73
|
+
- Execute in sequence
|
|
74
|
+
- Reusable and modular
|
|
75
|
+
"""
|
|
76
|
+
return {
|
|
77
|
+
'steps': steps if steps is not None else [],
|
|
78
|
+
'fitted': False,
|
|
79
|
+
'params': {},
|
|
80
|
+
'metadata': {
|
|
81
|
+
'created': True,
|
|
82
|
+
'n_steps': len(steps) if steps is not None else 0,
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# Create alias
|
|
88
|
+
create_pipe = create_pipeline
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def add_pipeline_step(
|
|
92
|
+
pipeline: Dict[str, Any],
|
|
93
|
+
name: str,
|
|
94
|
+
function: Callable,
|
|
95
|
+
params: Optional[Dict] = None
|
|
96
|
+
) -> Dict[str, Any]:
|
|
97
|
+
"""
|
|
98
|
+
Add a step to the pipeline.
|
|
99
|
+
|
|
100
|
+
Alias: add_step()
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
pipeline: Pipeline object
|
|
104
|
+
name: Step name
|
|
105
|
+
function: Step function
|
|
106
|
+
params: Optional step parameters
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
dict: Updated pipeline
|
|
110
|
+
|
|
111
|
+
Examples:
|
|
112
|
+
>>> from ilovetools.ml import create_pipe, add_step # Short aliases
|
|
113
|
+
|
|
114
|
+
>>> pipeline = create_pipe()
|
|
115
|
+
>>>
|
|
116
|
+
>>> def scale(X):
|
|
117
|
+
... return [[x / 10 for x in row] for row in X]
|
|
118
|
+
>>>
|
|
119
|
+
>>> pipeline = add_step(pipeline, 'scaler', scale)
|
|
120
|
+
>>> print(len(pipeline['steps']))
|
|
121
|
+
1
|
|
122
|
+
|
|
123
|
+
>>> # Add with parameters
|
|
124
|
+
>>> def encode(X, mapping=None):
|
|
125
|
+
... return X
|
|
126
|
+
>>>
|
|
127
|
+
>>> pipeline = add_step(pipeline, 'encoder', encode, {'mapping': {'A': 0, 'B': 1}})
|
|
128
|
+
>>> print(len(pipeline['steps']))
|
|
129
|
+
2
|
|
130
|
+
|
|
131
|
+
>>> from ilovetools.ml import add_pipeline_step # Full name
|
|
132
|
+
>>> pipeline = add_pipeline_step(pipeline, 'step3', lambda x: x)
|
|
133
|
+
|
|
134
|
+
Notes:
|
|
135
|
+
- Add steps in order
|
|
136
|
+
- Each step has name and function
|
|
137
|
+
- Optional parameters per step
|
|
138
|
+
- Build complex workflows
|
|
139
|
+
"""
|
|
140
|
+
step = {
|
|
141
|
+
'name': name,
|
|
142
|
+
'function': function,
|
|
143
|
+
'params': params if params is not None else {}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
pipeline['steps'].append(step)
|
|
147
|
+
pipeline['metadata']['n_steps'] = len(pipeline['steps'])
|
|
148
|
+
|
|
149
|
+
return pipeline
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# Create alias
|
|
153
|
+
add_step = add_pipeline_step
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def execute_pipeline(
|
|
157
|
+
pipeline: Dict[str, Any],
|
|
158
|
+
X: List[List[float]],
|
|
159
|
+
y: Optional[List] = None,
|
|
160
|
+
fit: bool = False
|
|
161
|
+
) -> Any:
|
|
162
|
+
"""
|
|
163
|
+
Execute pipeline on data.
|
|
164
|
+
|
|
165
|
+
Alias: execute_pipe()
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
pipeline: Pipeline object
|
|
169
|
+
X: Input data
|
|
170
|
+
y: Optional target values
|
|
171
|
+
fit: Whether to fit steps (for training)
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Transformed data
|
|
175
|
+
|
|
176
|
+
Examples:
|
|
177
|
+
>>> from ilovetools.ml import create_pipe, add_step, execute_pipe # Short aliases
|
|
178
|
+
|
|
179
|
+
>>> pipeline = create_pipe()
|
|
180
|
+
>>>
|
|
181
|
+
>>> def scale(X):
|
|
182
|
+
... return [[x / 10 for x in row] for row in X]
|
|
183
|
+
>>>
|
|
184
|
+
>>> def add_one(X):
|
|
185
|
+
... return [[x + 1 for x in row] for row in X]
|
|
186
|
+
>>>
|
|
187
|
+
>>> pipeline = add_step(pipeline, 'scaler', scale)
|
|
188
|
+
>>> pipeline = add_step(pipeline, 'adder', add_one)
|
|
189
|
+
>>>
|
|
190
|
+
>>> X = [[10, 20], [30, 40]]
|
|
191
|
+
>>> result = execute_pipe(pipeline, X)
|
|
192
|
+
>>> print(result)
|
|
193
|
+
[[2.0, 3.0], [4.0, 5.0]]
|
|
194
|
+
|
|
195
|
+
>>> from ilovetools.ml import execute_pipeline # Full name
|
|
196
|
+
>>> result = execute_pipeline(pipeline, X)
|
|
197
|
+
|
|
198
|
+
Notes:
|
|
199
|
+
- Executes steps in sequence
|
|
200
|
+
- Each step transforms data
|
|
201
|
+
- Output of step N → input of step N+1
|
|
202
|
+
- Fit mode for training
|
|
203
|
+
"""
|
|
204
|
+
result = X
|
|
205
|
+
|
|
206
|
+
for step in pipeline['steps']:
|
|
207
|
+
function = step['function']
|
|
208
|
+
params = step['params']
|
|
209
|
+
|
|
210
|
+
# Execute step with parameters
|
|
211
|
+
if params:
|
|
212
|
+
# Check if function accepts params
|
|
213
|
+
try:
|
|
214
|
+
result = function(result, **params)
|
|
215
|
+
except TypeError:
|
|
216
|
+
result = function(result)
|
|
217
|
+
else:
|
|
218
|
+
result = function(result)
|
|
219
|
+
|
|
220
|
+
if fit:
|
|
221
|
+
pipeline['fitted'] = True
|
|
222
|
+
|
|
223
|
+
return result
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
# Create alias
|
|
227
|
+
execute_pipe = execute_pipeline
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def validate_pipeline(pipeline: Dict[str, Any]) -> Dict[str, Any]:
|
|
231
|
+
"""
|
|
232
|
+
Validate pipeline structure and steps.
|
|
233
|
+
|
|
234
|
+
Alias: validate_pipe()
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
pipeline: Pipeline object
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
dict: Validation results
|
|
241
|
+
|
|
242
|
+
Examples:
|
|
243
|
+
>>> from ilovetools.ml import create_pipe, add_step, validate_pipe # Short aliases
|
|
244
|
+
|
|
245
|
+
>>> pipeline = create_pipe()
|
|
246
|
+
>>> pipeline = add_step(pipeline, 'scaler', lambda x: x)
|
|
247
|
+
>>>
|
|
248
|
+
>>> validation = validate_pipe(pipeline)
|
|
249
|
+
>>> print(validation['valid'])
|
|
250
|
+
True
|
|
251
|
+
>>> print(validation['n_steps'])
|
|
252
|
+
1
|
|
253
|
+
|
|
254
|
+
>>> from ilovetools.ml import validate_pipeline # Full name
|
|
255
|
+
>>> validation = validate_pipeline(pipeline)
|
|
256
|
+
|
|
257
|
+
Notes:
|
|
258
|
+
- Check pipeline structure
|
|
259
|
+
- Verify all steps are callable
|
|
260
|
+
- Ensure no duplicate names
|
|
261
|
+
- Validate before execution
|
|
262
|
+
"""
|
|
263
|
+
errors = []
|
|
264
|
+
warnings = []
|
|
265
|
+
|
|
266
|
+
# Check if pipeline has steps
|
|
267
|
+
if not pipeline.get('steps'):
|
|
268
|
+
warnings.append("Pipeline has no steps")
|
|
269
|
+
|
|
270
|
+
# Check for duplicate step names
|
|
271
|
+
step_names = [step['name'] for step in pipeline['steps']]
|
|
272
|
+
if len(step_names) != len(set(step_names)):
|
|
273
|
+
errors.append("Duplicate step names found")
|
|
274
|
+
|
|
275
|
+
# Check if all steps are callable
|
|
276
|
+
for step in pipeline['steps']:
|
|
277
|
+
if not callable(step.get('function')):
|
|
278
|
+
errors.append(f"Step '{step['name']}' function is not callable")
|
|
279
|
+
|
|
280
|
+
return {
|
|
281
|
+
'valid': len(errors) == 0,
|
|
282
|
+
'errors': errors,
|
|
283
|
+
'warnings': warnings,
|
|
284
|
+
'n_steps': len(pipeline['steps']),
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
# Create alias
|
|
289
|
+
validate_pipe = validate_pipeline
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def serialize_pipeline(pipeline: Dict[str, Any], include_functions: bool = False) -> str:
|
|
293
|
+
"""
|
|
294
|
+
Serialize pipeline to JSON string.
|
|
295
|
+
|
|
296
|
+
Alias: serialize_pipe()
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
pipeline: Pipeline object
|
|
300
|
+
include_functions: Whether to include function code (not recommended)
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
str: JSON string
|
|
304
|
+
|
|
305
|
+
Examples:
|
|
306
|
+
>>> from ilovetools.ml import create_pipe, add_step, serialize_pipe # Short aliases
|
|
307
|
+
|
|
308
|
+
>>> pipeline = create_pipe()
|
|
309
|
+
>>> pipeline = add_step(pipeline, 'scaler', lambda x: x)
|
|
310
|
+
>>>
|
|
311
|
+
>>> serialized = serialize_pipe(pipeline)
|
|
312
|
+
>>> print(type(serialized))
|
|
313
|
+
<class 'str'>
|
|
314
|
+
|
|
315
|
+
>>> from ilovetools.ml import serialize_pipeline # Full name
|
|
316
|
+
>>> serialized = serialize_pipeline(pipeline)
|
|
317
|
+
|
|
318
|
+
Notes:
|
|
319
|
+
- Save pipeline to file
|
|
320
|
+
- Version control
|
|
321
|
+
- Share with team
|
|
322
|
+
- Functions not serialized by default
|
|
323
|
+
"""
|
|
324
|
+
# Create serializable version
|
|
325
|
+
serializable = {
|
|
326
|
+
'steps': [],
|
|
327
|
+
'fitted': pipeline.get('fitted', False),
|
|
328
|
+
'params': pipeline.get('params', {}),
|
|
329
|
+
'metadata': pipeline.get('metadata', {}),
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
for step in pipeline['steps']:
|
|
333
|
+
step_data = {
|
|
334
|
+
'name': step['name'],
|
|
335
|
+
'params': step['params'],
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
if include_functions:
|
|
339
|
+
# Warning: This is not recommended for production
|
|
340
|
+
step_data['function_name'] = step['function'].__name__
|
|
341
|
+
|
|
342
|
+
serializable['steps'].append(step_data)
|
|
343
|
+
|
|
344
|
+
return json.dumps(serializable, indent=2)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
# Create alias
|
|
348
|
+
serialize_pipe = serialize_pipeline
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def deserialize_pipeline(json_string: str) -> Dict[str, Any]:
|
|
352
|
+
"""
|
|
353
|
+
Deserialize pipeline from JSON string.
|
|
354
|
+
|
|
355
|
+
Alias: deserialize_pipe()
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
json_string: JSON string
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
dict: Pipeline object (without functions)
|
|
362
|
+
|
|
363
|
+
Examples:
|
|
364
|
+
>>> from ilovetools.ml import serialize_pipe, deserialize_pipe # Short aliases
|
|
365
|
+
|
|
366
|
+
>>> # Assume we have a serialized pipeline
|
|
367
|
+
>>> json_str = '{"steps": [], "fitted": false, "params": {}}'
|
|
368
|
+
>>>
|
|
369
|
+
>>> pipeline = deserialize_pipe(json_str)
|
|
370
|
+
>>> print(pipeline['fitted'])
|
|
371
|
+
False
|
|
372
|
+
|
|
373
|
+
>>> from ilovetools.ml import deserialize_pipeline # Full name
|
|
374
|
+
>>> pipeline = deserialize_pipeline(json_str)
|
|
375
|
+
|
|
376
|
+
Notes:
|
|
377
|
+
- Load pipeline from file
|
|
378
|
+
- Restore structure
|
|
379
|
+
- Functions must be re-added
|
|
380
|
+
- Useful for configuration
|
|
381
|
+
"""
|
|
382
|
+
data = json.loads(json_string)
|
|
383
|
+
|
|
384
|
+
pipeline = {
|
|
385
|
+
'steps': [],
|
|
386
|
+
'fitted': data.get('fitted', False),
|
|
387
|
+
'params': data.get('params', {}),
|
|
388
|
+
'metadata': data.get('metadata', {}),
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
# Note: Functions are not deserialized
|
|
392
|
+
# They must be re-added manually
|
|
393
|
+
for step_data in data.get('steps', []):
|
|
394
|
+
pipeline['steps'].append({
|
|
395
|
+
'name': step_data['name'],
|
|
396
|
+
'function': None, # Must be set manually
|
|
397
|
+
'params': step_data.get('params', {}),
|
|
398
|
+
})
|
|
399
|
+
|
|
400
|
+
return pipeline
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# Create alias
|
|
404
|
+
deserialize_pipe = deserialize_pipeline
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def pipeline_transform(
|
|
408
|
+
pipeline: Dict[str, Any],
|
|
409
|
+
X: List[List[float]]
|
|
410
|
+
) -> List[List[float]]:
|
|
411
|
+
"""
|
|
412
|
+
Transform data using fitted pipeline.
|
|
413
|
+
|
|
414
|
+
Alias: pipe_transform()
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
pipeline: Fitted pipeline object
|
|
418
|
+
X: Input data
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
list: Transformed data
|
|
422
|
+
|
|
423
|
+
Examples:
|
|
424
|
+
>>> from ilovetools.ml import create_pipe, add_step, pipe_transform # Short aliases
|
|
425
|
+
|
|
426
|
+
>>> pipeline = create_pipe()
|
|
427
|
+
>>>
|
|
428
|
+
>>> def scale(X):
|
|
429
|
+
... return [[x / 10 for x in row] for row in X]
|
|
430
|
+
>>>
|
|
431
|
+
>>> pipeline = add_step(pipeline, 'scaler', scale)
|
|
432
|
+
>>> pipeline['fitted'] = True
|
|
433
|
+
>>>
|
|
434
|
+
>>> X = [[10, 20], [30, 40]]
|
|
435
|
+
>>> result = pipe_transform(pipeline, X)
|
|
436
|
+
>>> print(result)
|
|
437
|
+
[[1.0, 2.0], [3.0, 4.0]]
|
|
438
|
+
|
|
439
|
+
>>> from ilovetools.ml import pipeline_transform # Full name
|
|
440
|
+
>>> result = pipeline_transform(pipeline, X)
|
|
441
|
+
|
|
442
|
+
Notes:
|
|
443
|
+
- Use after fitting
|
|
444
|
+
- Transform new data
|
|
445
|
+
- Same transformations as training
|
|
446
|
+
- No fitting during transform
|
|
447
|
+
"""
|
|
448
|
+
if not pipeline.get('fitted'):
|
|
449
|
+
raise ValueError("Pipeline must be fitted before transform")
|
|
450
|
+
|
|
451
|
+
return execute_pipeline(pipeline, X, fit=False)
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
# Create alias
|
|
455
|
+
pipe_transform = pipeline_transform
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def pipeline_fit_transform(
|
|
459
|
+
pipeline: Dict[str, Any],
|
|
460
|
+
X: List[List[float]],
|
|
461
|
+
y: Optional[List] = None
|
|
462
|
+
) -> List[List[float]]:
|
|
463
|
+
"""
|
|
464
|
+
Fit pipeline and transform data.
|
|
465
|
+
|
|
466
|
+
Alias: pipe_fit_transform()
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
pipeline: Pipeline object
|
|
470
|
+
X: Input data
|
|
471
|
+
y: Optional target values
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
list: Transformed data
|
|
475
|
+
|
|
476
|
+
Examples:
|
|
477
|
+
>>> from ilovetools.ml import create_pipe, add_step, pipe_fit_transform # Short aliases
|
|
478
|
+
|
|
479
|
+
>>> pipeline = create_pipe()
|
|
480
|
+
>>>
|
|
481
|
+
>>> def scale(X):
|
|
482
|
+
... return [[x / 10 for x in row] for row in X]
|
|
483
|
+
>>>
|
|
484
|
+
>>> pipeline = add_step(pipeline, 'scaler', scale)
|
|
485
|
+
>>>
|
|
486
|
+
>>> X = [[10, 20], [30, 40]]
|
|
487
|
+
>>> result = pipe_fit_transform(pipeline, X)
|
|
488
|
+
>>> print(result)
|
|
489
|
+
[[1.0, 2.0], [3.0, 4.0]]
|
|
490
|
+
>>> print(pipeline['fitted'])
|
|
491
|
+
True
|
|
492
|
+
|
|
493
|
+
>>> from ilovetools.ml import pipeline_fit_transform # Full name
|
|
494
|
+
>>> result = pipeline_fit_transform(pipeline, X)
|
|
495
|
+
|
|
496
|
+
Notes:
|
|
497
|
+
- Fit and transform in one call
|
|
498
|
+
- Use for training data
|
|
499
|
+
- Pipeline becomes fitted
|
|
500
|
+
- Convenient for workflows
|
|
501
|
+
"""
|
|
502
|
+
return execute_pipeline(pipeline, X, y=y, fit=True)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
# Create alias
|
|
506
|
+
pipe_fit_transform = pipeline_fit_transform
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def get_pipeline_params(pipeline: Dict[str, Any]) -> Dict[str, Any]:
|
|
510
|
+
"""
|
|
511
|
+
Get all pipeline parameters.
|
|
512
|
+
|
|
513
|
+
Alias: get_params()
|
|
514
|
+
|
|
515
|
+
Args:
|
|
516
|
+
pipeline: Pipeline object
|
|
517
|
+
|
|
518
|
+
Returns:
|
|
519
|
+
dict: All parameters
|
|
520
|
+
|
|
521
|
+
Examples:
|
|
522
|
+
>>> from ilovetools.ml import create_pipe, add_step, get_params # Short aliases
|
|
523
|
+
|
|
524
|
+
>>> pipeline = create_pipe()
|
|
525
|
+
>>> pipeline = add_step(pipeline, 'scaler', lambda x: x, {'factor': 10})
|
|
526
|
+
>>>
|
|
527
|
+
>>> params = get_params(pipeline)
|
|
528
|
+
>>> print(params)
|
|
529
|
+
{'scaler__factor': 10}
|
|
530
|
+
|
|
531
|
+
>>> from ilovetools.ml import get_pipeline_params # Full name
|
|
532
|
+
>>> params = get_pipeline_params(pipeline)
|
|
533
|
+
|
|
534
|
+
Notes:
|
|
535
|
+
- Get all step parameters
|
|
536
|
+
- Nested parameter names
|
|
537
|
+
- Useful for inspection
|
|
538
|
+
- Format: step__param
|
|
539
|
+
"""
|
|
540
|
+
all_params = {}
|
|
541
|
+
|
|
542
|
+
for step in pipeline['steps']:
|
|
543
|
+
step_name = step['name']
|
|
544
|
+
step_params = step['params']
|
|
545
|
+
|
|
546
|
+
for param_name, param_value in step_params.items():
|
|
547
|
+
key = f"{step_name}__{param_name}"
|
|
548
|
+
all_params[key] = param_value
|
|
549
|
+
|
|
550
|
+
return all_params
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
# Create alias
|
|
554
|
+
get_params = get_pipeline_params
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def set_pipeline_params(
|
|
558
|
+
pipeline: Dict[str, Any],
|
|
559
|
+
params: Dict[str, Any]
|
|
560
|
+
) -> Dict[str, Any]:
|
|
561
|
+
"""
|
|
562
|
+
Set pipeline parameters.
|
|
563
|
+
|
|
564
|
+
Alias: set_params()
|
|
565
|
+
|
|
566
|
+
Args:
|
|
567
|
+
pipeline: Pipeline object
|
|
568
|
+
params: Parameters to set (format: step__param)
|
|
569
|
+
|
|
570
|
+
Returns:
|
|
571
|
+
dict: Updated pipeline
|
|
572
|
+
|
|
573
|
+
Examples:
|
|
574
|
+
>>> from ilovetools.ml import create_pipe, add_step, set_params # Short aliases
|
|
575
|
+
|
|
576
|
+
>>> pipeline = create_pipe()
|
|
577
|
+
>>> pipeline = add_step(pipeline, 'scaler', lambda x: x, {'factor': 10})
|
|
578
|
+
>>>
|
|
579
|
+
>>> pipeline = set_params(pipeline, {'scaler__factor': 20})
|
|
580
|
+
>>> print(pipeline['steps'][0]['params']['factor'])
|
|
581
|
+
20
|
|
582
|
+
|
|
583
|
+
>>> from ilovetools.ml import set_pipeline_params # Full name
|
|
584
|
+
>>> pipeline = set_pipeline_params(pipeline, {'scaler__factor': 30})
|
|
585
|
+
|
|
586
|
+
Notes:
|
|
587
|
+
- Update step parameters
|
|
588
|
+
- Use double underscore notation
|
|
589
|
+
- Useful for tuning
|
|
590
|
+
- Format: step__param
|
|
591
|
+
"""
|
|
592
|
+
for param_key, param_value in params.items():
|
|
593
|
+
if '__' in param_key:
|
|
594
|
+
step_name, param_name = param_key.split('__', 1)
|
|
595
|
+
|
|
596
|
+
# Find step and update parameter
|
|
597
|
+
for step in pipeline['steps']:
|
|
598
|
+
if step['name'] == step_name:
|
|
599
|
+
step['params'][param_name] = param_value
|
|
600
|
+
break
|
|
601
|
+
|
|
602
|
+
return pipeline
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
# Create alias
|
|
606
|
+
set_params = set_pipeline_params
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
def clone_pipeline(pipeline: Dict[str, Any]) -> Dict[str, Any]:
|
|
610
|
+
"""
|
|
611
|
+
Create a deep copy of pipeline.
|
|
612
|
+
|
|
613
|
+
Alias: clone_pipe()
|
|
614
|
+
|
|
615
|
+
Args:
|
|
616
|
+
pipeline: Pipeline object
|
|
617
|
+
|
|
618
|
+
Returns:
|
|
619
|
+
dict: Cloned pipeline
|
|
620
|
+
|
|
621
|
+
Examples:
|
|
622
|
+
>>> from ilovetools.ml import create_pipe, add_step, clone_pipe # Short aliases
|
|
623
|
+
|
|
624
|
+
>>> pipeline = create_pipe()
|
|
625
|
+
>>> pipeline = add_step(pipeline, 'scaler', lambda x: x)
|
|
626
|
+
>>>
|
|
627
|
+
>>> cloned = clone_pipe(pipeline)
|
|
628
|
+
>>> print(len(cloned['steps']))
|
|
629
|
+
1
|
|
630
|
+
>>> print(cloned is pipeline)
|
|
631
|
+
False
|
|
632
|
+
|
|
633
|
+
>>> from ilovetools.ml import clone_pipeline # Full name
|
|
634
|
+
>>> cloned = clone_pipeline(pipeline)
|
|
635
|
+
|
|
636
|
+
Notes:
|
|
637
|
+
- Create independent copy
|
|
638
|
+
- Modify without affecting original
|
|
639
|
+
- Useful for experiments
|
|
640
|
+
- Functions are shared (not deep copied)
|
|
641
|
+
"""
|
|
642
|
+
cloned = {
|
|
643
|
+
'steps': [],
|
|
644
|
+
'fitted': pipeline.get('fitted', False),
|
|
645
|
+
'params': pipeline.get('params', {}).copy(),
|
|
646
|
+
'metadata': pipeline.get('metadata', {}).copy(),
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
for step in pipeline['steps']:
|
|
650
|
+
cloned['steps'].append({
|
|
651
|
+
'name': step['name'],
|
|
652
|
+
'function': step['function'], # Shared reference
|
|
653
|
+
'params': step['params'].copy(),
|
|
654
|
+
})
|
|
655
|
+
|
|
656
|
+
return cloned
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
# Create alias
|
|
660
|
+
clone_pipe = clone_pipeline
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
def pipeline_summary(pipeline: Dict[str, Any]) -> Dict[str, Any]:
|
|
664
|
+
"""
|
|
665
|
+
Get pipeline summary statistics.
|
|
666
|
+
|
|
667
|
+
Alias: pipe_summary()
|
|
668
|
+
|
|
669
|
+
Args:
|
|
670
|
+
pipeline: Pipeline object
|
|
671
|
+
|
|
672
|
+
Returns:
|
|
673
|
+
dict: Summary information
|
|
674
|
+
|
|
675
|
+
Examples:
|
|
676
|
+
>>> from ilovetools.ml import create_pipe, add_step, pipe_summary # Short aliases
|
|
677
|
+
|
|
678
|
+
>>> pipeline = create_pipe()
|
|
679
|
+
>>> pipeline = add_step(pipeline, 'scaler', lambda x: x)
|
|
680
|
+
>>> pipeline = add_step(pipeline, 'encoder', lambda x: x)
|
|
681
|
+
>>>
|
|
682
|
+
>>> summary = pipe_summary(pipeline)
|
|
683
|
+
>>> print(summary['n_steps'])
|
|
684
|
+
2
|
|
685
|
+
>>> print(summary['step_names'])
|
|
686
|
+
['scaler', 'encoder']
|
|
687
|
+
|
|
688
|
+
>>> from ilovetools.ml import pipeline_summary # Full name
|
|
689
|
+
>>> summary = pipeline_summary(pipeline)
|
|
690
|
+
|
|
691
|
+
Notes:
|
|
692
|
+
- Quick overview
|
|
693
|
+
- Step count and names
|
|
694
|
+
- Fitted status
|
|
695
|
+
- Parameter count
|
|
696
|
+
"""
|
|
697
|
+
step_names = [step['name'] for step in pipeline['steps']]
|
|
698
|
+
|
|
699
|
+
total_params = sum(len(step['params']) for step in pipeline['steps'])
|
|
700
|
+
|
|
701
|
+
return {
|
|
702
|
+
'n_steps': len(pipeline['steps']),
|
|
703
|
+
'step_names': step_names,
|
|
704
|
+
'fitted': pipeline.get('fitted', False),
|
|
705
|
+
'total_params': total_params,
|
|
706
|
+
'has_metadata': 'metadata' in pipeline,
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
# Create alias
|
|
711
|
+
pipe_summary = pipeline_summary
|