collie-mlops 0.1.0b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of collie-mlops might be problematic. Click here for more details.

Files changed (53) hide show
  1. collie_mlops-0.1.0b0/LICENSE +21 -0
  2. collie_mlops-0.1.0b0/MANIFEST.in +15 -0
  3. collie_mlops-0.1.0b0/PKG-INFO +217 -0
  4. collie_mlops-0.1.0b0/README.md +172 -0
  5. collie_mlops-0.1.0b0/collie/__init__.py +69 -0
  6. collie_mlops-0.1.0b0/collie/_common/__init__.py +0 -0
  7. collie_mlops-0.1.0b0/collie/_common/decorator.py +53 -0
  8. collie_mlops-0.1.0b0/collie/_common/exceptions.py +104 -0
  9. collie_mlops-0.1.0b0/collie/_common/mlflow_model_io/__init__.py +0 -0
  10. collie_mlops-0.1.0b0/collie/_common/mlflow_model_io/base_flavor_handler.py +26 -0
  11. collie_mlops-0.1.0b0/collie/_common/mlflow_model_io/flavor_registry.py +72 -0
  12. collie_mlops-0.1.0b0/collie/_common/mlflow_model_io/model_flavors.py +259 -0
  13. collie_mlops-0.1.0b0/collie/_common/mlflow_model_io/model_io.py +65 -0
  14. collie_mlops-0.1.0b0/collie/_common/utils.py +13 -0
  15. collie_mlops-0.1.0b0/collie/contracts/__init__.py +0 -0
  16. collie_mlops-0.1.0b0/collie/contracts/event.py +79 -0
  17. collie_mlops-0.1.0b0/collie/contracts/mlflow.py +444 -0
  18. collie_mlops-0.1.0b0/collie/contracts/orchestrator.py +79 -0
  19. collie_mlops-0.1.0b0/collie/core/__init__.py +41 -0
  20. collie_mlops-0.1.0b0/collie/core/enums/__init__.py +0 -0
  21. collie_mlops-0.1.0b0/collie/core/enums/components.py +26 -0
  22. collie_mlops-0.1.0b0/collie/core/enums/ml_models.py +20 -0
  23. collie_mlops-0.1.0b0/collie/core/evaluator/__init__.py +0 -0
  24. collie_mlops-0.1.0b0/collie/core/evaluator/evaluator.py +147 -0
  25. collie_mlops-0.1.0b0/collie/core/models.py +125 -0
  26. collie_mlops-0.1.0b0/collie/core/orchestrator/__init__.py +0 -0
  27. collie_mlops-0.1.0b0/collie/core/orchestrator/orchestrator.py +47 -0
  28. collie_mlops-0.1.0b0/collie/core/pusher/__init__.py +0 -0
  29. collie_mlops-0.1.0b0/collie/core/pusher/pusher.py +98 -0
  30. collie_mlops-0.1.0b0/collie/core/trainer/__init__.py +0 -0
  31. collie_mlops-0.1.0b0/collie/core/trainer/trainer.py +78 -0
  32. collie_mlops-0.1.0b0/collie/core/transform/__init__.py +0 -0
  33. collie_mlops-0.1.0b0/collie/core/transform/transform.py +87 -0
  34. collie_mlops-0.1.0b0/collie/core/tuner/__init__.py +0 -0
  35. collie_mlops-0.1.0b0/collie/core/tuner/tuner.py +84 -0
  36. collie_mlops-0.1.0b0/collie/helper/__init__.py +0 -0
  37. collie_mlops-0.1.0b0/collie/helper/pytorch/__init__.py +0 -0
  38. collie_mlops-0.1.0b0/collie/helper/pytorch/callback/__init__.py +0 -0
  39. collie_mlops-0.1.0b0/collie/helper/pytorch/callback/callback.py +155 -0
  40. collie_mlops-0.1.0b0/collie/helper/pytorch/callback/earlystop.py +54 -0
  41. collie_mlops-0.1.0b0/collie/helper/pytorch/callback/model_checkpoint.py +100 -0
  42. collie_mlops-0.1.0b0/collie/helper/pytorch/model/__init__.py +0 -0
  43. collie_mlops-0.1.0b0/collie/helper/pytorch/model/loader.py +55 -0
  44. collie_mlops-0.1.0b0/collie/helper/pytorch/trainer.py +304 -0
  45. collie_mlops-0.1.0b0/collie_mlops.egg-info/PKG-INFO +217 -0
  46. collie_mlops-0.1.0b0/collie_mlops.egg-info/SOURCES.txt +52 -0
  47. collie_mlops-0.1.0b0/collie_mlops.egg-info/dependency_links.txt +1 -0
  48. collie_mlops-0.1.0b0/collie_mlops.egg-info/not-zip-safe +1 -0
  49. collie_mlops-0.1.0b0/collie_mlops.egg-info/requires.txt +11 -0
  50. collie_mlops-0.1.0b0/collie_mlops.egg-info/top_level.txt +1 -0
  51. collie_mlops-0.1.0b0/pyproject.toml +192 -0
  52. collie_mlops-0.1.0b0/requirements.txt +13 -0
  53. collie_mlops-0.1.0b0/setup.cfg +158 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 ChingHuanChiu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,15 @@
1
+ include README.md
2
+ include LICENSE
3
+ include requirements.txt
4
+ exclude pytest.ini
5
+
6
+ recursive-include collie *.py
7
+ recursive-include collie py.typed
8
+
9
+ recursive-exclude tests *
10
+ recursive-exclude example *
11
+ recursive-exclude deploy *
12
+ recursive-exclude __pycache__ *
13
+ recursive-exclude *.pyc
14
+ recursive-exclude *.pyo
15
+ recursive-exclude .DS_Store
@@ -0,0 +1,217 @@
1
+ Metadata-Version: 2.4
2
+ Name: collie-mlops
3
+ Version: 0.1.0b0
4
+ Summary: A Lightweight MLOps Framework for Machine Learning Workflows
5
+ Home-page: https://github.com/ChingHuanChiu/collie
6
+ Author: ChingHuanChiu
7
+ Author-email: ChingHuanChiu <stevenchiou8@gmail.com>
8
+ Maintainer-email: ChingHuanChiu <stevenchiou8@gmail.com>
9
+ License: MIT
10
+ Project-URL: Homepage, https://github.com/ChingHuanChiu/collie
11
+ Project-URL: Documentation, https://github.com/ChingHuanChiu/collie/blob/main/README.md
12
+ Project-URL: Repository, https://github.com/ChingHuanChiu/collie
13
+ Project-URL: Bug Tracker, https://github.com/ChingHuanChiu/collie/issues
14
+ Project-URL: Changelog, https://github.com/ChingHuanChiu/collie/blob/main/CHANGELOG.md
15
+ Keywords: mlops,machine-learning,mlflow,pipeline,orchestration,deep-learning,experiment-tracking
16
+ Classifier: Development Status :: 4 - Beta
17
+ Classifier: Intended Audience :: Developers
18
+ Classifier: Intended Audience :: Science/Research
19
+ Classifier: License :: OSI Approved :: MIT License
20
+ Classifier: Operating System :: OS Independent
21
+ Classifier: Programming Language :: Python :: 3
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
25
+ Classifier: Programming Language :: Python :: 3.13
26
+ Classifier: Programming Language :: Python :: 3 :: Only
27
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
28
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
29
+ Classifier: Typing :: Typed
30
+ Requires-Python: >=3.10
31
+ Description-Content-Type: text/markdown
32
+ License-File: LICENSE
33
+ Requires-Dist: mlflow>=2.0.0
34
+ Requires-Dist: pydantic>=2.0.0
35
+ Requires-Dist: pandas>=1.3.0
36
+ Requires-Dist: numpy<2.0.0,>=1.20.0
37
+ Requires-Dist: scikit-learn>=1.0.0
38
+ Requires-Dist: xgboost>=1.5.0
39
+ Requires-Dist: torch>=1.9.0
40
+ Requires-Dist: pytorch-lightning>=2.0.0
41
+ Requires-Dist: lightgbm>=3.0.0
42
+ Requires-Dist: transformers>=4.0.0
43
+ Requires-Dist: sentence-transformers>=2.0.0
44
+ Dynamic: license-file
45
+
46
+ # Collie 🐕
47
+
48
+ [![PyPI version](https://badge.fury.io/py/collie-mlops.svg)](https://badge.fury.io/py/collie-mlops)
49
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
50
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
51
+ [![Documentation](https://img.shields.io/badge/docs-sphinx-blue.svg)](docs/_build/html/index.html)
52
+ [![codecov](https://codecov.io/gh/ChingHuanChiu/collie/branch/main/graph/badge.svg)](https://codecov.io/gh/ChingHuanChiu/collie)
53
+
54
+ A Lightweight MLOps Framework for Machine Learning Workflows
55
+
56
+
57
+ ## Overview
58
+
59
+ Collie is a modern MLOps framework designed to streamline machine learning workflows by providing a component-based architecture integrated with MLflow. It enables data scientists and ML engineers to build, deploy, and manage ML pipelines with ease through modular components that handle different stages of the ML lifecycle.
60
+
61
+ ## Features
62
+
63
+ - **Component-Based Architecture**: Modular design with specialized components for each ML workflow stage
64
+ - **MLflow Integration**: Built-in experiment tracking, model registration, and deployment capabilities
65
+ - **Pipeline Orchestration**: Seamless workflow management with event-driven architecture
66
+ - **Model Management**: Automated model versioning, staging, and promotion
67
+ - **Framework Agnostic**: Supports multiple ML frameworks (PyTorch, scikit-learn, XGBoost, LightGBM, Transformers)
68
+
69
+ ## Architecture
70
+
71
+ Collie follows an event-driven architecture with the following core components:
72
+
73
+ - **Transformer**: Data preprocessing and feature engineering
74
+ - **Tuner**: Hyperparameter optimization
75
+ - **Trainer**: Model training and validation
76
+ - **Evaluator**: Model evaluation and comparison
77
+ - **Pusher**: Model deployment and registration
78
+ - **Orchestrator**: Workflow coordination and execution
79
+
80
+ ## Quick Start
81
+
82
+ ### Installation
83
+
84
+ ```bash
85
+ pip install collie-mlops
86
+ ```
87
+
88
+ This will install Collie with all supported ML frameworks including:
89
+ - scikit-learn
90
+ - PyTorch
91
+ - XGBoost
92
+ - LightGBM
93
+ - Transformers (with Sentence Transformers)
94
+
95
+ ### Prerequisites
96
+
97
+ - Python >= 3.10
98
+ - MLflow tracking server (can be local or remote)
99
+
100
+
101
+ ## Components
102
+
103
+ ### Transformer
104
+ Handles data preprocessing, feature engineering, and data validation.
105
+
106
+ ```python
107
+ class CustomTransformer(Transformer):
108
+ def handle(self, event) -> Event:
109
+ # Process your data
110
+ processed_data = self.preprocess(raw_data)
111
+ return Event(payload=TransformerPayload(train_data=processed_data))
112
+ ```
113
+
114
+ ### Tuner
115
+ Performs hyperparameter optimization using various strategies.
116
+
117
+ ```python
118
+ class CustomTuner(Tuner):
119
+ def handle(self, event) -> Event:
120
+ # Optimize hyperparameters
121
+ best_params = self.optimize(search_space)
122
+ return Event(payload=TunerPayload(hyperparameters=best_params))
123
+ ```
124
+
125
+ ### Trainer
126
+ Trains machine learning models with automatic experiment tracking.
127
+
128
+ ```python
129
+ class CustomTrainer(Trainer):
130
+ def handle(self, event) -> Event:
131
+ # Train your model
132
+ model = self.train(data, hyperparameters)
133
+ return Event(payload=TrainerPayload(model=model))
134
+ ```
135
+
136
+ ### Evaluator
137
+ Evaluates model performance and decides on deployment.
138
+
139
+ ```python
140
+ class CustomEvaluator(Evaluator):
141
+ def handle(self, event) -> Event:
142
+ # Evaluate model performance
143
+ metrics = self.evaluate(model, test_data)
144
+ is_better = self.compare_with_production(metrics)
145
+ return Event(payload=EvaluatorPayload(
146
+ metrics=metrics,
147
+ is_better_than_production=is_better
148
+ ))
149
+ ```
150
+
151
+ ### Pusher
152
+ Handles model deployment and registration.
153
+
154
+ ```python
155
+ class CustomPusher(Pusher):
156
+ def handle(self, event) -> Event:
157
+ # Deploy model to production
158
+ model_uri = self.deploy(model)
159
+ return Event(payload=PusherPayload(model_uri=model_uri))
160
+ ```
161
+
162
+ ## Configuration
163
+
164
+ ### MLflow Setup
165
+
166
+ Start MLflow tracking server:
167
+
168
+ ```bash
169
+ mlflow server \
170
+ --backend-store-uri sqlite:///mlflow.db \
171
+ --default-artifact-root ./mlruns \
172
+ --host 0.0.0.0 \
173
+ --port 5000
174
+ ```
175
+
176
+ ## Supported Frameworks
177
+
178
+ Collie supports multiple ML frameworks through its model flavor system currently:
179
+
180
+ - **PyTorch**
181
+ - **scikit-learn**
182
+ - **XGBoost**
183
+ - **LightGBM**
184
+ - **Transformers**
185
+
186
+
187
+ ## Documentation
188
+
189
+ [Here you are]( https://collie-mlops.readthedocs.io/en/latest/getting_started.html )
190
+
191
+ ## Roadmap
192
+
193
+ - [ ] TensorFlow/Keras support
194
+ - [ ] Model monitoring and drift detection
195
+ - [ ] Integration with Airflow/Kubeflow
196
+ - [ ] Integrate an LLM training/fine-tuning framework
197
+ - [ ] Solve the issue about heavy import and installation.
198
+
199
+ ## License
200
+
201
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
202
+
203
+ ## Citation
204
+
205
+ If you use Collie in your research, please cite:
206
+
207
+ ```bibtex
208
+ @software{collie2025,
209
+ author = {ChingHuanChiu},
210
+ title = {Collie: A Lightweight MLOps Framework},
211
+ year = {2025},
212
+ url = {https://github.com/ChingHuanChiu/collie}
213
+ }
214
+ ```
215
+
216
+ ---
217
+
@@ -0,0 +1,172 @@
1
+ # Collie 🐕
2
+
3
+ [![PyPI version](https://badge.fury.io/py/collie-mlops.svg)](https://badge.fury.io/py/collie-mlops)
4
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+ [![Documentation](https://img.shields.io/badge/docs-sphinx-blue.svg)](docs/_build/html/index.html)
7
+ [![codecov](https://codecov.io/gh/ChingHuanChiu/collie/branch/main/graph/badge.svg)](https://codecov.io/gh/ChingHuanChiu/collie)
8
+
9
+ A Lightweight MLOps Framework for Machine Learning Workflows
10
+
11
+
12
+ ## Overview
13
+
14
+ Collie is a modern MLOps framework designed to streamline machine learning workflows by providing a component-based architecture integrated with MLflow. It enables data scientists and ML engineers to build, deploy, and manage ML pipelines with ease through modular components that handle different stages of the ML lifecycle.
15
+
16
+ ## Features
17
+
18
+ - **Component-Based Architecture**: Modular design with specialized components for each ML workflow stage
19
+ - **MLflow Integration**: Built-in experiment tracking, model registration, and deployment capabilities
20
+ - **Pipeline Orchestration**: Seamless workflow management with event-driven architecture
21
+ - **Model Management**: Automated model versioning, staging, and promotion
22
+ - **Framework Agnostic**: Supports multiple ML frameworks (PyTorch, scikit-learn, XGBoost, LightGBM, Transformers)
23
+
24
+ ## Architecture
25
+
26
+ Collie follows an event-driven architecture with the following core components:
27
+
28
+ - **Transformer**: Data preprocessing and feature engineering
29
+ - **Tuner**: Hyperparameter optimization
30
+ - **Trainer**: Model training and validation
31
+ - **Evaluator**: Model evaluation and comparison
32
+ - **Pusher**: Model deployment and registration
33
+ - **Orchestrator**: Workflow coordination and execution
34
+
35
+ ## Quick Start
36
+
37
+ ### Installation
38
+
39
+ ```bash
40
+ pip install collie-mlops
41
+ ```
42
+
43
+ This will install Collie with all supported ML frameworks including:
44
+ - scikit-learn
45
+ - PyTorch
46
+ - XGBoost
47
+ - LightGBM
48
+ - Transformers (with Sentence Transformers)
49
+
50
+ ### Prerequisites
51
+
52
+ - Python >= 3.10
53
+ - MLflow tracking server (can be local or remote)
54
+
55
+
56
+ ## Components
57
+
58
+ ### Transformer
59
+ Handles data preprocessing, feature engineering, and data validation.
60
+
61
+ ```python
62
+ class CustomTransformer(Transformer):
63
+ def handle(self, event) -> Event:
64
+ # Process your data
65
+ processed_data = self.preprocess(raw_data)
66
+ return Event(payload=TransformerPayload(train_data=processed_data))
67
+ ```
68
+
69
+ ### Tuner
70
+ Performs hyperparameter optimization using various strategies.
71
+
72
+ ```python
73
+ class CustomTuner(Tuner):
74
+ def handle(self, event) -> Event:
75
+ # Optimize hyperparameters
76
+ best_params = self.optimize(search_space)
77
+ return Event(payload=TunerPayload(hyperparameters=best_params))
78
+ ```
79
+
80
+ ### Trainer
81
+ Trains machine learning models with automatic experiment tracking.
82
+
83
+ ```python
84
+ class CustomTrainer(Trainer):
85
+ def handle(self, event) -> Event:
86
+ # Train your model
87
+ model = self.train(data, hyperparameters)
88
+ return Event(payload=TrainerPayload(model=model))
89
+ ```
90
+
91
+ ### Evaluator
92
+ Evaluates model performance and decides on deployment.
93
+
94
+ ```python
95
+ class CustomEvaluator(Evaluator):
96
+ def handle(self, event) -> Event:
97
+ # Evaluate model performance
98
+ metrics = self.evaluate(model, test_data)
99
+ is_better = self.compare_with_production(metrics)
100
+ return Event(payload=EvaluatorPayload(
101
+ metrics=metrics,
102
+ is_better_than_production=is_better
103
+ ))
104
+ ```
105
+
106
+ ### Pusher
107
+ Handles model deployment and registration.
108
+
109
+ ```python
110
+ class CustomPusher(Pusher):
111
+ def handle(self, event) -> Event:
112
+ # Deploy model to production
113
+ model_uri = self.deploy(model)
114
+ return Event(payload=PusherPayload(model_uri=model_uri))
115
+ ```
116
+
117
+ ## Configuration
118
+
119
+ ### MLflow Setup
120
+
121
+ Start MLflow tracking server:
122
+
123
+ ```bash
124
+ mlflow server \
125
+ --backend-store-uri sqlite:///mlflow.db \
126
+ --default-artifact-root ./mlruns \
127
+ --host 0.0.0.0 \
128
+ --port 5000
129
+ ```
130
+
131
+ ## Supported Frameworks
132
+
133
+ Collie supports multiple ML frameworks through its model flavor system currently:
134
+
135
+ - **PyTorch**
136
+ - **scikit-learn**
137
+ - **XGBoost**
138
+ - **LightGBM**
139
+ - **Transformers**
140
+
141
+
142
+ ## Documentation
143
+
144
+ [Here you are]( https://collie-mlops.readthedocs.io/en/latest/getting_started.html )
145
+
146
+ ## Roadmap
147
+
148
+ - [ ] TensorFlow/Keras support
149
+ - [ ] Model monitoring and drift detection
150
+ - [ ] Integration with Airflow/Kubeflow
151
+ - [ ] Integrate an LLM training/fine-tuning framework
152
+ - [ ] Solve the issue about heavy import and installation.
153
+
154
+ ## License
155
+
156
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
157
+
158
+ ## Citation
159
+
160
+ If you use Collie in your research, please cite:
161
+
162
+ ```bibtex
163
+ @software{collie2025,
164
+ author = {ChingHuanChiu},
165
+ title = {Collie: A Lightweight MLOps Framework},
166
+ year = {2025},
167
+ url = {https://github.com/ChingHuanChiu/collie}
168
+ }
169
+ ```
170
+
171
+ ---
172
+
@@ -0,0 +1,69 @@
1
+ """
2
+ Collie - A Lightweight MLOps Framework for Machine Learning Workflows
3
+
4
+ Collie provides a modular, event-driven architecture for building ML pipelines
5
+ with deep MLflow integration.
6
+
7
+ Quick Start:
8
+ >>> from collie import Transformer, Trainer, Orchestrator
9
+ >>> # Define your components
10
+ >>> orchestrator = Orchestrator(
11
+ ... components=[MyTransformer(), MyTrainer()],
12
+ ... tracking_uri="http://localhost:5000",
13
+ ... registered_model_name="my_model"
14
+ ... )
15
+ >>> orchestrator.run()
16
+
17
+ For more examples, see: https://github.com/ChingHuanChiu/collie
18
+ """
19
+
20
+ __author__ = "ChingHuanChiu"
21
+ __email__ = "stevenchiou8@gmail.com"
22
+ __version__ = "0.1.0b0"
23
+
24
+ # Import all main components for easy access
25
+ from .contracts.event import Event, EventType, PipelineContext
26
+ from .core.transform.transform import Transformer
27
+ from .core.trainer.trainer import Trainer
28
+ from .core.tuner.tuner import Tuner
29
+ from .core.evaluator.evaluator import Evaluator
30
+ from .core.pusher.pusher import Pusher
31
+ from .core.orchestrator.orchestrator import Orchestrator
32
+
33
+ # Import data models
34
+ from .core.models import (
35
+ TransformerPayload,
36
+ TrainerPayload,
37
+ TunerPayload,
38
+ EvaluatorPayload,
39
+ PusherPayload,
40
+ )
41
+
42
+ # Import enums for configuration
43
+ from .core.enums.ml_models import ModelFlavor, MLflowModelStage
44
+
45
+ __all__ = [
46
+ # Core components - the main classes users interact with
47
+ "Transformer",
48
+ "Trainer",
49
+ "Tuner",
50
+ "Evaluator",
51
+ "Pusher",
52
+ "Orchestrator",
53
+
54
+ # Event system - for building custom workflows
55
+ "Event",
56
+ "EventType",
57
+ "PipelineContext",
58
+
59
+ # Payload models - for type-safe data passing
60
+ "TransformerPayload",
61
+ "TrainerPayload",
62
+ "TunerPayload",
63
+ "EvaluatorPayload",
64
+ "PusherPayload",
65
+
66
+ # Configuration enums
67
+ "ModelFlavor",
68
+ "MLflowModelStage",
69
+ ]
File without changes
@@ -0,0 +1,53 @@
1
+ from typing import Tuple, List
2
+ from functools import wraps
3
+
4
+
5
+ def type_checker(
6
+ typing: Tuple[type],
7
+ error_msg: str
8
+ ):
9
+ """
10
+ A decorator that checks the type of the output of a function.
11
+
12
+ Args:
13
+ typing (Tuple[type]): A tuple of types to check against.
14
+ error_msg (str): The error message to be raised if the type does not match.
15
+
16
+ Raises:
17
+ TypeError: If the type of the output of the function does not match with given types.
18
+ """
19
+
20
+ def closure(func):
21
+ @wraps(func)
22
+ def wrapper(*arg, **kwarg):
23
+ result = func(*arg, **kwarg)
24
+ if not isinstance(result, typing):
25
+ raise TypeError(error_msg)
26
+ return result
27
+ return wrapper
28
+ return closure
29
+
30
+
31
+ def dict_key_checker(keys: List[str]):
32
+ """
33
+ A decorator that checks the keys of the output of a function.
34
+
35
+ Args:
36
+ keys (List[str]): A list of keys to check against.
37
+
38
+ Raises:
39
+ TypeError: If the output of the function is not a dictionary.
40
+ KeyError: If the output of the function does not contain all the keys in the list.
41
+ """
42
+ def closure(func):
43
+ @wraps(func)
44
+ def wrapper(*arg, **kwarg):
45
+ result = func(*arg, **kwarg)
46
+ if not isinstance(result, dict):
47
+ raise TypeError("The output must be a dictionary.")
48
+ all_keys_exist = all(key in result for key in keys)
49
+ if not all_keys_exist:
50
+ raise KeyError(f"The following keys must all exist in the output: {keys}. Output: {result}")
51
+ return result
52
+ return wrapper
53
+ return closure
@@ -0,0 +1,104 @@
1
+
2
+ class CollieBaseException(Exception):
3
+ """Base exception for all Collie framework errors."""
4
+
5
+ def __init__(self, message: str, component: str = None, details: dict = None):
6
+ self.message = message
7
+ self.component = component or self.__class__.__name__.replace('Error', '')
8
+ self.details = details or {}
9
+
10
+ detailed_message = f"[{self.component}] {message}"
11
+ if self.details:
12
+ detailed_message += f" Details: {self.details}"
13
+
14
+ super().__init__(detailed_message)
15
+
16
+
17
+ class MLflowConfigurationError(CollieBaseException):
18
+ """Raised when MLflow configuration is invalid."""
19
+
20
+ def __init__(self, message: str, config_param: str = None, **kwargs):
21
+ details = kwargs.get('details', {})
22
+ if config_param:
23
+ details['config_parameter'] = config_param
24
+ super().__init__(message, component="MLflow Config", details=details)
25
+
26
+
27
+ class MLflowOperationError(CollieBaseException):
28
+ """Raised when MLflow operations fail."""
29
+
30
+ def __init__(self, message: str, operation: str = None, **kwargs):
31
+ details = kwargs.get('details', {})
32
+ if operation:
33
+ details['operation'] = operation
34
+ super().__init__(message, component="MLflow Operation", details=details)
35
+
36
+
37
+ class OrchestratorError(CollieBaseException):
38
+ """Raised for errors in the orchestrator process."""
39
+
40
+ def __init__(self, message: str, pipeline_stage: str = None, **kwargs):
41
+ details = kwargs.get('details', {})
42
+ if pipeline_stage:
43
+ details['pipeline_stage'] = pipeline_stage
44
+ super().__init__(message, component="Orchestrator", details=details)
45
+
46
+
47
+ class TransformerError(CollieBaseException):
48
+ """Raised when data transformation fails."""
49
+
50
+ def __init__(self, message: str, data_type: str = None, **kwargs):
51
+ details = kwargs.get('details', {})
52
+ if data_type:
53
+ details['data_type'] = data_type
54
+ super().__init__(message, component="Transformer", details=details)
55
+
56
+
57
+ class TrainerError(CollieBaseException):
58
+ """Raised when model training fails."""
59
+
60
+ def __init__(self, message: str, model_type: str = None, **kwargs):
61
+ details = kwargs.get('details', {})
62
+ if model_type:
63
+ details['model_type'] = model_type
64
+ super().__init__(message, component="Trainer", details=details)
65
+
66
+
67
+ class TunerError(CollieBaseException):
68
+ """Raised when hyperparameter tuning fails."""
69
+
70
+ def __init__(self, message: str, tuning_method: str = None, **kwargs):
71
+ details = kwargs.get('details', {})
72
+ if tuning_method:
73
+ details['tuning_method'] = tuning_method
74
+ super().__init__(message, component="Tuner", details=details)
75
+
76
+
77
+ class EvaluatorError(CollieBaseException):
78
+ """Raised when model evaluation fails."""
79
+
80
+ def __init__(self, message: str, metric: str = None, **kwargs):
81
+ details = kwargs.get('details', {})
82
+ if metric:
83
+ details['metric'] = metric
84
+ super().__init__(message, component="Evaluator", details=details)
85
+
86
+
87
+ class PusherError(CollieBaseException):
88
+ """Raised when model pushing/deployment fails."""
89
+
90
+ def __init__(self, message: str, deployment_target: str = None, **kwargs):
91
+ details = kwargs.get('details', {})
92
+ if deployment_target:
93
+ details['deployment_target'] = deployment_target
94
+ super().__init__(message, component="Pusher", details=details)
95
+
96
+
97
+ class ModelFlavorError(CollieBaseException):
98
+ """Raised when model flavor operations fail."""
99
+
100
+ def __init__(self, message: str, flavor: str = None, **kwargs):
101
+ details = kwargs.get('details', {})
102
+ if flavor:
103
+ details['flavor'] = flavor
104
+ super().__init__(message, component="Model Flavor", details=details)
@@ -0,0 +1,26 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any
3
+
4
+
5
+ class FlavorHandler(ABC):
6
+
7
+ @abstractmethod
8
+ def can_handle(self, model: Any) -> bool:
9
+ raise NotImplementedError
10
+
11
+ @abstractmethod
12
+ def flavor(self):
13
+ raise NotImplementedError
14
+
15
+ @abstractmethod
16
+ def log_model(
17
+ self,
18
+ model: Any,
19
+ name: str,
20
+ **kwargs: Any
21
+ ) -> None:
22
+ raise NotImplementedError
23
+
24
+ @abstractmethod
25
+ def load_model(self, model_uri: str) -> Any:
26
+ raise NotImplementedError