expops 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- expops-0.1.3.dist-info/METADATA +826 -0
- expops-0.1.3.dist-info/RECORD +86 -0
- expops-0.1.3.dist-info/WHEEL +5 -0
- expops-0.1.3.dist-info/entry_points.txt +3 -0
- expops-0.1.3.dist-info/licenses/LICENSE +674 -0
- expops-0.1.3.dist-info/top_level.txt +1 -0
- mlops/__init__.py +0 -0
- mlops/__main__.py +11 -0
- mlops/_version.py +34 -0
- mlops/adapters/__init__.py +12 -0
- mlops/adapters/base.py +86 -0
- mlops/adapters/config_schema.py +89 -0
- mlops/adapters/custom/__init__.py +3 -0
- mlops/adapters/custom/custom_adapter.py +447 -0
- mlops/adapters/plugin_manager.py +113 -0
- mlops/adapters/sklearn/__init__.py +3 -0
- mlops/adapters/sklearn/adapter.py +94 -0
- mlops/cluster/__init__.py +3 -0
- mlops/cluster/controller.py +496 -0
- mlops/cluster/process_runner.py +91 -0
- mlops/cluster/providers.py +258 -0
- mlops/core/__init__.py +95 -0
- mlops/core/custom_model_base.py +38 -0
- mlops/core/dask_networkx_executor.py +1265 -0
- mlops/core/executor_worker.py +1239 -0
- mlops/core/experiment_tracker.py +81 -0
- mlops/core/graph_types.py +64 -0
- mlops/core/networkx_parser.py +135 -0
- mlops/core/payload_spill.py +278 -0
- mlops/core/pipeline_utils.py +162 -0
- mlops/core/process_hashing.py +216 -0
- mlops/core/step_state_manager.py +1298 -0
- mlops/core/step_system.py +956 -0
- mlops/core/workspace.py +99 -0
- mlops/environment/__init__.py +10 -0
- mlops/environment/base.py +43 -0
- mlops/environment/conda_manager.py +307 -0
- mlops/environment/factory.py +70 -0
- mlops/environment/pyenv_manager.py +146 -0
- mlops/environment/setup_env.py +31 -0
- mlops/environment/system_manager.py +66 -0
- mlops/environment/utils.py +105 -0
- mlops/environment/venv_manager.py +134 -0
- mlops/main.py +527 -0
- mlops/managers/project_manager.py +400 -0
- mlops/managers/reproducibility_manager.py +575 -0
- mlops/platform.py +996 -0
- mlops/reporting/__init__.py +16 -0
- mlops/reporting/context.py +187 -0
- mlops/reporting/entrypoint.py +292 -0
- mlops/reporting/kv_utils.py +77 -0
- mlops/reporting/registry.py +50 -0
- mlops/runtime/__init__.py +9 -0
- mlops/runtime/context.py +34 -0
- mlops/runtime/env_export.py +113 -0
- mlops/storage/__init__.py +12 -0
- mlops/storage/adapters/__init__.py +9 -0
- mlops/storage/adapters/gcp_kv_store.py +778 -0
- mlops/storage/adapters/gcs_object_store.py +96 -0
- mlops/storage/adapters/memory_store.py +240 -0
- mlops/storage/adapters/redis_store.py +438 -0
- mlops/storage/factory.py +199 -0
- mlops/storage/interfaces/__init__.py +6 -0
- mlops/storage/interfaces/kv_store.py +118 -0
- mlops/storage/path_utils.py +38 -0
- mlops/templates/premier-league/charts/plot_metrics.js +70 -0
- mlops/templates/premier-league/charts/plot_metrics.py +145 -0
- mlops/templates/premier-league/charts/requirements.txt +6 -0
- mlops/templates/premier-league/configs/cluster_config.yaml +13 -0
- mlops/templates/premier-league/configs/project_config.yaml +207 -0
- mlops/templates/premier-league/data/England CSV.csv +12154 -0
- mlops/templates/premier-league/models/premier_league_model.py +638 -0
- mlops/templates/premier-league/requirements.txt +8 -0
- mlops/templates/sklearn-basic/README.md +22 -0
- mlops/templates/sklearn-basic/charts/plot_metrics.py +85 -0
- mlops/templates/sklearn-basic/charts/requirements.txt +3 -0
- mlops/templates/sklearn-basic/configs/project_config.yaml +64 -0
- mlops/templates/sklearn-basic/data/train.csv +14 -0
- mlops/templates/sklearn-basic/models/model.py +62 -0
- mlops/templates/sklearn-basic/requirements.txt +10 -0
- mlops/web/__init__.py +3 -0
- mlops/web/server.py +585 -0
- mlops/web/ui/index.html +52 -0
- mlops/web/ui/mlops-charts.js +357 -0
- mlops/web/ui/script.js +1244 -0
- mlops/web/ui/styles.css +248 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional, Protocol
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class KeyValueEventStore(Protocol):
|
|
7
|
+
"""Key/value store + events interface used by the platform."""
|
|
8
|
+
|
|
9
|
+
# Cache indices (strict-hash match)
|
|
10
|
+
def set_step_cache_record(
|
|
11
|
+
self,
|
|
12
|
+
process_name: str,
|
|
13
|
+
step_name: str,
|
|
14
|
+
input_hash: str,
|
|
15
|
+
config_hash: str,
|
|
16
|
+
function_hash: str | None,
|
|
17
|
+
record: dict[str, Any],
|
|
18
|
+
ttl_seconds: int | None = None,
|
|
19
|
+
) -> None: ...
|
|
20
|
+
|
|
21
|
+
def get_step_cache_path(
|
|
22
|
+
self,
|
|
23
|
+
process_name: str,
|
|
24
|
+
step_name: str,
|
|
25
|
+
input_hash: str | None,
|
|
26
|
+
config_hash: str | None,
|
|
27
|
+
function_hash: str | None,
|
|
28
|
+
) -> str | None: ...
|
|
29
|
+
|
|
30
|
+
def get_step_cache_record(
|
|
31
|
+
self,
|
|
32
|
+
process_name: str,
|
|
33
|
+
step_name: str,
|
|
34
|
+
input_hash: str | None,
|
|
35
|
+
config_hash: str | None,
|
|
36
|
+
function_hash: str | None,
|
|
37
|
+
) -> dict[str, Any] | None: ...
|
|
38
|
+
|
|
39
|
+
def set_process_cache_record(
|
|
40
|
+
self,
|
|
41
|
+
process_name: str,
|
|
42
|
+
input_hash: str,
|
|
43
|
+
config_hash: str,
|
|
44
|
+
function_hash: str | None,
|
|
45
|
+
record: dict[str, Any],
|
|
46
|
+
ttl_seconds: int | None = None,
|
|
47
|
+
) -> None: ...
|
|
48
|
+
|
|
49
|
+
def get_process_cache_path(
|
|
50
|
+
self,
|
|
51
|
+
process_name: str,
|
|
52
|
+
input_hash: str | None,
|
|
53
|
+
config_hash: str | None,
|
|
54
|
+
function_hash: str | None,
|
|
55
|
+
) -> str | None: ...
|
|
56
|
+
|
|
57
|
+
def get_process_cache_record(
|
|
58
|
+
self,
|
|
59
|
+
process_name: str,
|
|
60
|
+
input_hash: str | None,
|
|
61
|
+
config_hash: str | None,
|
|
62
|
+
function_hash: str | None,
|
|
63
|
+
) -> dict[str, Any] | None: ...
|
|
64
|
+
|
|
65
|
+
# Optional: batched cache lookups (implement when backend supports efficient multi-get)
|
|
66
|
+
def get_process_cache_paths_batch(
|
|
67
|
+
self,
|
|
68
|
+
lookups: list[tuple[str, str | None, str | None, str | None]],
|
|
69
|
+
) -> dict[str, str | None]: ...
|
|
70
|
+
|
|
71
|
+
# Run lifecycle + metrics
|
|
72
|
+
def mark_pipeline_started(self, run_id: str) -> None: ...
|
|
73
|
+
def mark_pipeline_completed(self, run_id: str, success: bool) -> None: ...
|
|
74
|
+
def get_run_status(self, run_id: str) -> str | None: ...
|
|
75
|
+
|
|
76
|
+
# Events
|
|
77
|
+
def publish_event(self, event: dict[str, Any]) -> None: ...
|
|
78
|
+
|
|
79
|
+
# Per-run step bookkeeping (for resume/get_step_results)
|
|
80
|
+
def record_run_step(self, run_id: str, process_name: str, step_name: str, record: dict[str, Any]) -> None: ...
|
|
81
|
+
def list_run_steps(self, run_id: str) -> dict[str, dict[str, Any]]: ...
|
|
82
|
+
|
|
83
|
+
# Stats
|
|
84
|
+
def increment_stat(self, run_id: str, name: str, amount: int = 1) -> None: ...
|
|
85
|
+
def get_pipeline_stats(self, run_id: str) -> dict[str, Any]: ...
|
|
86
|
+
|
|
87
|
+
# Charts/artifacts index per run (optional but recommended)
|
|
88
|
+
def record_run_chart_artifacts(self, run_id: str, chart_name: str, artifacts: list[dict[str, Any]]) -> None: ...
|
|
89
|
+
def list_run_charts(self, run_id: str) -> dict[str, Any]: ...
|
|
90
|
+
def copy_run_chart_artifacts(self, from_run_id: str, to_run_id: str, chart_name: str) -> bool: ...
|
|
91
|
+
|
|
92
|
+
# Run listing for UI (optional)
|
|
93
|
+
def list_runs(self, limit: int = 100) -> list[str]: ...
|
|
94
|
+
|
|
95
|
+
# Probe metrics (keyed by probe_path)
|
|
96
|
+
def save_probe_metrics_by_path(self, run_id: str, probe_path: str, metrics: dict[str, Any]) -> None: ...
|
|
97
|
+
def get_probe_metrics_by_path(self, run_id: str, probe_path: str) -> dict[str, Any]: ...
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# -------------------- Object storage protocol --------------------
|
|
103
|
+
class ObjectStore(Protocol):
|
|
104
|
+
"""Abstraction for binary/object storage backends (e.g., GCS/S3).
|
|
105
|
+
|
|
106
|
+
Implementations operate on opaque URIs (e.g., gs://bucket/prefix/key.pkl).
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
def put_bytes(self, uri: str, data: bytes, content_type: str | None = None) -> None: ...
|
|
110
|
+
|
|
111
|
+
def put_file(self, uri: str, file_path: str, content_type: str | None = None) -> None: ...
|
|
112
|
+
|
|
113
|
+
def get_bytes(self, uri: str) -> bytes: ...
|
|
114
|
+
|
|
115
|
+
def exists(self, uri: str) -> bool: ...
|
|
116
|
+
|
|
117
|
+
def build_uri(self, *parts: str) -> str: ...
|
|
118
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def encode_probe_path(probe_path: str) -> str:
|
|
7
|
+
"""Encode a logical probe_path into a Firestore/Redis-safe identifier.
|
|
8
|
+
|
|
9
|
+
Uses URL-safe base64 without padding and a small prefix to avoid pure-numeric IDs.
|
|
10
|
+
"""
|
|
11
|
+
raw = str(probe_path).encode("utf-8")
|
|
12
|
+
enc = base64.urlsafe_b64encode(raw).decode("ascii").rstrip("=")
|
|
13
|
+
return f"p_{enc}"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def decode_probe_path(encoded_id: str) -> str:
|
|
17
|
+
"""Decode an encoded probe_path identifier back to the logical path.
|
|
18
|
+
|
|
19
|
+
Raises ValueError if the identifier cannot be decoded.
|
|
20
|
+
"""
|
|
21
|
+
payload = str(encoded_id)
|
|
22
|
+
if payload.startswith("p_"):
|
|
23
|
+
payload = payload[2:]
|
|
24
|
+
# Restore base64 padding
|
|
25
|
+
pad = "=" * (-len(payload) % 4)
|
|
26
|
+
try:
|
|
27
|
+
raw = base64.urlsafe_b64decode(payload + pad)
|
|
28
|
+
return raw.decode("utf-8")
|
|
29
|
+
except Exception as e:
|
|
30
|
+
raise ValueError(f"Invalid encoded probe path id: {encoded_id}") from e
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
"encode_probe_path",
|
|
35
|
+
"decode_probe_path",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { chart } from '/mlops-charts.js';
|
|
2
|
+
|
|
3
|
+
// Dynamic chart: NN A vs NN B vs NN C losses on same chart
|
|
4
|
+
chart('nn_losses', (probePaths, ctx, listener) => {
|
|
5
|
+
const canvas = document.createElement('canvas');
|
|
6
|
+
ctx.containerElement.innerHTML = '';
|
|
7
|
+
ctx.containerElement.appendChild(canvas);
|
|
8
|
+
|
|
9
|
+
const colors = [
|
|
10
|
+
'rgb(75, 192, 192)',
|
|
11
|
+
'rgb(255, 99, 132)',
|
|
12
|
+
'rgb(54, 162, 235)'
|
|
13
|
+
];
|
|
14
|
+
|
|
15
|
+
const chartData = {
|
|
16
|
+
labels: [],
|
|
17
|
+
datasets: []
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
let colorIndex = 0;
|
|
21
|
+
const keys = Object.keys(probePaths);
|
|
22
|
+
keys.forEach((k) => {
|
|
23
|
+
chartData.datasets.push({
|
|
24
|
+
label: k.replace(/_/g, ' ').toUpperCase(),
|
|
25
|
+
data: [],
|
|
26
|
+
borderColor: colors[colorIndex % colors.length],
|
|
27
|
+
backgroundColor: colors[colorIndex % colors.length] + '33',
|
|
28
|
+
tension: 0.1,
|
|
29
|
+
fill: false
|
|
30
|
+
});
|
|
31
|
+
colorIndex++;
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
const chartInstance = new Chart(canvas, {
|
|
35
|
+
type: 'line',
|
|
36
|
+
data: chartData,
|
|
37
|
+
options: {
|
|
38
|
+
responsive: true,
|
|
39
|
+
maintainAspectRatio: true,
|
|
40
|
+
scales: {
|
|
41
|
+
x: { title: { display: true, text: 'Epoch' } },
|
|
42
|
+
y: { title: { display: true, text: 'Loss' }, beginAtZero: false }
|
|
43
|
+
},
|
|
44
|
+
plugins: {
|
|
45
|
+
title: { display: true, text: 'NN Training Loss (A/B)' },
|
|
46
|
+
legend: { display: true }
|
|
47
|
+
},
|
|
48
|
+
animation: { duration: 200 }
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
ctx.setChartInstance(chartInstance);
|
|
52
|
+
|
|
53
|
+
listener.subscribeAll(probePaths, (allMetrics) => {
|
|
54
|
+
let maxLength = 0;
|
|
55
|
+
|
|
56
|
+
chartData.datasets.forEach((dataset, idx) => {
|
|
57
|
+
const probeKey = keys[idx];
|
|
58
|
+
const metrics = allMetrics[probeKey] || {};
|
|
59
|
+
const lossSeries = ctx.toSeries(metrics.train_loss || {});
|
|
60
|
+
dataset.data = lossSeries;
|
|
61
|
+
maxLength = Math.max(maxLength, lossSeries.length);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
chartData.labels = Array.from({ length: maxLength }, (_, i) => i + 1);
|
|
65
|
+
|
|
66
|
+
chartInstance.update();
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
|
|
3
|
+
import matplotlib
|
|
4
|
+
matplotlib.use('Agg')
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from mlops.reporting import chart, ChartContext
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@chart()
|
|
12
|
+
def pca_scree(metrics: Dict[str, Any], ctx: ChartContext) -> None:
|
|
13
|
+
"""
|
|
14
|
+
Static chart showing PCA explained variance ratio and cumulative variance.
|
|
15
|
+
Expects metrics from feature_engineering/encode_and_pca step containing:
|
|
16
|
+
- pca_explained_variance_ratio: list[float]
|
|
17
|
+
- pca_cumulative_variance: list[float]
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
evr = metrics.get('feat', {}).get('pca_explained_variance_ratio', [])
|
|
21
|
+
cum = metrics.get('feat', {}).get('pca_cumulative_variance', [])
|
|
22
|
+
|
|
23
|
+
if not isinstance(evr, (list, tuple)) or len(evr) == 0:
|
|
24
|
+
return
|
|
25
|
+
|
|
26
|
+
xs = np.arange(1, len(evr) + 1)
|
|
27
|
+
fig, ax1 = plt.subplots(figsize=(10, 4))
|
|
28
|
+
ax1.bar(xs, evr, color='steelblue', alpha=0.7, label='Explained Variance Ratio')
|
|
29
|
+
ax1.set_xlabel('Principal Component')
|
|
30
|
+
ax1.set_ylabel('Explained Variance Ratio')
|
|
31
|
+
ax1.grid(True, axis='y', alpha=0.3)
|
|
32
|
+
|
|
33
|
+
ax2 = ax1.twinx()
|
|
34
|
+
if isinstance(cum, (list, tuple)) and len(cum) == len(evr):
|
|
35
|
+
ax2.plot(xs, cum, color='coral', marker='o', label='Cumulative Variance')
|
|
36
|
+
ax2.set_ylabel('Cumulative Variance')
|
|
37
|
+
|
|
38
|
+
fig.tight_layout()
|
|
39
|
+
ctx.savefig('pca_scree.png', dpi=150, fig=fig)
|
|
40
|
+
plt.close(fig)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@chart()
|
|
44
|
+
def goals_distribution(metrics: Dict[str, Any], ctx: ChartContext) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Static chart showing histograms of home and away goals.
|
|
47
|
+
Expects metrics from feature_engineering/encode_and_pca step containing:
|
|
48
|
+
- goals_hist_home: dict[str, int]
|
|
49
|
+
- goals_hist_away: dict[str, int]
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
feat_metrics = metrics.get('feat', {})
|
|
53
|
+
g_home = feat_metrics.get('goals_hist_home', {}) or {}
|
|
54
|
+
g_away = feat_metrics.get('goals_hist_away', {}) or {}
|
|
55
|
+
|
|
56
|
+
if not g_home and not g_away:
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
keys = sorted(set([int(k) for k in g_home.keys()] + [int(k) for k in g_away.keys()]))
|
|
60
|
+
vals_home = [int(g_home.get(str(k), 0)) for k in keys]
|
|
61
|
+
vals_away = [int(g_away.get(str(k), 0)) for k in keys]
|
|
62
|
+
|
|
63
|
+
x = np.arange(len(keys))
|
|
64
|
+
width = 0.4
|
|
65
|
+
fig, ax = plt.subplots(figsize=(10, 4))
|
|
66
|
+
ax.bar(x - width/2, vals_home, width, label='Home Goals', color='slateblue')
|
|
67
|
+
ax.bar(x + width/2, vals_away, width, label='Away Goals', color='seagreen')
|
|
68
|
+
ax.set_xticks(x, [str(k) for k in keys])
|
|
69
|
+
ax.set_xlabel('Goals')
|
|
70
|
+
ax.set_ylabel('Count')
|
|
71
|
+
ax.set_title('Goals Distribution (Home vs Away)')
|
|
72
|
+
ax.legend()
|
|
73
|
+
ax.grid(True, axis='y', alpha=0.3)
|
|
74
|
+
fig.tight_layout()
|
|
75
|
+
ctx.savefig('goals_distribution.png', dpi=150, fig=fig)
|
|
76
|
+
plt.close(fig)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@chart()
|
|
80
|
+
def test_metrics_comparison(metrics: Dict[str, Any], ctx: ChartContext) -> None:
|
|
81
|
+
"""
|
|
82
|
+
Static chart comparing classification metrics across baseline/best models and ensemble.
|
|
83
|
+
Expected keys: linear, nn_best, xgb_best, ensemble; each with test_accuracy, test_precision, test_f1.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def get_value(data):
|
|
87
|
+
if isinstance(data, dict) and data:
|
|
88
|
+
items = sorted(data.items(), key=lambda x: int(x[0]) if str(x[0]).isdigit() else 0)
|
|
89
|
+
return float(items[-1][1]) if items else None
|
|
90
|
+
if isinstance(data, (int, float)):
|
|
91
|
+
return float(data)
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
groups = {
|
|
95
|
+
'Linear': metrics.get('linear', {}),
|
|
96
|
+
'NN (Best)': metrics.get('nn_best', {}),
|
|
97
|
+
'XGB (Best)': metrics.get('xgb_best', {}),
|
|
98
|
+
'Ensemble': metrics.get('ensemble', {}),
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
labels = []
|
|
102
|
+
accs = []
|
|
103
|
+
precs = []
|
|
104
|
+
f1s = []
|
|
105
|
+
for label, m in groups.items():
|
|
106
|
+
acc = get_value(m.get('test_accuracy'))
|
|
107
|
+
prec = get_value(m.get('test_precision'))
|
|
108
|
+
f1 = get_value(m.get('test_f1'))
|
|
109
|
+
if all(v is None for v in (acc, prec, f1)):
|
|
110
|
+
continue
|
|
111
|
+
labels.append(label)
|
|
112
|
+
accs.append(acc if acc is not None else 0.0)
|
|
113
|
+
precs.append(prec if prec is not None else 0.0)
|
|
114
|
+
f1s.append(f1 if f1 is not None else 0.0)
|
|
115
|
+
|
|
116
|
+
if not labels:
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
x = np.arange(len(labels))
|
|
120
|
+
width = 0.25
|
|
121
|
+
fig, ax = plt.subplots(figsize=(12, 4))
|
|
122
|
+
r1 = ax.bar(x - width, accs, width, label='Accuracy', color='steelblue')
|
|
123
|
+
r2 = ax.bar(x, precs, width, label='Precision (macro)', color='mediumseagreen')
|
|
124
|
+
r3 = ax.bar(x + width, f1s, width, label='F1 (macro)', color='coral')
|
|
125
|
+
|
|
126
|
+
ax.set_ylabel('Score')
|
|
127
|
+
ax.set_xticks(x, labels)
|
|
128
|
+
ax.set_ylim(0.0, 1.0)
|
|
129
|
+
ax.grid(True, axis='y', alpha=0.3)
|
|
130
|
+
ax.legend(loc='upper left')
|
|
131
|
+
|
|
132
|
+
# Annotate bars
|
|
133
|
+
for rect in list(r1) + list(r2) + list(r3):
|
|
134
|
+
height = rect.get_height()
|
|
135
|
+
ax.annotate(f'{height:.3f}',
|
|
136
|
+
xy=(rect.get_x() + rect.get_width() / 2, height),
|
|
137
|
+
xytext=(0, 3),
|
|
138
|
+
textcoords="offset points",
|
|
139
|
+
ha='center', va='bottom', fontsize=8)
|
|
140
|
+
|
|
141
|
+
fig.tight_layout()
|
|
142
|
+
ctx.savefig('test_metrics_comparison.png', dpi=150, fig=fig)
|
|
143
|
+
plt.close(fig)
|
|
144
|
+
|
|
145
|
+
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
metadata:
|
|
2
|
+
name: "premier-league"
|
|
3
|
+
description: "Premier League 3-class classification with generic FE, NN A/B, XGB A/B, logistic baseline, and ensemble"
|
|
4
|
+
version: "1.0.0"
|
|
5
|
+
|
|
6
|
+
environment:
|
|
7
|
+
venv:
|
|
8
|
+
name: "premier-league-env"
|
|
9
|
+
requirements_file: "projects/premier-league/requirements.txt"
|
|
10
|
+
reporting:
|
|
11
|
+
name: "premier-league-env-reporting"
|
|
12
|
+
requirements_file: "projects/premier-league/charts/requirements.txt"
|
|
13
|
+
|
|
14
|
+
reproducibility:
|
|
15
|
+
random_seed: 43
|
|
16
|
+
|
|
17
|
+
model:
|
|
18
|
+
framework: "custom"
|
|
19
|
+
language: "python"
|
|
20
|
+
name: "premier_league_model"
|
|
21
|
+
version: "1.0.0"
|
|
22
|
+
parameters:
|
|
23
|
+
custom_script_path: "projects/premier-league/models/premier_league_model.py"
|
|
24
|
+
cache:
|
|
25
|
+
backend:
|
|
26
|
+
type: gcp
|
|
27
|
+
gcp_project: mlops-platform-470017
|
|
28
|
+
credentials_json: keys/firestore.json
|
|
29
|
+
object_store:
|
|
30
|
+
type: gcs
|
|
31
|
+
bucket: mlops-platform123
|
|
32
|
+
prefix: projects/premier-league/cache/steps
|
|
33
|
+
executor:
|
|
34
|
+
n_workers: 4
|
|
35
|
+
hyperparameters:
|
|
36
|
+
test_size: 0.2
|
|
37
|
+
pca_components: 21
|
|
38
|
+
|
|
39
|
+
pipeline:
|
|
40
|
+
process_adjlist: |
|
|
41
|
+
feature_engineering_generic preprocess_linear_nn
|
|
42
|
+
feature_engineering_generic preprocess_xgb
|
|
43
|
+
feature_engineering_generic pca_scree
|
|
44
|
+
feature_engineering_generic goals_distribution
|
|
45
|
+
preprocess_linear_nn linear_training
|
|
46
|
+
preprocess_linear_nn nn_training_a
|
|
47
|
+
preprocess_linear_nn nn_training_b
|
|
48
|
+
preprocess_xgb xgb_training_a
|
|
49
|
+
preprocess_xgb xgb_training_b
|
|
50
|
+
linear_training linear_inference
|
|
51
|
+
nn_training_a nn_inference_a
|
|
52
|
+
nn_training_b nn_inference_b
|
|
53
|
+
xgb_training_a xgb_inference_a
|
|
54
|
+
xgb_training_b xgb_inference_b
|
|
55
|
+
nn_inference_a nn_best_selection
|
|
56
|
+
nn_inference_b nn_best_selection
|
|
57
|
+
xgb_inference_a xgb_best_selection
|
|
58
|
+
xgb_inference_b xgb_best_selection
|
|
59
|
+
nn_best_selection nn_best_inference
|
|
60
|
+
xgb_best_selection xgb_best_inference
|
|
61
|
+
linear_training ensemble_inference
|
|
62
|
+
nn_best_selection ensemble_inference
|
|
63
|
+
xgb_best_selection ensemble_inference
|
|
64
|
+
linear_inference test_metrics_comparison
|
|
65
|
+
nn_best_inference test_metrics_comparison
|
|
66
|
+
xgb_best_inference test_metrics_comparison
|
|
67
|
+
ensemble_inference test_metrics_comparison
|
|
68
|
+
|
|
69
|
+
processes:
|
|
70
|
+
- name: "feature_engineering_generic"
|
|
71
|
+
description: "Generic FE: parse dates, derive labels (H/D/A), stratified indices"
|
|
72
|
+
code_function: "define_feature_engineering_generic_process"
|
|
73
|
+
|
|
74
|
+
- name: "preprocess_linear_nn"
|
|
75
|
+
description: "Preprocess for linear/nn: OHE + StandardScaler"
|
|
76
|
+
code_function: "define_preprocess_linear_nn_process"
|
|
77
|
+
|
|
78
|
+
- name: "preprocess_xgb"
|
|
79
|
+
description: "Preprocess for xgb: OHE only"
|
|
80
|
+
code_function: "define_preprocess_xgb_process"
|
|
81
|
+
|
|
82
|
+
- name: "linear_training"
|
|
83
|
+
description: "Train a multinomial Logistic Regression classifier"
|
|
84
|
+
code_function: "define_linear_training_process"
|
|
85
|
+
|
|
86
|
+
- name: "nn_training_a"
|
|
87
|
+
description: "Train NN classifier branch A"
|
|
88
|
+
code_function: "define_nn_training_process"
|
|
89
|
+
hyperparameters:
|
|
90
|
+
nn_params:
|
|
91
|
+
hidden_layers: [128, 64]
|
|
92
|
+
learning_rate: 0.001
|
|
93
|
+
epochs: 50
|
|
94
|
+
|
|
95
|
+
- name: "nn_training_b"
|
|
96
|
+
description: "Train NN classifier branch B"
|
|
97
|
+
code_function: "define_nn_training_process"
|
|
98
|
+
hyperparameters:
|
|
99
|
+
nn_params:
|
|
100
|
+
hidden_layers: [256, 128, 64]
|
|
101
|
+
learning_rate: 0.0008
|
|
102
|
+
epochs: 50
|
|
103
|
+
|
|
104
|
+
- name: "xgb_training_a"
|
|
105
|
+
description: "Train an XGBoost classifier (branch A)"
|
|
106
|
+
code_function: "define_xgb_training_process"
|
|
107
|
+
hyperparameters:
|
|
108
|
+
xgb_params:
|
|
109
|
+
n_estimators: 350
|
|
110
|
+
max_depth: 4
|
|
111
|
+
learning_rate: 0.1
|
|
112
|
+
subsample: 0.8
|
|
113
|
+
colsample_bytree: 0.9
|
|
114
|
+
|
|
115
|
+
- name: "xgb_training_b"
|
|
116
|
+
description: "Train an XGBoost classifier (branch B) with different hyperparameters"
|
|
117
|
+
code_function: "define_xgb_training_process"
|
|
118
|
+
hyperparameters:
|
|
119
|
+
xgb_params:
|
|
120
|
+
n_estimators: 500
|
|
121
|
+
max_depth: 8
|
|
122
|
+
learning_rate: 0.05
|
|
123
|
+
subsample: 0.8
|
|
124
|
+
colsample_bytree: 0.8
|
|
125
|
+
|
|
126
|
+
- name: "nn_inference_a"
|
|
127
|
+
description: "Run test inference for NN branch A and compute classification metrics"
|
|
128
|
+
code_function: "define_nn_inference_process"
|
|
129
|
+
hyperparameters:
|
|
130
|
+
train_key: nn_training_a
|
|
131
|
+
|
|
132
|
+
- name: "nn_inference_b"
|
|
133
|
+
description: "Run test inference for NN branch B and compute classification metrics"
|
|
134
|
+
code_function: "define_nn_inference_process"
|
|
135
|
+
hyperparameters:
|
|
136
|
+
train_key: nn_training_b
|
|
137
|
+
|
|
138
|
+
- name: "xgb_inference_a"
|
|
139
|
+
description: "Run test inference for XGBoost branch A and compute classification metrics"
|
|
140
|
+
code_function: "define_xgb_inference_process"
|
|
141
|
+
hyperparameters:
|
|
142
|
+
train_key: xgb_training_a
|
|
143
|
+
|
|
144
|
+
- name: "xgb_inference_b"
|
|
145
|
+
description: "Run test inference for XGBoost branch B and compute classification metrics"
|
|
146
|
+
code_function: "define_xgb_inference_process"
|
|
147
|
+
hyperparameters:
|
|
148
|
+
train_key: xgb_training_b
|
|
149
|
+
|
|
150
|
+
- name: "linear_inference"
|
|
151
|
+
description: "Run test inference for Logistic Regression and compute classification metrics"
|
|
152
|
+
code_function: "define_linear_inference_process"
|
|
153
|
+
|
|
154
|
+
- name: "nn_best_selection"
|
|
155
|
+
description: "Select best NN by F1 from A/B"
|
|
156
|
+
code_function: "define_select_best_nn_process"
|
|
157
|
+
|
|
158
|
+
- name: "xgb_best_selection"
|
|
159
|
+
description: "Select best XGB by F1 from A/B"
|
|
160
|
+
code_function: "define_select_best_xgb_process"
|
|
161
|
+
|
|
162
|
+
- name: "nn_best_inference"
|
|
163
|
+
description: "Stable best NN inference for charts"
|
|
164
|
+
code_function: "define_nn_best_inference_process"
|
|
165
|
+
|
|
166
|
+
- name: "xgb_best_inference"
|
|
167
|
+
description: "Stable best XGB inference for charts"
|
|
168
|
+
code_function: "define_xgb_best_inference_process"
|
|
169
|
+
|
|
170
|
+
- name: "ensemble_inference"
|
|
171
|
+
description: "Weighted soft-vote ensemble across Logistic, best NN, best XGB"
|
|
172
|
+
code_function: "define_ensemble_inference_process"
|
|
173
|
+
|
|
174
|
+
- name: "pca_scree"
|
|
175
|
+
type: chart
|
|
176
|
+
description: "Render PCA scree plot"
|
|
177
|
+
|
|
178
|
+
- name: "goals_distribution"
|
|
179
|
+
type: chart
|
|
180
|
+
description: "Render goals distribution plot"
|
|
181
|
+
|
|
182
|
+
- name: "test_metrics_comparison"
|
|
183
|
+
type: chart
|
|
184
|
+
description: "Compare classification metrics across models and ensemble"
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
reporting:
|
|
188
|
+
static_entrypoint: "projects/premier-league/charts/plot_metrics.py"
|
|
189
|
+
dynamic_entrypoint: "projects/premier-league/charts/plot_metrics.js"
|
|
190
|
+
charts:
|
|
191
|
+
- name: "pca_scree"
|
|
192
|
+
probe_paths:
|
|
193
|
+
feat: "feature_engineering_generic/feature_analysis"
|
|
194
|
+
- name: "goals_distribution"
|
|
195
|
+
probe_paths:
|
|
196
|
+
feat: "feature_engineering_generic/derive_labels_and_indices"
|
|
197
|
+
- name: "test_metrics_comparison"
|
|
198
|
+
probe_paths:
|
|
199
|
+
linear: "linear_inference/test_inference_classification"
|
|
200
|
+
nn_best: "nn_best_inference/test_inference_classification"
|
|
201
|
+
xgb_best: "xgb_best_inference/test_inference_classification"
|
|
202
|
+
ensemble: "ensemble_inference"
|
|
203
|
+
- name: "nn_losses"
|
|
204
|
+
type: dynamic
|
|
205
|
+
probe_paths:
|
|
206
|
+
nn_a: "nn_training_a/train_and_evaluate_nn_classifier"
|
|
207
|
+
nn_b: "nn_training_b/train_and_evaluate_nn_classifier"
|