expops 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. expops-0.1.3.dist-info/METADATA +826 -0
  2. expops-0.1.3.dist-info/RECORD +86 -0
  3. expops-0.1.3.dist-info/WHEEL +5 -0
  4. expops-0.1.3.dist-info/entry_points.txt +3 -0
  5. expops-0.1.3.dist-info/licenses/LICENSE +674 -0
  6. expops-0.1.3.dist-info/top_level.txt +1 -0
  7. mlops/__init__.py +0 -0
  8. mlops/__main__.py +11 -0
  9. mlops/_version.py +34 -0
  10. mlops/adapters/__init__.py +12 -0
  11. mlops/adapters/base.py +86 -0
  12. mlops/adapters/config_schema.py +89 -0
  13. mlops/adapters/custom/__init__.py +3 -0
  14. mlops/adapters/custom/custom_adapter.py +447 -0
  15. mlops/adapters/plugin_manager.py +113 -0
  16. mlops/adapters/sklearn/__init__.py +3 -0
  17. mlops/adapters/sklearn/adapter.py +94 -0
  18. mlops/cluster/__init__.py +3 -0
  19. mlops/cluster/controller.py +496 -0
  20. mlops/cluster/process_runner.py +91 -0
  21. mlops/cluster/providers.py +258 -0
  22. mlops/core/__init__.py +95 -0
  23. mlops/core/custom_model_base.py +38 -0
  24. mlops/core/dask_networkx_executor.py +1265 -0
  25. mlops/core/executor_worker.py +1239 -0
  26. mlops/core/experiment_tracker.py +81 -0
  27. mlops/core/graph_types.py +64 -0
  28. mlops/core/networkx_parser.py +135 -0
  29. mlops/core/payload_spill.py +278 -0
  30. mlops/core/pipeline_utils.py +162 -0
  31. mlops/core/process_hashing.py +216 -0
  32. mlops/core/step_state_manager.py +1298 -0
  33. mlops/core/step_system.py +956 -0
  34. mlops/core/workspace.py +99 -0
  35. mlops/environment/__init__.py +10 -0
  36. mlops/environment/base.py +43 -0
  37. mlops/environment/conda_manager.py +307 -0
  38. mlops/environment/factory.py +70 -0
  39. mlops/environment/pyenv_manager.py +146 -0
  40. mlops/environment/setup_env.py +31 -0
  41. mlops/environment/system_manager.py +66 -0
  42. mlops/environment/utils.py +105 -0
  43. mlops/environment/venv_manager.py +134 -0
  44. mlops/main.py +527 -0
  45. mlops/managers/project_manager.py +400 -0
  46. mlops/managers/reproducibility_manager.py +575 -0
  47. mlops/platform.py +996 -0
  48. mlops/reporting/__init__.py +16 -0
  49. mlops/reporting/context.py +187 -0
  50. mlops/reporting/entrypoint.py +292 -0
  51. mlops/reporting/kv_utils.py +77 -0
  52. mlops/reporting/registry.py +50 -0
  53. mlops/runtime/__init__.py +9 -0
  54. mlops/runtime/context.py +34 -0
  55. mlops/runtime/env_export.py +113 -0
  56. mlops/storage/__init__.py +12 -0
  57. mlops/storage/adapters/__init__.py +9 -0
  58. mlops/storage/adapters/gcp_kv_store.py +778 -0
  59. mlops/storage/adapters/gcs_object_store.py +96 -0
  60. mlops/storage/adapters/memory_store.py +240 -0
  61. mlops/storage/adapters/redis_store.py +438 -0
  62. mlops/storage/factory.py +199 -0
  63. mlops/storage/interfaces/__init__.py +6 -0
  64. mlops/storage/interfaces/kv_store.py +118 -0
  65. mlops/storage/path_utils.py +38 -0
  66. mlops/templates/premier-league/charts/plot_metrics.js +70 -0
  67. mlops/templates/premier-league/charts/plot_metrics.py +145 -0
  68. mlops/templates/premier-league/charts/requirements.txt +6 -0
  69. mlops/templates/premier-league/configs/cluster_config.yaml +13 -0
  70. mlops/templates/premier-league/configs/project_config.yaml +207 -0
  71. mlops/templates/premier-league/data/England CSV.csv +12154 -0
  72. mlops/templates/premier-league/models/premier_league_model.py +638 -0
  73. mlops/templates/premier-league/requirements.txt +8 -0
  74. mlops/templates/sklearn-basic/README.md +22 -0
  75. mlops/templates/sklearn-basic/charts/plot_metrics.py +85 -0
  76. mlops/templates/sklearn-basic/charts/requirements.txt +3 -0
  77. mlops/templates/sklearn-basic/configs/project_config.yaml +64 -0
  78. mlops/templates/sklearn-basic/data/train.csv +14 -0
  79. mlops/templates/sklearn-basic/models/model.py +62 -0
  80. mlops/templates/sklearn-basic/requirements.txt +10 -0
  81. mlops/web/__init__.py +3 -0
  82. mlops/web/server.py +585 -0
  83. mlops/web/ui/index.html +52 -0
  84. mlops/web/ui/mlops-charts.js +357 -0
  85. mlops/web/ui/script.js +1244 -0
  86. mlops/web/ui/styles.css +248 -0
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Optional, Protocol
4
+
5
+
6
+ class KeyValueEventStore(Protocol):
7
+ """Key/value store + events interface used by the platform."""
8
+
9
+ # Cache indices (strict-hash match)
10
+ def set_step_cache_record(
11
+ self,
12
+ process_name: str,
13
+ step_name: str,
14
+ input_hash: str,
15
+ config_hash: str,
16
+ function_hash: str | None,
17
+ record: dict[str, Any],
18
+ ttl_seconds: int | None = None,
19
+ ) -> None: ...
20
+
21
+ def get_step_cache_path(
22
+ self,
23
+ process_name: str,
24
+ step_name: str,
25
+ input_hash: str | None,
26
+ config_hash: str | None,
27
+ function_hash: str | None,
28
+ ) -> str | None: ...
29
+
30
+ def get_step_cache_record(
31
+ self,
32
+ process_name: str,
33
+ step_name: str,
34
+ input_hash: str | None,
35
+ config_hash: str | None,
36
+ function_hash: str | None,
37
+ ) -> dict[str, Any] | None: ...
38
+
39
+ def set_process_cache_record(
40
+ self,
41
+ process_name: str,
42
+ input_hash: str,
43
+ config_hash: str,
44
+ function_hash: str | None,
45
+ record: dict[str, Any],
46
+ ttl_seconds: int | None = None,
47
+ ) -> None: ...
48
+
49
+ def get_process_cache_path(
50
+ self,
51
+ process_name: str,
52
+ input_hash: str | None,
53
+ config_hash: str | None,
54
+ function_hash: str | None,
55
+ ) -> str | None: ...
56
+
57
+ def get_process_cache_record(
58
+ self,
59
+ process_name: str,
60
+ input_hash: str | None,
61
+ config_hash: str | None,
62
+ function_hash: str | None,
63
+ ) -> dict[str, Any] | None: ...
64
+
65
+ # Optional: batched cache lookups (implement when backend supports efficient multi-get)
66
+ def get_process_cache_paths_batch(
67
+ self,
68
+ lookups: list[tuple[str, str | None, str | None, str | None]],
69
+ ) -> dict[str, str | None]: ...
70
+
71
+ # Run lifecycle + metrics
72
+ def mark_pipeline_started(self, run_id: str) -> None: ...
73
+ def mark_pipeline_completed(self, run_id: str, success: bool) -> None: ...
74
+ def get_run_status(self, run_id: str) -> str | None: ...
75
+
76
+ # Events
77
+ def publish_event(self, event: dict[str, Any]) -> None: ...
78
+
79
+ # Per-run step bookkeeping (for resume/get_step_results)
80
+ def record_run_step(self, run_id: str, process_name: str, step_name: str, record: dict[str, Any]) -> None: ...
81
+ def list_run_steps(self, run_id: str) -> dict[str, dict[str, Any]]: ...
82
+
83
+ # Stats
84
+ def increment_stat(self, run_id: str, name: str, amount: int = 1) -> None: ...
85
+ def get_pipeline_stats(self, run_id: str) -> dict[str, Any]: ...
86
+
87
+ # Charts/artifacts index per run (optional but recommended)
88
+ def record_run_chart_artifacts(self, run_id: str, chart_name: str, artifacts: list[dict[str, Any]]) -> None: ...
89
+ def list_run_charts(self, run_id: str) -> dict[str, Any]: ...
90
+ def copy_run_chart_artifacts(self, from_run_id: str, to_run_id: str, chart_name: str) -> bool: ...
91
+
92
+ # Run listing for UI (optional)
93
+ def list_runs(self, limit: int = 100) -> list[str]: ...
94
+
95
+ # Probe metrics (keyed by probe_path)
96
+ def save_probe_metrics_by_path(self, run_id: str, probe_path: str, metrics: dict[str, Any]) -> None: ...
97
+ def get_probe_metrics_by_path(self, run_id: str, probe_path: str) -> dict[str, Any]: ...
98
+
99
+
100
+
101
+
102
+ # -------------------- Object storage protocol --------------------
103
+ class ObjectStore(Protocol):
104
+ """Abstraction for binary/object storage backends (e.g., GCS/S3).
105
+
106
+ Implementations operate on opaque URIs (e.g., gs://bucket/prefix/key.pkl).
107
+ """
108
+
109
+ def put_bytes(self, uri: str, data: bytes, content_type: str | None = None) -> None: ...
110
+
111
+ def put_file(self, uri: str, file_path: str, content_type: str | None = None) -> None: ...
112
+
113
+ def get_bytes(self, uri: str) -> bytes: ...
114
+
115
+ def exists(self, uri: str) -> bool: ...
116
+
117
+ def build_uri(self, *parts: str) -> str: ...
118
+
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+
5
+
6
+ def encode_probe_path(probe_path: str) -> str:
7
+ """Encode a logical probe_path into a Firestore/Redis-safe identifier.
8
+
9
+ Uses URL-safe base64 without padding and a small prefix to avoid pure-numeric IDs.
10
+ """
11
+ raw = str(probe_path).encode("utf-8")
12
+ enc = base64.urlsafe_b64encode(raw).decode("ascii").rstrip("=")
13
+ return f"p_{enc}"
14
+
15
+
16
+ def decode_probe_path(encoded_id: str) -> str:
17
+ """Decode an encoded probe_path identifier back to the logical path.
18
+
19
+ Raises ValueError if the identifier cannot be decoded.
20
+ """
21
+ payload = str(encoded_id)
22
+ if payload.startswith("p_"):
23
+ payload = payload[2:]
24
+ # Restore base64 padding
25
+ pad = "=" * (-len(payload) % 4)
26
+ try:
27
+ raw = base64.urlsafe_b64decode(payload + pad)
28
+ return raw.decode("utf-8")
29
+ except Exception as e:
30
+ raise ValueError(f"Invalid encoded probe path id: {encoded_id}") from e
31
+
32
+
33
+ __all__ = [
34
+ "encode_probe_path",
35
+ "decode_probe_path",
36
+ ]
37
+
38
+
@@ -0,0 +1,70 @@
1
+ import { chart } from '/mlops-charts.js';
2
+
3
+ // Dynamic chart: NN A vs NN B vs NN C losses on same chart
4
+ chart('nn_losses', (probePaths, ctx, listener) => {
5
+ const canvas = document.createElement('canvas');
6
+ ctx.containerElement.innerHTML = '';
7
+ ctx.containerElement.appendChild(canvas);
8
+
9
+ const colors = [
10
+ 'rgb(75, 192, 192)',
11
+ 'rgb(255, 99, 132)',
12
+ 'rgb(54, 162, 235)'
13
+ ];
14
+
15
+ const chartData = {
16
+ labels: [],
17
+ datasets: []
18
+ };
19
+
20
+ let colorIndex = 0;
21
+ const keys = Object.keys(probePaths);
22
+ keys.forEach((k) => {
23
+ chartData.datasets.push({
24
+ label: k.replace(/_/g, ' ').toUpperCase(),
25
+ data: [],
26
+ borderColor: colors[colorIndex % colors.length],
27
+ backgroundColor: colors[colorIndex % colors.length] + '33',
28
+ tension: 0.1,
29
+ fill: false
30
+ });
31
+ colorIndex++;
32
+ });
33
+
34
+ const chartInstance = new Chart(canvas, {
35
+ type: 'line',
36
+ data: chartData,
37
+ options: {
38
+ responsive: true,
39
+ maintainAspectRatio: true,
40
+ scales: {
41
+ x: { title: { display: true, text: 'Epoch' } },
42
+ y: { title: { display: true, text: 'Loss' }, beginAtZero: false }
43
+ },
44
+ plugins: {
45
+ title: { display: true, text: 'NN Training Loss (A/B)' },
46
+ legend: { display: true }
47
+ },
48
+ animation: { duration: 200 }
49
+ }
50
+ });
51
+ ctx.setChartInstance(chartInstance);
52
+
53
+ listener.subscribeAll(probePaths, (allMetrics) => {
54
+ let maxLength = 0;
55
+
56
+ chartData.datasets.forEach((dataset, idx) => {
57
+ const probeKey = keys[idx];
58
+ const metrics = allMetrics[probeKey] || {};
59
+ const lossSeries = ctx.toSeries(metrics.train_loss || {});
60
+ dataset.data = lossSeries;
61
+ maxLength = Math.max(maxLength, lossSeries.length);
62
+ });
63
+
64
+ chartData.labels = Array.from({ length: maxLength }, (_, i) => i + 1);
65
+
66
+ chartInstance.update();
67
+ });
68
+ });
69
+
70
+
@@ -0,0 +1,145 @@
1
+ from typing import Dict, Any
2
+
3
+ import matplotlib
4
+ matplotlib.use('Agg')
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+
8
+ from mlops.reporting import chart, ChartContext
9
+
10
+
11
+ @chart()
12
+ def pca_scree(metrics: Dict[str, Any], ctx: ChartContext) -> None:
13
+ """
14
+ Static chart showing PCA explained variance ratio and cumulative variance.
15
+ Expects metrics from feature_engineering/encode_and_pca step containing:
16
+ - pca_explained_variance_ratio: list[float]
17
+ - pca_cumulative_variance: list[float]
18
+ """
19
+
20
+ evr = metrics.get('feat', {}).get('pca_explained_variance_ratio', [])
21
+ cum = metrics.get('feat', {}).get('pca_cumulative_variance', [])
22
+
23
+ if not isinstance(evr, (list, tuple)) or len(evr) == 0:
24
+ return
25
+
26
+ xs = np.arange(1, len(evr) + 1)
27
+ fig, ax1 = plt.subplots(figsize=(10, 4))
28
+ ax1.bar(xs, evr, color='steelblue', alpha=0.7, label='Explained Variance Ratio')
29
+ ax1.set_xlabel('Principal Component')
30
+ ax1.set_ylabel('Explained Variance Ratio')
31
+ ax1.grid(True, axis='y', alpha=0.3)
32
+
33
+ ax2 = ax1.twinx()
34
+ if isinstance(cum, (list, tuple)) and len(cum) == len(evr):
35
+ ax2.plot(xs, cum, color='coral', marker='o', label='Cumulative Variance')
36
+ ax2.set_ylabel('Cumulative Variance')
37
+
38
+ fig.tight_layout()
39
+ ctx.savefig('pca_scree.png', dpi=150, fig=fig)
40
+ plt.close(fig)
41
+
42
+
43
+ @chart()
44
+ def goals_distribution(metrics: Dict[str, Any], ctx: ChartContext) -> None:
45
+ """
46
+ Static chart showing histograms of home and away goals.
47
+ Expects metrics from feature_engineering/encode_and_pca step containing:
48
+ - goals_hist_home: dict[str, int]
49
+ - goals_hist_away: dict[str, int]
50
+ """
51
+
52
+ feat_metrics = metrics.get('feat', {})
53
+ g_home = feat_metrics.get('goals_hist_home', {}) or {}
54
+ g_away = feat_metrics.get('goals_hist_away', {}) or {}
55
+
56
+ if not g_home and not g_away:
57
+ return
58
+
59
+ keys = sorted(set([int(k) for k in g_home.keys()] + [int(k) for k in g_away.keys()]))
60
+ vals_home = [int(g_home.get(str(k), 0)) for k in keys]
61
+ vals_away = [int(g_away.get(str(k), 0)) for k in keys]
62
+
63
+ x = np.arange(len(keys))
64
+ width = 0.4
65
+ fig, ax = plt.subplots(figsize=(10, 4))
66
+ ax.bar(x - width/2, vals_home, width, label='Home Goals', color='slateblue')
67
+ ax.bar(x + width/2, vals_away, width, label='Away Goals', color='seagreen')
68
+ ax.set_xticks(x, [str(k) for k in keys])
69
+ ax.set_xlabel('Goals')
70
+ ax.set_ylabel('Count')
71
+ ax.set_title('Goals Distribution (Home vs Away)')
72
+ ax.legend()
73
+ ax.grid(True, axis='y', alpha=0.3)
74
+ fig.tight_layout()
75
+ ctx.savefig('goals_distribution.png', dpi=150, fig=fig)
76
+ plt.close(fig)
77
+
78
+
79
+ @chart()
80
+ def test_metrics_comparison(metrics: Dict[str, Any], ctx: ChartContext) -> None:
81
+ """
82
+ Static chart comparing classification metrics across baseline/best models and ensemble.
83
+ Expected keys: linear, nn_best, xgb_best, ensemble; each with test_accuracy, test_precision, test_f1.
84
+ """
85
+
86
+ def get_value(data):
87
+ if isinstance(data, dict) and data:
88
+ items = sorted(data.items(), key=lambda x: int(x[0]) if str(x[0]).isdigit() else 0)
89
+ return float(items[-1][1]) if items else None
90
+ if isinstance(data, (int, float)):
91
+ return float(data)
92
+ return None
93
+
94
+ groups = {
95
+ 'Linear': metrics.get('linear', {}),
96
+ 'NN (Best)': metrics.get('nn_best', {}),
97
+ 'XGB (Best)': metrics.get('xgb_best', {}),
98
+ 'Ensemble': metrics.get('ensemble', {}),
99
+ }
100
+
101
+ labels = []
102
+ accs = []
103
+ precs = []
104
+ f1s = []
105
+ for label, m in groups.items():
106
+ acc = get_value(m.get('test_accuracy'))
107
+ prec = get_value(m.get('test_precision'))
108
+ f1 = get_value(m.get('test_f1'))
109
+ if all(v is None for v in (acc, prec, f1)):
110
+ continue
111
+ labels.append(label)
112
+ accs.append(acc if acc is not None else 0.0)
113
+ precs.append(prec if prec is not None else 0.0)
114
+ f1s.append(f1 if f1 is not None else 0.0)
115
+
116
+ if not labels:
117
+ return
118
+
119
+ x = np.arange(len(labels))
120
+ width = 0.25
121
+ fig, ax = plt.subplots(figsize=(12, 4))
122
+ r1 = ax.bar(x - width, accs, width, label='Accuracy', color='steelblue')
123
+ r2 = ax.bar(x, precs, width, label='Precision (macro)', color='mediumseagreen')
124
+ r3 = ax.bar(x + width, f1s, width, label='F1 (macro)', color='coral')
125
+
126
+ ax.set_ylabel('Score')
127
+ ax.set_xticks(x, labels)
128
+ ax.set_ylim(0.0, 1.0)
129
+ ax.grid(True, axis='y', alpha=0.3)
130
+ ax.legend(loc='upper left')
131
+
132
+ # Annotate bars
133
+ for rect in list(r1) + list(r2) + list(r3):
134
+ height = rect.get_height()
135
+ ax.annotate(f'{height:.3f}',
136
+ xy=(rect.get_x() + rect.get_width() / 2, height),
137
+ xytext=(0, 3),
138
+ textcoords="offset points",
139
+ ha='center', va='bottom', fontsize=8)
140
+
141
+ fig.tight_layout()
142
+ ctx.savefig('test_metrics_comparison.png', dpi=150, fig=fig)
143
+ plt.close(fig)
144
+
145
+
@@ -0,0 +1,6 @@
1
+ matplotlib>=3.8.0
2
+ numpy>=1.24.0
3
+
4
+
5
+
6
+
@@ -0,0 +1,13 @@
1
+ provider: slurm
2
+
3
+ num_workers: 7
4
+
5
+ options:
6
+ worker_cores: 1
7
+ worker_memory: "3GB"
8
+ worker_processes: 1
9
+ queue: null
10
+ walltime: "01:00:00"
11
+ scheduler_address: ""
12
+
13
+
@@ -0,0 +1,207 @@
1
+ metadata:
2
+ name: "premier-league"
3
+ description: "Premier League 3-class classification with generic FE, NN A/B, XGB A/B, logistic baseline, and ensemble"
4
+ version: "1.0.0"
5
+
6
+ environment:
7
+ venv:
8
+ name: "premier-league-env"
9
+ requirements_file: "projects/premier-league/requirements.txt"
10
+ reporting:
11
+ name: "premier-league-env-reporting"
12
+ requirements_file: "projects/premier-league/charts/requirements.txt"
13
+
14
+ reproducibility:
15
+ random_seed: 43
16
+
17
+ model:
18
+ framework: "custom"
19
+ language: "python"
20
+ name: "premier_league_model"
21
+ version: "1.0.0"
22
+ parameters:
23
+ custom_script_path: "projects/premier-league/models/premier_league_model.py"
24
+ cache:
25
+ backend:
26
+ type: gcp
27
+ gcp_project: mlops-platform-470017
28
+ credentials_json: keys/firestore.json
29
+ object_store:
30
+ type: gcs
31
+ bucket: mlops-platform123
32
+ prefix: projects/premier-league/cache/steps
33
+ executor:
34
+ n_workers: 4
35
+ hyperparameters:
36
+ test_size: 0.2
37
+ pca_components: 21
38
+
39
+ pipeline:
40
+ process_adjlist: |
41
+ feature_engineering_generic preprocess_linear_nn
42
+ feature_engineering_generic preprocess_xgb
43
+ feature_engineering_generic pca_scree
44
+ feature_engineering_generic goals_distribution
45
+ preprocess_linear_nn linear_training
46
+ preprocess_linear_nn nn_training_a
47
+ preprocess_linear_nn nn_training_b
48
+ preprocess_xgb xgb_training_a
49
+ preprocess_xgb xgb_training_b
50
+ linear_training linear_inference
51
+ nn_training_a nn_inference_a
52
+ nn_training_b nn_inference_b
53
+ xgb_training_a xgb_inference_a
54
+ xgb_training_b xgb_inference_b
55
+ nn_inference_a nn_best_selection
56
+ nn_inference_b nn_best_selection
57
+ xgb_inference_a xgb_best_selection
58
+ xgb_inference_b xgb_best_selection
59
+ nn_best_selection nn_best_inference
60
+ xgb_best_selection xgb_best_inference
61
+ linear_training ensemble_inference
62
+ nn_best_selection ensemble_inference
63
+ xgb_best_selection ensemble_inference
64
+ linear_inference test_metrics_comparison
65
+ nn_best_inference test_metrics_comparison
66
+ xgb_best_inference test_metrics_comparison
67
+ ensemble_inference test_metrics_comparison
68
+
69
+ processes:
70
+ - name: "feature_engineering_generic"
71
+ description: "Generic FE: parse dates, derive labels (H/D/A), stratified indices"
72
+ code_function: "define_feature_engineering_generic_process"
73
+
74
+ - name: "preprocess_linear_nn"
75
+ description: "Preprocess for linear/nn: OHE + StandardScaler"
76
+ code_function: "define_preprocess_linear_nn_process"
77
+
78
+ - name: "preprocess_xgb"
79
+ description: "Preprocess for xgb: OHE only"
80
+ code_function: "define_preprocess_xgb_process"
81
+
82
+ - name: "linear_training"
83
+ description: "Train a multinomial Logistic Regression classifier"
84
+ code_function: "define_linear_training_process"
85
+
86
+ - name: "nn_training_a"
87
+ description: "Train NN classifier branch A"
88
+ code_function: "define_nn_training_process"
89
+ hyperparameters:
90
+ nn_params:
91
+ hidden_layers: [128, 64]
92
+ learning_rate: 0.001
93
+ epochs: 50
94
+
95
+ - name: "nn_training_b"
96
+ description: "Train NN classifier branch B"
97
+ code_function: "define_nn_training_process"
98
+ hyperparameters:
99
+ nn_params:
100
+ hidden_layers: [256, 128, 64]
101
+ learning_rate: 0.0008
102
+ epochs: 50
103
+
104
+ - name: "xgb_training_a"
105
+ description: "Train an XGBoost classifier (branch A)"
106
+ code_function: "define_xgb_training_process"
107
+ hyperparameters:
108
+ xgb_params:
109
+ n_estimators: 350
110
+ max_depth: 4
111
+ learning_rate: 0.1
112
+ subsample: 0.8
113
+ colsample_bytree: 0.9
114
+
115
+ - name: "xgb_training_b"
116
+ description: "Train an XGBoost classifier (branch B) with different hyperparameters"
117
+ code_function: "define_xgb_training_process"
118
+ hyperparameters:
119
+ xgb_params:
120
+ n_estimators: 500
121
+ max_depth: 8
122
+ learning_rate: 0.05
123
+ subsample: 0.8
124
+ colsample_bytree: 0.8
125
+
126
+ - name: "nn_inference_a"
127
+ description: "Run test inference for NN branch A and compute classification metrics"
128
+ code_function: "define_nn_inference_process"
129
+ hyperparameters:
130
+ train_key: nn_training_a
131
+
132
+ - name: "nn_inference_b"
133
+ description: "Run test inference for NN branch B and compute classification metrics"
134
+ code_function: "define_nn_inference_process"
135
+ hyperparameters:
136
+ train_key: nn_training_b
137
+
138
+ - name: "xgb_inference_a"
139
+ description: "Run test inference for XGBoost branch A and compute classification metrics"
140
+ code_function: "define_xgb_inference_process"
141
+ hyperparameters:
142
+ train_key: xgb_training_a
143
+
144
+ - name: "xgb_inference_b"
145
+ description: "Run test inference for XGBoost branch B and compute classification metrics"
146
+ code_function: "define_xgb_inference_process"
147
+ hyperparameters:
148
+ train_key: xgb_training_b
149
+
150
+ - name: "linear_inference"
151
+ description: "Run test inference for Logistic Regression and compute classification metrics"
152
+ code_function: "define_linear_inference_process"
153
+
154
+ - name: "nn_best_selection"
155
+ description: "Select best NN by F1 from A/B"
156
+ code_function: "define_select_best_nn_process"
157
+
158
+ - name: "xgb_best_selection"
159
+ description: "Select best XGB by F1 from A/B"
160
+ code_function: "define_select_best_xgb_process"
161
+
162
+ - name: "nn_best_inference"
163
+ description: "Stable best NN inference for charts"
164
+ code_function: "define_nn_best_inference_process"
165
+
166
+ - name: "xgb_best_inference"
167
+ description: "Stable best XGB inference for charts"
168
+ code_function: "define_xgb_best_inference_process"
169
+
170
+ - name: "ensemble_inference"
171
+ description: "Weighted soft-vote ensemble across Logistic, best NN, best XGB"
172
+ code_function: "define_ensemble_inference_process"
173
+
174
+ - name: "pca_scree"
175
+ type: chart
176
+ description: "Render PCA scree plot"
177
+
178
+ - name: "goals_distribution"
179
+ type: chart
180
+ description: "Render goals distribution plot"
181
+
182
+ - name: "test_metrics_comparison"
183
+ type: chart
184
+ description: "Compare classification metrics across models and ensemble"
185
+
186
+
187
+ reporting:
188
+ static_entrypoint: "projects/premier-league/charts/plot_metrics.py"
189
+ dynamic_entrypoint: "projects/premier-league/charts/plot_metrics.js"
190
+ charts:
191
+ - name: "pca_scree"
192
+ probe_paths:
193
+ feat: "feature_engineering_generic/feature_analysis"
194
+ - name: "goals_distribution"
195
+ probe_paths:
196
+ feat: "feature_engineering_generic/derive_labels_and_indices"
197
+ - name: "test_metrics_comparison"
198
+ probe_paths:
199
+ linear: "linear_inference/test_inference_classification"
200
+ nn_best: "nn_best_inference/test_inference_classification"
201
+ xgb_best: "xgb_best_inference/test_inference_classification"
202
+ ensemble: "ensemble_inference"
203
+ - name: "nn_losses"
204
+ type: dynamic
205
+ probe_paths:
206
+ nn_a: "nn_training_a/train_and_evaluate_nn_classifier"
207
+ nn_b: "nn_training_b/train_and_evaluate_nn_classifier"