ezyml 2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ezyml/__init__.py +9 -0
- ezyml/cli.py +102 -0
- ezyml/compiler/__init__.py +1 -0
- ezyml/compiler/compile.py +137 -0
- ezyml/core.py +1005 -0
- ezyml/deploy/__init__.py +5 -0
- ezyml/deploy/docker.py +21 -0
- ezyml/deploy/fastapi.py +30 -0
- ezyml/deploy/k8s.py +125 -0
- ezyml/deploy/openapi.py +19 -0
- ezyml/deploy/streamlit.py +205 -0
- ezyml/devx/__init___.py +1 -0
- ezyml/devx/doctor.py +7 -0
- ezyml/devx/init.py +6 -0
- ezyml/eda/__init__.py +0 -0
- ezyml/eda/auto_eda.py +22 -0
- ezyml/evaluation/__init__.py +0 -0
- ezyml/evaluation/evaluator.py +43 -0
- ezyml/evaluation/metrics.py +25 -0
- ezyml/evaluation/plots.py +23 -0
- ezyml/explain/__init__.py +0 -0
- ezyml/explain/learner.py +12 -0
- ezyml/monitoring/__init__.py +0 -0
- ezyml/monitoring/drift.py +9 -0
- ezyml/monitoring/fingerprint.py +8 -0
- ezyml/pipeline/__init__.py +0 -0
- ezyml/pipeline/loader.py +84 -0
- ezyml/pipeline/visualize.py +9 -0
- ezyml/training/__init__.py +0 -0
- ezyml/training/tuner.py +6 -0
- ezyml-2.dist-info/METADATA +341 -0
- ezyml-2.dist-info/RECORD +36 -0
- ezyml-2.dist-info/WHEEL +5 -0
- ezyml-2.dist-info/entry_points.txt +2 -0
- ezyml-2.dist-info/licenses/LICENSE +21 -0
- ezyml-2.dist-info/top_level.txt +1 -0
ezyml/deploy/__init__.py
ADDED
ezyml/deploy/docker.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
def generate_dockerfile(output_path="Dockerfile"):
|
|
2
|
+
"""
|
|
3
|
+
Generates a minimal Dockerfile for FastAPI deployment.
|
|
4
|
+
"""
|
|
5
|
+
dockerfile = """
|
|
6
|
+
FROM python:3.10-slim
|
|
7
|
+
|
|
8
|
+
WORKDIR /app
|
|
9
|
+
|
|
10
|
+
COPY . /app
|
|
11
|
+
|
|
12
|
+
RUN pip install --no-cache-dir fastapi uvicorn scikit-learn numpy
|
|
13
|
+
|
|
14
|
+
EXPOSE 8000
|
|
15
|
+
|
|
16
|
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
17
|
+
"""
|
|
18
|
+
with open(output_path, "w") as f:
|
|
19
|
+
f.write(dockerfile)
|
|
20
|
+
|
|
21
|
+
return output_path
|
ezyml/deploy/fastapi.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
def generate_fastapi_app(model_path, schema, output_path="app.py"):
|
|
2
|
+
"""
|
|
3
|
+
Generates a FastAPI inference server.
|
|
4
|
+
Assumes sklearn-compatible model.
|
|
5
|
+
"""
|
|
6
|
+
code = f'''
|
|
7
|
+
from fastapi import FastAPI
|
|
8
|
+
import pickle
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
app = FastAPI()
|
|
12
|
+
|
|
13
|
+
with open("{model_path}", "rb") as f:
|
|
14
|
+
model = pickle.load(f)
|
|
15
|
+
|
|
16
|
+
FEATURES = {list(schema.keys())}
|
|
17
|
+
|
|
18
|
+
@app.post("/predict")
|
|
19
|
+
def predict(payload: dict):
|
|
20
|
+
try:
|
|
21
|
+
X = np.array([[payload[f] for f in FEATURES]])
|
|
22
|
+
pred = model.predict(X)[0]
|
|
23
|
+
return {{"prediction": int(pred)}}
|
|
24
|
+
except Exception as e:
|
|
25
|
+
return {{"error": str(e)}}
|
|
26
|
+
'''
|
|
27
|
+
with open(output_path, "w") as f:
|
|
28
|
+
f.write(code)
|
|
29
|
+
|
|
30
|
+
return output_path
|
ezyml/deploy/k8s.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import yaml
|
|
2
|
+
|
|
3
|
+
def generate_k8s_manifests(
|
|
4
|
+
app_name,
|
|
5
|
+
image,
|
|
6
|
+
port=8000,
|
|
7
|
+
replicas=1,
|
|
8
|
+
namespace="default",
|
|
9
|
+
output_prefix="k8s",
|
|
10
|
+
with_ingress=False,
|
|
11
|
+
ingress_host=None
|
|
12
|
+
):
|
|
13
|
+
"""
|
|
14
|
+
Generates Kubernetes YAML manifests for ezyml models.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
deployment = {
|
|
18
|
+
"apiVersion": "apps/v1",
|
|
19
|
+
"kind": "Deployment",
|
|
20
|
+
"metadata": {
|
|
21
|
+
"name": app_name,
|
|
22
|
+
"namespace": namespace
|
|
23
|
+
},
|
|
24
|
+
"spec": {
|
|
25
|
+
"replicas": replicas,
|
|
26
|
+
"selector": {
|
|
27
|
+
"matchLabels": {"app": app_name}
|
|
28
|
+
},
|
|
29
|
+
"template": {
|
|
30
|
+
"metadata": {
|
|
31
|
+
"labels": {"app": app_name}
|
|
32
|
+
},
|
|
33
|
+
"spec": {
|
|
34
|
+
"containers": [{
|
|
35
|
+
"name": app_name,
|
|
36
|
+
"image": image,
|
|
37
|
+
"ports": [{"containerPort": port}],
|
|
38
|
+
"resources": {
|
|
39
|
+
"requests": {
|
|
40
|
+
"cpu": "250m",
|
|
41
|
+
"memory": "256Mi"
|
|
42
|
+
},
|
|
43
|
+
"limits": {
|
|
44
|
+
"cpu": "500m",
|
|
45
|
+
"memory": "512Mi"
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}]
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
service = {
|
|
55
|
+
"apiVersion": "v1",
|
|
56
|
+
"kind": "Service",
|
|
57
|
+
"metadata": {
|
|
58
|
+
"name": f"{app_name}-svc",
|
|
59
|
+
"namespace": namespace
|
|
60
|
+
},
|
|
61
|
+
"spec": {
|
|
62
|
+
"selector": {"app": app_name},
|
|
63
|
+
"ports": [{
|
|
64
|
+
"protocol": "TCP",
|
|
65
|
+
"port": 80,
|
|
66
|
+
"targetPort": port
|
|
67
|
+
}],
|
|
68
|
+
"type": "ClusterIP"
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
manifests = [deployment, service]
|
|
73
|
+
|
|
74
|
+
if with_ingress:
|
|
75
|
+
if not ingress_host:
|
|
76
|
+
raise ValueError("ingress_host must be provided if with_ingress=True")
|
|
77
|
+
|
|
78
|
+
ingress = {
|
|
79
|
+
"apiVersion": "networking.k8s.io/v1",
|
|
80
|
+
"kind": "Ingress",
|
|
81
|
+
"metadata": {
|
|
82
|
+
"name": f"{app_name}-ingress",
|
|
83
|
+
"namespace": namespace
|
|
84
|
+
},
|
|
85
|
+
"spec": {
|
|
86
|
+
"rules": [{
|
|
87
|
+
"host": ingress_host,
|
|
88
|
+
"http": {
|
|
89
|
+
"paths": [{
|
|
90
|
+
"path": "/",
|
|
91
|
+
"pathType": "Prefix",
|
|
92
|
+
"backend": {
|
|
93
|
+
"service": {
|
|
94
|
+
"name": f"{app_name}-svc",
|
|
95
|
+
"port": {"number": 80}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}]
|
|
99
|
+
}
|
|
100
|
+
}]
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
manifests.append(ingress)
|
|
104
|
+
|
|
105
|
+
# Write individual files
|
|
106
|
+
with open(f"{output_prefix}_deployment.yaml", "w") as f:
|
|
107
|
+
yaml.safe_dump(deployment, f)
|
|
108
|
+
|
|
109
|
+
with open(f"{output_prefix}_service.yaml", "w") as f:
|
|
110
|
+
yaml.safe_dump(service, f)
|
|
111
|
+
|
|
112
|
+
# Write combined file
|
|
113
|
+
with open(f"{output_prefix}.yaml", "w") as f:
|
|
114
|
+
yaml.safe_dump_all(manifests, f)
|
|
115
|
+
|
|
116
|
+
if with_ingress:
|
|
117
|
+
with open(f"{output_prefix}_ingress.yaml", "w") as f:
|
|
118
|
+
yaml.safe_dump(ingress, f)
|
|
119
|
+
|
|
120
|
+
return {
|
|
121
|
+
"deployment": f"{output_prefix}_deployment.yaml",
|
|
122
|
+
"service": f"{output_prefix}_service.yaml",
|
|
123
|
+
"combined": f"{output_prefix}.yaml",
|
|
124
|
+
"ingress": f"{output_prefix}_ingress.yaml" if with_ingress else None
|
|
125
|
+
}
|
ezyml/deploy/openapi.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
def generate_openapi_spec(schema, output_path="openapi.json"):
|
|
4
|
+
"""
|
|
5
|
+
Generates a minimal OpenAPI-style input schema.
|
|
6
|
+
"""
|
|
7
|
+
spec = {
|
|
8
|
+
"title": "ezyml-model-api",
|
|
9
|
+
"type": "object",
|
|
10
|
+
"properties": {
|
|
11
|
+
k: {"type": "number"} for k in schema.keys()
|
|
12
|
+
},
|
|
13
|
+
"required": list(schema.keys())
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
with open(output_path, "w") as f:
|
|
17
|
+
json.dump(spec, f, indent=2)
|
|
18
|
+
|
|
19
|
+
return output_path
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
def generate_streamlit_app(model_path, schema, output_path="demo_app.py"):
|
|
2
|
+
"""
|
|
3
|
+
Generates a professional Streamlit app that automatically adapts to
|
|
4
|
+
Light or Dark mode (fixing the 'invisible text' issue).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
code = f'''
|
|
8
|
+
import streamlit as st
|
|
9
|
+
import pickle
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import numpy as np
|
|
12
|
+
import time
|
|
13
|
+
|
|
14
|
+
# --------------------------------------------------
|
|
15
|
+
# 1. PAGE CONFIGURATION
|
|
16
|
+
# --------------------------------------------------
|
|
17
|
+
st.set_page_config(
|
|
18
|
+
page_title="ezyml Enterprise Demo",
|
|
19
|
+
page_icon="⚡",
|
|
20
|
+
layout="wide",
|
|
21
|
+
initial_sidebar_state="expanded"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# --------------------------------------------------
|
|
25
|
+
# 2. CUSTOM CSS STYLING (THEME SAFE)
|
|
26
|
+
# --------------------------------------------------
|
|
27
|
+
# We removed the forced background colors to ensure text remains visible
|
|
28
|
+
# regardless of whether the user is in Light or Dark mode.
|
|
29
|
+
st.markdown("""
|
|
30
|
+
<style>
|
|
31
|
+
/* Style the tabs to look pill-shaped but keep default colors */
|
|
32
|
+
.stTabs [data-baseweb="tab-list"] {{
|
|
33
|
+
gap: 8px;
|
|
34
|
+
}}
|
|
35
|
+
.stTabs [data-baseweb="tab"] {{
|
|
36
|
+
height: 50px;
|
|
37
|
+
white-space: pre-wrap;
|
|
38
|
+
border-radius: 4px;
|
|
39
|
+
padding-top: 10px;
|
|
40
|
+
padding-bottom: 10px;
|
|
41
|
+
}}
|
|
42
|
+
|
|
43
|
+
/* Custom button styling - stands out in both modes */
|
|
44
|
+
div.stButton > button {{
|
|
45
|
+
width: 100%;
|
|
46
|
+
font-weight: bold;
|
|
47
|
+
padding: 0.5rem 1rem;
|
|
48
|
+
border-radius: 8px;
|
|
49
|
+
}}
|
|
50
|
+
|
|
51
|
+
/* Remove default top padding for a cleaner look */
|
|
52
|
+
.block-container {{
|
|
53
|
+
padding-top: 2rem;
|
|
54
|
+
footer {{visibility: hidden;}}
|
|
55
|
+
</style>
|
|
56
|
+
""", unsafe_allow_html=True)
|
|
57
|
+
|
|
58
|
+
# --------------------------------------------------
|
|
59
|
+
# 3. LOAD MODEL
|
|
60
|
+
# --------------------------------------------------
|
|
61
|
+
@st.cache_resource
|
|
62
|
+
def load_model():
|
|
63
|
+
try:
|
|
64
|
+
with open("{model_path}"=model.pkl, "rb") as f:
|
|
65
|
+
return pickle.load(f)
|
|
66
|
+
except FileNotFoundError:
|
|
67
|
+
st.error("Model file not found. Please check path.")
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
model = load_model()
|
|
71
|
+
|
|
72
|
+
# --------------------------------------------------
|
|
73
|
+
# 4. SIDEBAR
|
|
74
|
+
# --------------------------------------------------
|
|
75
|
+
with st.sidebar:
|
|
76
|
+
st.title("🎛️ ezyml Control")
|
|
77
|
+
st.divider()
|
|
78
|
+
|
|
79
|
+
st.subheader("Model Status")
|
|
80
|
+
if model:
|
|
81
|
+
st.success("🟢 Model Active")
|
|
82
|
+
else:
|
|
83
|
+
st.error("🔴 Model Offline")
|
|
84
|
+
|
|
85
|
+
st.info("""
|
|
86
|
+
**Mode Selection:**
|
|
87
|
+
1. **Single Predict**: Test one sample.
|
|
88
|
+
2. **Batch Predict**: Upload CSV.
|
|
89
|
+
""")
|
|
90
|
+
|
|
91
|
+
st.divider()
|
|
92
|
+
st.caption("Generated by ezyml v2.0")
|
|
93
|
+
|
|
94
|
+
# --------------------------------------------------
|
|
95
|
+
# 5. MAIN CONTENT
|
|
96
|
+
# --------------------------------------------------
|
|
97
|
+
|
|
98
|
+
st.title("⚡ Model Inference Dashboard")
|
|
99
|
+
st.markdown("Enter values below to generate predictions.")
|
|
100
|
+
|
|
101
|
+
tab1, tab2 = st.tabs(["🎯 Single Prediction", "📂 Batch Processing"])
|
|
102
|
+
|
|
103
|
+
# --- TAB 1: SINGLE PREDICTION ---
|
|
104
|
+
with tab1:
|
|
105
|
+
st.write("") # Spacer
|
|
106
|
+
|
|
107
|
+
# Create a container with a border for better visual separation
|
|
108
|
+
with st.container(border=True):
|
|
109
|
+
st.subheader("Configure Inputs")
|
|
110
|
+
|
|
111
|
+
with st.form("prediction_form"):
|
|
112
|
+
inputs = {{}}
|
|
113
|
+
|
|
114
|
+
# Use 3 columns for better layout
|
|
115
|
+
cols = st.columns(3)
|
|
116
|
+
schema_keys = {list(schema.keys())}
|
|
117
|
+
|
|
118
|
+
for i, feature in enumerate(schema_keys):
|
|
119
|
+
col = cols[i % 3]
|
|
120
|
+
with col:
|
|
121
|
+
inputs[feature] = st.number_input(
|
|
122
|
+
label=feature,
|
|
123
|
+
value=0.0,
|
|
124
|
+
format="%.2f"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
st.markdown("---")
|
|
128
|
+
|
|
129
|
+
# Using type="primary" makes the button colorful automatically
|
|
130
|
+
submitted = st.form_submit_button("Run Prediction 🚀", type="primary")
|
|
131
|
+
|
|
132
|
+
if submitted and model:
|
|
133
|
+
# Spinner instead of manual progress bar for cleaner UI
|
|
134
|
+
with st.spinner("Calculating..."):
|
|
135
|
+
time.sleep(0.5) # clear visual cue
|
|
136
|
+
|
|
137
|
+
X = pd.DataFrame([inputs])
|
|
138
|
+
try:
|
|
139
|
+
prediction = model.predict(X)[0]
|
|
140
|
+
|
|
141
|
+
# Result Container
|
|
142
|
+
st.write("")
|
|
143
|
+
st.subheader("Results")
|
|
144
|
+
|
|
145
|
+
res_col1, res_col2 = st.columns([1, 1])
|
|
146
|
+
|
|
147
|
+
with res_col1:
|
|
148
|
+
# Using a container with border makes it pop
|
|
149
|
+
with st.container(border=True):
|
|
150
|
+
st.metric(
|
|
151
|
+
label="Predicted Value",
|
|
152
|
+
value=str(prediction),
|
|
153
|
+
delta="Success"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Confidence (Optional)
|
|
157
|
+
if hasattr(model, "predict_proba"):
|
|
158
|
+
with res_col2:
|
|
159
|
+
try:
|
|
160
|
+
probs = model.predict_proba(X)[0]
|
|
161
|
+
confidence = np.max(probs) * 100
|
|
162
|
+
with st.container(border=True):
|
|
163
|
+
st.metric(label="Confidence", value=f"{{confidence:.1f}}%")
|
|
164
|
+
st.progress(int(confidence))
|
|
165
|
+
except:
|
|
166
|
+
pass
|
|
167
|
+
|
|
168
|
+
except Exception as e:
|
|
169
|
+
st.error(f"Prediction Error: {{e}}")
|
|
170
|
+
|
|
171
|
+
# --- TAB 2: BATCH PREDICTION ---
|
|
172
|
+
with tab2:
|
|
173
|
+
st.write("")
|
|
174
|
+
with st.container(border=True):
|
|
175
|
+
st.subheader("Bulk Inference")
|
|
176
|
+
|
|
177
|
+
uploaded_file = st.file_uploader("Upload CSV", type=["csv"])
|
|
178
|
+
|
|
179
|
+
if uploaded_file:
|
|
180
|
+
df = pd.read_csv(uploaded_file)
|
|
181
|
+
st.dataframe(df.head(), use_container_width=True)
|
|
182
|
+
|
|
183
|
+
if st.button("Process Batch", type="primary"):
|
|
184
|
+
with st.spinner("Processing..."):
|
|
185
|
+
try:
|
|
186
|
+
preds = model.predict(df)
|
|
187
|
+
df['Prediction'] = preds
|
|
188
|
+
|
|
189
|
+
st.success("Done!")
|
|
190
|
+
st.dataframe(df, use_container_width=True)
|
|
191
|
+
|
|
192
|
+
csv = df.to_csv(index=False).encode('utf-8')
|
|
193
|
+
st.download_button(
|
|
194
|
+
"⬇️ Download CSV",
|
|
195
|
+
csv,
|
|
196
|
+
"predictions.csv",
|
|
197
|
+
"text/csv"
|
|
198
|
+
)
|
|
199
|
+
except Exception as e:
|
|
200
|
+
st.error(f"Batch failed: {{e}}")
|
|
201
|
+
'''
|
|
202
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
203
|
+
f.write(code)
|
|
204
|
+
|
|
205
|
+
return output_path
|
ezyml/devx/__init___.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
ezyml/devx/doctor.py
ADDED
ezyml/devx/init.py
ADDED
ezyml/eda/__init__.py
ADDED
|
File without changes
|
ezyml/eda/auto_eda.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from scipy.stats import skew
|
|
3
|
+
|
|
4
|
+
def auto_eda(df, target=None):
|
|
5
|
+
report = {}
|
|
6
|
+
report["shape"] = df.shape
|
|
7
|
+
report["missing"] = df.isnull().mean().to_dict()
|
|
8
|
+
report["outliers"] = outliers_iqr(df)
|
|
9
|
+
report["skewness"] = {c: float(skew(df[c].dropna()))
|
|
10
|
+
for c in df.select_dtypes(include=np.number)}
|
|
11
|
+
if target:
|
|
12
|
+
report["target_distribution"] = df[target].value_counts(normalize=True).to_dict()
|
|
13
|
+
return report
|
|
14
|
+
|
|
15
|
+
def outliers_iqr(df):
|
|
16
|
+
out = {}
|
|
17
|
+
for c in df.select_dtypes(include=np.number):
|
|
18
|
+
q1, q3 = df[c].quantile([0.25,0.75])
|
|
19
|
+
iqr = q3-q1
|
|
20
|
+
mask = (df[c]<q1-1.5*iqr)|(df[c]>q3+1.5*iqr)
|
|
21
|
+
out[c] = float(mask.mean())
|
|
22
|
+
return out
|
|
File without changes
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import os, json
|
|
2
|
+
from .metrics import METRIC_REGISTRY
|
|
3
|
+
from .plots import *
|
|
4
|
+
|
|
5
|
+
class Evaluator:
|
|
6
|
+
DEFAULT_METRICS = {
|
|
7
|
+
"classification": ["accuracy", "precision", "recall", "f1", "roc_auc"],
|
|
8
|
+
"regression": ["rmse", "mae", "r2"]
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
DEFAULT_PLOTS = {
|
|
12
|
+
"classification": ["confusion_matrix", "roc_curve", "pr_curve"],
|
|
13
|
+
"regression": ["pred_vs_actual"]
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
def __init__(self, task, extra_metrics=None, extra_plots=None):
|
|
17
|
+
self.task = task
|
|
18
|
+
self.metrics = self.DEFAULT_METRICS[task] + (extra_metrics or [])
|
|
19
|
+
self.plots = self.DEFAULT_PLOTS[task] + (extra_plots or [])
|
|
20
|
+
|
|
21
|
+
def evaluate(self, y_true, y_pred, y_prob=None):
|
|
22
|
+
results = {}
|
|
23
|
+
for m in self.metrics:
|
|
24
|
+
fn = METRIC_REGISTRY[m]
|
|
25
|
+
results[m] = fn(y_true, y_prob if m=="roc_auc" else y_pred)
|
|
26
|
+
return results
|
|
27
|
+
|
|
28
|
+
def save(self, results, out_dir):
|
|
29
|
+
os.makedirs(out_dir, exist_ok=True)
|
|
30
|
+
with open(f"{out_dir}/metrics.json","w") as f:
|
|
31
|
+
json.dump(results, f, indent=2)
|
|
32
|
+
|
|
33
|
+
def visualize(self, y_true, y_pred, y_prob, out_dir):
|
|
34
|
+
os.makedirs(out_dir, exist_ok=True)
|
|
35
|
+
for p in self.plots:
|
|
36
|
+
if p=="confusion_matrix":
|
|
37
|
+
plot_confusion_matrix(y_true,y_pred,f"{out_dir}/confusion.png")
|
|
38
|
+
elif p=="roc_curve":
|
|
39
|
+
plot_roc_curve(y_true,y_prob,f"{out_dir}/roc.png")
|
|
40
|
+
elif p=="pr_curve":
|
|
41
|
+
plot_pr_curve(y_true,y_prob,f"{out_dir}/pr.png")
|
|
42
|
+
elif p=="pred_vs_actual":
|
|
43
|
+
plot_pred_vs_actual(y_true,y_pred,f"{out_dir}/pred_vs_actual.png")
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from sklearn.metrics import (
|
|
2
|
+
accuracy_score, precision_score, recall_score, f1_score,
|
|
3
|
+
roc_auc_score, mean_squared_error, mean_absolute_error, r2_score
|
|
4
|
+
)
|
|
5
|
+
|
|
6
|
+
def accuracy(y_true, y_pred): return accuracy_score(y_true, y_pred)
|
|
7
|
+
def precision(y_true, y_pred): return precision_score(y_true, y_pred, zero_division=0)
|
|
8
|
+
def recall(y_true, y_pred): return recall_score(y_true, y_pred, zero_division=0)
|
|
9
|
+
def f1(y_true, y_pred): return f1_score(y_true, y_pred, zero_division=0)
|
|
10
|
+
def roc_auc(y_true, y_prob): return roc_auc_score(y_true, y_prob)
|
|
11
|
+
|
|
12
|
+
def rmse(y_true, y_pred): return mean_squared_error(y_true, y_pred, squared=False)
|
|
13
|
+
def mae(y_true, y_pred): return mean_absolute_error(y_true, y_pred)
|
|
14
|
+
def r2(y_true, y_pred): return r2_score(y_true, y_pred)
|
|
15
|
+
|
|
16
|
+
METRIC_REGISTRY = {
|
|
17
|
+
"accuracy": accuracy,
|
|
18
|
+
"precision": precision,
|
|
19
|
+
"recall": recall,
|
|
20
|
+
"f1": f1,
|
|
21
|
+
"roc_auc": roc_auc,
|
|
22
|
+
"rmse": rmse,
|
|
23
|
+
"mae": mae,
|
|
24
|
+
"r2": r2
|
|
25
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import matplotlib.pyplot as plt
|
|
2
|
+
import seaborn as sns
|
|
3
|
+
from sklearn.metrics import confusion_matrix, roc_curve, precision_recall_curve
|
|
4
|
+
|
|
5
|
+
def plot_confusion_matrix(y_true, y_pred, path):
|
|
6
|
+
cm = confusion_matrix(y_true, y_pred)
|
|
7
|
+
sns.heatmap(cm, annot=True, fmt="d")
|
|
8
|
+
plt.savefig(path); plt.close()
|
|
9
|
+
|
|
10
|
+
def plot_roc_curve(y_true, y_prob, path):
|
|
11
|
+
fpr, tpr, _ = roc_curve(y_true, y_prob)
|
|
12
|
+
plt.plot(fpr, tpr); plt.xlabel("FPR"); plt.ylabel("TPR")
|
|
13
|
+
plt.savefig(path); plt.close()
|
|
14
|
+
|
|
15
|
+
def plot_pr_curve(y_true, y_prob, path):
|
|
16
|
+
p, r, _ = precision_recall_curve(y_true, y_prob)
|
|
17
|
+
plt.plot(r, p); plt.xlabel("Recall"); plt.ylabel("Precision")
|
|
18
|
+
plt.savefig(path); plt.close()
|
|
19
|
+
|
|
20
|
+
def plot_pred_vs_actual(y_true, y_pred, path):
|
|
21
|
+
plt.scatter(y_true, y_pred)
|
|
22
|
+
plt.xlabel("Actual"); plt.ylabel("Predicted")
|
|
23
|
+
plt.savefig(path); plt.close()
|
|
File without changes
|
ezyml/explain/learner.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
def explain_model_choice(model_name, profile):
|
|
2
|
+
if profile["rows"] < 1000:
|
|
3
|
+
return f"{model_name} chosen due to small dataset robustness."
|
|
4
|
+
return f"{model_name} chosen for general performance."
|
|
5
|
+
|
|
6
|
+
def explain_metric(metric):
|
|
7
|
+
explanations = {
|
|
8
|
+
"accuracy":"Overall correctness of predictions.",
|
|
9
|
+
"f1":"Balance between precision and recall.",
|
|
10
|
+
"roc_auc":"Ability to separate classes."
|
|
11
|
+
}
|
|
12
|
+
return explanations.get(metric,"Standard evaluation metric.")
|
|
File without changes
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
def detect_data_drift(base_df, new_df):
|
|
2
|
+
drift = {}
|
|
3
|
+
for c in base_df.columns:
|
|
4
|
+
drift[c] = abs(base_df[c].mean() - new_df[c].mean())
|
|
5
|
+
return drift
|
|
6
|
+
|
|
7
|
+
def detect_concept_drift(y_true, y_pred, threshold=0.7):
|
|
8
|
+
acc = (y_true==y_pred).mean()
|
|
9
|
+
return acc < threshold
|
|
File without changes
|
ezyml/pipeline/loader.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# # ezyml/pipeline/loader.py
|
|
2
|
+
|
|
3
|
+
# import yaml
|
|
4
|
+
# from ezyml.core import EZTrainer
|
|
5
|
+
# from ezyml.pipeline.visualize import render_ascii_dag
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# class Pipeline:
|
|
9
|
+
# def __init__(self, steps, edges):
|
|
10
|
+
# self.steps = steps
|
|
11
|
+
# self.edges = edges
|
|
12
|
+
# self.trainer = None
|
|
13
|
+
|
|
14
|
+
# def run(self, data, target=None):
|
|
15
|
+
# """
|
|
16
|
+
# Execute the pipeline.
|
|
17
|
+
# v1 assumption: last step is always EZTrainer.
|
|
18
|
+
# """
|
|
19
|
+
# if "trainer" not in self.steps:
|
|
20
|
+
# raise ValueError("Pipeline must contain a 'trainer' step")
|
|
21
|
+
|
|
22
|
+
# cfg = self.steps["trainer"]
|
|
23
|
+
# params = cfg.get("params", {})
|
|
24
|
+
|
|
25
|
+
# self.trainer = EZTrainer(
|
|
26
|
+
# data=data,
|
|
27
|
+
# target=target,
|
|
28
|
+
# model=params.get("model"),
|
|
29
|
+
# task="classification"
|
|
30
|
+
# )
|
|
31
|
+
|
|
32
|
+
# self.trainer.train()
|
|
33
|
+
# return self.trainer
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# def load_pipeline(path: str) -> Pipeline:
|
|
37
|
+
# with open(path, "r") as f:
|
|
38
|
+
# cfg = yaml.safe_load(f)
|
|
39
|
+
|
|
40
|
+
# steps = cfg.get("steps", {})
|
|
41
|
+
# edges = cfg.get("edges", [])
|
|
42
|
+
|
|
43
|
+
# render_ascii_dag(steps.keys(), _edges_to_map(edges))
|
|
44
|
+
# return Pipeline(steps=steps, edges=edges)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# def _edges_to_map(edges):
|
|
48
|
+
# graph = {}
|
|
49
|
+
# for src, dst in edges:
|
|
50
|
+
# graph.setdefault(src, []).append(dst)
|
|
51
|
+
# return graph
|
|
52
|
+
|
|
53
|
+
# ezyml/pipeline/loader.py
|
|
54
|
+
|
|
55
|
+
import yaml
|
|
56
|
+
from ezyml.core import EZTrainer
|
|
57
|
+
from ezyml.pipeline.visualize import render_ascii_dag
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class Pipeline:
|
|
61
|
+
def __init__(self, steps):
|
|
62
|
+
self.steps = steps
|
|
63
|
+
|
|
64
|
+
def run(self, data, target):
|
|
65
|
+
cfg = self.steps["trainer"]
|
|
66
|
+
params = cfg.get("params", {})
|
|
67
|
+
|
|
68
|
+
trainer = EZTrainer(
|
|
69
|
+
data=data,
|
|
70
|
+
target=target,
|
|
71
|
+
model=params.get("model", "random_forest"),
|
|
72
|
+
task="classification"
|
|
73
|
+
)
|
|
74
|
+
trainer.train()
|
|
75
|
+
return trainer
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def load_pipeline(path):
|
|
79
|
+
with open(path) as f:
|
|
80
|
+
cfg = yaml.safe_load(f)
|
|
81
|
+
|
|
82
|
+
steps = cfg.get("steps", {})
|
|
83
|
+
render_ascii_dag(steps.keys(), {})
|
|
84
|
+
return Pipeline(steps)
|