kaizenstat 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/PKG-INFO +14 -1
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/README.md +13 -0
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/kaizenstat/__init__.py +1 -1
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/kaizenstat/core.py +304 -4
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/kaizenstat.egg-info/PKG-INFO +14 -1
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/setup.py +1 -1
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/kaizenstat/cli.py +0 -0
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/kaizenstat.egg-info/SOURCES.txt +0 -0
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/kaizenstat.egg-info/dependency_links.txt +0 -0
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/kaizenstat.egg-info/entry_points.txt +0 -0
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/kaizenstat.egg-info/requires.txt +0 -0
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/kaizenstat.egg-info/top_level.txt +0 -0
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/pyproject.toml +0 -0
- {kaizenstat-0.2.2 → kaizenstat-0.2.4}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kaizenstat
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Zero-friction AutoML + Data Cleaning Toolkit
|
|
5
5
|
Author: Masuddar Rahman
|
|
6
6
|
Requires-Python: >=3.8
|
|
@@ -84,6 +84,9 @@ KaizenStat is designed around a single unified vocabulary. Every CLI command has
|
|
|
84
84
|
| `kz export-model` | `KaizenStat.save_model()` | 💾 Trains the top pipeline and saves it directly to a `.joblib` binary. |
|
|
85
85
|
| `kz report` | `KaizenStat.report()` | 📊 Generates a beautiful, interactive HTML profiling report with Chart.js. |
|
|
86
86
|
| `kz serve` | `KaizenStat.serve()` | 🌐 Launches a local web dashboard to explore the data and run predictions. |
|
|
87
|
+
| - | `KaizenStat.analyze()` | 🧠 Executes auto-intelligence analysis over dataset context using LLM reasoning. |
|
|
88
|
+
| - | `KaizenStat.ask()` | 🤖 Answers complex developer queries about accuracy, data quality, or anomalies. |
|
|
89
|
+
| - | `KaizenStat.ask_followup()` | 🔁 Maintains multi-turn conversation memory with the data reasoning engine. |
|
|
87
90
|
|
|
88
91
|
---
|
|
89
92
|
|
|
@@ -109,6 +112,16 @@ leaderboard = KaizenStat.benchmark(clean_df, target="target_column")
|
|
|
109
112
|
|
|
110
113
|
# 4. Generate standalone code for reproduction
|
|
111
114
|
KaizenStat.codegen("dataset.csv", target="target_column", output_path="reproduce.py")
|
|
115
|
+
|
|
116
|
+
# 5. Dual-Mode Conversational AI (OpenRouter powered)
|
|
117
|
+
# Runs automated structured AI analysis
|
|
118
|
+
analysis = KaizenStat.analyze(df, target="target_column")
|
|
119
|
+
|
|
120
|
+
# Ask custom developer queries about data or pipeline
|
|
121
|
+
KaizenStat.ask("Why is model accuracy lower or what are the dataset flaws?")
|
|
122
|
+
|
|
123
|
+
# Multi-turn conversation with memory context
|
|
124
|
+
KaizenStat.ask_followup("What should I do to handle the missing values or high cardinality?")
|
|
112
125
|
```
|
|
113
126
|
|
|
114
127
|
### 2. Command Line Interface (CLI)
|
|
@@ -54,6 +54,9 @@ KaizenStat is designed around a single unified vocabulary. Every CLI command has
|
|
|
54
54
|
| `kz export-model` | `KaizenStat.save_model()` | 💾 Trains the top pipeline and saves it directly to a `.joblib` binary. |
|
|
55
55
|
| `kz report` | `KaizenStat.report()` | 📊 Generates a beautiful, interactive HTML profiling report with Chart.js. |
|
|
56
56
|
| `kz serve` | `KaizenStat.serve()` | 🌐 Launches a local web dashboard to explore the data and run predictions. |
|
|
57
|
+
| - | `KaizenStat.analyze()` | 🧠 Executes auto-intelligence analysis over dataset context using LLM reasoning. |
|
|
58
|
+
| - | `KaizenStat.ask()` | 🤖 Answers complex developer queries about accuracy, data quality, or anomalies. |
|
|
59
|
+
| - | `KaizenStat.ask_followup()` | 🔁 Maintains multi-turn conversation memory with the data reasoning engine. |
|
|
57
60
|
|
|
58
61
|
---
|
|
59
62
|
|
|
@@ -79,6 +82,16 @@ leaderboard = KaizenStat.benchmark(clean_df, target="target_column")
|
|
|
79
82
|
|
|
80
83
|
# 4. Generate standalone code for reproduction
|
|
81
84
|
KaizenStat.codegen("dataset.csv", target="target_column", output_path="reproduce.py")
|
|
85
|
+
|
|
86
|
+
# 5. Dual-Mode Conversational AI (OpenRouter powered)
|
|
87
|
+
# Runs automated structured AI analysis
|
|
88
|
+
analysis = KaizenStat.analyze(df, target="target_column")
|
|
89
|
+
|
|
90
|
+
# Ask custom developer queries about data or pipeline
|
|
91
|
+
KaizenStat.ask("Why is model accuracy lower or what are the dataset flaws?")
|
|
92
|
+
|
|
93
|
+
# Multi-turn conversation with memory context
|
|
94
|
+
KaizenStat.ask_followup("What should I do to handle the missing values or high cardinality?")
|
|
82
95
|
```
|
|
83
96
|
|
|
84
97
|
### 2. Command Line Interface (CLI)
|
|
@@ -5,6 +5,9 @@
|
|
|
5
5
|
import os
|
|
6
6
|
import time
|
|
7
7
|
import warnings
|
|
8
|
+
import json
|
|
9
|
+
import urllib.request
|
|
10
|
+
from urllib.error import URLError, HTTPError
|
|
8
11
|
from typing import Optional, Dict, List, Union
|
|
9
12
|
|
|
10
13
|
import numpy as np
|
|
@@ -171,7 +174,13 @@ class KaizenStat:
|
|
|
171
174
|
report(data, target, output_path) → Generate interactive HTML report
|
|
172
175
|
save_model(pipeline, path) → Export trained model
|
|
173
176
|
load_model(path) → Load exported model
|
|
177
|
+
analyze(df, target) → Intelligent dataset analysis
|
|
178
|
+
ask(query) → Conversational AI support
|
|
179
|
+
ask_followup(query) → Conversational AI follow-up support
|
|
174
180
|
"""
|
|
181
|
+
DEFAULT_API_KEY = "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"
|
|
182
|
+
_last_context = None
|
|
183
|
+
_conversation_history = []
|
|
175
184
|
|
|
176
185
|
# ==========================
|
|
177
186
|
# 🧠 VALIDATION
|
|
@@ -257,6 +266,7 @@ class KaizenStat:
|
|
|
257
266
|
if imbalanced:
|
|
258
267
|
print(f" ⚠️ Class Imbalance Detected (majority > 65%)")
|
|
259
268
|
|
|
269
|
+
KaizenStat._last_audit_findings = findings
|
|
260
270
|
return findings
|
|
261
271
|
|
|
262
272
|
# ==========================
|
|
@@ -362,6 +372,7 @@ class KaizenStat:
|
|
|
362
372
|
if not dropped_cols and not actions:
|
|
363
373
|
print(" ✓ Dataset was already clean")
|
|
364
374
|
|
|
375
|
+
KaizenStat._last_dropped_cols = dropped_cols
|
|
365
376
|
return df
|
|
366
377
|
|
|
367
378
|
# ==========================
|
|
@@ -519,6 +530,7 @@ class KaizenStat:
|
|
|
519
530
|
KaizenStat._last_label_encoder = label_encoder
|
|
520
531
|
KaizenStat._last_task_type = "classification" if is_classification else "regression"
|
|
521
532
|
KaizenStat._last_target = target
|
|
533
|
+
KaizenStat._last_results_df = results_df
|
|
522
534
|
|
|
523
535
|
return results_df
|
|
524
536
|
|
|
@@ -546,6 +558,9 @@ class KaizenStat:
|
|
|
546
558
|
results = KaizenStat.benchmark(df, target)
|
|
547
559
|
|
|
548
560
|
print(f"\n🏆 BEST MODEL: {results.iloc[0]['Model']} (Score: {results.iloc[0]['Score']:.4f})")
|
|
561
|
+
|
|
562
|
+
# Build and store context for conversational AI
|
|
563
|
+
KaizenStat._last_context = KaizenStat._build_context(df, target)
|
|
549
564
|
|
|
550
565
|
return results
|
|
551
566
|
|
|
@@ -718,7 +733,7 @@ for col in list(df.columns):
|
|
|
718
733
|
num_features = {num_features}
|
|
719
734
|
cat_features = {cat_features}
|
|
720
735
|
|
|
721
|
-
X = df[num_features + cat_features]
|
|
736
|
+
X = df[num_features + cat_features].copy()
|
|
722
737
|
y = df["{target}"]
|
|
723
738
|
{"" if not needs_label_encoder else """
|
|
724
739
|
# Encode string labels
|
|
@@ -727,9 +742,9 @@ y = le.fit_transform(y)
|
|
|
727
742
|
"""}
|
|
728
743
|
# Fill missing values
|
|
729
744
|
if num_features:
|
|
730
|
-
X[num_features] = X[num_features].fillna(X[num_features].median())
|
|
745
|
+
X.loc[:, num_features] = X[num_features].fillna(X[num_features].median())
|
|
731
746
|
for col in cat_features:
|
|
732
|
-
X[col] = X[col].fillna(X[col].mode().iloc[0] if not X[col].mode().empty else "Unknown")
|
|
747
|
+
X.loc[:, col] = X[col].fillna(X[col].mode().iloc[0] if not X[col].mode().empty else "Unknown")
|
|
733
748
|
|
|
734
749
|
# 4. Preprocessing Pipeline
|
|
735
750
|
preprocessor = ColumnTransformer([
|
|
@@ -1265,4 +1280,289 @@ with tab4:
|
|
|
1265
1280
|
print(f" Open: http://localhost:{port}")
|
|
1266
1281
|
print(f" Press Ctrl+C to stop\n")
|
|
1267
1282
|
|
|
1268
|
-
os.system(f"streamlit run {app_file} --server.port {port} --server.headless true")
|
|
1283
|
+
os.system(f"streamlit run {app_file} --server.port {port} --server.headless true")
|
|
1284
|
+
|
|
1285
|
+
# ==========================
|
|
1286
|
+
# 🧠 AI CHAT & ANALYZE
|
|
1287
|
+
# ==========================
|
|
1288
|
+
@staticmethod
|
|
1289
|
+
def _build_context(df: pd.DataFrame, target: str) -> dict:
|
|
1290
|
+
# Check if we have pre-computed audit/heal info
|
|
1291
|
+
audit_findings = getattr(KaizenStat, "_last_audit_findings", {})
|
|
1292
|
+
if not audit_findings:
|
|
1293
|
+
# If not computed, run audit silently
|
|
1294
|
+
import io, contextlib
|
|
1295
|
+
with contextlib.redirect_stdout(io.StringIO()):
|
|
1296
|
+
try:
|
|
1297
|
+
audit_findings = KaizenStat.audit(df, target)
|
|
1298
|
+
except Exception:
|
|
1299
|
+
audit_findings = {}
|
|
1300
|
+
|
|
1301
|
+
# Calculate high cardinality columns
|
|
1302
|
+
high_card_cols = []
|
|
1303
|
+
for col in df.select_dtypes(exclude=[np.number]).columns:
|
|
1304
|
+
if col != target:
|
|
1305
|
+
if df[col].nunique() > 20:
|
|
1306
|
+
high_card_cols.append(col)
|
|
1307
|
+
|
|
1308
|
+
# Get dropped columns
|
|
1309
|
+
dropped_cols = getattr(KaizenStat, "_last_dropped_cols", [])
|
|
1310
|
+
dropped_cols_list = []
|
|
1311
|
+
if isinstance(dropped_cols, list):
|
|
1312
|
+
for item in dropped_cols:
|
|
1313
|
+
if isinstance(item, tuple) and len(item) > 0:
|
|
1314
|
+
dropped_cols_list.append(str(item[0]))
|
|
1315
|
+
else:
|
|
1316
|
+
dropped_cols_list.append(str(item))
|
|
1317
|
+
|
|
1318
|
+
# Get best model info
|
|
1319
|
+
best_model = "None"
|
|
1320
|
+
best_score = 0.0
|
|
1321
|
+
results_df = getattr(KaizenStat, "_last_results_df", None)
|
|
1322
|
+
if results_df is not None and not results_df.empty:
|
|
1323
|
+
best_model = results_df.iloc[0]["Model"]
|
|
1324
|
+
best_score = float(results_df.iloc[0]["Score"])
|
|
1325
|
+
|
|
1326
|
+
# Class imbalance
|
|
1327
|
+
imbalance_detected = audit_findings.get("imbalanced", False)
|
|
1328
|
+
|
|
1329
|
+
# Build missing columns detailed breakdown
|
|
1330
|
+
missing_counts = df.isna().sum()
|
|
1331
|
+
missing_dict = missing_counts[missing_counts > 0].to_dict()
|
|
1332
|
+
|
|
1333
|
+
# Ensure all types in context are standard Python primitives for JSON serialization
|
|
1334
|
+
context = {
|
|
1335
|
+
"shape": [int(df.shape[0]), int(df.shape[1])],
|
|
1336
|
+
"missing": {str(k): int(v) for k, v in missing_dict.items()},
|
|
1337
|
+
"dropped_cols": [str(c) for c in dropped_cols_list],
|
|
1338
|
+
"model": str(best_model),
|
|
1339
|
+
"score": float(best_score),
|
|
1340
|
+
"imbalance": bool(imbalance_detected),
|
|
1341
|
+
"high_cardinality": [str(c) for c in high_card_cols]
|
|
1342
|
+
}
|
|
1343
|
+
return context
|
|
1344
|
+
|
|
1345
|
+
@staticmethod
|
|
1346
|
+
def _get_system_prompt(context: dict) -> str:
|
|
1347
|
+
prompt_template = """You are an expert Data Scientist AI assistant integrated inside a system called KaizenStat.
|
|
1348
|
+
|
|
1349
|
+
You are NOT a generic chatbot. You MUST ONLY answer based on the structured dataset context provided below.
|
|
1350
|
+
|
|
1351
|
+
SYSTEM CONTEXT (VERY IMPORTANT)
|
|
1352
|
+
The following information is automatically extracted from the dataset and ML pipeline:
|
|
1353
|
+
{context}
|
|
1354
|
+
|
|
1355
|
+
YOUR ROLE
|
|
1356
|
+
You must act as:
|
|
1357
|
+
- a senior data scientist
|
|
1358
|
+
- a decision-making assistant
|
|
1359
|
+
- a debugging expert
|
|
1360
|
+
|
|
1361
|
+
YOUR TASK
|
|
1362
|
+
Based ONLY on the provided context:
|
|
1363
|
+
- Identify key problems in the dataset or pipeline
|
|
1364
|
+
- Explain WHY these problems matter
|
|
1365
|
+
- Suggest clear, practical improvements
|
|
1366
|
+
- If user asked a question, answer it using context
|
|
1367
|
+
- If no question is asked, provide a structured analysis
|
|
1368
|
+
|
|
1369
|
+
RESPONSE STYLE
|
|
1370
|
+
- Be concise but insightful
|
|
1371
|
+
- Use bullet points when helpful
|
|
1372
|
+
- Avoid generic advice
|
|
1373
|
+
- Do NOT hallucinate missing data
|
|
1374
|
+
- Do NOT assume anything outside the context
|
|
1375
|
+
- Always tie your reasoning to the given dataset
|
|
1376
|
+
|
|
1377
|
+
Remember:
|
|
1378
|
+
You are not ChatGPT.
|
|
1379
|
+
You are KaizenStat’s intelligence layer."""
|
|
1380
|
+
return prompt_template.replace("{context}", json.dumps(context, indent=2))
|
|
1381
|
+
|
|
1382
|
+
@staticmethod
|
|
1383
|
+
def _build_ai_prompt(context: dict, user_query: Optional[str] = None) -> str:
|
|
1384
|
+
system_prompt = KaizenStat._get_system_prompt(context)
|
|
1385
|
+
if user_query:
|
|
1386
|
+
return f"{system_prompt}\n\nUSER QUESTION:\n{user_query}"
|
|
1387
|
+
return system_prompt
|
|
1388
|
+
|
|
1389
|
+
@staticmethod
|
|
1390
|
+
def _call_openrouter_api_messages(messages: list, api_key: Optional[str] = None) -> str:
|
|
1391
|
+
key = api_key or getattr(KaizenStat, "DEFAULT_API_KEY", "")
|
|
1392
|
+
if not key:
|
|
1393
|
+
raise ValueError("No OpenRouter API key found. Please provide one.")
|
|
1394
|
+
|
|
1395
|
+
url = "https://openrouter.ai/api/v1/chat/completions"
|
|
1396
|
+
headers = {
|
|
1397
|
+
"Authorization": f"Bearer {key}",
|
|
1398
|
+
"Content-Type": "application/json",
|
|
1399
|
+
"HTTP-Referer": "https://github.com/masuddarrahaman/KaizenStat-Library",
|
|
1400
|
+
"X-Title": "KaizenStat Intelligence"
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
import ssl
|
|
1404
|
+
ssl_context = ssl._create_unverified_context()
|
|
1405
|
+
|
|
1406
|
+
# Models list with fallback mechanisms
|
|
1407
|
+
models = [
|
|
1408
|
+
"google/gemini-2.5-flash",
|
|
1409
|
+
"meta-llama/llama-3-8b-instruct:free",
|
|
1410
|
+
"google/gemma-2-9b-it:free",
|
|
1411
|
+
"qwen/qwen-2.5-72b-instruct:free",
|
|
1412
|
+
"google/gemini-2.5-pro"
|
|
1413
|
+
]
|
|
1414
|
+
|
|
1415
|
+
last_error = None
|
|
1416
|
+
for model in models:
|
|
1417
|
+
payload = {
|
|
1418
|
+
"model": model,
|
|
1419
|
+
"messages": messages,
|
|
1420
|
+
"temperature": 0.2,
|
|
1421
|
+
"max_tokens": 1500
|
|
1422
|
+
}
|
|
1423
|
+
req = urllib.request.Request(
|
|
1424
|
+
url,
|
|
1425
|
+
data=json.dumps(payload).encode("utf-8"),
|
|
1426
|
+
headers=headers,
|
|
1427
|
+
method="POST"
|
|
1428
|
+
)
|
|
1429
|
+
|
|
1430
|
+
try:
|
|
1431
|
+
# 15 seconds timeout
|
|
1432
|
+
with urllib.request.urlopen(req, context=ssl_context, timeout=15) as response:
|
|
1433
|
+
res = json.loads(response.read().decode("utf-8"))
|
|
1434
|
+
if "choices" in res and len(res["choices"]) > 0:
|
|
1435
|
+
return res["choices"][0]["message"]["content"]
|
|
1436
|
+
except HTTPError as e:
|
|
1437
|
+
err_body = e.read().decode("utf-8")
|
|
1438
|
+
try:
|
|
1439
|
+
err_json = json.loads(err_body)
|
|
1440
|
+
error_msg = err_json.get("error", {}).get("message", "")
|
|
1441
|
+
except Exception:
|
|
1442
|
+
error_msg = err_body
|
|
1443
|
+
last_error = f"HTTP Error {e.code}: {error_msg}"
|
|
1444
|
+
print(f"⚠️ Model {model} failed or server busy: {last_error}. Trying fallback model...")
|
|
1445
|
+
except URLError as e:
|
|
1446
|
+
last_error = f"Network Error: {e.reason}"
|
|
1447
|
+
print(f"⚠️ Model {model} network error: {last_error}. Trying fallback model...")
|
|
1448
|
+
except Exception as e:
|
|
1449
|
+
last_error = f"Unexpected Error: {e}"
|
|
1450
|
+
print(f"⚠️ Model {model} failed: {last_error}. Trying fallback model...")
|
|
1451
|
+
|
|
1452
|
+
raise RuntimeError(
|
|
1453
|
+
f"Failed to query OpenRouter. Last error: {last_error}\n"
|
|
1454
|
+
"Server might be busy or API token has expired. "
|
|
1455
|
+
"Please check your internet connection or try again. "
|
|
1456
|
+
"Alternatively, provide your own OpenRouter / Gemini API key via the `api_key` parameter."
|
|
1457
|
+
)
|
|
1458
|
+
|
|
1459
|
+
@staticmethod
|
|
1460
|
+
def analyze(data: Union[str, pd.DataFrame], target: str, api_key: Optional[str] = None) -> str:
|
|
1461
|
+
"""
|
|
1462
|
+
Perform auto-intelligence analysis on the dataset.
|
|
1463
|
+
|
|
1464
|
+
Args:
|
|
1465
|
+
data: CSV path or DataFrame.
|
|
1466
|
+
target: Name of the target column.
|
|
1467
|
+
api_key: Optional custom OpenRouter API key.
|
|
1468
|
+
|
|
1469
|
+
Returns:
|
|
1470
|
+
The plain-English structured analysis.
|
|
1471
|
+
"""
|
|
1472
|
+
df = DataEngine.load(data)
|
|
1473
|
+
# Run auto pipeline to populate metrics
|
|
1474
|
+
KaizenStat.auto(df, target)
|
|
1475
|
+
|
|
1476
|
+
context = KaizenStat._last_context
|
|
1477
|
+
prompt = KaizenStat._build_ai_prompt(context, user_query=None)
|
|
1478
|
+
|
|
1479
|
+
print("\n🧠 Querying KaizenStat Intelligence Engine...")
|
|
1480
|
+
response = KaizenStat._call_openrouter_api_messages(
|
|
1481
|
+
[{"role": "user", "content": prompt}],
|
|
1482
|
+
api_key=api_key
|
|
1483
|
+
)
|
|
1484
|
+
|
|
1485
|
+
# Initialize conversation history
|
|
1486
|
+
KaizenStat._conversation_history = [
|
|
1487
|
+
{"role": "user", "content": "Analyze this dataset."},
|
|
1488
|
+
{"role": "assistant", "content": response}
|
|
1489
|
+
]
|
|
1490
|
+
|
|
1491
|
+
print("\n💬 KAIZENSTAT AUTOMATIC ANALYSIS:")
|
|
1492
|
+
print(response)
|
|
1493
|
+
return response
|
|
1494
|
+
|
|
1495
|
+
@staticmethod
|
|
1496
|
+
def ask(user_query: str, api_key: Optional[str] = None) -> str:
|
|
1497
|
+
"""
|
|
1498
|
+
Ask a conversational question about the last analyzed dataset context.
|
|
1499
|
+
|
|
1500
|
+
Args:
|
|
1501
|
+
user_query: The question for the AI engine.
|
|
1502
|
+
api_key: Optional custom OpenRouter API key.
|
|
1503
|
+
|
|
1504
|
+
Returns:
|
|
1505
|
+
The AI response.
|
|
1506
|
+
"""
|
|
1507
|
+
context = KaizenStat._last_context
|
|
1508
|
+
if context is None:
|
|
1509
|
+
raise ValueError(
|
|
1510
|
+
"No dataset context found. Please run KaizenStat.analyze(df, target) "
|
|
1511
|
+
"or KaizenStat.auto(df, target) first."
|
|
1512
|
+
)
|
|
1513
|
+
|
|
1514
|
+
prompt = KaizenStat._build_ai_prompt(context, user_query=user_query)
|
|
1515
|
+
|
|
1516
|
+
print(f"\n🧠 Querying KaizenStat Intelligence for: '{user_query}'...")
|
|
1517
|
+
response = KaizenStat._call_openrouter_api_messages(
|
|
1518
|
+
[{"role": "user", "content": prompt}],
|
|
1519
|
+
api_key=api_key
|
|
1520
|
+
)
|
|
1521
|
+
|
|
1522
|
+
# Reset history thread for this question
|
|
1523
|
+
KaizenStat._conversation_history = [
|
|
1524
|
+
{"role": "user", "content": user_query},
|
|
1525
|
+
{"role": "assistant", "content": response}
|
|
1526
|
+
]
|
|
1527
|
+
|
|
1528
|
+
print("\n💬 KAIZENSTAT RESPONSE:")
|
|
1529
|
+
print(response)
|
|
1530
|
+
return response
|
|
1531
|
+
|
|
1532
|
+
@staticmethod
|
|
1533
|
+
def ask_followup(user_query: str, api_key: Optional[str] = None) -> str:
|
|
1534
|
+
"""
|
|
1535
|
+
Ask a follow-up question keeping conversation history memory.
|
|
1536
|
+
|
|
1537
|
+
Args:
|
|
1538
|
+
user_query: The follow-up question.
|
|
1539
|
+
api_key: Optional custom OpenRouter API key.
|
|
1540
|
+
|
|
1541
|
+
Returns:
|
|
1542
|
+
The AI response.
|
|
1543
|
+
"""
|
|
1544
|
+
context = KaizenStat._last_context
|
|
1545
|
+
if context is None:
|
|
1546
|
+
raise ValueError(
|
|
1547
|
+
"No dataset context found. Please run KaizenStat.analyze(df, target) "
|
|
1548
|
+
"or KaizenStat.auto(df, target) first."
|
|
1549
|
+
)
|
|
1550
|
+
|
|
1551
|
+
if not KaizenStat._conversation_history:
|
|
1552
|
+
return KaizenStat.ask(user_query, api_key=api_key)
|
|
1553
|
+
|
|
1554
|
+
history = KaizenStat._conversation_history
|
|
1555
|
+
history.append({"role": "user", "content": user_query})
|
|
1556
|
+
|
|
1557
|
+
system_prompt = KaizenStat._get_system_prompt(context)
|
|
1558
|
+
messages = [{"role": "system", "content": system_prompt}] + history
|
|
1559
|
+
|
|
1560
|
+
print(f"\n🧠 Querying KaizenStat (Follow-up) for: '{user_query}'...")
|
|
1561
|
+
response = KaizenStat._call_openrouter_api_messages(messages, api_key=api_key)
|
|
1562
|
+
|
|
1563
|
+
history.append({"role": "assistant", "content": response})
|
|
1564
|
+
KaizenStat._conversation_history = history
|
|
1565
|
+
|
|
1566
|
+
print("\n💬 KAIZENSTAT RESPONSE:")
|
|
1567
|
+
print(response)
|
|
1568
|
+
return response
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kaizenstat
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Zero-friction AutoML + Data Cleaning Toolkit
|
|
5
5
|
Author: Masuddar Rahman
|
|
6
6
|
Requires-Python: >=3.8
|
|
@@ -84,6 +84,9 @@ KaizenStat is designed around a single unified vocabulary. Every CLI command has
|
|
|
84
84
|
| `kz export-model` | `KaizenStat.save_model()` | 💾 Trains the top pipeline and saves it directly to a `.joblib` binary. |
|
|
85
85
|
| `kz report` | `KaizenStat.report()` | 📊 Generates a beautiful, interactive HTML profiling report with Chart.js. |
|
|
86
86
|
| `kz serve` | `KaizenStat.serve()` | 🌐 Launches a local web dashboard to explore the data and run predictions. |
|
|
87
|
+
| - | `KaizenStat.analyze()` | 🧠 Executes auto-intelligence analysis over dataset context using LLM reasoning. |
|
|
88
|
+
| - | `KaizenStat.ask()` | 🤖 Answers complex developer queries about accuracy, data quality, or anomalies. |
|
|
89
|
+
| - | `KaizenStat.ask_followup()` | 🔁 Maintains multi-turn conversation memory with the data reasoning engine. |
|
|
87
90
|
|
|
88
91
|
---
|
|
89
92
|
|
|
@@ -109,6 +112,16 @@ leaderboard = KaizenStat.benchmark(clean_df, target="target_column")
|
|
|
109
112
|
|
|
110
113
|
# 4. Generate standalone code for reproduction
|
|
111
114
|
KaizenStat.codegen("dataset.csv", target="target_column", output_path="reproduce.py")
|
|
115
|
+
|
|
116
|
+
# 5. Dual-Mode Conversational AI (OpenRouter powered)
|
|
117
|
+
# Runs automated structured AI analysis
|
|
118
|
+
analysis = KaizenStat.analyze(df, target="target_column")
|
|
119
|
+
|
|
120
|
+
# Ask custom developer queries about data or pipeline
|
|
121
|
+
KaizenStat.ask("Why is model accuracy lower or what are the dataset flaws?")
|
|
122
|
+
|
|
123
|
+
# Multi-turn conversation with memory context
|
|
124
|
+
KaizenStat.ask_followup("What should I do to handle the missing values or high cardinality?")
|
|
112
125
|
```
|
|
113
126
|
|
|
114
127
|
### 2. Command Line Interface (CLI)
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="kaizenstat",
|
|
5
|
-
version="0.2.
|
|
5
|
+
version="0.2.4",
|
|
6
6
|
author="Masuddar Rahman",
|
|
7
7
|
description="Zero-friction AutoML + Data Cleaning Toolkit",
|
|
8
8
|
long_description=open("README.md").read() if open("README.md") else "",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|