nexora-prediction 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexora_prediction-0.1.0/LICENSE +21 -0
- nexora_prediction-0.1.0/PKG-INFO +327 -0
- nexora_prediction-0.1.0/README.md +250 -0
- nexora_prediction-0.1.0/nexora/__init__.py +11 -0
- nexora_prediction-0.1.0/nexora/cli/__init__.py +5 -0
- nexora_prediction-0.1.0/nexora/cli/main.py +45 -0
- nexora_prediction-0.1.0/nexora/codegen/__init__.py +5 -0
- nexora_prediction-0.1.0/nexora/codegen/docker_gen.py +113 -0
- nexora_prediction-0.1.0/nexora/codegen/fastapi_gen.py +382 -0
- nexora_prediction-0.1.0/nexora/codegen/flask_gen.py +214 -0
- nexora_prediction-0.1.0/nexora/codegen/mlflow_gen.py +191 -0
- nexora_prediction-0.1.0/nexora/codegen/notebook_gen.py +250 -0
- nexora_prediction-0.1.0/nexora/codegen/pipeline_gen.py +165 -0
- nexora_prediction-0.1.0/nexora/codegen/script.py +186 -0
- nexora_prediction-0.1.0/nexora/codegen/streamlit_gen.py +320 -0
- nexora_prediction-0.1.0/nexora/config.py +40 -0
- nexora_prediction-0.1.0/nexora/core.py +196 -0
- nexora_prediction-0.1.0/nexora/explainer/__init__.py +5 -0
- nexora_prediction-0.1.0/nexora/explainer/llm_explainer.py +155 -0
- nexora_prediction-0.1.0/nexora/explainer/pdp.py +49 -0
- nexora_prediction-0.1.0/nexora/explainer/sensitivity.py +47 -0
- nexora_prediction-0.1.0/nexora/explainer/shap_explainer.py +94 -0
- nexora_prediction-0.1.0/nexora/export/__init__.py +1 -0
- nexora_prediction-0.1.0/nexora/export/codegen.py +456 -0
- nexora_prediction-0.1.0/nexora/io/__init__.py +6 -0
- nexora_prediction-0.1.0/nexora/io/loaders.py +68 -0
- nexora_prediction-0.1.0/nexora/io/remote.py +193 -0
- nexora_prediction-0.1.0/nexora/io/serializer.py +55 -0
- nexora_prediction-0.1.0/nexora/io/versioning.py +52 -0
- nexora_prediction-0.1.0/nexora/models/__init__.py +7 -0
- nexora_prediction-0.1.0/nexora/models/registry.py +268 -0
- nexora_prediction-0.1.0/nexora/models/task_detector.py +39 -0
- nexora_prediction-0.1.0/nexora/models/trainer.py +297 -0
- nexora_prediction-0.1.0/nexora/monitor/diagnostics.py +203 -0
- nexora_prediction-0.1.0/nexora/monitor/drift.py +121 -0
- nexora_prediction-0.1.0/nexora/monitor/performance.py +69 -0
- nexora_prediction-0.1.0/nexora/preprocessing/__init__.py +8 -0
- nexora_prediction-0.1.0/nexora/preprocessing/pipeline_builder.py +195 -0
- nexora_prediction-0.1.0/nexora/preprocessing/text_processor.py +67 -0
- nexora_prediction-0.1.0/nexora/profiler/__init__.py +9 -0
- nexora_prediction-0.1.0/nexora/profiler/dataset_profile.py +252 -0
- nexora_prediction-0.1.0/nexora/report.py +927 -0
- nexora_prediction-0.1.0/nexora/testing/plugin.py +28 -0
- nexora_prediction-0.1.0/nexora/types.py +198 -0
- nexora_prediction-0.1.0/nexora_prediction.egg-info/PKG-INFO +327 -0
- nexora_prediction-0.1.0/nexora_prediction.egg-info/SOURCES.txt +59 -0
- nexora_prediction-0.1.0/nexora_prediction.egg-info/dependency_links.txt +1 -0
- nexora_prediction-0.1.0/nexora_prediction.egg-info/entry_points.txt +5 -0
- nexora_prediction-0.1.0/nexora_prediction.egg-info/requires.txt +63 -0
- nexora_prediction-0.1.0/nexora_prediction.egg-info/top_level.txt +1 -0
- nexora_prediction-0.1.0/pyproject.toml +106 -0
- nexora_prediction-0.1.0/setup.cfg +4 -0
- nexora_prediction-0.1.0/tests/test_cli.py +22 -0
- nexora_prediction-0.1.0/tests/test_codegen.py +352 -0
- nexora_prediction-0.1.0/tests/test_core.py +72 -0
- nexora_prediction-0.1.0/tests/test_explainer.py +14 -0
- nexora_prediction-0.1.0/tests/test_llm_explainer.py +72 -0
- nexora_prediction-0.1.0/tests/test_monitor.py +95 -0
- nexora_prediction-0.1.0/tests/test_pdp_sensitivity.py +54 -0
- nexora_prediction-0.1.0/tests/test_properties.py +67 -0
- nexora_prediction-0.1.0/tests/test_remote.py +135 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 jeet Patel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nexora-prediction
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Autonomous predictive analytics from CSV to trained model and runnable code.
|
|
5
|
+
Author: Jeet Patel
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://nexoraprediction.netlify.app
|
|
8
|
+
Project-URL: Repository, https://github.com/jeet2005/Nexora
|
|
9
|
+
Project-URL: Issues, https://github.com/jeet2005/Nexora/issues
|
|
10
|
+
Keywords: automl,machine-learning,predictive-analytics,data-science
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: pandas>=2.0
|
|
22
|
+
Requires-Dist: numpy>=1.24
|
|
23
|
+
Requires-Dist: scikit-learn>=1.3
|
|
24
|
+
Requires-Dist: xgboost>=2.0
|
|
25
|
+
Requires-Dist: lightgbm>=4.0
|
|
26
|
+
Requires-Dist: catboost>=1.2
|
|
27
|
+
Requires-Dist: optuna>=3.0
|
|
28
|
+
Requires-Dist: shap>=0.44
|
|
29
|
+
Requires-Dist: imbalanced-learn>=0.11
|
|
30
|
+
Requires-Dist: jinja2>=3.1
|
|
31
|
+
Requires-Dist: click>=8.1
|
|
32
|
+
Requires-Dist: rich>=13.0
|
|
33
|
+
Requires-Dist: tqdm>=4.65
|
|
34
|
+
Requires-Dist: joblib>=1.3
|
|
35
|
+
Requires-Dist: pydantic>=2.0
|
|
36
|
+
Requires-Dist: requests>=2.31
|
|
37
|
+
Requires-Dist: pyarrow>=14.0
|
|
38
|
+
Requires-Dist: openpyxl>=3.1
|
|
39
|
+
Requires-Dist: matplotlib>=3.7
|
|
40
|
+
Requires-Dist: seaborn>=0.12
|
|
41
|
+
Requires-Dist: duckdb>=0.10.0
|
|
42
|
+
Requires-Dist: evidently>=0.4.0
|
|
43
|
+
Requires-Dist: huggingface_hub>=0.20.0
|
|
44
|
+
Requires-Dist: gradio>=4.0.0
|
|
45
|
+
Requires-Dist: marimo>=0.1.0
|
|
46
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
47
|
+
Requires-Dist: umap-learn>=0.5.0
|
|
48
|
+
Requires-Dist: dvc>=3.0.0
|
|
49
|
+
Provides-Extra: dev
|
|
50
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
51
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
52
|
+
Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
53
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
54
|
+
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
55
|
+
Requires-Dist: mypy>=1.5; extra == "dev"
|
|
56
|
+
Requires-Dist: hypothesis>=6.0.0; extra == "dev"
|
|
57
|
+
Requires-Dist: twine>=4.0; extra == "dev"
|
|
58
|
+
Provides-Extra: sql
|
|
59
|
+
Requires-Dist: sqlalchemy>=2.0; extra == "sql"
|
|
60
|
+
Requires-Dist: psycopg2-binary>=2.9; extra == "sql"
|
|
61
|
+
Provides-Extra: mongo
|
|
62
|
+
Requires-Dist: pymongo>=4.5; extra == "mongo"
|
|
63
|
+
Provides-Extra: cloud
|
|
64
|
+
Requires-Dist: boto3>=1.34; extra == "cloud"
|
|
65
|
+
Requires-Dist: gspread>=5.12; extra == "cloud"
|
|
66
|
+
Provides-Extra: ui
|
|
67
|
+
Requires-Dist: streamlit>=1.30; extra == "ui"
|
|
68
|
+
Provides-Extra: llm
|
|
69
|
+
Requires-Dist: ollama>=0.1; extra == "llm"
|
|
70
|
+
Requires-Dist: openai>=1.0; extra == "llm"
|
|
71
|
+
Provides-Extra: export
|
|
72
|
+
Requires-Dist: weasyprint>=60.0; extra == "export"
|
|
73
|
+
Requires-Dist: nbformat>=5.9; extra == "export"
|
|
74
|
+
Provides-Extra: all
|
|
75
|
+
Requires-Dist: nexora[cloud,export,llm,mongo,sql,ui]; extra == "all"
|
|
76
|
+
Dynamic: license-file
|
|
77
|
+
|
|
78
|
+
# Nexora
|
|
79
|
+
|
|
80
|
+
An autonomous predictive analytics platform that profiles datasets, builds optimized preprocessing pipelines, trains reproducible model registries, runs batch predictions, monitors feature drift, and provides grounded AI educational interactive chats from a single CSV upload.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
[](https://github.com/jeet2005/Nexora/actions/workflows/ci-backend.yml)
|
|
85
|
+
[](https://github.com/jeet2005/Nexora/actions/workflows/ci-frontend.yml)
|
|
86
|
+
[](https://github.com/jeet2005/Nexora/stargazers)
|
|
87
|
+
[](https://github.com/jeet2005/Nexora/issues)
|
|
88
|
+
[](LICENSE)
|
|
89
|
+
[](CONTRIBUTING.md)
|
|
90
|
+
[](https://fastapi.tiangolo.com/)
|
|
91
|
+
[](https://www.python.org/)
|
|
92
|
+
[](https://scikit-learn.org/)
|
|
93
|
+
[](https://xgboost.ai/)
|
|
94
|
+
[](https://lightgbm.ai/)
|
|
95
|
+
[](https://catboost.ai/)
|
|
96
|
+
[](https://github.com/slundberg/shap)
|
|
97
|
+
[](https://reactjs.org/)
|
|
98
|
+
[](https://www.typescriptlang.org/)
|
|
99
|
+
[](https://recharts.org/)
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## Why Nexora?
|
|
104
|
+
|
|
105
|
+
Data scientists and developers often spend hours writing repetitive code for data profiling, exploratory analysis, preprocessing, model benchmarking, and production endpoint deployments. Nexora bridges this gap by serving as a unified prediction engine.
|
|
106
|
+
|
|
107
|
+
By uploading a single CSV dataset, developers can instantly audit dataset health, clean features, benchmark leading machine learning models side-by-side, analyze SHAP explainability insights, download compiled PDF reports, converse with a grounded AI dataset assistant, and deploy production-ready prediction API endpoints secured by unique API keys.
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Live Deployments
|
|
112
|
+
|
|
113
|
+
| Component | URL | Host Provider |
|
|
114
|
+
| :--- | :--- | :--- |
|
|
115
|
+
| **Frontend Web App** | [nexoraprediction.netlify.app](https://nexoraprediction.netlify.app/) | Netlify |
|
|
116
|
+
| **Backend API** | [nexora-360r.onrender.com](https://nexora-360r.onrender.com/) | Render |
|
|
117
|
+
| **API Documentation** | [nexora-360r.onrender.com/docs](https://nexora-360r.onrender.com/docs) | Render |
|
|
118
|
+
|
|
119
|
+
*Note: The backend API runs on Render's free tier and spins down after periods of inactivity. Please allow 30 to 60 seconds for the initial cold start when first accessing the application.*
|
|
120
|
+
|
|
121
|
+
*Note: The educational assistant (Ollama integration) requires a local Ollama instance and is only active when running the application locally. See local setup guidelines below.*
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## System Architecture
|
|
126
|
+
|
|
127
|
+
The diagram below outlines the end-to-end data flow, processing components, and communication layers in Nexora:
|
|
128
|
+
|
|
129
|
+
```mermaid
|
|
130
|
+
graph TD
|
|
131
|
+
subgraph Client Layer
|
|
132
|
+
A[React Frontend]
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
subgraph Service API Layer
|
|
136
|
+
B[FastAPI Backend Gateway]
|
|
137
|
+
C[Dataset Analyzer & Validator]
|
|
138
|
+
D[Preprocessing Engine]
|
|
139
|
+
E[Training Manager & Registry]
|
|
140
|
+
F[SHAP Explainability Engine]
|
|
141
|
+
G[Grounded Chat Agent]
|
|
142
|
+
H[API Key Deployment Manager]
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
subgraph Storage & Compute
|
|
146
|
+
I[(Local Uploads / MongoDB)]
|
|
147
|
+
J[Local Ollama / Phi-3 Mini]
|
|
148
|
+
K[ML Models: XGBoost, CatBoost, LightGBM, Scikit-Learn]
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
A -->|Upload CSV & Configuration| B
|
|
152
|
+
B --> C
|
|
153
|
+
B --> D
|
|
154
|
+
B --> E
|
|
155
|
+
B --> F
|
|
156
|
+
B --> G
|
|
157
|
+
B --> H
|
|
158
|
+
|
|
159
|
+
C <-->|Read / Write Datasets| I
|
|
160
|
+
D <-->|Save Clean Pipelines| I
|
|
161
|
+
E <-->|Real-time Socket Updates| A
|
|
162
|
+
E <-->|Benchmark & Serialize| K
|
|
163
|
+
F -->|Render Report| I
|
|
164
|
+
G <-->|Dataset Context Queries| J
|
|
165
|
+
H <-->|Authorize Keys & Serves| K
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## Core Features
|
|
171
|
+
|
|
172
|
+
### 1. Dataset Intelligence Engine
|
|
173
|
+
* **Automated CSV Validation** - Formats columns, assesses size boundaries, and verifies tabular file integrity.
|
|
174
|
+
* **Health Profiling** - Evaluates structural completeness, statistical anomalies, and generates per-column scorecards.
|
|
175
|
+
* **Preview and Distributions** - Offers statistical summaries, skew metrics, and categorical balance diagnostics.
|
|
176
|
+
|
|
177
|
+
### 2. Dynamic Preprocessing Pipelines
|
|
178
|
+
* **Type Parsing** - Separates numerical parameters, categorical labels, datetimes, and identifier variables.
|
|
179
|
+
* **Intelligent Preprocessing** - Implements missing values imputation, standard scaling, target-label encoding, outlier detection, and duplicate record cleaning.
|
|
180
|
+
* **Interactive Configuration** - Provides controls to select prediction targets and customize individual preprocessing steps.
|
|
181
|
+
|
|
182
|
+
### 3. Prediction Studio and Benchmarking
|
|
183
|
+
* **Model Registry** - Supports multiple algorithms including XGBoost, CatBoost, LightGBM, and Scikit-Learn ensembles.
|
|
184
|
+
* **Training Pipeline** - Executes cross-validation splits, train-test isolation, and hyperparameter parameter sweeps.
|
|
185
|
+
* **WebSocket Leaderboard** - Streams active model training metrics and charts real-time scores directly to the UI.
|
|
186
|
+
* **Comparison Arena** - Visualizes metrics, prediction drift charts, and latency histograms of trained models.
|
|
187
|
+
|
|
188
|
+
### 4. Interactive Data Visualization
|
|
189
|
+
* **Multi-Chart Dashboard** - Displays numerical trends, categorical patterns, and completeness heatmaps.
|
|
190
|
+
* **Data Health Visualization** - Compiles data quality stats, missing records rates, and unique features counts.
|
|
191
|
+
* **Correlation Insights** - Flags linear dependencies, high associations, and outlier counts.
|
|
192
|
+
|
|
193
|
+
### 5. Production Suite
|
|
194
|
+
* **API Endpoints** - Deploys production-grade prediction endpoints secured by custom API keys.
|
|
195
|
+
* **Batch Processing** - Enables bulk uploads to retrieve fully enriched output prediction sheets.
|
|
196
|
+
* **Drift Detection** - Compares historical prediction request signatures to highlight potential target concept drift.
|
|
197
|
+
* **Grounded LLM Chat** - Integrates local Ollama models (Phi-3 Mini) to act as a database context tutor answering questions regarding data distribution trends.
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## Technical Stack
|
|
202
|
+
|
|
203
|
+
| Layer | Technologies |
|
|
204
|
+
| :--- | :--- |
|
|
205
|
+
| **Frontend Web App** | React 18, Vite, TypeScript, Tailwind CSS, Framer Motion, Recharts, Axios, Lucide Icons |
|
|
206
|
+
| **Backend Service API** | Python 3.11, FastAPI, Uvicorn, Pydantic, Pandas, NumPy, Scikit-learn, CatBoost, LightGBM, XGBoost |
|
|
207
|
+
| **Data Persistence** | MongoDB Atlas / Local File Storage |
|
|
208
|
+
| **Local LLM Integration** | Ollama Engine (Phi-3 Mini) |
|
|
209
|
+
| **Infrastructure Platforms** | Netlify (Frontend), Render (Backend) |
|
|
210
|
+
|
|
211
|
+
---
|
|
212
|
+
|
|
213
|
+
## Local Development
|
|
214
|
+
|
|
215
|
+
### Installation Prerequisites
|
|
216
|
+
|
|
217
|
+
| Dependency | Minimum Version |
|
|
218
|
+
| :--- | :--- |
|
|
219
|
+
| Python | 3.11 or higher |
|
|
220
|
+
| Node.js | 20 or higher |
|
|
221
|
+
| npm | 10 or higher |
|
|
222
|
+
| Ollama | Latest (optional, for grounded Q&A) |
|
|
223
|
+
|
|
224
|
+
### Development Option 1: Standard Installation
|
|
225
|
+
|
|
226
|
+
#### 1. Clone the Project
|
|
227
|
+
```bash
|
|
228
|
+
git clone https://github.com/jeet2005/Nexora.git
|
|
229
|
+
cd Nexora
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
#### 2. Configure Backend Service
|
|
233
|
+
```bash
|
|
234
|
+
cd backend
|
|
235
|
+
python -m venv .venv
|
|
236
|
+
|
|
237
|
+
# Activate Virtual Environment (Windows)
|
|
238
|
+
.venv\Scripts\activate
|
|
239
|
+
|
|
240
|
+
# Activate Virtual Environment (macOS / Linux)
|
|
241
|
+
source .venv/bin/activate
|
|
242
|
+
|
|
243
|
+
# Install dependencies and setup configuration
|
|
244
|
+
pip install -r requirements.txt
|
|
245
|
+
cp .env.example .env
|
|
246
|
+
|
|
247
|
+
# Run development server
|
|
248
|
+
python run.py
|
|
249
|
+
```
|
|
250
|
+
The backend service will be available at `http://localhost:8000`. You can test endpoints on Swagger UI at `http://localhost:8000/docs`.
|
|
251
|
+
|
|
252
|
+
#### 3. Configure Frontend Application
|
|
253
|
+
```bash
|
|
254
|
+
cd ../frontend
|
|
255
|
+
npm install
|
|
256
|
+
cp .env.example .env.local
|
|
257
|
+
|
|
258
|
+
# Run development server
|
|
259
|
+
npm run dev
|
|
260
|
+
```
|
|
261
|
+
The React frontend application will be active at `http://localhost:5173`.
|
|
262
|
+
|
|
263
|
+
---
|
|
264
|
+
|
|
265
|
+
### Development Option 2: Docker Compose Setup
|
|
266
|
+
|
|
267
|
+
Run the entire stack (FastAPI, React, and MongoDB) with a single command:
|
|
268
|
+
|
|
269
|
+
```bash
|
|
270
|
+
docker compose up --build
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
* **Frontend Web App**: Access at `http://localhost:3000`
|
|
274
|
+
* **Backend API**: Access at `http://localhost:8000`
|
|
275
|
+
* **MongoDB Instance**: Running on port `27017`
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
### Development Option 3: Makefile Shortcuts
|
|
280
|
+
|
|
281
|
+
If you have Make installed, you can orchestrate development commands directly from the project root:
|
|
282
|
+
|
|
283
|
+
* Install all package dependencies: `make install`
|
|
284
|
+
* Launch backend locally: `make dev-backend`
|
|
285
|
+
* Launch frontend locally: `make dev-frontend`
|
|
286
|
+
* Run backend pytest suite: `make test`
|
|
287
|
+
* Format all file types: `make format`
|
|
288
|
+
* Spin up Docker containers: `make docker-up`
|
|
289
|
+
* Spin down Docker containers: `make docker-down`
|
|
290
|
+
|
|
291
|
+
---
|
|
292
|
+
|
|
293
|
+
## Grounded Q&A Assistant Setup (Optional)
|
|
294
|
+
|
|
295
|
+
To enable the dataset assistant using a local LLM instance:
|
|
296
|
+
|
|
297
|
+
1. Download and install [Ollama](https://ollama.com/).
|
|
298
|
+
2. Pull the default micro-LLM model in your terminal:
|
|
299
|
+
```bash
|
|
300
|
+
ollama pull phi3:mini
|
|
301
|
+
```
|
|
302
|
+
3. Keep Ollama active in the background. The assistant will detect local hosting at `http://localhost:11434` and enable custom educational conversations.
|
|
303
|
+
|
|
304
|
+
---
|
|
305
|
+
|
|
306
|
+
## Repository Roadmap
|
|
307
|
+
|
|
308
|
+
- [ ] Add Pytest code coverage reports in the Backend CI pipeline.
|
|
309
|
+
- [ ] Implement multi-file comparison dashboards within the Frontend page.
|
|
310
|
+
- [ ] Add support for automated time-series forecasting hyperparameter tuning.
|
|
311
|
+
- [ ] Integrate PostgreSQL database schema mappings for enterprise persistence layers.
|
|
312
|
+
- [ ] Add REST API key rotation options inside the Production UI.
|
|
313
|
+
- [ ] Create automated end-to-end integration tests using Playwright.
|
|
314
|
+
|
|
315
|
+
---
|
|
316
|
+
|
|
317
|
+
## Contributing and Governance
|
|
318
|
+
|
|
319
|
+
Contributions are welcome. Please read our [Contributing Guidelines](CONTRIBUTING.md) to understand branch conventions, pull request structures, and developer standards. Ensure all contributions align with our [Code of Conduct](CODE_OF_CONDUCT.md).
|
|
320
|
+
|
|
321
|
+
For vulnerability notifications, refer to our [Security Policy](SECURITY.md).
|
|
322
|
+
|
|
323
|
+
---
|
|
324
|
+
|
|
325
|
+
## License
|
|
326
|
+
|
|
327
|
+
Nexora is open-source software licensed under the [MIT License](LICENSE).
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# Nexora
|
|
2
|
+
|
|
3
|
+
An autonomous predictive analytics platform that profiles datasets, builds optimized preprocessing pipelines, trains reproducible model registries, runs batch predictions, monitors feature drift, and provides grounded AI educational interactive chats from a single CSV upload.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
[](https://github.com/jeet2005/Nexora/actions/workflows/ci-backend.yml)
|
|
8
|
+
[](https://github.com/jeet2005/Nexora/actions/workflows/ci-frontend.yml)
|
|
9
|
+
[](https://github.com/jeet2005/Nexora/stargazers)
|
|
10
|
+
[](https://github.com/jeet2005/Nexora/issues)
|
|
11
|
+
[](LICENSE)
|
|
12
|
+
[](CONTRIBUTING.md)
|
|
13
|
+
[](https://fastapi.tiangolo.com/)
|
|
14
|
+
[](https://www.python.org/)
|
|
15
|
+
[](https://scikit-learn.org/)
|
|
16
|
+
[](https://xgboost.ai/)
|
|
17
|
+
[](https://lightgbm.ai/)
|
|
18
|
+
[](https://catboost.ai/)
|
|
19
|
+
[](https://github.com/slundberg/shap)
|
|
20
|
+
[](https://reactjs.org/)
|
|
21
|
+
[](https://www.typescriptlang.org/)
|
|
22
|
+
[](https://recharts.org/)
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Why Nexora?
|
|
27
|
+
|
|
28
|
+
Data scientists and developers often spend hours writing repetitive code for data profiling, exploratory analysis, preprocessing, model benchmarking, and production endpoint deployments. Nexora bridges this gap by serving as a unified prediction engine.
|
|
29
|
+
|
|
30
|
+
By uploading a single CSV dataset, developers can instantly audit dataset health, clean features, benchmark leading machine learning models side-by-side, analyze SHAP explainability insights, download compiled PDF reports, converse with a grounded AI dataset assistant, and deploy production-ready prediction API endpoints secured by unique API keys.
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Live Deployments
|
|
35
|
+
|
|
36
|
+
| Component | URL | Host Provider |
|
|
37
|
+
| :--- | :--- | :--- |
|
|
38
|
+
| **Frontend Web App** | [nexoraprediction.netlify.app](https://nexoraprediction.netlify.app/) | Netlify |
|
|
39
|
+
| **Backend API** | [nexora-360r.onrender.com](https://nexora-360r.onrender.com/) | Render |
|
|
40
|
+
| **API Documentation** | [nexora-360r.onrender.com/docs](https://nexora-360r.onrender.com/docs) | Render |
|
|
41
|
+
|
|
42
|
+
*Note: The backend API runs on Render's free tier and spins down after periods of inactivity. Please allow 30 to 60 seconds for the initial cold start when first accessing the application.*
|
|
43
|
+
|
|
44
|
+
*Note: The educational assistant (Ollama integration) requires a local Ollama instance and is only active when running the application locally. See local setup guidelines below.*
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## System Architecture
|
|
49
|
+
|
|
50
|
+
The diagram below outlines the end-to-end data flow, processing components, and communication layers in Nexora:
|
|
51
|
+
|
|
52
|
+
```mermaid
|
|
53
|
+
graph TD
|
|
54
|
+
subgraph Client Layer
|
|
55
|
+
A[React Frontend]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
subgraph Service API Layer
|
|
59
|
+
B[FastAPI Backend Gateway]
|
|
60
|
+
C[Dataset Analyzer & Validator]
|
|
61
|
+
D[Preprocessing Engine]
|
|
62
|
+
E[Training Manager & Registry]
|
|
63
|
+
F[SHAP Explainability Engine]
|
|
64
|
+
G[Grounded Chat Agent]
|
|
65
|
+
H[API Key Deployment Manager]
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
subgraph Storage & Compute
|
|
69
|
+
I[(Local Uploads / MongoDB)]
|
|
70
|
+
J[Local Ollama / Phi-3 Mini]
|
|
71
|
+
K[ML Models: XGBoost, CatBoost, LightGBM, Scikit-Learn]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
A -->|Upload CSV & Configuration| B
|
|
75
|
+
B --> C
|
|
76
|
+
B --> D
|
|
77
|
+
B --> E
|
|
78
|
+
B --> F
|
|
79
|
+
B --> G
|
|
80
|
+
B --> H
|
|
81
|
+
|
|
82
|
+
C <-->|Read / Write Datasets| I
|
|
83
|
+
D <-->|Save Clean Pipelines| I
|
|
84
|
+
E <-->|Real-time Socket Updates| A
|
|
85
|
+
E <-->|Benchmark & Serialize| K
|
|
86
|
+
F -->|Render Report| I
|
|
87
|
+
G <-->|Dataset Context Queries| J
|
|
88
|
+
H <-->|Authorize Keys & Serves| K
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Core Features
|
|
94
|
+
|
|
95
|
+
### 1. Dataset Intelligence Engine
|
|
96
|
+
* **Automated CSV Validation** - Formats columns, assesses size boundaries, and verifies tabular file integrity.
|
|
97
|
+
* **Health Profiling** - Evaluates structural completeness, statistical anomalies, and generates per-column scorecards.
|
|
98
|
+
* **Preview and Distributions** - Offers statistical summaries, skew metrics, and categorical balance diagnostics.
|
|
99
|
+
|
|
100
|
+
### 2. Dynamic Preprocessing Pipelines
|
|
101
|
+
* **Type Parsing** - Separates numerical parameters, categorical labels, datetimes, and identifier variables.
|
|
102
|
+
* **Intelligent Preprocessing** - Implements missing values imputation, standard scaling, target-label encoding, outlier detection, and duplicate record cleaning.
|
|
103
|
+
* **Interactive Configuration** - Provides controls to select prediction targets and customize individual preprocessing steps.
|
|
104
|
+
|
|
105
|
+
### 3. Prediction Studio and Benchmarking
|
|
106
|
+
* **Model Registry** - Supports multiple algorithms including XGBoost, CatBoost, LightGBM, and Scikit-Learn ensembles.
|
|
107
|
+
* **Training Pipeline** - Executes cross-validation splits, train-test isolation, and hyperparameter parameter sweeps.
|
|
108
|
+
* **WebSocket Leaderboard** - Streams active model training metrics and charts real-time scores directly to the UI.
|
|
109
|
+
* **Comparison Arena** - Visualizes metrics, prediction drift charts, and latency histograms of trained models.
|
|
110
|
+
|
|
111
|
+
### 4. Interactive Data Visualization
|
|
112
|
+
* **Multi-Chart Dashboard** - Displays numerical trends, categorical patterns, and completeness heatmaps.
|
|
113
|
+
* **Data Health Visualization** - Compiles data quality stats, missing records rates, and unique features counts.
|
|
114
|
+
* **Correlation Insights** - Flags linear dependencies, high associations, and outlier counts.
|
|
115
|
+
|
|
116
|
+
### 5. Production Suite
|
|
117
|
+
* **API Endpoints** - Deploys production-grade prediction endpoints secured by custom API keys.
|
|
118
|
+
* **Batch Processing** - Enables bulk uploads to retrieve fully enriched output prediction sheets.
|
|
119
|
+
* **Drift Detection** - Compares historical prediction request signatures to highlight potential target concept drift.
|
|
120
|
+
* **Grounded LLM Chat** - Integrates local Ollama models (Phi-3 Mini) to act as a database context tutor answering questions regarding data distribution trends.
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Technical Stack
|
|
125
|
+
|
|
126
|
+
| Layer | Technologies |
|
|
127
|
+
| :--- | :--- |
|
|
128
|
+
| **Frontend Web App** | React 18, Vite, TypeScript, Tailwind CSS, Framer Motion, Recharts, Axios, Lucide Icons |
|
|
129
|
+
| **Backend Service API** | Python 3.11, FastAPI, Uvicorn, Pydantic, Pandas, NumPy, Scikit-learn, CatBoost, LightGBM, XGBoost |
|
|
130
|
+
| **Data Persistence** | MongoDB Atlas / Local File Storage |
|
|
131
|
+
| **Local LLM Integration** | Ollama Engine (Phi-3 Mini) |
|
|
132
|
+
| **Infrastructure Platforms** | Netlify (Frontend), Render (Backend) |
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Local Development
|
|
137
|
+
|
|
138
|
+
### Installation Prerequisites
|
|
139
|
+
|
|
140
|
+
| Dependency | Minimum Version |
|
|
141
|
+
| :--- | :--- |
|
|
142
|
+
| Python | 3.11 or higher |
|
|
143
|
+
| Node.js | 20 or higher |
|
|
144
|
+
| npm | 10 or higher |
|
|
145
|
+
| Ollama | Latest (optional, for grounded Q&A) |
|
|
146
|
+
|
|
147
|
+
### Development Option 1: Standard Installation
|
|
148
|
+
|
|
149
|
+
#### 1. Clone the Project
|
|
150
|
+
```bash
|
|
151
|
+
git clone https://github.com/jeet2005/Nexora.git
|
|
152
|
+
cd Nexora
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
#### 2. Configure Backend Service
|
|
156
|
+
```bash
|
|
157
|
+
cd backend
|
|
158
|
+
python -m venv .venv
|
|
159
|
+
|
|
160
|
+
# Activate Virtual Environment (Windows)
|
|
161
|
+
.venv\Scripts\activate
|
|
162
|
+
|
|
163
|
+
# Activate Virtual Environment (macOS / Linux)
|
|
164
|
+
source .venv/bin/activate
|
|
165
|
+
|
|
166
|
+
# Install dependencies and setup configuration
|
|
167
|
+
pip install -r requirements.txt
|
|
168
|
+
cp .env.example .env
|
|
169
|
+
|
|
170
|
+
# Run development server
|
|
171
|
+
python run.py
|
|
172
|
+
```
|
|
173
|
+
The backend service will be available at `http://localhost:8000`. You can test endpoints on Swagger UI at `http://localhost:8000/docs`.
|
|
174
|
+
|
|
175
|
+
#### 3. Configure Frontend Application
|
|
176
|
+
```bash
|
|
177
|
+
cd ../frontend
|
|
178
|
+
npm install
|
|
179
|
+
cp .env.example .env.local
|
|
180
|
+
|
|
181
|
+
# Run development server
|
|
182
|
+
npm run dev
|
|
183
|
+
```
|
|
184
|
+
The React frontend application will be active at `http://localhost:5173`.
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
### Development Option 2: Docker Compose Setup
|
|
189
|
+
|
|
190
|
+
Run the entire stack (FastAPI, React, and MongoDB) with a single command:
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
docker compose up --build
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
* **Frontend Web App**: Access at `http://localhost:3000`
|
|
197
|
+
* **Backend API**: Access at `http://localhost:8000`
|
|
198
|
+
* **MongoDB Instance**: Running on port `27017`
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
### Development Option 3: Makefile Shortcuts
|
|
203
|
+
|
|
204
|
+
If you have Make installed, you can orchestrate development commands directly from the project root:
|
|
205
|
+
|
|
206
|
+
* Install all package dependencies: `make install`
|
|
207
|
+
* Launch backend locally: `make dev-backend`
|
|
208
|
+
* Launch frontend locally: `make dev-frontend`
|
|
209
|
+
* Run backend pytest suite: `make test`
|
|
210
|
+
* Format all file types: `make format`
|
|
211
|
+
* Spin up Docker containers: `make docker-up`
|
|
212
|
+
* Spin down Docker containers: `make docker-down`
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## Grounded Q&A Assistant Setup (Optional)
|
|
217
|
+
|
|
218
|
+
To enable the dataset assistant using a local LLM instance:
|
|
219
|
+
|
|
220
|
+
1. Download and install [Ollama](https://ollama.com/).
|
|
221
|
+
2. Pull the default micro-LLM model in your terminal:
|
|
222
|
+
```bash
|
|
223
|
+
ollama pull phi3:mini
|
|
224
|
+
```
|
|
225
|
+
3. Keep Ollama active in the background. The assistant will detect local hosting at `http://localhost:11434` and enable custom educational conversations.
|
|
226
|
+
|
|
227
|
+
---
|
|
228
|
+
|
|
229
|
+
## Repository Roadmap
|
|
230
|
+
|
|
231
|
+
- [ ] Add Pytest code coverage reports in the Backend CI pipeline.
|
|
232
|
+
- [ ] Implement multi-file comparison dashboards within the Frontend page.
|
|
233
|
+
- [ ] Add support for automated time-series forecasting hyperparameter tuning.
|
|
234
|
+
- [ ] Integrate PostgreSQL database schema mappings for enterprise persistence layers.
|
|
235
|
+
- [ ] Add REST API key rotation options inside the Production UI.
|
|
236
|
+
- [ ] Create automated end-to-end integration tests using Playwright.
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## Contributing and Governance
|
|
241
|
+
|
|
242
|
+
Contributions are welcome. Please read our [Contributing Guidelines](CONTRIBUTING.md) to understand branch conventions, pull request structures, and developer standards. Ensure all contributions align with our [Code of Conduct](CODE_OF_CONDUCT.md).
|
|
243
|
+
|
|
244
|
+
For vulnerability notifications, refer to our [Security Policy](SECURITY.md).
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
## License
|
|
249
|
+
|
|
250
|
+
Nexora is open-source software licensed under the [MIT License](LICENSE).
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Public package API for Nexora v0.1.0."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from nexora.core import Nexora
|
|
5
|
+
from nexora.report import NexoraReport
|
|
6
|
+
# Alias for backward compatibility
|
|
7
|
+
NexoraPrediction = Nexora
|
|
8
|
+
|
|
9
|
+
__all__ = ["DatasetProfile", "ModelResult", "Nexora", "NexoraReport", "NexoraPrediction"]
|
|
10
|
+
|
|
11
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Click-based CLI for the Nexora MVP."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from nexora import Nexora
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.group()
|
|
13
|
+
def cli() -> None:
|
|
14
|
+
"""Nexora predictive analytics CLI."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@cli.command()
|
|
18
|
+
@click.argument("data_csv", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
|
19
|
+
@click.option("--target", required=True, help="Target column to predict.")
|
|
20
|
+
@click.option(
|
|
21
|
+
"--out",
|
|
22
|
+
"output_path",
|
|
23
|
+
type=click.Path(dir_okay=False, path_type=Path),
|
|
24
|
+
default=None,
|
|
25
|
+
help="Output .nx session path.",
|
|
26
|
+
)
|
|
27
|
+
@click.option(
|
|
28
|
+
"--max-models",
|
|
29
|
+
default=6,
|
|
30
|
+
show_default=True,
|
|
31
|
+
type=int,
|
|
32
|
+
help="Maximum number of MVP registry models to train.",
|
|
33
|
+
)
|
|
34
|
+
def train(data_csv: Path, target: str, output_path: Path | None, max_models: int) -> None:
|
|
35
|
+
"""Train models from a CSV and save a Nexora session."""
|
|
36
|
+
|
|
37
|
+
report = Nexora(data_csv, target=target).run(max_models=max_models)
|
|
38
|
+
session_path = output_path or data_csv.with_suffix(".nx")
|
|
39
|
+
saved = report.save(session_path)
|
|
40
|
+
|
|
41
|
+
click.echo(f"Best model: {report.best_model} ({report.best_score_label}={report.best_score:.4f})")
|
|
42
|
+
click.echo(f"Saved session: {saved}")
|
|
43
|
+
click.echo("")
|
|
44
|
+
leaderboard = report.leaderboard.head(10)
|
|
45
|
+
click.echo(leaderboard.to_string(index=False))
|