nexora-prediction 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. nexora_prediction-0.1.0/LICENSE +21 -0
  2. nexora_prediction-0.1.0/PKG-INFO +327 -0
  3. nexora_prediction-0.1.0/README.md +250 -0
  4. nexora_prediction-0.1.0/nexora/__init__.py +11 -0
  5. nexora_prediction-0.1.0/nexora/cli/__init__.py +5 -0
  6. nexora_prediction-0.1.0/nexora/cli/main.py +45 -0
  7. nexora_prediction-0.1.0/nexora/codegen/__init__.py +5 -0
  8. nexora_prediction-0.1.0/nexora/codegen/docker_gen.py +113 -0
  9. nexora_prediction-0.1.0/nexora/codegen/fastapi_gen.py +382 -0
  10. nexora_prediction-0.1.0/nexora/codegen/flask_gen.py +214 -0
  11. nexora_prediction-0.1.0/nexora/codegen/mlflow_gen.py +191 -0
  12. nexora_prediction-0.1.0/nexora/codegen/notebook_gen.py +250 -0
  13. nexora_prediction-0.1.0/nexora/codegen/pipeline_gen.py +165 -0
  14. nexora_prediction-0.1.0/nexora/codegen/script.py +186 -0
  15. nexora_prediction-0.1.0/nexora/codegen/streamlit_gen.py +320 -0
  16. nexora_prediction-0.1.0/nexora/config.py +40 -0
  17. nexora_prediction-0.1.0/nexora/core.py +196 -0
  18. nexora_prediction-0.1.0/nexora/explainer/__init__.py +5 -0
  19. nexora_prediction-0.1.0/nexora/explainer/llm_explainer.py +155 -0
  20. nexora_prediction-0.1.0/nexora/explainer/pdp.py +49 -0
  21. nexora_prediction-0.1.0/nexora/explainer/sensitivity.py +47 -0
  22. nexora_prediction-0.1.0/nexora/explainer/shap_explainer.py +94 -0
  23. nexora_prediction-0.1.0/nexora/export/__init__.py +1 -0
  24. nexora_prediction-0.1.0/nexora/export/codegen.py +456 -0
  25. nexora_prediction-0.1.0/nexora/io/__init__.py +6 -0
  26. nexora_prediction-0.1.0/nexora/io/loaders.py +68 -0
  27. nexora_prediction-0.1.0/nexora/io/remote.py +193 -0
  28. nexora_prediction-0.1.0/nexora/io/serializer.py +55 -0
  29. nexora_prediction-0.1.0/nexora/io/versioning.py +52 -0
  30. nexora_prediction-0.1.0/nexora/models/__init__.py +7 -0
  31. nexora_prediction-0.1.0/nexora/models/registry.py +268 -0
  32. nexora_prediction-0.1.0/nexora/models/task_detector.py +39 -0
  33. nexora_prediction-0.1.0/nexora/models/trainer.py +297 -0
  34. nexora_prediction-0.1.0/nexora/monitor/diagnostics.py +203 -0
  35. nexora_prediction-0.1.0/nexora/monitor/drift.py +121 -0
  36. nexora_prediction-0.1.0/nexora/monitor/performance.py +69 -0
  37. nexora_prediction-0.1.0/nexora/preprocessing/__init__.py +8 -0
  38. nexora_prediction-0.1.0/nexora/preprocessing/pipeline_builder.py +195 -0
  39. nexora_prediction-0.1.0/nexora/preprocessing/text_processor.py +67 -0
  40. nexora_prediction-0.1.0/nexora/profiler/__init__.py +9 -0
  41. nexora_prediction-0.1.0/nexora/profiler/dataset_profile.py +252 -0
  42. nexora_prediction-0.1.0/nexora/report.py +927 -0
  43. nexora_prediction-0.1.0/nexora/testing/plugin.py +28 -0
  44. nexora_prediction-0.1.0/nexora/types.py +198 -0
  45. nexora_prediction-0.1.0/nexora_prediction.egg-info/PKG-INFO +327 -0
  46. nexora_prediction-0.1.0/nexora_prediction.egg-info/SOURCES.txt +59 -0
  47. nexora_prediction-0.1.0/nexora_prediction.egg-info/dependency_links.txt +1 -0
  48. nexora_prediction-0.1.0/nexora_prediction.egg-info/entry_points.txt +5 -0
  49. nexora_prediction-0.1.0/nexora_prediction.egg-info/requires.txt +63 -0
  50. nexora_prediction-0.1.0/nexora_prediction.egg-info/top_level.txt +1 -0
  51. nexora_prediction-0.1.0/pyproject.toml +106 -0
  52. nexora_prediction-0.1.0/setup.cfg +4 -0
  53. nexora_prediction-0.1.0/tests/test_cli.py +22 -0
  54. nexora_prediction-0.1.0/tests/test_codegen.py +352 -0
  55. nexora_prediction-0.1.0/tests/test_core.py +72 -0
  56. nexora_prediction-0.1.0/tests/test_explainer.py +14 -0
  57. nexora_prediction-0.1.0/tests/test_llm_explainer.py +72 -0
  58. nexora_prediction-0.1.0/tests/test_monitor.py +95 -0
  59. nexora_prediction-0.1.0/tests/test_pdp_sensitivity.py +54 -0
  60. nexora_prediction-0.1.0/tests/test_properties.py +67 -0
  61. nexora_prediction-0.1.0/tests/test_remote.py +135 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 jeet Patel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,327 @@
1
+ Metadata-Version: 2.4
2
+ Name: nexora-prediction
3
+ Version: 0.1.0
4
+ Summary: Autonomous predictive analytics from CSV to trained model and runnable code.
5
+ Author: Jeet Patel
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://nexoraprediction.netlify.app
8
+ Project-URL: Repository, https://github.com/jeet2005/Nexora
9
+ Project-URL: Issues, https://github.com/jeet2005/Nexora/issues
10
+ Keywords: automl,machine-learning,predictive-analytics,data-science
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: pandas>=2.0
22
+ Requires-Dist: numpy>=1.24
23
+ Requires-Dist: scikit-learn>=1.3
24
+ Requires-Dist: xgboost>=2.0
25
+ Requires-Dist: lightgbm>=4.0
26
+ Requires-Dist: catboost>=1.2
27
+ Requires-Dist: optuna>=3.0
28
+ Requires-Dist: shap>=0.44
29
+ Requires-Dist: imbalanced-learn>=0.11
30
+ Requires-Dist: jinja2>=3.1
31
+ Requires-Dist: click>=8.1
32
+ Requires-Dist: rich>=13.0
33
+ Requires-Dist: tqdm>=4.65
34
+ Requires-Dist: joblib>=1.3
35
+ Requires-Dist: pydantic>=2.0
36
+ Requires-Dist: requests>=2.31
37
+ Requires-Dist: pyarrow>=14.0
38
+ Requires-Dist: openpyxl>=3.1
39
+ Requires-Dist: matplotlib>=3.7
40
+ Requires-Dist: seaborn>=0.12
41
+ Requires-Dist: duckdb>=0.10.0
42
+ Requires-Dist: evidently>=0.4.0
43
+ Requires-Dist: huggingface_hub>=0.20.0
44
+ Requires-Dist: gradio>=4.0.0
45
+ Requires-Dist: marimo>=0.1.0
46
+ Requires-Dist: sentence-transformers>=2.2.0
47
+ Requires-Dist: umap-learn>=0.5.0
48
+ Requires-Dist: dvc>=3.0.0
49
+ Provides-Extra: dev
50
+ Requires-Dist: build>=1.2; extra == "dev"
51
+ Requires-Dist: pytest>=7.4; extra == "dev"
52
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
53
+ Requires-Dist: black>=23.0; extra == "dev"
54
+ Requires-Dist: ruff>=0.1; extra == "dev"
55
+ Requires-Dist: mypy>=1.5; extra == "dev"
56
+ Requires-Dist: hypothesis>=6.0.0; extra == "dev"
57
+ Requires-Dist: twine>=4.0; extra == "dev"
58
+ Provides-Extra: sql
59
+ Requires-Dist: sqlalchemy>=2.0; extra == "sql"
60
+ Requires-Dist: psycopg2-binary>=2.9; extra == "sql"
61
+ Provides-Extra: mongo
62
+ Requires-Dist: pymongo>=4.5; extra == "mongo"
63
+ Provides-Extra: cloud
64
+ Requires-Dist: boto3>=1.34; extra == "cloud"
65
+ Requires-Dist: gspread>=5.12; extra == "cloud"
66
+ Provides-Extra: ui
67
+ Requires-Dist: streamlit>=1.30; extra == "ui"
68
+ Provides-Extra: llm
69
+ Requires-Dist: ollama>=0.1; extra == "llm"
70
+ Requires-Dist: openai>=1.0; extra == "llm"
71
+ Provides-Extra: export
72
+ Requires-Dist: weasyprint>=60.0; extra == "export"
73
+ Requires-Dist: nbformat>=5.9; extra == "export"
74
+ Provides-Extra: all
75
+ Requires-Dist: nexora[cloud,export,llm,mongo,sql,ui]; extra == "all"
76
+ Dynamic: license-file
77
+
78
+ # Nexora
79
+
80
+ An autonomous predictive analytics platform that profiles datasets, builds optimized preprocessing pipelines, trains reproducible model registries, runs batch predictions, monitors feature drift, and provides grounded AI educational interactive chats from a single CSV upload.
81
+
82
+ ---
83
+
84
+ [![Backend CI](https://github.com/jeet2005/Nexora/actions/workflows/ci-backend.yml/badge.svg?branch=main)](https://github.com/jeet2005/Nexora/actions/workflows/ci-backend.yml)
85
+ [![Frontend CI](https://github.com/jeet2005/Nexora/actions/workflows/ci-frontend.yml/badge.svg?branch=main)](https://github.com/jeet2005/Nexora/actions/workflows/ci-frontend.yml)
86
+ [![GitHub stars](https://img.shields.io/github/stars/jeet2005/Nexora?style=social)](https://github.com/jeet2005/Nexora/stargazers)
87
+ [![GitHub issues](https://img.shields.io/github/issues/jeet2005/Nexora)](https://github.com/jeet2005/Nexora/issues)
88
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
89
+ [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](CONTRIBUTING.md)
90
+ [![Made with FastAPI](https://img.shields.io/badge/Made%20with-FastAPI-009688?logo=fastapi&logoColor=white)](https://fastapi.tiangolo.com/)
91
+ [![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=white)](https://www.python.org/)
92
+ [![scikit-learn](https://img.shields.io/badge/scikit--learn-5C9E48?logo=scikit-learn&logoColor=white)](https://scikit-learn.org/)
93
+ [![XGBoost](https://img.shields.io/badge/XGBoost-1A4B8D?logo=xgboost&logoColor=white)](https://xgboost.ai/)
94
+ [![LightGBM](https://img.shields.io/badge/LightGBM-00A8A1?logo=lightgbm&logoColor=white)](https://lightgbm.ai/)
95
+ [![CatBoost](https://img.shields.io/badge/CatBoost-1F8E4B?logo=catboost&logoColor=white)](https://catboost.ai/)
96
+ [![SHAP](https://img.shields.io/badge/SHAP-FF6F00?logo=shap&logoColor=white)](https://github.com/slundberg/shap)
97
+ [![React](https://img.shields.io/badge/Frontend-React-61dafb?logo=react&logoColor=white)](https://reactjs.org/)
98
+ [![TypeScript](https://img.shields.io/badge/TypeScript-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
99
+ [![Recharts](https://img.shields.io/badge/Recharts-181717?logo=recharts&logoColor=white)](https://recharts.org/)
100
+
101
+ ---
102
+
103
+ ## Why Nexora?
104
+
105
+ Data scientists and developers often spend hours writing repetitive code for data profiling, exploratory analysis, preprocessing, model benchmarking, and production endpoint deployments. Nexora bridges this gap by serving as a unified prediction engine.
106
+
107
+ By uploading a single CSV dataset, developers can instantly audit dataset health, clean features, benchmark leading machine learning models side-by-side, analyze SHAP explainability insights, download compiled PDF reports, converse with a grounded AI dataset assistant, and deploy production-ready prediction API endpoints secured by unique API keys.
108
+
109
+ ---
110
+
111
+ ## Live Deployments
112
+
113
+ | Component | URL | Host Provider |
114
+ | :--- | :--- | :--- |
115
+ | **Frontend Web App** | [nexoraprediction.netlify.app](https://nexoraprediction.netlify.app/) | Netlify |
116
+ | **Backend API** | [nexora-360r.onrender.com](https://nexora-360r.onrender.com/) | Render |
117
+ | **API Documentation** | [nexora-360r.onrender.com/docs](https://nexora-360r.onrender.com/docs) | Render |
118
+
119
+ *Note: The backend API runs on Render's free tier and spins down after periods of inactivity. Please allow 30 to 60 seconds for the initial cold start when first accessing the application.*
120
+
121
+ *Note: The educational assistant (Ollama integration) requires a local Ollama instance and is only active when running the application locally. See local setup guidelines below.*
122
+
123
+ ---
124
+
125
+ ## System Architecture
126
+
127
+ The diagram below outlines the end-to-end data flow, processing components, and communication layers in Nexora:
128
+
129
+ ```mermaid
130
+ graph TD
131
+ subgraph Client Layer
132
+ A[React Frontend]
133
+ end
134
+
135
+ subgraph Service API Layer
136
+ B[FastAPI Backend Gateway]
137
+ C[Dataset Analyzer & Validator]
138
+ D[Preprocessing Engine]
139
+ E[Training Manager & Registry]
140
+ F[SHAP Explainability Engine]
141
+ G[Grounded Chat Agent]
142
+ H[API Key Deployment Manager]
143
+ end
144
+
145
+ subgraph Storage & Compute
146
+ I[(Local Uploads / MongoDB)]
147
+ J[Local Ollama / Phi-3 Mini]
148
+ K[ML Models: XGBoost, CatBoost, LightGBM, Scikit-Learn]
149
+ end
150
+
151
+ A -->|Upload CSV & Configuration| B
152
+ B --> C
153
+ B --> D
154
+ B --> E
155
+ B --> F
156
+ B --> G
157
+ B --> H
158
+
159
+ C <-->|Read / Write Datasets| I
160
+ D <-->|Save Clean Pipelines| I
161
+ E <-->|Real-time Socket Updates| A
162
+ E <-->|Benchmark & Serialize| K
163
+ F -->|Render Report| I
164
+ G <-->|Dataset Context Queries| J
165
+ H <-->|Authorize Keys & Serves| K
166
+ ```
167
+
168
+ ---
169
+
170
+ ## Core Features
171
+
172
+ ### 1. Dataset Intelligence Engine
173
+ * **Automated CSV Validation** - Formats columns, assesses size boundaries, and verifies tabular file integrity.
174
+ * **Health Profiling** - Evaluates structural completeness, statistical anomalies, and generates per-column scorecards.
175
+ * **Preview and Distributions** - Offers statistical summaries, skew metrics, and categorical balance diagnostics.
176
+
177
+ ### 2. Dynamic Preprocessing Pipelines
178
+ * **Type Parsing** - Separates numerical parameters, categorical labels, datetimes, and identifier variables.
179
+ * **Intelligent Preprocessing** - Implements missing values imputation, standard scaling, target-label encoding, outlier detection, and duplicate record cleaning.
180
+ * **Interactive Configuration** - Provides controls to select prediction targets and customize individual preprocessing steps.
181
+
182
+ ### 3. Prediction Studio and Benchmarking
183
+ * **Model Registry** - Supports multiple algorithms including XGBoost, CatBoost, LightGBM, and Scikit-Learn ensembles.
184
+ * **Training Pipeline** - Executes cross-validation splits, train-test isolation, and hyperparameter parameter sweeps.
185
+ * **WebSocket Leaderboard** - Streams active model training metrics and charts real-time scores directly to the UI.
186
+ * **Comparison Arena** - Visualizes metrics, prediction drift charts, and latency histograms of trained models.
187
+
188
+ ### 4. Interactive Data Visualization
189
+ * **Multi-Chart Dashboard** - Displays numerical trends, categorical patterns, and completeness heatmaps.
190
+ * **Data Health Visualization** - Compiles data quality stats, missing records rates, and unique features counts.
191
+ * **Correlation Insights** - Flags linear dependencies, high associations, and outlier counts.
192
+
193
+ ### 5. Production Suite
194
+ * **API Endpoints** - Deploys production-grade prediction endpoints secured by custom API keys.
195
+ * **Batch Processing** - Enables bulk uploads to retrieve fully enriched output prediction sheets.
196
+ * **Drift Detection** - Compares historical prediction request signatures to highlight potential target concept drift.
197
+ * **Grounded LLM Chat** - Integrates local Ollama models (Phi-3 Mini) to act as a database context tutor answering questions regarding data distribution trends.
198
+
199
+ ---
200
+
201
+ ## Technical Stack
202
+
203
+ | Layer | Technologies |
204
+ | :--- | :--- |
205
+ | **Frontend Web App** | React 18, Vite, TypeScript, Tailwind CSS, Framer Motion, Recharts, Axios, Lucide Icons |
206
+ | **Backend Service API** | Python 3.11, FastAPI, Uvicorn, Pydantic, Pandas, NumPy, Scikit-learn, CatBoost, LightGBM, XGBoost |
207
+ | **Data Persistence** | MongoDB Atlas / Local File Storage |
208
+ | **Local LLM Integration** | Ollama Engine (Phi-3 Mini) |
209
+ | **Infrastructure Platforms** | Netlify (Frontend), Render (Backend) |
210
+
211
+ ---
212
+
213
+ ## Local Development
214
+
215
+ ### Installation Prerequisites
216
+
217
+ | Dependency | Minimum Version |
218
+ | :--- | :--- |
219
+ | Python | 3.11 or higher |
220
+ | Node.js | 20 or higher |
221
+ | npm | 10 or higher |
222
+ | Ollama | Latest (optional, for grounded Q&A) |
223
+
224
+ ### Development Option 1: Standard Installation
225
+
226
+ #### 1. Clone the Project
227
+ ```bash
228
+ git clone https://github.com/jeet2005/Nexora.git
229
+ cd Nexora
230
+ ```
231
+
232
+ #### 2. Configure Backend Service
233
+ ```bash
234
+ cd backend
235
+ python -m venv .venv
236
+
237
+ # Activate Virtual Environment (Windows)
238
+ .venv\Scripts\activate
239
+
240
+ # Activate Virtual Environment (macOS / Linux)
241
+ source .venv/bin/activate
242
+
243
+ # Install dependencies and setup configuration
244
+ pip install -r requirements.txt
245
+ cp .env.example .env
246
+
247
+ # Run development server
248
+ python run.py
249
+ ```
250
+ The backend service will be available at `http://localhost:8000`. You can test endpoints on Swagger UI at `http://localhost:8000/docs`.
251
+
252
+ #### 3. Configure Frontend Application
253
+ ```bash
254
+ cd ../frontend
255
+ npm install
256
+ cp .env.example .env.local
257
+
258
+ # Run development server
259
+ npm run dev
260
+ ```
261
+ The React frontend application will be active at `http://localhost:5173`.
262
+
263
+ ---
264
+
265
+ ### Development Option 2: Docker Compose Setup
266
+
267
+ Run the entire stack (FastAPI, React, and MongoDB) with a single command:
268
+
269
+ ```bash
270
+ docker compose up --build
271
+ ```
272
+
273
+ * **Frontend Web App**: Access at `http://localhost:3000`
274
+ * **Backend API**: Access at `http://localhost:8000`
275
+ * **MongoDB Instance**: Running on port `27017`
276
+
277
+ ---
278
+
279
+ ### Development Option 3: Makefile Shortcuts
280
+
281
+ If you have Make installed, you can orchestrate development commands directly from the project root:
282
+
283
+ * Install all package dependencies: `make install`
284
+ * Launch backend locally: `make dev-backend`
285
+ * Launch frontend locally: `make dev-frontend`
286
+ * Run backend pytest suite: `make test`
287
+ * Format all file types: `make format`
288
+ * Spin up Docker containers: `make docker-up`
289
+ * Spin down Docker containers: `make docker-down`
290
+
291
+ ---
292
+
293
+ ## Grounded Q&A Assistant Setup (Optional)
294
+
295
+ To enable the dataset assistant using a local LLM instance:
296
+
297
+ 1. Download and install [Ollama](https://ollama.com/).
298
+ 2. Pull the default micro-LLM model in your terminal:
299
+ ```bash
300
+ ollama pull phi3:mini
301
+ ```
302
+ 3. Keep Ollama active in the background. The assistant will detect local hosting at `http://localhost:11434` and enable custom educational conversations.
303
+
304
+ ---
305
+
306
+ ## Repository Roadmap
307
+
308
+ - [ ] Add Pytest code coverage reports in the Backend CI pipeline.
309
+ - [ ] Implement multi-file comparison dashboards within the Frontend page.
310
+ - [ ] Add support for automated time-series forecasting hyperparameter tuning.
311
+ - [ ] Integrate PostgreSQL database schema mappings for enterprise persistence layers.
312
+ - [ ] Add REST API key rotation options inside the Production UI.
313
+ - [ ] Create automated end-to-end integration tests using Playwright.
314
+
315
+ ---
316
+
317
+ ## Contributing and Governance
318
+
319
+ Contributions are welcome. Please read our [Contributing Guidelines](CONTRIBUTING.md) to understand branch conventions, pull request structures, and developer standards. Ensure all contributions align with our [Code of Conduct](CODE_OF_CONDUCT.md).
320
+
321
+ For vulnerability notifications, refer to our [Security Policy](SECURITY.md).
322
+
323
+ ---
324
+
325
+ ## License
326
+
327
+ Nexora is open-source software licensed under the [MIT License](LICENSE).
@@ -0,0 +1,250 @@
1
+ # Nexora
2
+
3
+ An autonomous predictive analytics platform that profiles datasets, builds optimized preprocessing pipelines, trains reproducible model registries, runs batch predictions, monitors feature drift, and provides grounded AI educational interactive chats from a single CSV upload.
4
+
5
+ ---
6
+
7
+ [![Backend CI](https://github.com/jeet2005/Nexora/actions/workflows/ci-backend.yml/badge.svg?branch=main)](https://github.com/jeet2005/Nexora/actions/workflows/ci-backend.yml)
8
+ [![Frontend CI](https://github.com/jeet2005/Nexora/actions/workflows/ci-frontend.yml/badge.svg?branch=main)](https://github.com/jeet2005/Nexora/actions/workflows/ci-frontend.yml)
9
+ [![GitHub stars](https://img.shields.io/github/stars/jeet2005/Nexora?style=social)](https://github.com/jeet2005/Nexora/stargazers)
10
+ [![GitHub issues](https://img.shields.io/github/issues/jeet2005/Nexora)](https://github.com/jeet2005/Nexora/issues)
11
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
12
+ [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](CONTRIBUTING.md)
13
+ [![Made with FastAPI](https://img.shields.io/badge/Made%20with-FastAPI-009688?logo=fastapi&logoColor=white)](https://fastapi.tiangolo.com/)
14
+ [![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=white)](https://www.python.org/)
15
+ [![scikit-learn](https://img.shields.io/badge/scikit--learn-5C9E48?logo=scikit-learn&logoColor=white)](https://scikit-learn.org/)
16
+ [![XGBoost](https://img.shields.io/badge/XGBoost-1A4B8D?logo=xgboost&logoColor=white)](https://xgboost.ai/)
17
+ [![LightGBM](https://img.shields.io/badge/LightGBM-00A8A1?logo=lightgbm&logoColor=white)](https://lightgbm.ai/)
18
+ [![CatBoost](https://img.shields.io/badge/CatBoost-1F8E4B?logo=catboost&logoColor=white)](https://catboost.ai/)
19
+ [![SHAP](https://img.shields.io/badge/SHAP-FF6F00?logo=shap&logoColor=white)](https://github.com/slundberg/shap)
20
+ [![React](https://img.shields.io/badge/Frontend-React-61dafb?logo=react&logoColor=white)](https://reactjs.org/)
21
+ [![TypeScript](https://img.shields.io/badge/TypeScript-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
22
+ [![Recharts](https://img.shields.io/badge/Recharts-181717?logo=recharts&logoColor=white)](https://recharts.org/)
23
+
24
+ ---
25
+
26
+ ## Why Nexora?
27
+
28
+ Data scientists and developers often spend hours writing repetitive code for data profiling, exploratory analysis, preprocessing, model benchmarking, and production endpoint deployments. Nexora bridges this gap by serving as a unified prediction engine.
29
+
30
+ By uploading a single CSV dataset, developers can instantly audit dataset health, clean features, benchmark leading machine learning models side-by-side, analyze SHAP explainability insights, download compiled PDF reports, converse with a grounded AI dataset assistant, and deploy production-ready prediction API endpoints secured by unique API keys.
31
+
32
+ ---
33
+
34
+ ## Live Deployments
35
+
36
+ | Component | URL | Host Provider |
37
+ | :--- | :--- | :--- |
38
+ | **Frontend Web App** | [nexoraprediction.netlify.app](https://nexoraprediction.netlify.app/) | Netlify |
39
+ | **Backend API** | [nexora-360r.onrender.com](https://nexora-360r.onrender.com/) | Render |
40
+ | **API Documentation** | [nexora-360r.onrender.com/docs](https://nexora-360r.onrender.com/docs) | Render |
41
+
42
+ *Note: The backend API runs on Render's free tier and spins down after periods of inactivity. Please allow 30 to 60 seconds for the initial cold start when first accessing the application.*
43
+
44
+ *Note: The educational assistant (Ollama integration) requires a local Ollama instance and is only active when running the application locally. See local setup guidelines below.*
45
+
46
+ ---
47
+
48
+ ## System Architecture
49
+
50
+ The diagram below outlines the end-to-end data flow, processing components, and communication layers in Nexora:
51
+
52
+ ```mermaid
53
+ graph TD
54
+ subgraph Client Layer
55
+ A[React Frontend]
56
+ end
57
+
58
+ subgraph Service API Layer
59
+ B[FastAPI Backend Gateway]
60
+ C[Dataset Analyzer & Validator]
61
+ D[Preprocessing Engine]
62
+ E[Training Manager & Registry]
63
+ F[SHAP Explainability Engine]
64
+ G[Grounded Chat Agent]
65
+ H[API Key Deployment Manager]
66
+ end
67
+
68
+ subgraph Storage & Compute
69
+ I[(Local Uploads / MongoDB)]
70
+ J[Local Ollama / Phi-3 Mini]
71
+ K[ML Models: XGBoost, CatBoost, LightGBM, Scikit-Learn]
72
+ end
73
+
74
+ A -->|Upload CSV & Configuration| B
75
+ B --> C
76
+ B --> D
77
+ B --> E
78
+ B --> F
79
+ B --> G
80
+ B --> H
81
+
82
+ C <-->|Read / Write Datasets| I
83
+ D <-->|Save Clean Pipelines| I
84
+ E <-->|Real-time Socket Updates| A
85
+ E <-->|Benchmark & Serialize| K
86
+ F -->|Render Report| I
87
+ G <-->|Dataset Context Queries| J
88
+ H <-->|Authorize Keys & Serves| K
89
+ ```
90
+
91
+ ---
92
+
93
+ ## Core Features
94
+
95
+ ### 1. Dataset Intelligence Engine
96
+ * **Automated CSV Validation** - Formats columns, assesses size boundaries, and verifies tabular file integrity.
97
+ * **Health Profiling** - Evaluates structural completeness, statistical anomalies, and generates per-column scorecards.
98
+ * **Preview and Distributions** - Offers statistical summaries, skew metrics, and categorical balance diagnostics.
99
+
100
+ ### 2. Dynamic Preprocessing Pipelines
101
+ * **Type Parsing** - Separates numerical parameters, categorical labels, datetimes, and identifier variables.
102
+ * **Intelligent Preprocessing** - Implements missing values imputation, standard scaling, target-label encoding, outlier detection, and duplicate record cleaning.
103
+ * **Interactive Configuration** - Provides controls to select prediction targets and customize individual preprocessing steps.
104
+
105
+ ### 3. Prediction Studio and Benchmarking
106
+ * **Model Registry** - Supports multiple algorithms including XGBoost, CatBoost, LightGBM, and Scikit-Learn ensembles.
107
+ * **Training Pipeline** - Executes cross-validation splits, train-test isolation, and hyperparameter parameter sweeps.
108
+ * **WebSocket Leaderboard** - Streams active model training metrics and charts real-time scores directly to the UI.
109
+ * **Comparison Arena** - Visualizes metrics, prediction drift charts, and latency histograms of trained models.
110
+
111
+ ### 4. Interactive Data Visualization
112
+ * **Multi-Chart Dashboard** - Displays numerical trends, categorical patterns, and completeness heatmaps.
113
+ * **Data Health Visualization** - Compiles data quality stats, missing records rates, and unique features counts.
114
+ * **Correlation Insights** - Flags linear dependencies, high associations, and outlier counts.
115
+
116
+ ### 5. Production Suite
117
+ * **API Endpoints** - Deploys production-grade prediction endpoints secured by custom API keys.
118
+ * **Batch Processing** - Enables bulk uploads to retrieve fully enriched output prediction sheets.
119
+ * **Drift Detection** - Compares historical prediction request signatures to highlight potential target concept drift.
120
+ * **Grounded LLM Chat** - Integrates local Ollama models (Phi-3 Mini) to act as a database context tutor answering questions regarding data distribution trends.
121
+
122
+ ---
123
+
124
+ ## Technical Stack
125
+
126
+ | Layer | Technologies |
127
+ | :--- | :--- |
128
+ | **Frontend Web App** | React 18, Vite, TypeScript, Tailwind CSS, Framer Motion, Recharts, Axios, Lucide Icons |
129
+ | **Backend Service API** | Python 3.11, FastAPI, Uvicorn, Pydantic, Pandas, NumPy, Scikit-learn, CatBoost, LightGBM, XGBoost |
130
+ | **Data Persistence** | MongoDB Atlas / Local File Storage |
131
+ | **Local LLM Integration** | Ollama Engine (Phi-3 Mini) |
132
+ | **Infrastructure Platforms** | Netlify (Frontend), Render (Backend) |
133
+
134
+ ---
135
+
136
+ ## Local Development
137
+
138
+ ### Installation Prerequisites
139
+
140
+ | Dependency | Minimum Version |
141
+ | :--- | :--- |
142
+ | Python | 3.11 or higher |
143
+ | Node.js | 20 or higher |
144
+ | npm | 10 or higher |
145
+ | Ollama | Latest (optional, for grounded Q&A) |
146
+
147
+ ### Development Option 1: Standard Installation
148
+
149
+ #### 1. Clone the Project
150
+ ```bash
151
+ git clone https://github.com/jeet2005/Nexora.git
152
+ cd Nexora
153
+ ```
154
+
155
+ #### 2. Configure Backend Service
156
+ ```bash
157
+ cd backend
158
+ python -m venv .venv
159
+
160
+ # Activate Virtual Environment (Windows)
161
+ .venv\Scripts\activate
162
+
163
+ # Activate Virtual Environment (macOS / Linux)
164
+ source .venv/bin/activate
165
+
166
+ # Install dependencies and setup configuration
167
+ pip install -r requirements.txt
168
+ cp .env.example .env
169
+
170
+ # Run development server
171
+ python run.py
172
+ ```
173
+ The backend service will be available at `http://localhost:8000`. You can test endpoints on Swagger UI at `http://localhost:8000/docs`.
174
+
175
+ #### 3. Configure Frontend Application
176
+ ```bash
177
+ cd ../frontend
178
+ npm install
179
+ cp .env.example .env.local
180
+
181
+ # Run development server
182
+ npm run dev
183
+ ```
184
+ The React frontend application will be active at `http://localhost:5173`.
185
+
186
+ ---
187
+
188
+ ### Development Option 2: Docker Compose Setup
189
+
190
+ Run the entire stack (FastAPI, React, and MongoDB) with a single command:
191
+
192
+ ```bash
193
+ docker compose up --build
194
+ ```
195
+
196
+ * **Frontend Web App**: Access at `http://localhost:3000`
197
+ * **Backend API**: Access at `http://localhost:8000`
198
+ * **MongoDB Instance**: Running on port `27017`
199
+
200
+ ---
201
+
202
+ ### Development Option 3: Makefile Shortcuts
203
+
204
+ If you have Make installed, you can orchestrate development commands directly from the project root:
205
+
206
+ * Install all package dependencies: `make install`
207
+ * Launch backend locally: `make dev-backend`
208
+ * Launch frontend locally: `make dev-frontend`
209
+ * Run backend pytest suite: `make test`
210
+ * Format all file types: `make format`
211
+ * Spin up Docker containers: `make docker-up`
212
+ * Spin down Docker containers: `make docker-down`
213
+
214
+ ---
215
+
216
+ ## Grounded Q&A Assistant Setup (Optional)
217
+
218
+ To enable the dataset assistant using a local LLM instance:
219
+
220
+ 1. Download and install [Ollama](https://ollama.com/).
221
+ 2. Pull the default micro-LLM model in your terminal:
222
+ ```bash
223
+ ollama pull phi3:mini
224
+ ```
225
+ 3. Keep Ollama active in the background. The assistant will detect local hosting at `http://localhost:11434` and enable custom educational conversations.
226
+
227
+ ---
228
+
229
+ ## Repository Roadmap
230
+
231
+ - [ ] Add Pytest code coverage reports in the Backend CI pipeline.
232
+ - [ ] Implement multi-file comparison dashboards within the Frontend page.
233
+ - [ ] Add support for automated time-series forecasting hyperparameter tuning.
234
+ - [ ] Integrate PostgreSQL database schema mappings for enterprise persistence layers.
235
+ - [ ] Add REST API key rotation options inside the Production UI.
236
+ - [ ] Create automated end-to-end integration tests using Playwright.
237
+
238
+ ---
239
+
240
+ ## Contributing and Governance
241
+
242
+ Contributions are welcome. Please read our [Contributing Guidelines](CONTRIBUTING.md) to understand branch conventions, pull request structures, and developer standards. Ensure all contributions align with our [Code of Conduct](CODE_OF_CONDUCT.md).
243
+
244
+ For vulnerability notifications, refer to our [Security Policy](SECURITY.md).
245
+
246
+ ---
247
+
248
+ ## License
249
+
250
+ Nexora is open-source software licensed under the [MIT License](LICENSE).
@@ -0,0 +1,11 @@
1
+ """Public package API for Nexora v0.1.0."""
2
+
3
+
4
+ from nexora.core import Nexora
5
+ from nexora.report import NexoraReport
6
+ # Alias for backward compatibility
7
+ NexoraPrediction = Nexora
8
+
9
+ __all__ = ["DatasetProfile", "ModelResult", "Nexora", "NexoraReport", "NexoraPrediction"]
10
+
11
+ __version__ = "0.1.0"
@@ -0,0 +1,5 @@
1
+ """Nexora command line interface."""
2
+
3
+ from nexora.cli.main import cli
4
+
5
+ __all__ = ["cli"]
@@ -0,0 +1,45 @@
1
+ """Click-based CLI for the Nexora MVP."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import click
8
+
9
+ from nexora import Nexora
10
+
11
+
12
+ @click.group()
13
+ def cli() -> None:
14
+ """Nexora predictive analytics CLI."""
15
+
16
+
17
+ @cli.command()
18
+ @click.argument("data_csv", type=click.Path(exists=True, dir_okay=False, path_type=Path))
19
+ @click.option("--target", required=True, help="Target column to predict.")
20
+ @click.option(
21
+ "--out",
22
+ "output_path",
23
+ type=click.Path(dir_okay=False, path_type=Path),
24
+ default=None,
25
+ help="Output .nx session path.",
26
+ )
27
+ @click.option(
28
+ "--max-models",
29
+ default=6,
30
+ show_default=True,
31
+ type=int,
32
+ help="Maximum number of MVP registry models to train.",
33
+ )
34
+ def train(data_csv: Path, target: str, output_path: Path | None, max_models: int) -> None:
35
+ """Train models from a CSV and save a Nexora session."""
36
+
37
+ report = Nexora(data_csv, target=target).run(max_models=max_models)
38
+ session_path = output_path or data_csv.with_suffix(".nx")
39
+ saved = report.save(session_path)
40
+
41
+ click.echo(f"Best model: {report.best_model} ({report.best_score_label}={report.best_score:.4f})")
42
+ click.echo(f"Saved session: {saved}")
43
+ click.echo("")
44
+ leaderboard = report.leaderboard.head(10)
45
+ click.echo(leaderboard.to_string(index=False))
@@ -0,0 +1,5 @@
1
+ """Standalone code generation."""
2
+
3
+ from nexora.codegen.script import generate_script
4
+
5
+ __all__ = ["generate_script"]