mudra-ml 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ # Private working notes, never tracked
2
+ .agent/
3
+
4
+ # Build artifacts
5
+ dist/
6
+ build/
7
+ *.egg-info/
8
+ .eggs/
9
+
10
+ # Python
11
+ __pycache__/
12
+ *.py[cod]
13
+ *.so
14
+ .pytest_cache/
15
+ .ruff_cache/
16
+ .mypy_cache/
17
+ .coverage
18
+ htmlcov/
19
+ coverage.xml
20
+
21
+ # Environments
22
+ .venv*/
23
+ venv/
24
+ env/
25
+
26
+ # Credentials
27
+ .pypirc
28
+
29
+ # Editor and OS
30
+ .vscode/
31
+ .idea/
32
+ .DS_Store
33
+ Thumbs.db
34
+
35
+ # Generated run output
36
+ mudra_ml_report.*
37
+ run_artifact*
38
+ *.joblib
@@ -0,0 +1,29 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are recorded here. The format follows
4
+ Keep a Changelog, and the project uses semantic versioning.
5
+
6
+ ## [0.1.0] - 2026-06-04
7
+
8
+ First public release.
9
+
10
+ ### Added
11
+ - File ingestion for csv, tsv, excel, json, and parquet, with delimiter,
12
+ encoding, and header auto-detection.
13
+ - Data profiler with per-column type inference (numeric, categorical,
14
+ datetime, boolean, id, text), missingness, cardinality, distribution stats,
15
+ and candidate-target ranking.
16
+ - Goal object and rule-based goal inference for task, target, and metric, with
17
+ operator-set fields taking precedence over inference.
18
+ - Leakage-safe cleaning and preprocessing as a scikit-learn Pipeline and
19
+ ColumnTransformer: statistical imputation, datetime part extraction, IQR or
20
+ z-score outlier clipping, one-hot and frequency encoding, and scaling.
21
+ - Rule-based algorithm recommendation keyed on task, dataset size, feature
22
+ count, cardinality, and operator constraints.
23
+ - Cross-validated training and tuning with RandomizedSearchCV at a fixed seed,
24
+ task-appropriate evaluation, best-model selection, and feature importance.
25
+ - Markdown and HTML run reports that log every decision and the rule behind it.
26
+ - Model and pipeline persistence through joblib, with a predict path on loaded
27
+ artifacts.
28
+ - Command line interface with `run` and `profile`.
29
+ - Optional xgboost and lightgbm candidates through the `boost` extra.
mudra_ml-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mudit Nautiyal
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,213 @@
1
+ Metadata-Version: 2.4
2
+ Name: mudra-ml
3
+ Version: 0.1.0
4
+ Summary: Glass-box autonomous data science: profile, clean, model, and explain every decision.
5
+ Project-URL: Homepage, https://github.com/MuditNautiyal-21/MudraML
6
+ Project-URL: Repository, https://github.com/MuditNautiyal-21/MudraML
7
+ Project-URL: Issues, https://github.com/MuditNautiyal-21/MudraML/issues
8
+ Project-URL: Changelog, https://github.com/MuditNautiyal-21/MudraML/blob/main/CHANGELOG.md
9
+ Author: Mudit Nautiyal
10
+ License: MIT License
11
+
12
+ Copyright (c) 2026 Mudit Nautiyal
13
+
14
+ Permission is hereby granted, free of charge, to any person obtaining a copy
15
+ of this software and associated documentation files (the "Software"), to deal
16
+ in the Software without restriction, including without limitation the rights
17
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18
+ copies of the Software, and to permit persons to whom the Software is
19
+ furnished to do so, subject to the following conditions:
20
+
21
+ The above copyright notice and this permission notice shall be included in all
22
+ copies or substantial portions of the Software.
23
+
24
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
+ SOFTWARE.
31
+ License-File: LICENSE
32
+ Keywords: automl,data-science,explainable,machine-learning,pipeline,scikit-learn
33
+ Classifier: Development Status :: 4 - Beta
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: Intended Audience :: Science/Research
36
+ Classifier: License :: OSI Approved :: MIT License
37
+ Classifier: Operating System :: OS Independent
38
+ Classifier: Programming Language :: Python :: 3
39
+ Classifier: Programming Language :: Python :: 3.10
40
+ Classifier: Programming Language :: Python :: 3.11
41
+ Classifier: Programming Language :: Python :: 3.12
42
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
43
+ Classifier: Typing :: Typed
44
+ Requires-Python: >=3.10
45
+ Requires-Dist: jinja2>=3.1
46
+ Requires-Dist: joblib>=1.2
47
+ Requires-Dist: numpy>=1.23
48
+ Requires-Dist: pandas>=1.5
49
+ Requires-Dist: scikit-learn>=1.2
50
+ Requires-Dist: typer>=0.9
51
+ Provides-Extra: boost
52
+ Requires-Dist: lightgbm>=4.0; extra == 'boost'
53
+ Requires-Dist: xgboost>=1.7; extra == 'boost'
54
+ Provides-Extra: dev
55
+ Requires-Dist: build>=1.0; extra == 'dev'
56
+ Requires-Dist: mypy>=1.8; extra == 'dev'
57
+ Requires-Dist: openpyxl>=3.1; extra == 'dev'
58
+ Requires-Dist: pyarrow>=12.0; extra == 'dev'
59
+ Requires-Dist: pytest-cov>=4.1; extra == 'dev'
60
+ Requires-Dist: pytest>=7.4; extra == 'dev'
61
+ Requires-Dist: ruff>=0.4; extra == 'dev'
62
+ Requires-Dist: twine>=5.0; extra == 'dev'
63
+ Provides-Extra: excel
64
+ Requires-Dist: openpyxl>=3.1; extra == 'excel'
65
+ Provides-Extra: files
66
+ Requires-Dist: openpyxl>=3.1; extra == 'files'
67
+ Requires-Dist: pyarrow>=12.0; extra == 'files'
68
+ Provides-Extra: parquet
69
+ Requires-Dist: pyarrow>=12.0; extra == 'parquet'
70
+ Description-Content-Type: text/markdown
71
+
72
+ # MudraML
73
+
74
+ Automated, glass-box data science. Point it at a data file, get a fitted model and a report of every decision behind it.
75
+
76
+ [![PyPI version](https://img.shields.io/pypi/v/mudra-ml.svg)](https://pypi.org/project/mudra-ml/)
77
+ [![Python versions](https://img.shields.io/pypi/pyversions/mudra-ml.svg)](https://pypi.org/project/mudra-ml/)
78
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
79
+ [![CI](https://github.com/MuditNautiyal-21/MudraML/actions/workflows/ci.yml/badge.svg)](https://github.com/MuditNautiyal-21/MudraML/actions/workflows/ci.yml)
80
+
81
+ MudraML automates the common data science workflow and shows its work. You point it at a data file, optionally state a goal, and it ingests the data, profiles it, cleans it, picks an algorithm, trains and tunes a shortlist of models, evaluates them, and returns the best fitted model together with a report of every decision it made and the rule behind that decision.
82
+
83
+ The point of difference is the decision engine. It is rule-based and statistical, not another model. Outlier handling uses IQR or z-score rules. Missing values are filled by median, mode, or a constant, or the column is dropped past a missingness threshold. The algorithm shortlist comes from a documented rule set keyed on the task, the dataset size, the feature count, and your constraints. Every one of those choices is written into the report, so a person can read why the pipeline did what it did and disagree with it if they want.
84
+
85
+ This is the glass-box position: the models are the product, and the way the pipeline reaches them is auditable rather than hidden inside a search.
86
+
87
+ ## Install
88
+
89
+ ```
90
+ pip install mudra-ml
91
+ ```
92
+
93
+ Optional extras:
94
+
95
+ ```
96
+ pip install mudra-ml[files] # parquet and excel readers
97
+ pip install mudra-ml[boost] # xgboost and lightgbm candidates
98
+ ```
99
+
100
+ The library runs fully on the scikit-learn core. The boosters are added to the shortlist only when the extra is installed.
101
+
102
+ ## Quickstart
103
+
104
+ Fully automatic. MudraML infers the task, the target, and the metric:
105
+
106
+ ```python
107
+ from mudra_ml import Mudra
108
+
109
+ m = Mudra()
110
+ result = m.run("data.csv")
111
+ print(result.report_path) # markdown and HTML report on disk
112
+ model = result.best_model # fitted, ready to predict
113
+ ```
114
+
115
+ Operator-defined goal. You set what you care about and MudraML honors it:
116
+
117
+ ```python
118
+ result = m.run(
119
+ "churn.csv",
120
+ target="churn",
121
+ task="classification",
122
+ metric="f1",
123
+ constraints={"interpretable": True, "max_train_seconds": 120},
124
+ )
125
+ ```
126
+
127
+ When `interpretable` is set, the shortlist is limited to models you can read directly, such as logistic regression and a single decision tree. The report states which goal fields you set and which ones were inferred.
128
+
129
+ ## What the report looks like
130
+
131
+ Every run writes a report that records each decision and the rule that produced it. An excerpt:
132
+
133
+ ```
134
+ GOAL
135
+ task classification (inferred: target has 2 unique values)
136
+ target churn (operator-set)
137
+ metric f1 (default for classification)
138
+
139
+ PROFILE
140
+ 18 columns: 12 numeric, 5 categorical, 1 datetime
141
+ dropped customer_id (id column, 100 percent unique)
142
+ missing: tenure 3.2 percent, region 0.1 percent
143
+
144
+ PREPROCESS (fit on train split only)
145
+ tenure median imputation (3.2 percent missing, below 40 percent drop threshold)
146
+ region most-frequent imputation, one-hot (4 categories, low cardinality)
147
+ signup_date extracted year, month, day-of-week
148
+ numeric standard scaling
149
+
150
+ RECOMMEND
151
+ shortlist logistic regression, random forest, gradient boosting
152
+ rule classification, 5000 rows, 23 features after encoding, no interpretable constraint
153
+
154
+ EVALUATE (held-out test)
155
+ best gradient boosting f1 0.81
156
+ also random forest 0.79, logistic regression 0.74
157
+ top features tenure, monthly_charges, contract_type
158
+ ```
159
+
160
+ The numbers above are illustrative. Your run writes the real values for your data.
161
+
162
+ ## Predict and reuse
163
+
164
+ ```python
165
+ result.save("run_artifact") # pipeline + model + metadata
166
+ loaded = Mudra.load("run_artifact")
167
+ preds = loaded.predict(new_dataframe)
168
+ ```
169
+
170
+ The preprocessing pipeline travels with the model, so new rows are transformed the same way the training rows were.
171
+
172
+ ## Command line
173
+
174
+ ```
175
+ mudra-ml run data.csv --target churn --task classification --metric f1
176
+ mudra-ml profile data.csv
177
+ ```
178
+
179
+ `run` writes the report and prints the selected model and its held-out metrics. `profile` prints the inferred column types, missingness, cardinality, and the candidate target columns.
180
+
181
+ ## What it does, stage by stage
182
+
183
+ 1. Ingest. Readers for csv, tsv, excel, json, and parquet. For delimited text the delimiter, encoding, and header row are detected.
184
+ 2. Profile. Per-column type inference (numeric, categorical, datetime, boolean, id, text), missingness, cardinality, distribution stats, and candidate-target ranking.
185
+ 3. Goal. Rule-based inference of the task, target, and metric, with any field you set taking precedence.
186
+ 4. Preprocess. A leakage-safe scikit-learn Pipeline and ColumnTransformer. Imputation, datetime part extraction, outlier clipping, encoding, and scaling are all fit on the training split only.
187
+ 5. Recommend. A documented rule set returns a candidate shortlist.
188
+ 6. Train and evaluate. Cross-validated training, tuning with RandomizedSearchCV at a fixed seed, held-out scoring, best-model selection, and feature importance where the model exposes it.
189
+ 7. Report. Markdown and HTML that log every decision and the rule that produced it.
190
+
191
+ ## Why leakage safety matters here
192
+
193
+ Every statistic that preprocessing needs (a median, a category frequency, an outlier bound, a scaler mean) is learned during `fit`. MudraML fits the pipeline on the training split and only transforms the test split. No information from the test data reaches the model through preprocessing. The test suite checks this property directly: it fits on a slice with a known mean and confirms the learned imputation value matches the train slice rather than the whole dataset.
194
+
195
+ ## Determinism
196
+
197
+ One `random_state` is threaded through every stochastic step (the split, the search, the estimators) and defaults to a fixed value. Two runs on the same data and the same goal produce the same result and the same report.
198
+
199
+ ## Tasks and metrics
200
+
201
+ | Task | Default metric | Also reported |
202
+ | --- | --- | --- |
203
+ | classification | f1 | accuracy, precision, recall, roc_auc, confusion matrix |
204
+ | regression | rmse | mae, mse, r2 |
205
+ | clustering | silhouette | davies_bouldin |
206
+
207
+ ## Scope
208
+
209
+ This release covers the supervised classification and regression cases and KMeans clustering, end to end, with the decision log and the report. Deep text modeling, time series, model-based imputation, and a search beyond curated grids are out of scope by design, since the engine is meant to stay explainable. See the changelog for the version history.
210
+
211
+ ## License
212
+
213
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,142 @@
1
+ # MudraML
2
+
3
+ Automated, glass-box data science. Point it at a data file, get a fitted model and a report of every decision behind it.
4
+
5
+ [![PyPI version](https://img.shields.io/pypi/v/mudra-ml.svg)](https://pypi.org/project/mudra-ml/)
6
+ [![Python versions](https://img.shields.io/pypi/pyversions/mudra-ml.svg)](https://pypi.org/project/mudra-ml/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
8
+ [![CI](https://github.com/MuditNautiyal-21/MudraML/actions/workflows/ci.yml/badge.svg)](https://github.com/MuditNautiyal-21/MudraML/actions/workflows/ci.yml)
9
+
10
+ MudraML automates the common data science workflow and shows its work. You point it at a data file, optionally state a goal, and it ingests the data, profiles it, cleans it, picks an algorithm, trains and tunes a shortlist of models, evaluates them, and returns the best fitted model together with a report of every decision it made and the rule behind that decision.
11
+
12
+ The point of difference is the decision engine. It is rule-based and statistical, not another model. Outlier handling uses IQR or z-score rules. Missing values are filled by median, mode, or a constant, or the column is dropped past a missingness threshold. The algorithm shortlist comes from a documented rule set keyed on the task, the dataset size, the feature count, and your constraints. Every one of those choices is written into the report, so a person can read why the pipeline did what it did and disagree with it if they want.
13
+
14
+ This is the glass-box position: the models are the product, and the way the pipeline reaches them is auditable rather than hidden inside a search.
15
+
16
+ ## Install
17
+
18
+ ```
19
+ pip install mudra-ml
20
+ ```
21
+
22
+ Optional extras:
23
+
24
+ ```
25
+ pip install mudra-ml[files] # parquet and excel readers
26
+ pip install mudra-ml[boost] # xgboost and lightgbm candidates
27
+ ```
28
+
29
+ The library runs fully on the scikit-learn core. The boosters are added to the shortlist only when the extra is installed.
30
+
31
+ ## Quickstart
32
+
33
+ Fully automatic. MudraML infers the task, the target, and the metric:
34
+
35
+ ```python
36
+ from mudra_ml import Mudra
37
+
38
+ m = Mudra()
39
+ result = m.run("data.csv")
40
+ print(result.report_path) # markdown and HTML report on disk
41
+ model = result.best_model # fitted, ready to predict
42
+ ```
43
+
44
+ Operator-defined goal. You set what you care about and MudraML honors it:
45
+
46
+ ```python
47
+ result = m.run(
48
+ "churn.csv",
49
+ target="churn",
50
+ task="classification",
51
+ metric="f1",
52
+ constraints={"interpretable": True, "max_train_seconds": 120},
53
+ )
54
+ ```
55
+
56
+ When `interpretable` is set, the shortlist is limited to models you can read directly, such as logistic regression and a single decision tree. The report states which goal fields you set and which ones were inferred.
57
+
58
+ ## What the report looks like
59
+
60
+ Every run writes a report that records each decision and the rule that produced it. An excerpt:
61
+
62
+ ```
63
+ GOAL
64
+ task classification (inferred: target has 2 unique values)
65
+ target churn (operator-set)
66
+ metric f1 (default for classification)
67
+
68
+ PROFILE
69
+ 18 columns: 12 numeric, 5 categorical, 1 datetime
70
+ dropped customer_id (id column, 100 percent unique)
71
+ missing: tenure 3.2 percent, region 0.1 percent
72
+
73
+ PREPROCESS (fit on train split only)
74
+ tenure median imputation (3.2 percent missing, below 40 percent drop threshold)
75
+ region most-frequent imputation, one-hot (4 categories, low cardinality)
76
+ signup_date extracted year, month, day-of-week
77
+ numeric standard scaling
78
+
79
+ RECOMMEND
80
+ shortlist logistic regression, random forest, gradient boosting
81
+ rule classification, 5000 rows, 23 features after encoding, no interpretable constraint
82
+
83
+ EVALUATE (held-out test)
84
+ best gradient boosting f1 0.81
85
+ also random forest 0.79, logistic regression 0.74
86
+ top features tenure, monthly_charges, contract_type
87
+ ```
88
+
89
+ The numbers above are illustrative. Your run writes the real values for your data.
90
+
91
+ ## Predict and reuse
92
+
93
+ ```python
94
+ result.save("run_artifact") # pipeline + model + metadata
95
+ loaded = Mudra.load("run_artifact")
96
+ preds = loaded.predict(new_dataframe)
97
+ ```
98
+
99
+ The preprocessing pipeline travels with the model, so new rows are transformed the same way the training rows were.
100
+
101
+ ## Command line
102
+
103
+ ```
104
+ mudra-ml run data.csv --target churn --task classification --metric f1
105
+ mudra-ml profile data.csv
106
+ ```
107
+
108
+ `run` writes the report and prints the selected model and its held-out metrics. `profile` prints the inferred column types, missingness, cardinality, and the candidate target columns.
109
+
110
+ ## What it does, stage by stage
111
+
112
+ 1. Ingest. Readers for csv, tsv, excel, json, and parquet. For delimited text the delimiter, encoding, and header row are detected.
113
+ 2. Profile. Per-column type inference (numeric, categorical, datetime, boolean, id, text), missingness, cardinality, distribution stats, and candidate-target ranking.
114
+ 3. Goal. Rule-based inference of the task, target, and metric, with any field you set taking precedence.
115
+ 4. Preprocess. A leakage-safe scikit-learn Pipeline and ColumnTransformer. Imputation, datetime part extraction, outlier clipping, encoding, and scaling are all fit on the training split only.
116
+ 5. Recommend. A documented rule set returns a candidate shortlist.
117
+ 6. Train and evaluate. Cross-validated training, tuning with RandomizedSearchCV at a fixed seed, held-out scoring, best-model selection, and feature importance where the model exposes it.
118
+ 7. Report. Markdown and HTML that log every decision and the rule that produced it.
119
+
120
+ ## Why leakage safety matters here
121
+
122
+ Every statistic that preprocessing needs (a median, a category frequency, an outlier bound, a scaler mean) is learned during `fit`. MudraML fits the pipeline on the training split and only transforms the test split. No information from the test data reaches the model through preprocessing. The test suite checks this property directly: it fits on a slice with a known mean and confirms the learned imputation value matches the train slice rather than the whole dataset.
123
+
124
+ ## Determinism
125
+
126
+ One `random_state` is threaded through every stochastic step (the split, the search, the estimators) and defaults to a fixed value. Two runs on the same data and the same goal produce the same result and the same report.
127
+
128
+ ## Tasks and metrics
129
+
130
+ | Task | Default metric | Also reported |
131
+ | --- | --- | --- |
132
+ | classification | f1 | accuracy, precision, recall, roc_auc, confusion matrix |
133
+ | regression | rmse | mae, mse, r2 |
134
+ | clustering | silhouette | davies_bouldin |
135
+
136
+ ## Scope
137
+
138
+ This release covers the supervised classification and regression cases and KMeans clustering, end to end, with the decision log and the report. Deep text modeling, time series, model-based imputation, and a search beyond curated grids are out of scope by design, since the engine is meant to stay explainable. See the changelog for the version history.
139
+
140
+ ## License
141
+
142
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,106 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "mudra-ml"
7
+ version = "0.1.0"
8
+ description = "Glass-box autonomous data science: profile, clean, model, and explain every decision."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { file = "LICENSE" }
12
+ authors = [{ name = "Mudit Nautiyal" }]
13
+ keywords = [
14
+ "automl",
15
+ "machine-learning",
16
+ "data-science",
17
+ "scikit-learn",
18
+ "explainable",
19
+ "pipeline",
20
+ ]
21
+ classifiers = [
22
+ "Development Status :: 4 - Beta",
23
+ "Intended Audience :: Science/Research",
24
+ "Intended Audience :: Developers",
25
+ "License :: OSI Approved :: MIT License",
26
+ "Operating System :: OS Independent",
27
+ "Programming Language :: Python :: 3",
28
+ "Programming Language :: Python :: 3.10",
29
+ "Programming Language :: Python :: 3.11",
30
+ "Programming Language :: Python :: 3.12",
31
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
32
+ "Typing :: Typed",
33
+ ]
34
+ dependencies = [
35
+ "pandas>=1.5",
36
+ "numpy>=1.23",
37
+ "scikit-learn>=1.2",
38
+ "joblib>=1.2",
39
+ "jinja2>=3.1",
40
+ "typer>=0.9",
41
+ ]
42
+
43
+ [project.optional-dependencies]
44
+ parquet = ["pyarrow>=12.0"]
45
+ excel = ["openpyxl>=3.1"]
46
+ boost = ["xgboost>=1.7", "lightgbm>=4.0"]
47
+ files = ["pyarrow>=12.0", "openpyxl>=3.1"]
48
+ dev = [
49
+ "pytest>=7.4",
50
+ "pytest-cov>=4.1",
51
+ "ruff>=0.4",
52
+ "mypy>=1.8",
53
+ "build>=1.0",
54
+ "twine>=5.0",
55
+ "pyarrow>=12.0",
56
+ "openpyxl>=3.1",
57
+ ]
58
+
59
+ [project.urls]
60
+ Homepage = "https://github.com/MuditNautiyal-21/MudraML"
61
+ Repository = "https://github.com/MuditNautiyal-21/MudraML"
62
+ Issues = "https://github.com/MuditNautiyal-21/MudraML/issues"
63
+ Changelog = "https://github.com/MuditNautiyal-21/MudraML/blob/main/CHANGELOG.md"
64
+
65
+ [project.scripts]
66
+ mudra-ml = "mudra_ml.cli:app"
67
+
68
+ [tool.hatch.build.targets.wheel]
69
+ packages = ["src/mudra_ml"]
70
+
71
+ [tool.hatch.build.targets.sdist]
72
+ include = [
73
+ "src/mudra_ml",
74
+ "tests",
75
+ "README.md",
76
+ "CHANGELOG.md",
77
+ "LICENSE",
78
+ "pyproject.toml",
79
+ ]
80
+
81
+ [tool.ruff]
82
+ line-length = 100
83
+ target-version = "py310"
84
+ src = ["src", "tests"]
85
+
86
+ [tool.ruff.lint]
87
+ select = ["E", "F", "I", "UP", "B", "W"]
88
+ ignore = ["B008"]
89
+
90
+ [tool.ruff.lint.per-file-ignores]
91
+ # The report module holds long inline HTML and markdown templates.
92
+ "src/mudra_ml/report.py" = ["E501"]
93
+ "tests/*" = ["E501"]
94
+
95
+ [tool.pytest.ini_options]
96
+ testpaths = ["tests"]
97
+ addopts = "-q"
98
+
99
+ [tool.coverage.run]
100
+ source = ["mudra_ml"]
101
+ branch = true
102
+
103
+ [tool.mypy]
104
+ python_version = "3.10"
105
+ ignore_missing_imports = true
106
+ warn_unused_ignores = false
@@ -0,0 +1,34 @@
1
+ """MudraML: glass-box autonomous data science.
2
+
3
+ The decision engine that drives the pipeline is rule-based and statistical.
4
+ It is deterministic, logged, and explainable. The machine learning models are
5
+ the output it produces, not the mechanism by which it chooses what to do.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from .core import Mudra, RunResult
11
+ from .evaluate import evaluate
12
+ from .goal import Goal, infer_goal
13
+ from .ingest import load
14
+ from .preprocess import build_pipeline
15
+ from .profile import DataProfile, DataProfiler
16
+ from .recommend import recommend_models
17
+ from .report import write_report
18
+
19
+ __version__ = "0.1.0"
20
+
21
+ __all__ = [
22
+ "Mudra",
23
+ "RunResult",
24
+ "Goal",
25
+ "infer_goal",
26
+ "load",
27
+ "DataProfiler",
28
+ "DataProfile",
29
+ "build_pipeline",
30
+ "recommend_models",
31
+ "evaluate",
32
+ "write_report",
33
+ "__version__",
34
+ ]