dsbro 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsbro-0.1.0/.gitignore +32 -0
- dsbro-0.1.0/CHANGELOG.md +9 -0
- dsbro-0.1.0/LICENSE +22 -0
- dsbro-0.1.0/PKG-INFO +187 -0
- dsbro-0.1.0/README.md +140 -0
- dsbro-0.1.0/dsbro/__init__.py +124 -0
- dsbro-0.1.0/dsbro/_helpers.py +106 -0
- dsbro-0.1.0/dsbro/_themes.py +109 -0
- dsbro-0.1.0/dsbro/_version.py +4 -0
- dsbro-0.1.0/dsbro/eda.py +1133 -0
- dsbro-0.1.0/dsbro/io.py +497 -0
- dsbro-0.1.0/dsbro/metrics.py +243 -0
- dsbro-0.1.0/dsbro/ml.py +895 -0
- dsbro-0.1.0/dsbro/prep.py +971 -0
- dsbro-0.1.0/dsbro/text.py +229 -0
- dsbro-0.1.0/dsbro/utils.py +461 -0
- dsbro-0.1.0/dsbro/viz.py +731 -0
- dsbro-0.1.0/examples/.gitkeep +1 -0
- dsbro-0.1.0/examples/quickstart.ipynb +194 -0
- dsbro-0.1.0/imgs/dsbro.png +0 -0
- dsbro-0.1.0/pyproject.toml +81 -0
- dsbro-0.1.0/tests/conftest.py +46 -0
- dsbro-0.1.0/tests/test_eda.py +176 -0
- dsbro-0.1.0/tests/test_io.py +102 -0
- dsbro-0.1.0/tests/test_metrics.py +62 -0
- dsbro-0.1.0/tests/test_ml.py +161 -0
- dsbro-0.1.0/tests/test_package.py +30 -0
- dsbro-0.1.0/tests/test_prep.py +170 -0
- dsbro-0.1.0/tests/test_text.py +59 -0
- dsbro-0.1.0/tests/test_utils.py +116 -0
- dsbro-0.1.0/tests/test_viz.py +225 -0
dsbro-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# local planning / AI docs
|
|
2
|
+
CLAUDE.md
|
|
3
|
+
dsbro_master_plan.md
|
|
4
|
+
|
|
5
|
+
# python cache
|
|
6
|
+
__pycache__/
|
|
7
|
+
*.py[cod]
|
|
8
|
+
|
|
9
|
+
# virtual envs
|
|
10
|
+
.venv/
|
|
11
|
+
venv/
|
|
12
|
+
env/
|
|
13
|
+
|
|
14
|
+
# build artifacts
|
|
15
|
+
build/
|
|
16
|
+
dist/
|
|
17
|
+
*.egg-info/
|
|
18
|
+
|
|
19
|
+
# test / lint cache
|
|
20
|
+
.pytest_cache/
|
|
21
|
+
.ruff_cache/
|
|
22
|
+
.coverage
|
|
23
|
+
htmlcov/
|
|
24
|
+
|
|
25
|
+
# notebook junk
|
|
26
|
+
.ipynb_checkpoints/
|
|
27
|
+
|
|
28
|
+
# OS / editor
|
|
29
|
+
.DS_Store
|
|
30
|
+
Thumbs.db
|
|
31
|
+
.vscode/
|
|
32
|
+
.idea/
|
dsbro-0.1.0/CHANGELOG.md
ADDED
dsbro-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Muhammad Ibrahim Qasmi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
dsbro-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dsbro
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Your Data Science Bro. One import away.
|
|
5
|
+
Project-URL: Homepage, https://github.com/muhammadibrahim313/dsbro
|
|
6
|
+
Project-URL: Documentation, https://github.com/muhammadibrahim313/dsbro
|
|
7
|
+
Project-URL: Repository, https://github.com/muhammadibrahim313/dsbro
|
|
8
|
+
Project-URL: Issues, https://github.com/muhammadibrahim313/dsbro/issues
|
|
9
|
+
Author: Muhammad Ibrahim Qasmi
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: data-science,eda,kaggle,machine-learning,python
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
24
|
+
Requires-Python: >=3.9
|
|
25
|
+
Requires-Dist: matplotlib>=3.7
|
|
26
|
+
Requires-Dist: numpy>=1.24
|
|
27
|
+
Requires-Dist: pandas>=2.0
|
|
28
|
+
Requires-Dist: scikit-learn>=1.3
|
|
29
|
+
Requires-Dist: seaborn>=0.12
|
|
30
|
+
Provides-Extra: all
|
|
31
|
+
Requires-Dist: catboost>=1.2; extra == 'all'
|
|
32
|
+
Requires-Dist: lightgbm>=4.0; extra == 'all'
|
|
33
|
+
Requires-Dist: optuna>=3.6; extra == 'all'
|
|
34
|
+
Requires-Dist: plotly>=5.22; extra == 'all'
|
|
35
|
+
Requires-Dist: xgboost>=2.0; extra == 'all'
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: ruff>=0.11; extra == 'dev'
|
|
39
|
+
Provides-Extra: ml
|
|
40
|
+
Requires-Dist: catboost>=1.2; extra == 'ml'
|
|
41
|
+
Requires-Dist: lightgbm>=4.0; extra == 'ml'
|
|
42
|
+
Requires-Dist: optuna>=3.6; extra == 'ml'
|
|
43
|
+
Requires-Dist: xgboost>=2.0; extra == 'ml'
|
|
44
|
+
Provides-Extra: plotly
|
|
45
|
+
Requires-Dist: plotly>=5.22; extra == 'plotly'
|
|
46
|
+
Description-Content-Type: text/markdown
|
|
47
|
+
|
|
48
|
+
<p align="center">
|
|
49
|
+
<img src="https://raw.githubusercontent.com/muhammadibrahim313/dsbro/main/imgs/dsbro.png" alt="dsbro logo" width="320">
|
|
50
|
+
</p>
|
|
51
|
+
|
|
52
|
+
# dsbro
|
|
53
|
+
|
|
54
|
+
[](https://www.python.org/)
|
|
55
|
+
[](https://github.com/muhammadibrahim313/dsbro/blob/main/LICENSE)
|
|
56
|
+
[](https://github.com/muhammadibrahim313/dsbro)
|
|
57
|
+
|
|
58
|
+
Your Data Science Bro. One import away.
|
|
59
|
+
|
|
60
|
+
`dsbro` is a lightweight Python toolkit for notebook-heavy data science work. It is built for the
|
|
61
|
+
repeated workflow most people have in Kaggle, Colab, and local Jupyter notebooks: setup, load
|
|
62
|
+
data, inspect it, clean it, visualize it, and train a baseline model fast.
|
|
63
|
+
|
|
64
|
+
## What dsbro covers
|
|
65
|
+
|
|
66
|
+
- `dsbro.utils`: notebook setup, seeding, timers, system info, downloads, simple parallel work
|
|
67
|
+
- `dsbro.io`: file loading, saving, previews, directory trees, file search, submission helpers
|
|
68
|
+
- `dsbro.eda`: overview tables, missing-value analysis, drift checks, target analysis, comparisons
|
|
69
|
+
- `dsbro.prep`: encoding, scaling, missing-value filling, feature engineering, memory reduction
|
|
70
|
+
- `dsbro.viz`: themed matplotlib and optional plotly charts for fast notebook visuals
|
|
71
|
+
- `dsbro.metrics`: classification and regression metrics in one place
|
|
72
|
+
- `dsbro.ml`: model comparison, cross-validation, training, tuning, stacking, pseudo-labeling
|
|
73
|
+
- `dsbro.text`: text cleaning, tokenization, word frequencies, and TF-IDF features
|
|
74
|
+
|
|
75
|
+
## Current status
|
|
76
|
+
|
|
77
|
+
Implemented now:
|
|
78
|
+
|
|
79
|
+
- Core package scaffold and packaging
|
|
80
|
+
- `utils`, `io`, `eda`, `prep`, `viz`, `metrics`, `ml`, and `text`
|
|
81
|
+
- Built-in help and about/version entry points
|
|
82
|
+
- Tests across the implemented modules
|
|
83
|
+
- Quickstart notebook in [examples/quickstart.ipynb](https://github.com/muhammadibrahim313/dsbro/blob/main/examples/quickstart.ipynb)
|
|
84
|
+
|
|
85
|
+
Still planned:
|
|
86
|
+
|
|
87
|
+
- Final polish for docs/examples
|
|
88
|
+
- Additional ML/deep-learning extras over time
|
|
89
|
+
|
|
90
|
+
## Installation
|
|
91
|
+
|
|
92
|
+
From PyPI:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
pip install dsbro
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
From GitHub:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
pip install git+https://github.com/muhammadibrahim313/dsbro.git
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
For local development:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
pip install -e ".[dev]"
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Optional extras:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
pip install -e ".[ml]"
|
|
114
|
+
pip install -e ".[plotly]"
|
|
115
|
+
pip install -e ".[all]"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
PyPI packaging is scaffolded, but this repository is still in active buildout.
|
|
119
|
+
|
|
120
|
+
## Quick example
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
import dsbro
|
|
124
|
+
import pandas as pd
|
|
125
|
+
|
|
126
|
+
dsbro.setup()
|
|
127
|
+
|
|
128
|
+
train = pd.DataFrame(
|
|
129
|
+
{
|
|
130
|
+
"age": [22, 35, 41, 28],
|
|
131
|
+
"city": ["lahore", "karachi", "lahore", "islamabad"],
|
|
132
|
+
"purchased": [0, 1, 1, 0],
|
|
133
|
+
}
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
overview = dsbro.eda.overview(train)
|
|
137
|
+
processed, report = dsbro.prep.auto_preprocess(train, target="purchased")
|
|
138
|
+
leaderboard = dsbro.ml.compare(train, target="purchased", cv=2)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Help system
|
|
142
|
+
|
|
143
|
+
`dsbro` includes a built-in cheatsheet:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
dsbro.help()
|
|
147
|
+
dsbro.help("viz")
|
|
148
|
+
dsbro.help("encode")
|
|
149
|
+
dsbro.about()
|
|
150
|
+
dsbro.version()
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Notebook example
|
|
154
|
+
|
|
155
|
+
The repository includes a walkthrough notebook:
|
|
156
|
+
|
|
157
|
+
- [examples/quickstart.ipynb](https://github.com/muhammadibrahim313/dsbro/blob/main/examples/quickstart.ipynb)
|
|
158
|
+
|
|
159
|
+
It demonstrates:
|
|
160
|
+
|
|
161
|
+
- `dsbro.setup()`
|
|
162
|
+
- `dsbro.eda.overview()`
|
|
163
|
+
- `dsbro.prep.datetime_features()`
|
|
164
|
+
- `dsbro.prep.text_features()`
|
|
165
|
+
- `dsbro.prep.auto_preprocess()`
|
|
166
|
+
- `dsbro.viz.bar()`
|
|
167
|
+
- `dsbro.ml.compare()`
|
|
168
|
+
|
|
169
|
+
## Development
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
pytest tests/ -v
|
|
173
|
+
ruff check dsbro/ tests/
|
|
174
|
+
ruff format dsbro/ tests/
|
|
175
|
+
python -m build
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Roadmap
|
|
179
|
+
|
|
180
|
+
- Expand example notebooks
|
|
181
|
+
- Add GitHub Actions CI
|
|
182
|
+
- Publish to TestPyPI, then PyPI
|
|
183
|
+
- Continue polishing module docs and tutorial coverage
|
|
184
|
+
|
|
185
|
+
## License
|
|
186
|
+
|
|
187
|
+
MIT. See [LICENSE](https://github.com/muhammadibrahim313/dsbro/blob/main/LICENSE).
|
dsbro-0.1.0/README.md
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="https://raw.githubusercontent.com/muhammadibrahim313/dsbro/main/imgs/dsbro.png" alt="dsbro logo" width="320">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
# dsbro
|
|
6
|
+
|
|
7
|
+
[](https://www.python.org/)
|
|
8
|
+
[](https://github.com/muhammadibrahim313/dsbro/blob/main/LICENSE)
|
|
9
|
+
[](https://github.com/muhammadibrahim313/dsbro)
|
|
10
|
+
|
|
11
|
+
Your Data Science Bro. One import away.
|
|
12
|
+
|
|
13
|
+
`dsbro` is a lightweight Python toolkit for notebook-heavy data science work. It is built for the
|
|
14
|
+
repeated workflow most people have in Kaggle, Colab, and local Jupyter notebooks: setup, load
|
|
15
|
+
data, inspect it, clean it, visualize it, and train a baseline model fast.
|
|
16
|
+
|
|
17
|
+
## What dsbro covers
|
|
18
|
+
|
|
19
|
+
- `dsbro.utils`: notebook setup, seeding, timers, system info, downloads, simple parallel work
|
|
20
|
+
- `dsbro.io`: file loading, saving, previews, directory trees, file search, submission helpers
|
|
21
|
+
- `dsbro.eda`: overview tables, missing-value analysis, drift checks, target analysis, comparisons
|
|
22
|
+
- `dsbro.prep`: encoding, scaling, missing-value filling, feature engineering, memory reduction
|
|
23
|
+
- `dsbro.viz`: themed matplotlib and optional plotly charts for fast notebook visuals
|
|
24
|
+
- `dsbro.metrics`: classification and regression metrics in one place
|
|
25
|
+
- `dsbro.ml`: model comparison, cross-validation, training, tuning, stacking, pseudo-labeling
|
|
26
|
+
- `dsbro.text`: text cleaning, tokenization, word frequencies, and TF-IDF features
|
|
27
|
+
|
|
28
|
+
## Current status
|
|
29
|
+
|
|
30
|
+
Implemented now:
|
|
31
|
+
|
|
32
|
+
- Core package scaffold and packaging
|
|
33
|
+
- `utils`, `io`, `eda`, `prep`, `viz`, `metrics`, `ml`, and `text`
|
|
34
|
+
- Built-in help and about/version entry points
|
|
35
|
+
- Tests across the implemented modules
|
|
36
|
+
- Quickstart notebook in [examples/quickstart.ipynb](https://github.com/muhammadibrahim313/dsbro/blob/main/examples/quickstart.ipynb)
|
|
37
|
+
|
|
38
|
+
Still planned:
|
|
39
|
+
|
|
40
|
+
- Final polish for docs/examples
|
|
41
|
+
- Additional ML/deep-learning extras over time
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
From PyPI:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install dsbro
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
From GitHub:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install git+https://github.com/muhammadibrahim313/dsbro.git
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
For local development:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install -e ".[dev]"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Optional extras:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install -e ".[ml]"
|
|
67
|
+
pip install -e ".[plotly]"
|
|
68
|
+
pip install -e ".[all]"
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
PyPI packaging is scaffolded, but this repository is still in active buildout.
|
|
72
|
+
|
|
73
|
+
## Quick example
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
import dsbro
|
|
77
|
+
import pandas as pd
|
|
78
|
+
|
|
79
|
+
dsbro.setup()
|
|
80
|
+
|
|
81
|
+
train = pd.DataFrame(
|
|
82
|
+
{
|
|
83
|
+
"age": [22, 35, 41, 28],
|
|
84
|
+
"city": ["lahore", "karachi", "lahore", "islamabad"],
|
|
85
|
+
"purchased": [0, 1, 1, 0],
|
|
86
|
+
}
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
overview = dsbro.eda.overview(train)
|
|
90
|
+
processed, report = dsbro.prep.auto_preprocess(train, target="purchased")
|
|
91
|
+
leaderboard = dsbro.ml.compare(train, target="purchased", cv=2)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Help system
|
|
95
|
+
|
|
96
|
+
`dsbro` includes a built-in cheatsheet:
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
dsbro.help()
|
|
100
|
+
dsbro.help("viz")
|
|
101
|
+
dsbro.help("encode")
|
|
102
|
+
dsbro.about()
|
|
103
|
+
dsbro.version()
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Notebook example
|
|
107
|
+
|
|
108
|
+
The repository includes a walkthrough notebook:
|
|
109
|
+
|
|
110
|
+
- [examples/quickstart.ipynb](https://github.com/muhammadibrahim313/dsbro/blob/main/examples/quickstart.ipynb)
|
|
111
|
+
|
|
112
|
+
It demonstrates:
|
|
113
|
+
|
|
114
|
+
- `dsbro.setup()`
|
|
115
|
+
- `dsbro.eda.overview()`
|
|
116
|
+
- `dsbro.prep.datetime_features()`
|
|
117
|
+
- `dsbro.prep.text_features()`
|
|
118
|
+
- `dsbro.prep.auto_preprocess()`
|
|
119
|
+
- `dsbro.viz.bar()`
|
|
120
|
+
- `dsbro.ml.compare()`
|
|
121
|
+
|
|
122
|
+
## Development
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
pytest tests/ -v
|
|
126
|
+
ruff check dsbro/ tests/
|
|
127
|
+
ruff format dsbro/ tests/
|
|
128
|
+
python -m build
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Roadmap
|
|
132
|
+
|
|
133
|
+
- Expand example notebooks
|
|
134
|
+
- Add GitHub Actions CI
|
|
135
|
+
- Publish to TestPyPI, then PyPI
|
|
136
|
+
- Continue polishing module docs and tutorial coverage
|
|
137
|
+
|
|
138
|
+
## License
|
|
139
|
+
|
|
140
|
+
MIT. See [LICENSE](https://github.com/muhammadibrahim313/dsbro/blob/main/LICENSE).
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Top-level package exports for dsbro."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import inspect
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from dsbro import eda, io, metrics, ml, prep, text, utils, viz
|
|
9
|
+
from dsbro._version import __version__
|
|
10
|
+
from dsbro.utils import setup
|
|
11
|
+
|
|
12
|
+
_MODULES = {
|
|
13
|
+
"io": io,
|
|
14
|
+
"eda": eda,
|
|
15
|
+
"prep": prep,
|
|
16
|
+
"viz": viz,
|
|
17
|
+
"ml": ml,
|
|
18
|
+
"metrics": metrics,
|
|
19
|
+
"utils": utils,
|
|
20
|
+
"text": text,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
_MODULE_SUMMARIES = {
|
|
24
|
+
"io": "File and directory utilities",
|
|
25
|
+
"eda": "Exploratory data analysis helpers",
|
|
26
|
+
"prep": "Preprocessing and feature engineering",
|
|
27
|
+
"viz": "Visualization helpers with dsbro theming",
|
|
28
|
+
"ml": "Model comparison, training, tuning, and ensembles",
|
|
29
|
+
"metrics": "Classification and regression metrics",
|
|
30
|
+
"utils": "Notebook and environment utilities",
|
|
31
|
+
"text": "Text cleaning and NLP-style helpers",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def version() -> str:
|
|
36
|
+
"""Return the installed dsbro version string."""
|
|
37
|
+
return __version__
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def about() -> str:
|
|
41
|
+
"""Print and return basic project metadata."""
|
|
42
|
+
message = (
|
|
43
|
+
f"dsbro {__version__}\n"
|
|
44
|
+
"Your Data Science Bro. One import away.\n"
|
|
45
|
+
"Author: Muhammad Ibrahim Qasmi\n"
|
|
46
|
+
"Homepage: https://github.com/ibrahimqasmi/dsbro"
|
|
47
|
+
)
|
|
48
|
+
print(message)
|
|
49
|
+
return message
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def help(topic: str | None = None) -> str:
|
|
53
|
+
"""Print a categorized help summary for dsbro modules or functions."""
|
|
54
|
+
if topic is None:
|
|
55
|
+
lines = ["dsbro available modules:"]
|
|
56
|
+
for module_name, module in _MODULES.items():
|
|
57
|
+
lines.append(f"- {module_name}: {_MODULE_SUMMARIES.get(module_name, '')}")
|
|
58
|
+
for function_name in _public_functions(module):
|
|
59
|
+
function = getattr(module, function_name)
|
|
60
|
+
signature = inspect.signature(function)
|
|
61
|
+
summary = _first_line(inspect.getdoc(function))
|
|
62
|
+
lines.append(f" {function_name}{signature}: {summary}")
|
|
63
|
+
message = "\n".join(lines)
|
|
64
|
+
print(message)
|
|
65
|
+
return message
|
|
66
|
+
|
|
67
|
+
normalized = topic.strip().lower()
|
|
68
|
+
if normalized in _MODULES:
|
|
69
|
+
module = _MODULES[normalized]
|
|
70
|
+
lines = [f"dsbro.{normalized} - {_MODULE_SUMMARIES.get(normalized, '')}"]
|
|
71
|
+
for function_name in _public_functions(module):
|
|
72
|
+
function = getattr(module, function_name)
|
|
73
|
+
signature = inspect.signature(function)
|
|
74
|
+
summary = _first_line(inspect.getdoc(function))
|
|
75
|
+
lines.append(f"- {function_name}{signature}: {summary}")
|
|
76
|
+
message = "\n".join(lines)
|
|
77
|
+
print(message)
|
|
78
|
+
return message
|
|
79
|
+
|
|
80
|
+
for module_name, module in _MODULES.items():
|
|
81
|
+
if hasattr(module, normalized):
|
|
82
|
+
function = getattr(module, normalized)
|
|
83
|
+
message = inspect.getdoc(function) or (
|
|
84
|
+
f"No help text available for dsbro.{module_name}.{normalized}."
|
|
85
|
+
)
|
|
86
|
+
print(message)
|
|
87
|
+
return message
|
|
88
|
+
|
|
89
|
+
raise ValueError(f"Unknown help topic: {topic}")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _public_functions(module: Any) -> list[str]:
|
|
93
|
+
"""Return public functions defined directly in a module."""
|
|
94
|
+
names: list[str] = []
|
|
95
|
+
for name, member in inspect.getmembers(module, inspect.isfunction):
|
|
96
|
+
if name.startswith("_"):
|
|
97
|
+
continue
|
|
98
|
+
if inspect.getmodule(member) is module:
|
|
99
|
+
names.append(name)
|
|
100
|
+
return sorted(names)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _first_line(docstring: str | None) -> str:
|
|
104
|
+
"""Return the first line of a docstring."""
|
|
105
|
+
if not docstring:
|
|
106
|
+
return ""
|
|
107
|
+
return docstring.strip().splitlines()[0]
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
__all__ = [
|
|
111
|
+
"__version__",
|
|
112
|
+
"about",
|
|
113
|
+
"eda",
|
|
114
|
+
"help",
|
|
115
|
+
"io",
|
|
116
|
+
"metrics",
|
|
117
|
+
"ml",
|
|
118
|
+
"prep",
|
|
119
|
+
"setup",
|
|
120
|
+
"text",
|
|
121
|
+
"utils",
|
|
122
|
+
"version",
|
|
123
|
+
"viz",
|
|
124
|
+
]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Internal helpers used across the dsbro package."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ctypes
|
|
6
|
+
import importlib
|
|
7
|
+
import os
|
|
8
|
+
import platform
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _ensure_path(path: str | Path, *, exists: bool = True) -> Path:
|
|
14
|
+
"""Return a normalized Path and optionally validate existence."""
|
|
15
|
+
normalized = Path(path).expanduser()
|
|
16
|
+
if exists and not normalized.exists():
|
|
17
|
+
raise FileNotFoundError(f"Path does not exist: {normalized}")
|
|
18
|
+
return normalized
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _format_size(size_bytes: int) -> str:
|
|
22
|
+
"""Format bytes into a human-readable size string."""
|
|
23
|
+
units = ["B", "KB", "MB", "GB", "TB"]
|
|
24
|
+
size = float(size_bytes)
|
|
25
|
+
for unit in units:
|
|
26
|
+
if size < 1024 or unit == units[-1]:
|
|
27
|
+
return f"{size:.1f} {unit}"
|
|
28
|
+
size /= 1024
|
|
29
|
+
return f"{size_bytes} B"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _safe_import(module_name: str, install_hint: str | None = None) -> Any:
|
|
33
|
+
"""Import a module and raise a helpful message when missing."""
|
|
34
|
+
try:
|
|
35
|
+
return importlib.import_module(module_name)
|
|
36
|
+
except ImportError as exc:
|
|
37
|
+
message = f"Optional dependency '{module_name}' is required."
|
|
38
|
+
if install_hint:
|
|
39
|
+
message = f"{message} Install with: {install_hint}"
|
|
40
|
+
raise ImportError(message) from exc
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _detect_text_encoding(path: Path) -> str | None:
|
|
44
|
+
"""Best-effort encoding detection for small text files."""
|
|
45
|
+
for encoding in ("utf-8", "utf-8-sig", "latin-1"):
|
|
46
|
+
try:
|
|
47
|
+
path.read_text(encoding=encoding)
|
|
48
|
+
return encoding
|
|
49
|
+
except UnicodeDecodeError:
|
|
50
|
+
continue
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _count_lines(path: Path, encoding: str | None = None) -> int | None:
|
|
55
|
+
"""Count lines in a text file."""
|
|
56
|
+
encodings = [encoding] if encoding else ["utf-8", "utf-8-sig", "latin-1"]
|
|
57
|
+
for candidate in encodings:
|
|
58
|
+
try:
|
|
59
|
+
with path.open("r", encoding=candidate) as handle:
|
|
60
|
+
return sum(1 for _ in handle)
|
|
61
|
+
except UnicodeDecodeError:
|
|
62
|
+
continue
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _get_package_version(name: str) -> str | None:
|
|
67
|
+
"""Return an installed package version or None."""
|
|
68
|
+
try:
|
|
69
|
+
module = importlib.import_module(name)
|
|
70
|
+
except ImportError:
|
|
71
|
+
return None
|
|
72
|
+
return getattr(module, "__version__", None)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _get_total_memory_bytes() -> int | None:
|
|
76
|
+
"""Return total system memory in bytes without requiring psutil."""
|
|
77
|
+
if hasattr(os, "sysconf"):
|
|
78
|
+
page_size = os.sysconf_names.get("SC_PAGE_SIZE")
|
|
79
|
+
page_count = os.sysconf_names.get("SC_PHYS_PAGES")
|
|
80
|
+
if page_size is not None and page_count is not None:
|
|
81
|
+
try:
|
|
82
|
+
return os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
|
|
83
|
+
except (OSError, ValueError):
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
if platform.system() == "Windows":
|
|
87
|
+
class _MemoryStatus(ctypes.Structure):
|
|
88
|
+
_fields_ = [
|
|
89
|
+
("length", ctypes.c_ulong),
|
|
90
|
+
("memory_load", ctypes.c_ulong),
|
|
91
|
+
("total_phys", ctypes.c_ulonglong),
|
|
92
|
+
("avail_phys", ctypes.c_ulonglong),
|
|
93
|
+
("total_page_file", ctypes.c_ulonglong),
|
|
94
|
+
("avail_page_file", ctypes.c_ulonglong),
|
|
95
|
+
("total_virtual", ctypes.c_ulonglong),
|
|
96
|
+
("avail_virtual", ctypes.c_ulonglong),
|
|
97
|
+
("avail_extended_virtual", ctypes.c_ulonglong),
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
status = _MemoryStatus()
|
|
101
|
+
status.length = ctypes.sizeof(_MemoryStatus)
|
|
102
|
+
if ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(status)):
|
|
103
|
+
return int(status.total_phys)
|
|
104
|
+
|
|
105
|
+
return None
|
|
106
|
+
|