haphazard 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. haphazard-0.1.0/LICENSE +21 -0
  2. haphazard-0.1.0/MANIFEST.in +2 -0
  3. haphazard-0.1.0/PKG-INFO +255 -0
  4. haphazard-0.1.0/README.md +214 -0
  5. haphazard-0.1.0/haphazard/__init__.py +25 -0
  6. haphazard-0.1.0/haphazard/data/__init__.py +26 -0
  7. haphazard-0.1.0/haphazard/data/base_dataset.py +250 -0
  8. haphazard-0.1.0/haphazard/data/datasets/__init__.py +116 -0
  9. haphazard-0.1.0/haphazard/data/datasets/dry_bean/__init__.py +90 -0
  10. haphazard-0.1.0/haphazard/data/datasets/dummy_dataset/__init__.py +84 -0
  11. haphazard-0.1.0/haphazard/data/datasets/gas/__init__.py +73 -0
  12. haphazard-0.1.0/haphazard/data/datasets/magic04/__init__.py +65 -0
  13. haphazard-0.1.0/haphazard/data/mask.py +164 -0
  14. haphazard-0.1.0/haphazard/models/__init__.py +25 -0
  15. haphazard-0.1.0/haphazard/models/base_model.py +253 -0
  16. haphazard-0.1.0/haphazard/models/model_zoo/__init__.py +105 -0
  17. haphazard-0.1.0/haphazard/models/model_zoo/dummy_model/__init__.py +149 -0
  18. haphazard-0.1.0/haphazard/models/model_zoo/olvf/__init__.py +187 -0
  19. haphazard-0.1.0/haphazard/models/model_zoo/olvf/olvf.py +215 -0
  20. haphazard-0.1.0/haphazard/utils/__init__.py +41 -0
  21. haphazard-0.1.0/haphazard/utils/file_utils.py +50 -0
  22. haphazard-0.1.0/haphazard/utils/metrics.py +279 -0
  23. haphazard-0.1.0/haphazard/utils/seeding.py +62 -0
  24. haphazard-0.1.0/haphazard.egg-info/PKG-INFO +255 -0
  25. haphazard-0.1.0/haphazard.egg-info/SOURCES.txt +29 -0
  26. haphazard-0.1.0/haphazard.egg-info/dependency_links.txt +1 -0
  27. haphazard-0.1.0/haphazard.egg-info/requires.txt +5 -0
  28. haphazard-0.1.0/haphazard.egg-info/top_level.txt +1 -0
  29. haphazard-0.1.0/pyproject.toml +3 -0
  30. haphazard-0.1.0/setup.cfg +4 -0
  31. haphazard-0.1.0/setup.py +38 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Arijit Das
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,2 @@
1
+ include README.md
2
+ recursive-include haphazard *.py
@@ -0,0 +1,255 @@
1
+ Metadata-Version: 2.4
2
+ Name: haphazard
3
+ Version: 0.1.0
4
+ Summary: A modular framework for registering and running haphazard datasets and models.
5
+ Home-page: https://github.com/theArijitDas/Haphazard-Package/
6
+ Author: Arijit Das
7
+ Author-email: dasarijitjnv@gmail.com
8
+ License: MIT
9
+ Project-URL: Bug Tracker, https://github.com/theArijitDas/Haphazard-Package/issues
10
+ Project-URL: Source Code, https://github.com/theArijitDas/Haphazard-Package/
11
+ Keywords: machine-learning haphazard models datasets registration framework
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: numpy
24
+ Requires-Dist: pandas
25
+ Requires-Dist: tqdm
26
+ Requires-Dist: scikit-learn
27
+ Requires-Dist: torch
28
+ Dynamic: author
29
+ Dynamic: author-email
30
+ Dynamic: classifier
31
+ Dynamic: description
32
+ Dynamic: description-content-type
33
+ Dynamic: home-page
34
+ Dynamic: keywords
35
+ Dynamic: license
36
+ Dynamic: license-file
37
+ Dynamic: project-url
38
+ Dynamic: requires-dist
39
+ Dynamic: requires-python
40
+ Dynamic: summary
41
+
42
+ # Haphazard
43
+
44
+ A Python package for **haphazard dataset and model management**.
45
+ Provides a standardized interface for loading datasets and models, running experiments, and extending with custom datasets or models.
46
+
47
+ ---
48
+
49
+ ## Table of Contents
50
+
51
+ - [Installation](#installation)
52
+ - [Quick Start](#quick-start)
53
+ - [Datasets](#datasets)
54
+ - [Models](#models)
55
+ - [Contributing](#contributing)
56
+ - [License](#license)
57
+
58
+ ---
59
+
60
+ ## Installation
61
+
62
+ Install via pip (after packaging):
63
+
64
+ ```bash
65
+ pip install haphazard
66
+ ````
67
+
68
+ Or for local development:
69
+
70
+ ```bash
71
+ git clone <repo_url>
72
+ cd haphazard
73
+ pip install -e .
74
+ ```
75
+
76
+ ---
77
+
78
+ ## Project Structure
79
+
80
+ The Haphazard package has a modular layout:
81
+
82
+ ```
83
+ haphazard/
84
+ ├── __init__.py # Top-level package
85
+ ├── data/ # Dataset related modules
86
+ │ ├── __init__.py
87
+ │ ├── base_dataset.py # Abstract BaseDataset class
88
+ │ └── datasets/ # All dataset implementations
89
+ │ ├── __init__.py
90
+ │ └── dummy_dataset/
91
+ │ └── __init__.py
92
+ ├── models/ # Model related modules
93
+ │ ├── __init__.py
94
+ │ ├── base_model.py # Abstract BaseModel class
95
+ │ └── model_zoo/ # All model implementations
96
+ │ ├── __init__.py
97
+ │ └── dummy_model/
98
+ │ └── __init__.py
99
+ └── utils/ # Optional helper functions
100
+ └── ...
101
+ ```
102
+
103
+ **Notes:**
104
+
105
+ * `data/base_dataset.py` defines `BaseDataset`.
106
+ * `data/datasets/` contains registered datasets; each dataset is a submodule with `__init__.py`.
107
+ * `models/base_model.py` defines `BaseModel`.
108
+ * `models/model_zoo/` contains registered models; each model is a submodule with `__init__.py`.
109
+ * `utils/` is optional, for shared helpers.
110
+
111
+ This layout allows **dynamic registration** of datasets and models via decorators.
112
+
113
+ ---
114
+
115
+
116
+ ## Quick Start
117
+
118
+ ```python
119
+ from haphazard import load_dataset, load_model
120
+
121
+ # Load dataset
122
+ dataset = load_dataset("dummy", n_samples=100, n_features=10)
123
+
124
+ # Load model
125
+ model = load_model("dummy")
126
+
127
+ # Run model
128
+ outputs = model(dataset)
129
+ print(outputs)
130
+ ```
131
+
132
+ ---
133
+
134
+ ## Datasets
135
+
136
+ * All datasets must inherit from `BaseDataset`.
137
+ * Example dataset: `DummyDataset`.
138
+ * Main interface:
139
+
140
+ ```python
141
+ from haphazard import load_dataset
142
+
143
+ dataset = load_dataset("dummy", base_path="./data")
144
+ x, y = dataset.load_data()
145
+ mask = dataset.load_mask(scheme="probabilistic", availability_prob=0.5)
146
+ ```
147
+
148
+ ### Dataset Attributes
149
+
150
+ * `name` : str — Dataset name.
151
+ * `task` : `"classification"` | `"regression"`.
152
+ * `haphazard_type` : `"controlled"` | `"intrinsic"`.
153
+ * `n_samples`, `n_features` : int.
154
+ * `num_classes` : int (for classification).
155
+
156
+ ---
157
+
158
+ ## Models
159
+
160
+ * All models must inherit from `BaseModel`.
161
+ * Example model: `DummyModel`.
162
+ * Main interface:
163
+
164
+ ```python
165
+ from haphazard import load_model
166
+
167
+ model = load_model("dummy")
168
+ outputs = model(dataset)
169
+ ```
170
+
171
+ ### Output
172
+
173
+ * **Classification**: `labels`, `preds`, `logits`, `time_taken`, `is_logit`.
174
+ * **Regression**: `targets`, `preds`, `time_taken`.
175
+
176
+ ---
177
+
178
+ ## Contributing
179
+
180
+ Haphazard is designed for **easy extensibility**. You can add new datasets and models.
181
+
182
+ ### Adding a new dataset
183
+
184
+ 1. Create a new folder under `haphazard/data/datasets/`, e.g., `my_dataset/`.
185
+ 2. Add `__init__.py`:
186
+
187
+ ```python
188
+ from ...base_dataset import BaseDataset
189
+ from ...datasets import register_dataset
190
+ import numpy as np
191
+
192
+ @register_dataset("my_dataset")
193
+ class MyDataset(BaseDataset):
194
+ def __init__(self, base_path="./", **kwargs):
195
+ self.name = "my_dataset"
196
+ self.haphazard_type = "controlled"
197
+ self.task = "classification"
198
+ super().__init__(base_path=base_path, **kwargs)
199
+
200
+ def read_data(self, base_path="./"):
201
+ # Load or generate x, y
202
+ x = np.random.random((100, 10))
203
+ y = np.random.randint(0, 2, 100)
204
+ return x, y
205
+ ```
206
+
207
+ 3. The dataset is automatically registered and can be loaded with `load_dataset("my_dataset")`.
208
+
209
+ ### Adding a new model
210
+
211
+ 1. Create a new folder under `haphazard/models/model_zoo/`, e.g., `my_model/`.
212
+ 2. Add `__init__.py`:
213
+
214
+ ```python
215
+ from ...base_model import BaseModel, BaseDataset
216
+ from ...model_zoo import register_model
217
+ import numpy as np
218
+
219
+ @register_model("my_model")
220
+ class MyModel(BaseModel):
221
+ def __init__(self, **kwargs):
222
+ self.name = "MyModel"
223
+ self.tasks = {"classification", "regression"}
224
+ self.deterministic = True
225
+ self.hyperparameters = set()
226
+ super().__init__(**kwargs)
227
+
228
+ def fit(self, dataset: BaseDataset, mask_params=None, model_params=None, seed=42):
229
+ # Dummy implementation
230
+ x, y = dataset.load_data()
231
+ mask = dataset.load_mask(**mask_params)
232
+ preds = np.random.randint(0, 2, size=y.shape[0])
233
+ if dataset.task == "classification":
234
+ return {
235
+ "labels": y,
236
+ "preds": preds,
237
+ "logits": preds.astype(float),
238
+ "time_taken": 0.0,
239
+ "is_logit": True
240
+ }
241
+ elif dataset.task == "regression":
242
+ return {
243
+ "targets": y,
244
+ "preds": preds,
245
+ "time_taken": 0.0,
246
+ }
247
+ ```
248
+
249
+ 3. The model is automatically registered and can be loaded with `load_model("my_model")`.
250
+
251
+ ---
252
+
253
+ ## License
254
+
255
+ MIT License.
@@ -0,0 +1,214 @@
1
+ # Haphazard
2
+
3
+ A Python package for **haphazard dataset and model management**.
4
+ Provides a standardized interface for loading datasets and models, running experiments, and extending with custom datasets or models.
5
+
6
+ ---
7
+
8
+ ## Table of Contents
9
+
10
+ - [Installation](#installation)
11
+ - [Quick Start](#quick-start)
12
+ - [Datasets](#datasets)
13
+ - [Models](#models)
14
+ - [Contributing](#contributing)
15
+ - [License](#license)
16
+
17
+ ---
18
+
19
+ ## Installation
20
+
21
+ Install via pip (after packaging):
22
+
23
+ ```bash
24
+ pip install haphazard
25
+ ````
26
+
27
+ Or for local development:
28
+
29
+ ```bash
30
+ git clone <repo_url>
31
+ cd haphazard
32
+ pip install -e .
33
+ ```
34
+
35
+ ---
36
+
37
+ ## Project Structure
38
+
39
+ The Haphazard package has a modular layout:
40
+
41
+ ```
42
+ haphazard/
43
+ ├── __init__.py # Top-level package
44
+ ├── data/ # Dataset related modules
45
+ │ ├── __init__.py
46
+ │ ├── base_dataset.py # Abstract BaseDataset class
47
+ │ └── datasets/ # All dataset implementations
48
+ │ ├── __init__.py
49
+ │ └── dummy_dataset/
50
+ │ └── __init__.py
51
+ ├── models/ # Model related modules
52
+ │ ├── __init__.py
53
+ │ ├── base_model.py # Abstract BaseModel class
54
+ │ └── model_zoo/ # All model implementations
55
+ │ ├── __init__.py
56
+ │ └── dummy_model/
57
+ │ └── __init__.py
58
+ └── utils/ # Optional helper functions
59
+ └── ...
60
+ ```
61
+
62
+ **Notes:**
63
+
64
+ * `data/base_dataset.py` defines `BaseDataset`.
65
+ * `data/datasets/` contains registered datasets; each dataset is a submodule with `__init__.py`.
66
+ * `models/base_model.py` defines `BaseModel`.
67
+ * `models/model_zoo/` contains registered models; each model is a submodule with `__init__.py`.
68
+ * `utils/` is optional, for shared helpers.
69
+
70
+ This layout allows **dynamic registration** of datasets and models via decorators.
71
+
72
+ ---
73
+
74
+
75
+ ## Quick Start
76
+
77
+ ```python
78
+ from haphazard import load_dataset, load_model
79
+
80
+ # Load dataset
81
+ dataset = load_dataset("dummy", n_samples=100, n_features=10)
82
+
83
+ # Load model
84
+ model = load_model("dummy")
85
+
86
+ # Run model
87
+ outputs = model(dataset)
88
+ print(outputs)
89
+ ```
90
+
91
+ ---
92
+
93
+ ## Datasets
94
+
95
+ * All datasets must inherit from `BaseDataset`.
96
+ * Example dataset: `DummyDataset`.
97
+ * Main interface:
98
+
99
+ ```python
100
+ from haphazard import load_dataset
101
+
102
+ dataset = load_dataset("dummy", base_path="./data")
103
+ x, y = dataset.load_data()
104
+ mask = dataset.load_mask(scheme="probabilistic", availability_prob=0.5)
105
+ ```
106
+
107
+ ### Dataset Attributes
108
+
109
+ * `name` : str — Dataset name.
110
+ * `task` : `"classification"` | `"regression"`.
111
+ * `haphazard_type` : `"controlled"` | `"intrinsic"`.
112
+ * `n_samples`, `n_features` : int.
113
+ * `num_classes` : int (for classification).
114
+
115
+ ---
116
+
117
+ ## Models
118
+
119
+ * All models must inherit from `BaseModel`.
120
+ * Example model: `DummyModel`.
121
+ * Main interface:
122
+
123
+ ```python
124
+ from haphazard import load_model
125
+
126
+ model = load_model("dummy")
127
+ outputs = model(dataset)
128
+ ```
129
+
130
+ ### Output
131
+
132
+ * **Classification**: `labels`, `preds`, `logits`, `time_taken`, `is_logit`.
133
+ * **Regression**: `targets`, `preds`, `time_taken`.
134
+
135
+ ---
136
+
137
+ ## Contributing
138
+
139
+ Haphazard is designed for **easy extensibility**. You can add new datasets and models.
140
+
141
+ ### Adding a new dataset
142
+
143
+ 1. Create a new folder under `haphazard/data/datasets/`, e.g., `my_dataset/`.
144
+ 2. Add `__init__.py`:
145
+
146
+ ```python
147
+ from ...base_dataset import BaseDataset
148
+ from ...datasets import register_dataset
149
+ import numpy as np
150
+
151
+ @register_dataset("my_dataset")
152
+ class MyDataset(BaseDataset):
153
+ def __init__(self, base_path="./", **kwargs):
154
+ self.name = "my_dataset"
155
+ self.haphazard_type = "controlled"
156
+ self.task = "classification"
157
+ super().__init__(base_path=base_path, **kwargs)
158
+
159
+ def read_data(self, base_path="./"):
160
+ # Load or generate x, y
161
+ x = np.random.random((100, 10))
162
+ y = np.random.randint(0, 2, 100)
163
+ return x, y
164
+ ```
165
+
166
+ 3. The dataset is automatically registered and can be loaded with `load_dataset("my_dataset")`.
167
+
168
+ ### Adding a new model
169
+
170
+ 1. Create a new folder under `haphazard/models/model_zoo/`, e.g., `my_model/`.
171
+ 2. Add `__init__.py`:
172
+
173
+ ```python
174
+ from ...base_model import BaseModel, BaseDataset
175
+ from ...model_zoo import register_model
176
+ import numpy as np
177
+
178
+ @register_model("my_model")
179
+ class MyModel(BaseModel):
180
+ def __init__(self, **kwargs):
181
+ self.name = "MyModel"
182
+ self.tasks = {"classification", "regression"}
183
+ self.deterministic = True
184
+ self.hyperparameters = set()
185
+ super().__init__(**kwargs)
186
+
187
+ def fit(self, dataset: BaseDataset, mask_params=None, model_params=None, seed=42):
188
+ # Dummy implementation
189
+ x, y = dataset.load_data()
190
+ mask = dataset.load_mask(**mask_params)
191
+ preds = np.random.randint(0, 2, size=y.shape[0])
192
+ if dataset.task == "classification":
193
+ return {
194
+ "labels": y,
195
+ "preds": preds,
196
+ "logits": preds.astype(float),
197
+ "time_taken": 0.0,
198
+ "is_logit": True
199
+ }
200
+ elif dataset.task == "regression":
201
+ return {
202
+ "targets": y,
203
+ "preds": preds,
204
+ "time_taken": 0.0,
205
+ }
206
+ ```
207
+
208
+ 3. The model is automatically registered and can be loaded with `load_model("my_model")`.
209
+
210
+ ---
211
+
212
+ ## License
213
+
214
+ MIT License.
@@ -0,0 +1,25 @@
1
+ """
2
+ haphazard
3
+ ---------
4
+ Top-level imports for main API access.
5
+ """
6
+
7
+ # User-facing APIs
8
+ from .data import load_dataset
9
+ from .models import load_model
10
+
11
+ # Optional APIs for contributors
12
+ from .data import BaseDataset, register_dataset
13
+ from .models import BaseModel, register_model
14
+
15
+ # -------------------------------------------------------------------------
16
+ # Public exports
17
+ # -------------------------------------------------------------------------
18
+ __all__ = [
19
+ "load_dataset",
20
+ "load_model",
21
+ "BaseDataset",
22
+ "register_dataset",
23
+ "BaseModel",
24
+ "register_model",
25
+ ]
@@ -0,0 +1,26 @@
1
+ """
2
+ haphazard.data
3
+ --------------
4
+ Dataset abstraction and registry subpackage.
5
+
6
+ This subpackage provides:
7
+
8
+ - The :class:`BaseDataset` abstract base class, which all datasets must inherit from.
9
+ - Dataset registration and loading utilities via the `datasets` submodule.
10
+ """
11
+
12
+ from .base_dataset import BaseDataset, TaskType, HaphazardType
13
+ from .datasets import load_dataset, _DATASET_REGISTRY, register_dataset
14
+
15
+
16
+ # -------------------------------------------------------------------------
17
+ # Public exports
18
+ # -------------------------------------------------------------------------
19
+ __all__ = [
20
+ "BaseDataset",
21
+ "TaskType",
22
+ "HaphazardType",
23
+ "load_dataset",
24
+ "_DATASET_REGISTRY",
25
+ "register_dataset",
26
+ ]