dataeval 0.86.8__py3-none-any.whl → 0.86.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,7 +51,7 @@ VOCClassStringMap = Literal[
51
51
  TVOCClassMap = TypeVar("TVOCClassMap", VOCClassStringMap, int, list[VOCClassStringMap], list[int])
52
52
 
53
53
 
54
- class BaseVOCDataset(BaseDataset[_TArray, _TTarget, list[str]]):
54
+ class BaseVOCDataset(BaseDataset[_TArray, _TTarget, list[str], str]):
55
55
  _resources = [
56
56
  DataLocation(
57
57
  url="https://data.brainchip.com/dataset-mirror/voc/VOCtrainval_11-May-2012.tar",
@@ -412,7 +412,7 @@ class BaseVOCDataset(BaseDataset[_TArray, _TTarget, list[str]]):
412
412
 
413
413
  class VOCDetection(
414
414
  BaseVOCDataset[NDArray[Any], ObjectDetectionTarget[NDArray[Any]]],
415
- BaseODDataset[NDArray[Any]],
415
+ BaseODDataset[NDArray[Any], list[str], str],
416
416
  BaseDatasetNumpyMixin,
417
417
  ):
418
418
  """
@@ -467,7 +467,7 @@ class VOCDetection(
467
467
 
468
468
  class VOCDetectionTorch(
469
469
  BaseVOCDataset[torch.Tensor, ObjectDetectionTarget[torch.Tensor]],
470
- BaseODDataset[torch.Tensor],
470
+ BaseODDataset[torch.Tensor, list[str], str],
471
471
  BaseDatasetTorchMixin,
472
472
  ):
473
473
  """
@@ -57,22 +57,29 @@ class Sufficiency(Generic[T]):
57
57
  test_ds : torch.Dataset
58
58
  Data that will be used for every run's evaluation
59
59
  train_fn : Callable[[nn.Module, Dataset, Sequence[int]], None]
60
- Function which takes a model (torch.nn.Module), a dataset
61
- (torch.utils.data.Dataset), indices to train on and executes model
60
+ Function which takes a model, a dataset, and indices to train on and then executes model
62
61
  training against the data.
63
62
  eval_fn : Callable[[nn.Module, Dataset], Mapping[str, float | ArrayLike]]
64
- Function which takes a model (torch.nn.Module), a dataset
65
- (torch.utils.data.Dataset) and returns a dictionary of metric
66
- values (Mapping[str, float]) which is used to assess model performance
63
+ Function which takes a model, a dataset and returns a dictionary of metric
64
+ values which is used to assess model performance
67
65
  given the model and data.
68
66
  runs : int, default 1
69
- Number of models to run over all subsets
67
+ Number of models to train over the entire dataset.
70
68
  substeps : int, default 5
71
- Total number of dataset partitions that each model will train on
69
+ The number of steps that each model will be trained and evaluated on.
72
70
  train_kwargs : Mapping | None, default None
73
71
  Additional arguments required for custom training function
74
72
  eval_kwargs : Mapping | None, default None
75
73
  Additional arguments required for custom evaluation function
74
+
75
+ Warning
76
+ -------
77
+ Since each run is trained sequentially, increasing the parameter `runs` can significantly increase runtime.
78
+
79
+ Note
80
+ ----
81
+ Substeps is overridden by the parameter `eval_at` in :meth:`.Sufficiency.evaluate`
82
+
76
83
  """
77
84
 
78
85
  def __init__(
@@ -159,13 +166,22 @@ class Sufficiency(Generic[T]):
159
166
  @set_metadata(state=["runs", "substeps"])
160
167
  def evaluate(self, eval_at: int | Iterable[int] | None = None) -> SufficiencyOutput:
161
168
  """
162
- Creates data indices, trains models, and returns plotting data
169
+ Train and evaluate a model over multiple substeps
170
+
171
+ This function trains a model up to each step calculated from substeps. The model is then evaluated
172
+ at that step and trained from 0 to the next step. This repeats for all substeps. Once a model has been
173
+ trained and evaluated at all substeps, if runs is greater than one, the model weights are reset and
174
+ the process is repeated.
175
+
176
+ During each evaluation, the metrics returned as a dictionary by the given evaluation function are stored
177
+ and then averaged over when all runs are complete.
163
178
 
164
179
  Parameters
165
180
  ----------
166
181
  eval_at : int | Iterable[int] | None, default None
167
- Specify this to collect accuracies over a specific set of dataset lengths, rather
168
- than letting :term:`sufficiency<Sufficiency>` internally create the lengths to evaluate at.
182
+ Specify this to collect metrics over a specific set of dataset lengths.
183
+ If `None`, evaluates at each step is calculated by
184
+ `np.geomspace` over the length of the dataset for self.substeps
169
185
 
170
186
  Returns
171
187
  -------
@@ -179,6 +195,8 @@ class Sufficiency(Generic[T]):
179
195
 
180
196
  Examples
181
197
  --------
198
+ Default runs and substeps
199
+
182
200
  >>> suff = Sufficiency(
183
201
  ... model=model,
184
202
  ... train_ds=train_ds,
@@ -190,6 +208,31 @@ class Sufficiency(Generic[T]):
190
208
  ... )
191
209
  >>> suff.evaluate()
192
210
  SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), measures={'test': array([1., 1., 1., 1., 1.])}, n_iter=1000)
211
+
212
+ Evaluate at a single value
213
+
214
+ >>> suff = Sufficiency(
215
+ ... model=model,
216
+ ... train_ds=train_ds,
217
+ ... test_ds=test_ds,
218
+ ... train_fn=train_fn,
219
+ ... eval_fn=eval_fn,
220
+ ... )
221
+ >>> suff.evaluate(eval_at=50)
222
+ SufficiencyOutput(steps=array([50]), measures={'test': array([1.])}, n_iter=1000)
223
+
224
+ Evaluating at linear steps from 0-100 inclusive
225
+
226
+ >>> suff = Sufficiency(
227
+ ... model=model,
228
+ ... train_ds=train_ds,
229
+ ... test_ds=test_ds,
230
+ ... train_fn=train_fn,
231
+ ... eval_fn=eval_fn,
232
+ ... )
233
+ >>> suff.evaluate(eval_at=np.arange(0, 101, 20))
234
+ SufficiencyOutput(steps=array([ 0, 20, 40, 60, 80, 100]), measures={'test': array([1., 1., 1., 1., 1., 1.])}, n_iter=1000)
235
+
193
236
  """ # noqa: E501
194
237
  if eval_at is not None:
195
238
  ranges = np.asarray(list(eval_at) if isinstance(eval_at, Iterable) else [eval_at])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataeval
3
- Version: 0.86.8
3
+ Version: 0.86.9
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Project-URL: Homepage, https://dataeval.ai/
6
6
  Project-URL: Repository, https://github.com/aria-ml/dataeval/
@@ -1,6 +1,6 @@
1
1
  dataeval/__init__.py,sha256=dEDltdHOnbk4-XAbQwJLOZtCbRLZsDMnptWRwbF2r54,1773
2
2
  dataeval/_log.py,sha256=C7AGkIRzymvYJ0LQXtnShiy3i5Xrp8T58JzIHHguk_Q,365
3
- dataeval/_version.py,sha256=IPUOExUy8nF4kYGtCPV5bg6_IYDRLVOKnFJcNllcO1M,513
3
+ dataeval/_version.py,sha256=NKlNIBKyuGsE6TJjC6ieMwWJh-T6f3KPhk_0sXgjByQ,513
4
4
  dataeval/config.py,sha256=g3Np0Q3J5Rzij6Gsz7tJh7eOxgwNPf6NsFYmAR8Atfs,4219
5
5
  dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  dataeval/typing.py,sha256=W8rqFFkAqE5a5ar3MmB-O5gcMJqvoDKXC8Y0ggBqAKo,7216
@@ -89,17 +89,18 @@ dataeval/utils/data/__init__.py,sha256=xGzrjrOxOP2DP1tU84AWMKPnSxFvSjM81CTlDg4rN
89
89
  dataeval/utils/data/_dataset.py,sha256=tC_vqgWnmojAoAANo5BUVfEUYXl7GzOBSeYjR9olbDk,9506
90
90
  dataeval/utils/data/collate.py,sha256=5egEEKhNNCGeNLChO1p6dZ4Wg6x51VEaMNHz7hEZUxI,3936
91
91
  dataeval/utils/data/metadata.py,sha256=L1c2bCiMj0aR0QCoKkjwBujIftJDEMgW_3ZbgeS8WHo,14703
92
- dataeval/utils/datasets/__init__.py,sha256=pAXqHX76yAoBI8XB3m6zGuW-u3s3PCoIXG5GDzxH7Zs,572
93
- dataeval/utils/datasets/_antiuav.py,sha256=kA_ia1fYNcJiz9SpCvh-Z8iSc7iJrdogjBI3soyaa7A,8304
94
- dataeval/utils/datasets/_base.py,sha256=pyfpJda3ku469M3TFRsJn9S2oAiQODOGTlLcdcoEW9U,9031
92
+ dataeval/utils/datasets/__init__.py,sha256=8sEQwOixx9OMkwaU0u9Hl2Cdcb5095tJzz5dgqgdNKc,643
93
+ dataeval/utils/datasets/_antiuav.py,sha256=CvqFIBEu8X1MmKzyUBTw1uzog2DWppiJ6ZynVNp8mv0,8320
94
+ dataeval/utils/datasets/_base.py,sha256=hMVza1lN9yhLVLSR7ucw1cQKn3s8UGdq74NV8MN6ZYo,9285
95
95
  dataeval/utils/datasets/_cifar10.py,sha256=hZc_A30yKYBbv2kvVdEkZ9egyEe6XBUnmksoIAoJ-5Y,8265
96
- dataeval/utils/datasets/_fileio.py,sha256=LEoFVNdryRdi7mKpWw-9D8lA6XMa-Jaszd85bv93POo,5454
97
- dataeval/utils/datasets/_milco.py,sha256=iXf4C1I3Eg_3gHKUe4XPi21yFMBO51zxTIqAkGf9bYg,7869
96
+ dataeval/utils/datasets/_fileio.py,sha256=hMxGm-OnsDMj8_xbbHgv9YSxpqm_8NXWQrj53rDg-nQ,5451
97
+ dataeval/utils/datasets/_milco.py,sha256=yWAqH-Dxe2ZYWG8dW89j4SQHh8O_Ys90LurGKFYfSU4,7885
98
98
  dataeval/utils/datasets/_mixin.py,sha256=S8iii-SoYUsFFYNXjw2thlZkpBvRLnZ4XI8wTqOKXgU,1729
99
99
  dataeval/utils/datasets/_mnist.py,sha256=uz46sE1Go3TgGjG6x2cXckSVQ0mSg2mhgk8BUvLWjb0,8149
100
+ dataeval/utils/datasets/_seadrone.py,sha256=daRjeRNaa5CPhwr1nelbTXaJrF5H6nUbz4scH3gCl8g,270979
100
101
  dataeval/utils/datasets/_ships.py,sha256=6U04HAoM3jgLl1qv-NnxjZeSsBipcqWJBMhBMn5iIUY,5115
101
102
  dataeval/utils/datasets/_types.py,sha256=iSKyHXRlGuomXs0FHK6md8lXLQrQQ4fxgVOwr4o81bo,1089
102
- dataeval/utils/datasets/_voc.py,sha256=pafY112O80isYkrdy7Quie9SBm_TmYhREuyl8SxtsR0,24586
103
+ dataeval/utils/datasets/_voc.py,sha256=P11jLIMo87_f8xBLWCMAX1-lA5OGCowmfWpVxpQJFEc,24623
103
104
  dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
104
105
  dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
105
106
  dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
@@ -107,8 +108,8 @@ dataeval/utils/torch/_internal.py,sha256=9rzlMeM8i3p-ctulh9WDQATMXtlp-Jk2pBX7NGC
107
108
  dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Zc,9700
108
109
  dataeval/utils/torch/trainer.py,sha256=DRyPScGdE4o5Xo3BmD9p2PGOApzi1E-QfsBRNZ5IXW8,5544
109
110
  dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
110
- dataeval/workflows/sufficiency.py,sha256=j-R8dg4XE6a66p_oTXG2GNzgg3vGk85CTblxhFXaxog,8513
111
- dataeval-0.86.8.dist-info/METADATA,sha256=rCf58-uzgjsTNZkY3LOBMSi5fhQ2cdAtnrrDI_eYR_I,5925
112
- dataeval-0.86.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
113
- dataeval-0.86.8.dist-info/licenses/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
114
- dataeval-0.86.8.dist-info/RECORD,,
111
+ dataeval/workflows/sufficiency.py,sha256=UAPjowFrmM6IJJaOk9GkH3nfQTyDy2_zOY55o2g3G1M,10072
112
+ dataeval-0.86.9.dist-info/METADATA,sha256=qUho4Ureh4Pfo91py79pNNUK5yu0x0c6K8R29Al5yQ4,5925
113
+ dataeval-0.86.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
114
+ dataeval-0.86.9.dist-info/licenses/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
115
+ dataeval-0.86.9.dist-info/RECORD,,