data-forager 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -168,11 +168,14 @@ class SubsampledDataset:
168
168
 
169
169
  # Sample indices without replacement
170
170
  rng = np.random.default_rng(seed)
171
- self._indices = rng.choice(n_full, size=n_sub, replace=False)
171
+ indices = rng.choice(n_full, size=n_sub, replace=False)
172
172
 
173
173
  # Sort for cache locality unless random order is requested
174
174
  if not random_order:
175
- self._indices.sort()
175
+ indices.sort()
176
+
177
+ # Convert to Python list of ints for underlying dataset compatibility
178
+ self._indices: list[int] = indices.tolist()
176
179
 
177
180
  def __len__(self) -> int:
178
181
  return len(self._indices)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-forager
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Enabling random access to large datasets on disk for PyTorch training and other use cases
5
5
  Author-email: Freddy Snijder <forager@visionscapers.com>
6
6
  License-Expression: MIT
@@ -3,7 +3,7 @@ data_forager/sample_index.py,sha256=72J4_AZtmgyMd6AXMxkfz5BnZ3tf6iZBk962DeFGVcI,
3
3
  data_forager/unzip_files.py,sha256=f3rUUN31NdScQiau_uiw1fNeIHobvGfExSG0KqW9kok,2695
4
4
  data_forager/utils.py,sha256=Vbp-wA4Tf0Y4rHRIFaf_uU7MA6xzfFI2jjzmnlNGwRk,454
5
5
  data_forager/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- data_forager/datasets/common.py,sha256=8IXsOCZ5rXd1_8LY77PpRmZvkvefD1af_PHgim9Ufaw,5746
6
+ data_forager/datasets/common.py,sha256=gA9Q_2nXp1cvYm7zK99puAVg6rWARZ0eqSO7YrP8rr4,5865
7
7
  data_forager/datasets/jsonl.py,sha256=enOjWRT-AJTF3tWtNlonCqdDpZfVsK8If6yEtlA8tns,630
8
8
  data_forager/datasets/tokens.py,sha256=OP5MNb9uBDSX_Of6lNVLs5CAj46RwkP4gGDk-94lD40,597
9
9
  data_forager/index_stores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -13,8 +13,8 @@ data_forager/indexers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
13
13
  data_forager/indexers/jsonl_indexer.py,sha256=xvogFjEcKPNr-GBkKEr8WnWPVvJbkWQzUBrGwVELlA4,1395
14
14
  data_forager/indexers/text_lines.py,sha256=XMm5oc0btP7I16z87g1fmq9AqJyVhDOvR2cDu_zFZio,5093
15
15
  data_forager/indexers/tokenization_indexer.py,sha256=t-7Q3PLAJ0DYZT6LWdHeahk9Hz9OQsvWfoPvhHIneMk,13927
16
- data_forager-0.1.4.dist-info/licenses/LICENSE,sha256=If0vYAiJJUtbASoyZPVhvTu3e3m4WB1cQmUpvo9HRTc,1071
17
- data_forager-0.1.4.dist-info/METADATA,sha256=87KLSqkidJw1F7i7q9xnIwdBsAV5amRQ_4O1RxY8KBc,9090
18
- data_forager-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
- data_forager-0.1.4.dist-info/top_level.txt,sha256=mLcF2mYnfdaeJ_vIa7hT-MtTpUvn7kgyaWNuxXZ1Ds8,13
20
- data_forager-0.1.4.dist-info/RECORD,,
16
+ data_forager-0.1.5.dist-info/licenses/LICENSE,sha256=If0vYAiJJUtbASoyZPVhvTu3e3m4WB1cQmUpvo9HRTc,1071
17
+ data_forager-0.1.5.dist-info/METADATA,sha256=V2844BjLgxIwaLuRb-78AUDFXpW9YfMb2ieyUDrKx4Q,9090
18
+ data_forager-0.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
+ data_forager-0.1.5.dist-info/top_level.txt,sha256=mLcF2mYnfdaeJ_vIa7hT-MtTpUvn7kgyaWNuxXZ1Ds8,13
20
+ data_forager-0.1.5.dist-info/RECORD,,