data-forager 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_forager/datasets/tokens_with_aux.py +91 -0
- data_forager/index_stores/fs_based.py +77 -4
- data_forager/indexers/text_lines.py +28 -73
- data_forager/indexers/tokenization_indexer.py +158 -191
- data_forager/sample_generators/__init__.py +30 -0
- data_forager/sample_generators/aux/__init__.py +18 -0
- data_forager/sample_generators/aux/common.py +77 -0
- data_forager/sample_generators/aux/loss_mask.py +78 -0
- data_forager/sample_generators/common.py +117 -0
- data_forager/sample_generators/schema.py +54 -0
- data_forager/sample_generators/tokenization.py +210 -0
- data_forager/sample_generators/tokenization_with_aux.py +250 -0
- data_forager/sample_index.py +34 -2
- {data_forager-0.1.6.dist-info → data_forager-0.2.0.dist-info}/METADATA +1 -1
- data_forager-0.2.0.dist-info/RECORD +29 -0
- {data_forager-0.1.6.dist-info → data_forager-0.2.0.dist-info}/WHEEL +1 -1
- data_forager-0.1.6.dist-info/RECORD +0 -20
- {data_forager-0.1.6.dist-info → data_forager-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {data_forager-0.1.6.dist-info → data_forager-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
data_forager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
data_forager/sample_index.py,sha256=hLBLBIXHUXACwggIrS5rSOyYuHIaWrDrWMYgmcpy0yQ,2289
|
|
3
|
+
data_forager/unzip_files.py,sha256=f3rUUN31NdScQiau_uiw1fNeIHobvGfExSG0KqW9kok,2695
|
|
4
|
+
data_forager/utils.py,sha256=Vbp-wA4Tf0Y4rHRIFaf_uU7MA6xzfFI2jjzmnlNGwRk,454
|
|
5
|
+
data_forager/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
data_forager/datasets/common.py,sha256=gA9Q_2nXp1cvYm7zK99puAVg6rWARZ0eqSO7YrP8rr4,5865
|
|
7
|
+
data_forager/datasets/jsonl.py,sha256=enOjWRT-AJTF3tWtNlonCqdDpZfVsK8If6yEtlA8tns,630
|
|
8
|
+
data_forager/datasets/tokens.py,sha256=OP5MNb9uBDSX_Of6lNVLs5CAj46RwkP4gGDk-94lD40,597
|
|
9
|
+
data_forager/datasets/tokens_with_aux.py,sha256=AE2KaFAhMvNGuS25Slus2N6xkzeUawQo2Lvptjkjejo,2815
|
|
10
|
+
data_forager/index_stores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
data_forager/index_stores/common.py,sha256=HYPZfCtmbLDxjPgCoUseahicTEI1el7V1s5tSyZxkfs,780
|
|
12
|
+
data_forager/index_stores/fs_based.py,sha256=gJrbbuQy0gWChDZ3dCK2gOK7ndmZAgzhNRTTUbZ4O1Y,6858
|
|
13
|
+
data_forager/indexers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
data_forager/indexers/jsonl_indexer.py,sha256=xvogFjEcKPNr-GBkKEr8WnWPVvJbkWQzUBrGwVELlA4,1395
|
|
15
|
+
data_forager/indexers/text_lines.py,sha256=A-c8t-OxCQ53UDVq9JtPNRxk2zc4yckHtkDV2r6sPLU,3665
|
|
16
|
+
data_forager/indexers/tokenization_indexer.py,sha256=UAV-eqH5l5OzL_4J-SJqWw_DSuGpoJHx2fxwOlDcTqc,11623
|
|
17
|
+
data_forager/sample_generators/__init__.py,sha256=iZ1t4xlPQT5NXo4fN7MyYzXBOY0Ow-C9I151HouQxQ8,817
|
|
18
|
+
data_forager/sample_generators/common.py,sha256=1qHVFJubrzMvhAuuVoetsdcsgEBe_0kQPfAOqjt-dhc,3713
|
|
19
|
+
data_forager/sample_generators/schema.py,sha256=OjCFJ_D-sa7xR8Qj5bD2OU4Xf_RtXNT05L4yHk9CDvw,1676
|
|
20
|
+
data_forager/sample_generators/tokenization.py,sha256=X6Vq_Xx17Vz1ljZjUX9UzCBvLUMzwnaYOMp2LzxpIIA,8535
|
|
21
|
+
data_forager/sample_generators/tokenization_with_aux.py,sha256=WyUyNtiEfwOOVxEnENolaI8gEbOdclQ1yIduSSzHRjQ,9696
|
|
22
|
+
data_forager/sample_generators/aux/__init__.py,sha256=FbcIQ7t78evP5gXKOTxnKYUWWnfB0Br92-AT8RFs7E4,434
|
|
23
|
+
data_forager/sample_generators/aux/common.py,sha256=6Wm_VZtYWaCkyZtUCpol5VpsP6xWxI-wd7O-pRcz_vc,2070
|
|
24
|
+
data_forager/sample_generators/aux/loss_mask.py,sha256=2IknQr0sFYE2P6IWPhaqI7CKWmS1tYJbVShtLgsb_GU,2303
|
|
25
|
+
data_forager-0.2.0.dist-info/licenses/LICENSE,sha256=If0vYAiJJUtbASoyZPVhvTu3e3m4WB1cQmUpvo9HRTc,1071
|
|
26
|
+
data_forager-0.2.0.dist-info/METADATA,sha256=VqBIBhs14gZaetu3sUlSa6tFANrFmbQb0TPiLuorn5M,9090
|
|
27
|
+
data_forager-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
28
|
+
data_forager-0.2.0.dist-info/top_level.txt,sha256=mLcF2mYnfdaeJ_vIa7hT-MtTpUvn7kgyaWNuxXZ1Ds8,13
|
|
29
|
+
data_forager-0.2.0.dist-info/RECORD,,
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
data_forager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
data_forager/sample_index.py,sha256=72J4_AZtmgyMd6AXMxkfz5BnZ3tf6iZBk962DeFGVcI,1020
|
|
3
|
-
data_forager/unzip_files.py,sha256=f3rUUN31NdScQiau_uiw1fNeIHobvGfExSG0KqW9kok,2695
|
|
4
|
-
data_forager/utils.py,sha256=Vbp-wA4Tf0Y4rHRIFaf_uU7MA6xzfFI2jjzmnlNGwRk,454
|
|
5
|
-
data_forager/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
data_forager/datasets/common.py,sha256=gA9Q_2nXp1cvYm7zK99puAVg6rWARZ0eqSO7YrP8rr4,5865
|
|
7
|
-
data_forager/datasets/jsonl.py,sha256=enOjWRT-AJTF3tWtNlonCqdDpZfVsK8If6yEtlA8tns,630
|
|
8
|
-
data_forager/datasets/tokens.py,sha256=OP5MNb9uBDSX_Of6lNVLs5CAj46RwkP4gGDk-94lD40,597
|
|
9
|
-
data_forager/index_stores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
data_forager/index_stores/common.py,sha256=HYPZfCtmbLDxjPgCoUseahicTEI1el7V1s5tSyZxkfs,780
|
|
11
|
-
data_forager/index_stores/fs_based.py,sha256=usTE_eUdcfvBTgIX6fMrdXWqrHuUgNPmtRbGweKh2g8,4106
|
|
12
|
-
data_forager/indexers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
data_forager/indexers/jsonl_indexer.py,sha256=xvogFjEcKPNr-GBkKEr8WnWPVvJbkWQzUBrGwVELlA4,1395
|
|
14
|
-
data_forager/indexers/text_lines.py,sha256=XMm5oc0btP7I16z87g1fmq9AqJyVhDOvR2cDu_zFZio,5093
|
|
15
|
-
data_forager/indexers/tokenization_indexer.py,sha256=t-7Q3PLAJ0DYZT6LWdHeahk9Hz9OQsvWfoPvhHIneMk,13927
|
|
16
|
-
data_forager-0.1.6.dist-info/licenses/LICENSE,sha256=If0vYAiJJUtbASoyZPVhvTu3e3m4WB1cQmUpvo9HRTc,1071
|
|
17
|
-
data_forager-0.1.6.dist-info/METADATA,sha256=djaXS9HRCi_ei2Sm-f_yqWgGwe2JX6UZphbTfsHN2vw,9090
|
|
18
|
-
data_forager-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
19
|
-
data_forager-0.1.6.dist-info/top_level.txt,sha256=mLcF2mYnfdaeJ_vIa7hT-MtTpUvn7kgyaWNuxXZ1Ds8,13
|
|
20
|
-
data_forager-0.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|