oxenai 0.42.4__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oxen/workspace.py ADDED
@@ -0,0 +1,210 @@
1
+ import os
2
+
3
+ from typing import Optional, TYPE_CHECKING
4
+
5
+ from .oxen import PyWorkspace, PyCommit
6
+
7
+ # Use TYPE_CHECKING for type hints to avoid runtime circular imports
8
+ if TYPE_CHECKING:
9
+ from .remote_repo import RemoteRepo
10
+
11
+
12
+ class Workspace:
13
+ """
14
+ The Workspace class allows you to interact with an Oxen workspace
15
+ without downloading the data locally.
16
+
17
+ Workspaces can be created off a branch and is tied to the commit id of the branch
18
+ at the time of creation.
19
+
20
+ You can commit a Workspace back to the same branch if the branch has not
21
+ advanced, otherwise you will have to commit to a new branch and merge.
22
+
23
+ ## Examples
24
+
25
+ ### Adding Files to a Workspace
26
+
27
+ Create a workspace from a branch.
28
+
29
+ ```python
30
+ from oxen import RemoteRepo
31
+ from oxen import Workspace
32
+
33
+ # Connect to the remote repo
34
+ repo = RemoteRepo("ox/CatDogBBox")
35
+
36
+ # Create the workspace
37
+ workspace = Workspace(repo, "my-branch")
38
+
39
+ # Add a file to the workspace
40
+ workspace.add("my-image.png")
41
+
42
+ # Print the status of the workspace
43
+ status = workspace.status()
44
+ print(status.added_files())
45
+
46
+ # Commit the workspace
47
+ workspace.commit("Adding my image to the workspace.")
48
+ ```
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ repo: "RemoteRepo",
54
+ branch: str,
55
+ workspace_id: Optional[str] = None,
56
+ workspace_name: Optional[str] = None,
57
+ path: Optional[str] = None,
58
+ ):
59
+ """
60
+ Create a new Workspace.
61
+
62
+ Args:
63
+ repo: `PyRemoteRepo`
64
+ The remote repo to create the workspace from.
65
+ branch: `str`
66
+ The branch name to create the workspace from. The workspace
67
+ will be tied to the commit id of the branch at the time of creation.
68
+ workspace_id: `Optional[str]`
69
+ The workspace id to create the workspace from.
70
+ If left empty, will create a unique workspace id.
71
+ workspace_name: `Optional[str]`
72
+ The name of the workspace. If left empty, the workspace will have no name.
73
+ path: `Optional[str]`
74
+ The path to the workspace. If left empty, the workspace will be created in the root of the remote repo.
75
+ """
76
+ self._repo = repo
77
+ if not self._repo.revision == branch:
78
+ self._repo.create_checkout_branch(branch)
79
+ try:
80
+ self._workspace = PyWorkspace(
81
+ repo._repo, branch, workspace_id, workspace_name, path
82
+ )
83
+ except ValueError as e:
84
+ print(e)
85
+ # Print this error in red
86
+ print(
87
+ f"\033[91mMake sure that you have write access to `{repo.namespace}/{repo.name}`\033[0m\n"
88
+ )
89
+ raise e
90
+
91
+ def __repr__(self):
92
+ return f"Workspace(id={self._workspace.id()}, branch={self._workspace.branch()}, commit_id={self._workspace.commit_id()})"
93
+
94
+ @property
95
+ def id(self):
96
+ """
97
+ Get the id of the workspace.
98
+ """
99
+ return self._workspace.id()
100
+
101
+ @property
102
+ def name(self):
103
+ """
104
+ Get the name of the workspace.
105
+ """
106
+ return self._workspace.name()
107
+
108
+ @property
109
+ def branch(self):
110
+ """
111
+ Get the branch that the workspace is tied to.
112
+ """
113
+ return self._workspace.branch()
114
+
115
+ @property
116
+ def commit_id(self):
117
+ """
118
+ Get the commit id of the workspace.
119
+ """
120
+ return self._workspace.commit_id()
121
+
122
+ @property
123
+ def repo(self) -> "RemoteRepo":
124
+ """
125
+ Get the remote repo that the workspace is tied to.
126
+ """
127
+ return self._repo
128
+
129
+ def status(self, path: str = ""):
130
+ """
131
+ Get the status of the workspace.
132
+
133
+ Args:
134
+ path: `str`
135
+ The path to check the status of.
136
+ """
137
+ return self._workspace.status(path)
138
+
139
+ def add(self, src: str, dst: str = ""):
140
+ """
141
+ Add a file to the workspace
142
+
143
+ Args:
144
+ src: `str`
145
+ The path to the local file to be staged
146
+ dst: `str`
147
+ The path in the remote repo where the file will be added
148
+ """
149
+ # Add a file to the workspace
150
+ if os.path.isdir(src):
151
+ paths = []
152
+ for dir_path, _, files in os.walk(src):
153
+ for file_name in files:
154
+ path = os.path.join(dir_path, file_name)
155
+ paths.append(path)
156
+ self._workspace.add_many(paths, dst)
157
+ else:
158
+ # Add a single file
159
+ self._workspace.add(src, dst)
160
+
161
+
162
+ def add_bytes(self, src: str, buf: bytes, dst: str = ""):
163
+ """
164
+ Adds from a memory buffer to the workspace
165
+
166
+ Args:
167
+ src: `str`
168
+ The relative path to be used as the entry's name in the workspace
169
+ buf: `bytes`
170
+ The memory buffer to be read from for this entry
171
+ dst: `str`
172
+ The path in the remote repo where the file will be added
173
+ """
174
+
175
+ self._workspace.add_bytes(src, buf, dst)
176
+
177
+ def rm(self, path: str):
178
+ """
179
+ Remove a file from the workspace
180
+
181
+ Args:
182
+ path: `str`
183
+ The path to the file on workspace to be removed
184
+ """
185
+ self._workspace.rm(path)
186
+
187
+ def commit(
188
+ self,
189
+ message: str,
190
+ branch_name: Optional[str] = None,
191
+ ) -> PyCommit:
192
+ """
193
+ Commit the workspace to a branch
194
+
195
+ Args:
196
+ message: `str`
197
+ The message to commit with
198
+ branch_name: `Optional[str]`
199
+ The name of the branch to commit to. If left empty, will commit to the branch
200
+ the workspace was created from.
201
+ """
202
+ if branch_name is None:
203
+ branch_name = self._workspace.branch()
204
+ return self._workspace.commit(message, branch_name)
205
+
206
+ def delete(self):
207
+ """
208
+ Delete the workspace
209
+ """
210
+ self._workspace.delete()
@@ -0,0 +1,92 @@
1
+ Metadata-Version: 2.4
2
+ Name: oxenai
3
+ Version: 0.42.4
4
+ Classifier: Programming Language :: Rust
5
+ Classifier: Programming Language :: Python :: Implementation :: CPython
6
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
7
+ Classifier: Topic :: Software Development :: Version Control
8
+ Requires-Dist: fsspec>=2025.3.0
9
+ Requires-Dist: maturin>=1.9.3
10
+ Requires-Dist: pandas>=2.3.1
11
+ Requires-Dist: polars>=1.32.0
12
+ Requires-Dist: pyarrow>=21.0.0
13
+ Requires-Dist: pytest>=8.4.1
14
+ Requires-Dist: pytest-datadir>=1.8.0
15
+ Requires-Dist: requests>=2.32.4
16
+ Requires-Dist: ruff>=0.12.7
17
+ Requires-Dist: toml>=0.10.2
18
+ Requires-Dist: tqdm>=4.67.1
19
+ Summary: Data version control for machine learning
20
+ Keywords: oxen,version control
21
+ License-Expression: Apache-2.0
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
24
+ Project-URL: Homepage, https://www.oxen.ai/
25
+ Project-URL: Documentation, https://docs.oxen.ai/
26
+ Project-URL: Repository, https://github.com/Oxen-AI/Oxen
27
+
28
+ # 🐂 🐍 Oxen Python Interface
29
+
30
+ The Oxen python interface makes it easy to integrate Oxen datasets directly into machine learning dataloaders or other data pipelines.
31
+
32
+ ## Repositories
33
+
34
+ There are two types of repositories one can interact with, a `Repo` and a `RemoteRepo`.
35
+
36
+
37
+ ## Local Repo
38
+
39
+ To fully clone all the data to your local machine, you can use the `Repo` class.
40
+
41
+ ```python
42
+ import oxen
43
+
44
+ repo = oxen.Repo("path/to/repository")
45
+ repo.clone("https://hub.oxen.ai/ox/CatDogBBox")
46
+ ```
47
+
48
+ If there is a specific version of your data you want to access, you can specify the `branch` when cloning.
49
+
50
+ ```python
51
+ repo.clone("https://hub.oxen.ai/ox/CatDogBBox", branch="my-pets")
52
+ ```
53
+
54
+ Once you have a repository locally, you can perform the same operations you might via the command line, through the python api.
55
+
56
+ For example, you can checkout a branch, add a file, commit, and push the data to the same remote you cloned it from.
57
+
58
+ ```python
59
+ import oxen
60
+
61
+ repo = oxen.Repo("path/to/repository")
62
+ repo.clone("https://hub.oxen.ai/ox/CatDogBBox")
63
+ repo.checkout()
64
+ ```
65
+
66
+ ## Remote Repo
67
+
68
+ If you don't want to download the data locally, you can use the `RemoteRepo` class to interact with a remote repository on OxenHub.
69
+
70
+ ```python
71
+ import oxen
72
+
73
+ repo = RemoteRepo("https://hub.oxen.ai/ox/CatDogBBox")
74
+ ```
75
+
76
+ To stage and commit files to a specific version of the data, you can `checkout` an existing branch or create a new one.
77
+
78
+ ```python
79
+ repo.create_branch("dev")
80
+ repo.checkout("dev")
81
+ ```
82
+
83
+ You can then stage files to the remote repository by specifying the file path and destination directory.
84
+
85
+ ```python
86
+ repo.add("new-cat.png", "images") # Stage to images/new-cat.png on remote
87
+ repo.commit("Adding another training image")
88
+ ```
89
+
90
+ Note that no "push" command is required here, since the above code creates a commit directly on the remote branch.
91
+
92
+
@@ -0,0 +1,32 @@
1
+ oxen/__init__.py,sha256=fnWDRsrC_1osHMXikyIDVgP1_LP8RTW89EhC-f-df-s,1036
2
+ oxen/auth.py,sha256=sEcWP2BVmWBd1mRmQNRts8CY0-1NlopcjLljy8wFTK0,1387
3
+ oxen/clone.py,sha256=SIRSkG8qnuz2H0yV9RuFIk99i_V4EQoD1nJtnvXljJY,1963
4
+ oxen/config.py,sha256=XolGXGFLjW-Zt6apnnGrKlUSvcbkwFSkxx9qIIFEiYI,430
5
+ oxen/data_frame.py,sha256=E7yoqpj75NtD642Tsp3DxIw0Y3W47oCJoJNkRmfo648,15719
6
+ oxen/datasets.py,sha256=DLsZg1GsGVTihu9gVyx2_QUazA7iHoQ2JDS7mexCviw,3401
7
+ oxen/df_utils.py,sha256=ZmDY8mJm_0LCu9OEg8ve6uQ7Yy_ZZ_3TqW4Xh9kAvyI,1099
8
+ oxen/diff/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ oxen/diff/change_type.py,sha256=MPExZECVhoU5pNvM8GmnoF0Mcvki-lvN0nUon3mktEk,218
10
+ oxen/diff/diff.py,sha256=54uoGrHZqhRigrEuTjktj7rVdl1JcKFpekbTaDI00zc,5003
11
+ oxen/diff/line_diff.py,sha256=gsfN1P4bEMhRUesshwfyQ-THE8jP8IIChZSULQBMmBo,1128
12
+ oxen/diff/tabular_diff.py,sha256=NwglOL4KAdJKbSSkxCBGbeehhKqGd8Rh9AhMCyJPQTA,525
13
+ oxen/diff/text_diff.py,sha256=94mVJtuJAoef9mpxSW4ZapiY_fo-Bru8foZktAfm_Y4,1389
14
+ oxen/features.py,sha256=5zLscxbX_4OcpjqZz2mkj5obX-6AMyMNCXASJzo_qx4,1378
15
+ oxen/fs.py,sha256=yfDX7jcIUUXAzfoj67znKoPNGUS8C0tWAOczEnSQAKI,1485
16
+ oxen/init.py,sha256=OLNE_EaIpJuX5Z7oTot_k7gPQJpCqyVqRtNg1WqCQ3w,398
17
+ oxen/oxen.cpython-312-darwin.so,sha256=Lqh2BkRqsoICgWiK4H6S4F7dExWDvrc6fa9EWpkj5uU,144970920
18
+ oxen/oxen_fs.py,sha256=bk3BQy9KPOFIg87T-0pFmXpvR67K9TmU6BVdoD2ULhk,11095
19
+ oxen/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ oxen/providers/dataset_path_provider.py,sha256=3hrzA2mGROIgY4et2HB5sSmZyyGdkHsy-uLwzTs9KIw,674
21
+ oxen/providers/mock_provider.py,sha256=JmuR5lDVc3fTv-0qGVl0hBKKYo98vxkYCVhKqKW-bDo,2048
22
+ oxen/providers/oxen_data_frame_provider.py,sha256=g6_eO5gTD6v1u4TxIcjGq4JGdR0LRxcjQ4ZX0PrQMtc,1593
23
+ oxen/remote_repo.py,sha256=uIQk0tsmtqJpOMQjbLtL_-uX7CAlwerTh4gwCIY1Hrg,20417
24
+ oxen/repo.py,sha256=hZv_2zlBD5yYetrYhh0RxqmpJmjjd97Sa4T7MSIfTY4,7031
25
+ oxen/streaming_dataset.py,sha256=vHCZ6LuLMwKKHtySbPaekTj1xDJ5NeuXzMB_sjXtIeg,7978
26
+ oxen/user.py,sha256=4KeGuoywCneA-ocTyqv-Da1Ho2W1xcdPlHB8h7Rt1HY,1165
27
+ oxen/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ oxen/workspace.py,sha256=lOaGZARIB2f6Yg7cvSeeAP-4vxypFZ-MR7OX434Phn8,6036
29
+ oxenai-0.42.4.dist-info/METADATA,sha256=2XlR7tfdBidw9ddchEPk34_WSL25onJNrJgaKp9gNk8,2816
30
+ oxenai-0.42.4.dist-info/WHEEL,sha256=RzpM5MCae0zSGyiAZroxbkepvywsAm9pLWSENAg7iow,107
31
+ oxenai-0.42.4.dist-info/entry_points.txt,sha256=CvslGiUZsEt-5k6cJ19pRFHGicXb4XrR0CKhrzq1aEU,40
32
+ oxenai-0.42.4.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.10.2)
3
+ Root-Is-Purelib: false
4
+ Tag: cp312-cp312-macosx_10_13_x86_64
@@ -0,0 +1,2 @@
1
+ [fsspec.specs]
2
+ oxen=oxen.oxen_fs:OxenFS