oxenai 0.42.4__cp312-cp312-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oxen/__init__.py +55 -0
- oxen/auth.py +40 -0
- oxen/clone.py +58 -0
- oxen/config.py +16 -0
- oxen/data_frame.py +462 -0
- oxen/datasets.py +106 -0
- oxen/df_utils.py +54 -0
- oxen/diff/__init__.py +0 -0
- oxen/diff/change_type.py +12 -0
- oxen/diff/diff.py +143 -0
- oxen/diff/line_diff.py +41 -0
- oxen/diff/tabular_diff.py +22 -0
- oxen/diff/text_diff.py +48 -0
- oxen/features.py +58 -0
- oxen/fs.py +57 -0
- oxen/init.py +19 -0
- oxen/oxen.cpython-312-darwin.so +0 -0
- oxen/oxen_fs.py +351 -0
- oxen/providers/__init__.py +0 -0
- oxen/providers/dataset_path_provider.py +26 -0
- oxen/providers/mock_provider.py +73 -0
- oxen/providers/oxen_data_frame_provider.py +61 -0
- oxen/remote_repo.py +656 -0
- oxen/repo.py +239 -0
- oxen/streaming_dataset.py +242 -0
- oxen/user.py +40 -0
- oxen/util/__init__.py +0 -0
- oxen/workspace.py +210 -0
- oxenai-0.42.4.dist-info/METADATA +92 -0
- oxenai-0.42.4.dist-info/RECORD +32 -0
- oxenai-0.42.4.dist-info/WHEEL +4 -0
- oxenai-0.42.4.dist-info/entry_points.txt +2 -0
oxen/workspace.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from typing import Optional, TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from .oxen import PyWorkspace, PyCommit
|
|
6
|
+
|
|
7
|
+
# Use TYPE_CHECKING for type hints to avoid runtime circular imports
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from .remote_repo import RemoteRepo
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Workspace:
|
|
13
|
+
"""
|
|
14
|
+
The Workspace class allows you to interact with an Oxen workspace
|
|
15
|
+
without downloading the data locally.
|
|
16
|
+
|
|
17
|
+
Workspaces can be created off a branch and is tied to the commit id of the branch
|
|
18
|
+
at the time of creation.
|
|
19
|
+
|
|
20
|
+
You can commit a Workspace back to the same branch if the branch has not
|
|
21
|
+
advanced, otherwise you will have to commit to a new branch and merge.
|
|
22
|
+
|
|
23
|
+
## Examples
|
|
24
|
+
|
|
25
|
+
### Adding Files to a Workspace
|
|
26
|
+
|
|
27
|
+
Create a workspace from a branch.
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from oxen import RemoteRepo
|
|
31
|
+
from oxen import Workspace
|
|
32
|
+
|
|
33
|
+
# Connect to the remote repo
|
|
34
|
+
repo = RemoteRepo("ox/CatDogBBox")
|
|
35
|
+
|
|
36
|
+
# Create the workspace
|
|
37
|
+
workspace = Workspace(repo, "my-branch")
|
|
38
|
+
|
|
39
|
+
# Add a file to the workspace
|
|
40
|
+
workspace.add("my-image.png")
|
|
41
|
+
|
|
42
|
+
# Print the status of the workspace
|
|
43
|
+
status = workspace.status()
|
|
44
|
+
print(status.added_files())
|
|
45
|
+
|
|
46
|
+
# Commit the workspace
|
|
47
|
+
workspace.commit("Adding my image to the workspace.")
|
|
48
|
+
```
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
repo: "RemoteRepo",
|
|
54
|
+
branch: str,
|
|
55
|
+
workspace_id: Optional[str] = None,
|
|
56
|
+
workspace_name: Optional[str] = None,
|
|
57
|
+
path: Optional[str] = None,
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Create a new Workspace.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
repo: `PyRemoteRepo`
|
|
64
|
+
The remote repo to create the workspace from.
|
|
65
|
+
branch: `str`
|
|
66
|
+
The branch name to create the workspace from. The workspace
|
|
67
|
+
will be tied to the commit id of the branch at the time of creation.
|
|
68
|
+
workspace_id: `Optional[str]`
|
|
69
|
+
The workspace id to create the workspace from.
|
|
70
|
+
If left empty, will create a unique workspace id.
|
|
71
|
+
workspace_name: `Optional[str]`
|
|
72
|
+
The name of the workspace. If left empty, the workspace will have no name.
|
|
73
|
+
path: `Optional[str]`
|
|
74
|
+
The path to the workspace. If left empty, the workspace will be created in the root of the remote repo.
|
|
75
|
+
"""
|
|
76
|
+
self._repo = repo
|
|
77
|
+
if not self._repo.revision == branch:
|
|
78
|
+
self._repo.create_checkout_branch(branch)
|
|
79
|
+
try:
|
|
80
|
+
self._workspace = PyWorkspace(
|
|
81
|
+
repo._repo, branch, workspace_id, workspace_name, path
|
|
82
|
+
)
|
|
83
|
+
except ValueError as e:
|
|
84
|
+
print(e)
|
|
85
|
+
# Print this error in red
|
|
86
|
+
print(
|
|
87
|
+
f"\033[91mMake sure that you have write access to `{repo.namespace}/{repo.name}`\033[0m\n"
|
|
88
|
+
)
|
|
89
|
+
raise e
|
|
90
|
+
|
|
91
|
+
def __repr__(self):
|
|
92
|
+
return f"Workspace(id={self._workspace.id()}, branch={self._workspace.branch()}, commit_id={self._workspace.commit_id()})"
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def id(self):
|
|
96
|
+
"""
|
|
97
|
+
Get the id of the workspace.
|
|
98
|
+
"""
|
|
99
|
+
return self._workspace.id()
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def name(self):
|
|
103
|
+
"""
|
|
104
|
+
Get the name of the workspace.
|
|
105
|
+
"""
|
|
106
|
+
return self._workspace.name()
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def branch(self):
|
|
110
|
+
"""
|
|
111
|
+
Get the branch that the workspace is tied to.
|
|
112
|
+
"""
|
|
113
|
+
return self._workspace.branch()
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def commit_id(self):
|
|
117
|
+
"""
|
|
118
|
+
Get the commit id of the workspace.
|
|
119
|
+
"""
|
|
120
|
+
return self._workspace.commit_id()
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def repo(self) -> "RemoteRepo":
|
|
124
|
+
"""
|
|
125
|
+
Get the remote repo that the workspace is tied to.
|
|
126
|
+
"""
|
|
127
|
+
return self._repo
|
|
128
|
+
|
|
129
|
+
def status(self, path: str = ""):
|
|
130
|
+
"""
|
|
131
|
+
Get the status of the workspace.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
path: `str`
|
|
135
|
+
The path to check the status of.
|
|
136
|
+
"""
|
|
137
|
+
return self._workspace.status(path)
|
|
138
|
+
|
|
139
|
+
def add(self, src: str, dst: str = ""):
|
|
140
|
+
"""
|
|
141
|
+
Add a file to the workspace
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
src: `str`
|
|
145
|
+
The path to the local file to be staged
|
|
146
|
+
dst: `str`
|
|
147
|
+
The path in the remote repo where the file will be added
|
|
148
|
+
"""
|
|
149
|
+
# Add a file to the workspace
|
|
150
|
+
if os.path.isdir(src):
|
|
151
|
+
paths = []
|
|
152
|
+
for dir_path, _, files in os.walk(src):
|
|
153
|
+
for file_name in files:
|
|
154
|
+
path = os.path.join(dir_path, file_name)
|
|
155
|
+
paths.append(path)
|
|
156
|
+
self._workspace.add_many(paths, dst)
|
|
157
|
+
else:
|
|
158
|
+
# Add a single file
|
|
159
|
+
self._workspace.add(src, dst)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def add_bytes(self, src: str, buf: bytes, dst: str = ""):
|
|
163
|
+
"""
|
|
164
|
+
Adds from a memory buffer to the workspace
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
src: `str`
|
|
168
|
+
The relative path to be used as the entry's name in the workspace
|
|
169
|
+
buf: `bytes`
|
|
170
|
+
The memory buffer to be read from for this entry
|
|
171
|
+
dst: `str`
|
|
172
|
+
The path in the remote repo where the file will be added
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
self._workspace.add_bytes(src, buf, dst)
|
|
176
|
+
|
|
177
|
+
def rm(self, path: str):
|
|
178
|
+
"""
|
|
179
|
+
Remove a file from the workspace
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
path: `str`
|
|
183
|
+
The path to the file on workspace to be removed
|
|
184
|
+
"""
|
|
185
|
+
self._workspace.rm(path)
|
|
186
|
+
|
|
187
|
+
def commit(
|
|
188
|
+
self,
|
|
189
|
+
message: str,
|
|
190
|
+
branch_name: Optional[str] = None,
|
|
191
|
+
) -> PyCommit:
|
|
192
|
+
"""
|
|
193
|
+
Commit the workspace to a branch
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
message: `str`
|
|
197
|
+
The message to commit with
|
|
198
|
+
branch_name: `Optional[str]`
|
|
199
|
+
The name of the branch to commit to. If left empty, will commit to the branch
|
|
200
|
+
the workspace was created from.
|
|
201
|
+
"""
|
|
202
|
+
if branch_name is None:
|
|
203
|
+
branch_name = self._workspace.branch()
|
|
204
|
+
return self._workspace.commit(message, branch_name)
|
|
205
|
+
|
|
206
|
+
def delete(self):
|
|
207
|
+
"""
|
|
208
|
+
Delete the workspace
|
|
209
|
+
"""
|
|
210
|
+
self._workspace.delete()
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: oxenai
|
|
3
|
+
Version: 0.42.4
|
|
4
|
+
Classifier: Programming Language :: Rust
|
|
5
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
6
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
7
|
+
Classifier: Topic :: Software Development :: Version Control
|
|
8
|
+
Requires-Dist: fsspec>=2025.3.0
|
|
9
|
+
Requires-Dist: maturin>=1.9.3
|
|
10
|
+
Requires-Dist: pandas>=2.3.1
|
|
11
|
+
Requires-Dist: polars>=1.32.0
|
|
12
|
+
Requires-Dist: pyarrow>=21.0.0
|
|
13
|
+
Requires-Dist: pytest>=8.4.1
|
|
14
|
+
Requires-Dist: pytest-datadir>=1.8.0
|
|
15
|
+
Requires-Dist: requests>=2.32.4
|
|
16
|
+
Requires-Dist: ruff>=0.12.7
|
|
17
|
+
Requires-Dist: toml>=0.10.2
|
|
18
|
+
Requires-Dist: tqdm>=4.67.1
|
|
19
|
+
Summary: Data version control for machine learning
|
|
20
|
+
Keywords: oxen,version control
|
|
21
|
+
License-Expression: Apache-2.0
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
24
|
+
Project-URL: Homepage, https://www.oxen.ai/
|
|
25
|
+
Project-URL: Documentation, https://docs.oxen.ai/
|
|
26
|
+
Project-URL: Repository, https://github.com/Oxen-AI/Oxen
|
|
27
|
+
|
|
28
|
+
# 🐂 🐍 Oxen Python Interface
|
|
29
|
+
|
|
30
|
+
The Oxen python interface makes it easy to integrate Oxen datasets directly into machine learning dataloaders or other data pipelines.
|
|
31
|
+
|
|
32
|
+
## Repositories
|
|
33
|
+
|
|
34
|
+
There are two types of repositories one can interact with, a `Repo` and a `RemoteRepo`.
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
## Local Repo
|
|
38
|
+
|
|
39
|
+
To fully clone all the data to your local machine, you can use the `Repo` class.
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import oxen
|
|
43
|
+
|
|
44
|
+
repo = oxen.Repo("path/to/repository")
|
|
45
|
+
repo.clone("https://hub.oxen.ai/ox/CatDogBBox")
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
If there is a specific version of your data you want to access, you can specify the `branch` when cloning.
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
repo.clone("https://hub.oxen.ai/ox/CatDogBBox", branch="my-pets")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Once you have a repository locally, you can perform the same operations you might via the command line, through the python api.
|
|
55
|
+
|
|
56
|
+
For example, you can checkout a branch, add a file, commit, and push the data to the same remote you cloned it from.
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
import oxen
|
|
60
|
+
|
|
61
|
+
repo = oxen.Repo("path/to/repository")
|
|
62
|
+
repo.clone("https://hub.oxen.ai/ox/CatDogBBox")
|
|
63
|
+
repo.checkout()
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Remote Repo
|
|
67
|
+
|
|
68
|
+
If you don't want to download the data locally, you can use the `RemoteRepo` class to interact with a remote repository on OxenHub.
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import oxen
|
|
72
|
+
|
|
73
|
+
repo = RemoteRepo("https://hub.oxen.ai/ox/CatDogBBox")
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
To stage and commit files to a specific version of the data, you can `checkout` an existing branch or create a new one.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
repo.create_branch("dev")
|
|
80
|
+
repo.checkout("dev")
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
You can then stage files to the remote repository by specifying the file path and destination directory.
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
repo.add("new-cat.png", "images") # Stage to images/new-cat.png on remote
|
|
87
|
+
repo.commit("Adding another training image")
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Note that no "push" command is required here, since the above code creates a commit directly on the remote branch.
|
|
91
|
+
|
|
92
|
+
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
oxen/__init__.py,sha256=fnWDRsrC_1osHMXikyIDVgP1_LP8RTW89EhC-f-df-s,1036
|
|
2
|
+
oxen/auth.py,sha256=sEcWP2BVmWBd1mRmQNRts8CY0-1NlopcjLljy8wFTK0,1387
|
|
3
|
+
oxen/clone.py,sha256=SIRSkG8qnuz2H0yV9RuFIk99i_V4EQoD1nJtnvXljJY,1963
|
|
4
|
+
oxen/config.py,sha256=XolGXGFLjW-Zt6apnnGrKlUSvcbkwFSkxx9qIIFEiYI,430
|
|
5
|
+
oxen/data_frame.py,sha256=E7yoqpj75NtD642Tsp3DxIw0Y3W47oCJoJNkRmfo648,15719
|
|
6
|
+
oxen/datasets.py,sha256=DLsZg1GsGVTihu9gVyx2_QUazA7iHoQ2JDS7mexCviw,3401
|
|
7
|
+
oxen/df_utils.py,sha256=ZmDY8mJm_0LCu9OEg8ve6uQ7Yy_ZZ_3TqW4Xh9kAvyI,1099
|
|
8
|
+
oxen/diff/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
oxen/diff/change_type.py,sha256=MPExZECVhoU5pNvM8GmnoF0Mcvki-lvN0nUon3mktEk,218
|
|
10
|
+
oxen/diff/diff.py,sha256=54uoGrHZqhRigrEuTjktj7rVdl1JcKFpekbTaDI00zc,5003
|
|
11
|
+
oxen/diff/line_diff.py,sha256=gsfN1P4bEMhRUesshwfyQ-THE8jP8IIChZSULQBMmBo,1128
|
|
12
|
+
oxen/diff/tabular_diff.py,sha256=NwglOL4KAdJKbSSkxCBGbeehhKqGd8Rh9AhMCyJPQTA,525
|
|
13
|
+
oxen/diff/text_diff.py,sha256=94mVJtuJAoef9mpxSW4ZapiY_fo-Bru8foZktAfm_Y4,1389
|
|
14
|
+
oxen/features.py,sha256=5zLscxbX_4OcpjqZz2mkj5obX-6AMyMNCXASJzo_qx4,1378
|
|
15
|
+
oxen/fs.py,sha256=yfDX7jcIUUXAzfoj67znKoPNGUS8C0tWAOczEnSQAKI,1485
|
|
16
|
+
oxen/init.py,sha256=OLNE_EaIpJuX5Z7oTot_k7gPQJpCqyVqRtNg1WqCQ3w,398
|
|
17
|
+
oxen/oxen.cpython-312-darwin.so,sha256=Lqh2BkRqsoICgWiK4H6S4F7dExWDvrc6fa9EWpkj5uU,144970920
|
|
18
|
+
oxen/oxen_fs.py,sha256=bk3BQy9KPOFIg87T-0pFmXpvR67K9TmU6BVdoD2ULhk,11095
|
|
19
|
+
oxen/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
+
oxen/providers/dataset_path_provider.py,sha256=3hrzA2mGROIgY4et2HB5sSmZyyGdkHsy-uLwzTs9KIw,674
|
|
21
|
+
oxen/providers/mock_provider.py,sha256=JmuR5lDVc3fTv-0qGVl0hBKKYo98vxkYCVhKqKW-bDo,2048
|
|
22
|
+
oxen/providers/oxen_data_frame_provider.py,sha256=g6_eO5gTD6v1u4TxIcjGq4JGdR0LRxcjQ4ZX0PrQMtc,1593
|
|
23
|
+
oxen/remote_repo.py,sha256=uIQk0tsmtqJpOMQjbLtL_-uX7CAlwerTh4gwCIY1Hrg,20417
|
|
24
|
+
oxen/repo.py,sha256=hZv_2zlBD5yYetrYhh0RxqmpJmjjd97Sa4T7MSIfTY4,7031
|
|
25
|
+
oxen/streaming_dataset.py,sha256=vHCZ6LuLMwKKHtySbPaekTj1xDJ5NeuXzMB_sjXtIeg,7978
|
|
26
|
+
oxen/user.py,sha256=4KeGuoywCneA-ocTyqv-Da1Ho2W1xcdPlHB8h7Rt1HY,1165
|
|
27
|
+
oxen/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
oxen/workspace.py,sha256=lOaGZARIB2f6Yg7cvSeeAP-4vxypFZ-MR7OX434Phn8,6036
|
|
29
|
+
oxenai-0.42.4.dist-info/METADATA,sha256=2XlR7tfdBidw9ddchEPk34_WSL25onJNrJgaKp9gNk8,2816
|
|
30
|
+
oxenai-0.42.4.dist-info/WHEEL,sha256=RzpM5MCae0zSGyiAZroxbkepvywsAm9pLWSENAg7iow,107
|
|
31
|
+
oxenai-0.42.4.dist-info/entry_points.txt,sha256=CvslGiUZsEt-5k6cJ19pRFHGicXb4XrR0CKhrzq1aEU,40
|
|
32
|
+
oxenai-0.42.4.dist-info/RECORD,,
|