oxenai 0.39.1__cp313-cp313-manylinux_2_34_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of oxenai might be problematic. Click here for more details.

oxen/remote_repo.py ADDED
@@ -0,0 +1,626 @@
1
+ import os
2
+
3
+ from typing import Optional
4
+ from typing import List, Tuple
5
+ from .oxen import PyRemoteRepo, remote, PyCommit
6
+ from . import user as oxen_user
7
+ from .workspace import Workspace
8
+
9
+
10
+ def get_repo(name: str, host: str = "hub.oxen.ai", scheme: str = "https"):
11
+ """
12
+ Get a RemoteRepo object for the specified name. For example 'ox/CatDogBBox'.
13
+
14
+ Args:
15
+ name: `str`
16
+ Name of the repository in the format 'namespace/repo_name'.
17
+ host: `str`
18
+ The host to connect to. Defaults to 'hub.oxen.ai'
19
+ Returns:
20
+ [RemoteRepo](/python-api/remote_repo)
21
+ """
22
+ py_repo = remote.get_repo(name, host, scheme)
23
+
24
+ if py_repo is None:
25
+ raise ValueError(f"Repository {name} not found")
26
+
27
+ repo_id = f"{py_repo.namespace()}/{py_repo.name()}"
28
+ return RemoteRepo(repo_id, py_repo.host, py_repo.revision, py_repo.scheme)
29
+
30
+
31
+ def create_repo(
32
+ name: str,
33
+ description="",
34
+ is_public: bool = True,
35
+ host: str = "hub.oxen.ai",
36
+ scheme: str = "https",
37
+ files: List[Tuple[str, str]] = [],
38
+ ):
39
+ """
40
+ Create a new repository on the remote server.
41
+
42
+ Args:
43
+ name: `str`
44
+ Name of the repository in the format 'namespace/repo_name'.
45
+ description: `str`
46
+ Description of the repository.
47
+ Only applicable to [OxenHub](https://oxen.ai).
48
+ is_public: `bool`
49
+ Whether the repository is public or private.
50
+ Only applicable to [OxenHub](https://oxen.ai).
51
+ host: `str`
52
+ The host to connect to. Defaults to 'hub.oxen.ai'
53
+ scheme: `str`
54
+ The scheme to use for the remote url. Default: 'https'
55
+ files: `List[Tuple[str, str]]`
56
+ A list of tuples containing the path to the file and the contents
57
+ of the file that you would like to seed the repository with.
58
+ Returns:
59
+ [RemoteRepo](/python-api/remote_repo)
60
+ """
61
+ py_repo = remote.create_repo(name, description, is_public, host, scheme, files)
62
+ repo_id = f"{py_repo.namespace()}/{py_repo.name()}"
63
+ return RemoteRepo(repo_id, py_repo.host, "main", py_repo.scheme)
64
+
65
+
66
+ class RemoteRepo:
67
+ """
68
+ The RemoteRepo class allows you to interact with an Oxen repository
69
+ without downloading the data locally.
70
+
71
+ ## Examples
72
+
73
+ ### Add & Commit Files
74
+
75
+ Adding and committing a file to a remote workspace.
76
+
77
+ ```python
78
+ from oxen import RemoteRepo
79
+
80
+ repo = RemoteRepo("ox/CatDogBBox")
81
+ repo.add("/path/to/image.png")
82
+ status = repo.status()
83
+ print(status.added_files())
84
+ repo.commit("Adding my image to the remote workspace.")
85
+ ```
86
+
87
+ ### Downloading Specific Files
88
+
89
+ Grab a specific file revision and load it into pandas.
90
+
91
+ ```python
92
+ from oxen import RemoteRepo
93
+ import pandas as pd
94
+
95
+ # Connect to the remote repo
96
+ repo = RemoteRepo("ox/CatDogBBox")
97
+ # Specify the version of the file you want to download
98
+ branch = repo.get_branch("my-pets")
99
+ # Download takes a file or directory a commit id
100
+ repo.download("annotations", revision=branch.commit_id)
101
+ # Once you have the data locally, use whatever library you want to explore the data
102
+ df = pd.read_csv("annotations/train.csv")
103
+ print(df.head())
104
+ ```
105
+ """
106
+
107
+ def __init__(
108
+ self,
109
+ repo_id: str,
110
+ host: str = "hub.oxen.ai",
111
+ revision: str = "main",
112
+ scheme: str = "https",
113
+ ):
114
+ """
115
+ Create a new RemoteRepo object to interact with.
116
+
117
+ Args:
118
+ repo_id: `str`
119
+ Name of the repository in the format 'namespace/repo_name'.
120
+ For example 'ox/chatbot'
121
+ host: `str`
122
+ The host to connect to. Defaults to 'hub.oxen.ai'
123
+ revision: `str`
124
+ The branch name or commit id to checkout. Defaults to 'main'
125
+ scheme: `str`
126
+ The scheme to use for the remote url. Default: 'https'
127
+ """
128
+ self._repo = PyRemoteRepo(repo_id, host, revision, scheme)
129
+ # An internal workspace gets created on the first add() call
130
+ self._workspace = None
131
+
132
+ def __repr__(self):
133
+ return f"RemoteRepo({self._repo.url()})"
134
+
135
+ def create(self, empty: bool = False, is_public: bool = False):
136
+ """
137
+ Will create the repo on the remote server.
138
+
139
+ Args:
140
+ empty: `bool`
141
+ Whether to create an empty repo or not. Default: False
142
+ is_public: `bool`
143
+ Whether the repository is public or private. Default: False
144
+ """
145
+ self._repo.create(empty, is_public)
146
+
147
+ def exists(self) -> bool:
148
+ """
149
+ Checks if this remote repo exists on the server.
150
+ """
151
+ return self._repo.exists()
152
+
153
+ def delete(self):
154
+ """
155
+ Delete this remote repo from the server.
156
+ """
157
+ self._repo.delete()
158
+
159
+ def checkout(self, revision: str, create=False):
160
+ """
161
+ Switches the remote repo to the specified revision.
162
+
163
+ Args:
164
+ revision: `str`
165
+ The name of the branch or commit id to checkout.
166
+ create: `bool`
167
+ Whether to create a new branch if it doesn't exist. Default: False
168
+ """
169
+ if create:
170
+ self._repo.create_branch(revision)
171
+
172
+ return self._repo.checkout(revision)
173
+
174
+ def ls(
175
+ self, directory: Optional[str] = None, page_num: int = 1, page_size: int = 100
176
+ ):
177
+ """
178
+ Lists the contents of a directory in the remote repo.
179
+
180
+ Args:
181
+ directory: `str`
182
+ The directory to list. If None, will list the root directory.
183
+ page_num: `int`
184
+ The page number to return. Default: 1
185
+ page_size: `int`
186
+ The number of items to return per page. Default: 100
187
+ """
188
+ if directory is None:
189
+ return self._repo.ls("", page_num, page_size)
190
+
191
+ return self._repo.ls(directory, page_num, page_size)
192
+
193
+ def scan(self, directory: Optional[str] = None, page_size: int = 100):
194
+ """
195
+ Generator over the contents of a directory in the remote repo
196
+
197
+ Args:
198
+ directory: `str`
199
+ The directory to list. If None, will list the root directory
200
+ page_size: `int`
201
+ The number of items to return per page. Default: 100
202
+ """
203
+ if directory is None:
204
+ directory = ""
205
+
206
+ current_page = 1
207
+
208
+ while True:
209
+ contents = self._repo.ls(
210
+ directory, page_num=current_page, page_size=page_size
211
+ )
212
+
213
+ if not contents.entries:
214
+ return
215
+
216
+ yield from contents.entries
217
+
218
+ if current_page >= contents.total_pages:
219
+ return
220
+
221
+ current_page += 1
222
+
223
+ def download(
224
+ self, src: str, dst: Optional[str] = None, revision: Optional[str] = None
225
+ ):
226
+ """
227
+ Download a file or directory from the remote repo.
228
+
229
+ Args:
230
+ src: `str`
231
+ The path to the remote file
232
+ dst: `str | None`
233
+ The path to the local file. If None, will download to
234
+ the same path as `src`
235
+ revision: `str | None`
236
+ The branch or commit id to download. Defaults to `self.revision`
237
+ """
238
+ if dst is None:
239
+ dst = src
240
+ # create parent dir if it does not exist
241
+ directory = os.path.dirname(dst)
242
+ if directory and not os.path.exists(directory):
243
+ os.makedirs(directory, exist_ok=True)
244
+
245
+ if revision is None:
246
+ self._repo.download(src, dst, self.revision)
247
+ else:
248
+ self._repo.download(src, dst, revision)
249
+
250
+ def get_file(self, src: str, revision: Optional[str] = None):
251
+ """
252
+ Get a file from the remote repo.
253
+
254
+ Args:
255
+ src: `str`
256
+ The path to the remote file
257
+ revision: `str | None`
258
+ The branch or commit id to download. Defaults to `self.revision`
259
+ """
260
+ if revision is None:
261
+ return self._repo.get_file(src, self.revision)
262
+ else:
263
+ return self._repo.get_file(src, revision)
264
+
265
+ def create_workspace(
266
+ self, branch: Optional[str] = None, workspace_name: Optional[str] = None
267
+ ):
268
+ """
269
+ Create a new workspace in the remote repo. If the workspace already exists, it will just be returned.
270
+
271
+ Args:
272
+ branch: `str | None`
273
+ The branch to create the workspace on. Defaults to `self.revision`
274
+ workspace_name: `str | None`
275
+ The named workspace to use when adding the file. If None, will create a temporary workspace
276
+
277
+ Returns:
278
+ [Workspace](/python-api/workspace)
279
+ """
280
+ if branch is None or branch == "":
281
+ branch = self.revision
282
+
283
+ if self._workspace is None:
284
+ self._workspace = Workspace(self, branch, workspace_name=workspace_name)
285
+ print(
286
+ f"Workspace '{self._workspace.id}' created from commit '{self._workspace.commit_id}'"
287
+ )
288
+ self._repo.set_commit_id(self._workspace.commit_id)
289
+ return self._workspace
290
+ elif (
291
+ self._workspace.branch == branch and self._workspace.name == workspace_name
292
+ ):
293
+ # workspace already exists
294
+ return self._workspace
295
+ else:
296
+ raise ValueError(
297
+ "A different workspace is already open for this repo, commit or delete it first"
298
+ )
299
+
300
+ def delete_workspace(self):
301
+ """
302
+ Delete the current workspace in the remote repo.
303
+ """
304
+ if self._workspace is not None:
305
+ self._workspace.delete()
306
+ self._workspace = None
307
+
308
+ def add(
309
+ self,
310
+ src: str,
311
+ dst: Optional[str] = "",
312
+ branch: Optional[str] = None,
313
+ workspace_name: Optional[str] = None,
314
+ ):
315
+ """
316
+ Stage a file to a workspace in the remote repo.
317
+
318
+ Args:
319
+ src: `str`
320
+ The path to the local file to upload
321
+ dst: `str | None`
322
+ The directory to upload the file to. If None, will upload to the root directory.
323
+ branch: `str | None`
324
+ The branch to upload the file to. Defaults to `self.revision`
325
+ workspace_name: `str | None`
326
+ The named workspace to use when adding the file. If None, will create a temporary workspace
327
+
328
+ Returns:
329
+ [Workspace](/python-api/workspace)
330
+ """
331
+ # If the workspace already exists, this is a no-op
332
+ self.create_workspace(branch, workspace_name)
333
+ self._workspace.add(src, dst)
334
+ return self._workspace
335
+
336
+ def status(self):
337
+ """
338
+ Get the status of the workspace.
339
+ """
340
+ if self._workspace is None:
341
+ raise ValueError("No workspace found. Please call add() first.")
342
+
343
+ return self._workspace.status()
344
+
345
+ def commit(self, message: str, branch: Optional[str] = None):
346
+ """
347
+ Commit the workspace to the remote repo.
348
+
349
+ Args:
350
+ message: `str`
351
+ The message to commit with
352
+ branch: `str | None`
353
+ The branch to commit to. Defaults to the branch the workspace was created on.
354
+ """
355
+ if self._workspace is None:
356
+ raise ValueError("No workspace found. Please call add() first.")
357
+
358
+ commit = self._workspace.commit(message, branch)
359
+ self._repo.set_commit_id(commit.id)
360
+
361
+ # If it's not a named workspace, it's deleted after commit
362
+ if self._workspace.name is None:
363
+ self._workspace = None
364
+ return commit
365
+
366
+ def upload(
367
+ self,
368
+ src: str,
369
+ commit_message: str,
370
+ file_name: Optional[str] = None,
371
+ dst_dir: Optional[str] = "",
372
+ branch: Optional[str] = None,
373
+ ):
374
+ """
375
+ Upload a file to the remote repo.
376
+
377
+ Args:
378
+ src: `str`
379
+ The path to the local file to upload
380
+ file_name: `str | None`
381
+ The name of the file to upload. If None, will use the name of the file in `src`
382
+ dst_dir: `str | None`
383
+ The directory to upload the file to. If None, will upload to the root directory.
384
+ branch: `str | None`
385
+ The branch to upload the file to. Defaults to `self.revision`
386
+ """
387
+ if branch is None:
388
+ branch = self.revision
389
+ if file_name is None:
390
+ file_name = os.path.basename(src)
391
+ user = oxen_user.current_user()
392
+
393
+ self._repo.put_file(branch, dst_dir, src, file_name, commit_message, user)
394
+
395
+ def metadata(self, path: str):
396
+ """
397
+ Get the metadata for a file in the remote repo.
398
+ """
399
+ return self._repo.metadata(path)
400
+
401
+ def file_exists(self, path: str, revision: Optional[str] = None):
402
+ """
403
+ Check if a file exists in the remote repo.
404
+
405
+ Args:
406
+ path: `str`
407
+ The path to the file to check
408
+ revision: `str`
409
+ The revision to check against, defaults to `self.revision`
410
+ """
411
+
412
+ if revision is None:
413
+ revision = self.revision
414
+
415
+ return self._repo.file_exists(path, revision)
416
+
417
+ def file_has_changes(
418
+ self, local_path: str, remote_path: str = None, revision: str = None
419
+ ):
420
+ """
421
+ Check if a local file has changed compared to a remote revision
422
+
423
+ Args:
424
+ local_path: `str`
425
+ The local path to the file to check
426
+ remote_path: `str`
427
+ The remote path to the file to check, will default to `local_path` if not provided
428
+ revision: `str`
429
+ The revision to check against, defaults to `self.revision`
430
+ """
431
+
432
+ if remote_path is None:
433
+ remote_path = local_path
434
+
435
+ if revision is None:
436
+ revision = self.revision
437
+
438
+ # If the file doesn't exist on the remote repo, it's a new file, hence has changes
439
+ if not self.file_exists(remote_path, revision):
440
+ return True
441
+
442
+ return self._repo.file_has_changes(local_path, remote_path, revision)
443
+
444
+ def log(
445
+ self,
446
+ revision: Optional[str] = None,
447
+ path: Optional[str] = None,
448
+ page_num: int = 1,
449
+ page_size: int = 10,
450
+ ):
451
+ """
452
+ Get the commit history for a remote repo
453
+
454
+ Args:
455
+ revision: `str | None`
456
+ The revision to get the commit history for. Defaults to `self.revision`
457
+ path: `str | None`
458
+ The path to the file to get the commit history for. Defaults to
459
+ None, which will return the commit history for the entire repo
460
+ page_num: `int`
461
+ The page number to return. Defaults to 1
462
+ page_size: `int`
463
+ The number of items to return per page. Defaults to 10
464
+ """
465
+ if revision is None:
466
+ revision = self.revision
467
+
468
+ return self._repo.log(revision, path, page_num, page_size)
469
+
470
+ def branch_exists(self, name: str) -> bool:
471
+ """
472
+ Check if a branch exists in the remote repo.
473
+
474
+ Args:
475
+ name: `str`
476
+ The name of the branch to check
477
+ """
478
+ return self._repo.branch_exists(name)
479
+
480
+ def branch(self):
481
+ """
482
+ Get the current branch for a remote repo
483
+ """
484
+ return self.get_branch(self.revision)
485
+
486
+ def branches(self):
487
+ """
488
+ List all branches for a remote repo
489
+ """
490
+ return self._repo.list_branches()
491
+
492
+ def list_workspaces(self):
493
+ """
494
+ List all workspaces for a remote repo
495
+ """
496
+ return self._repo.list_workspaces()
497
+
498
+ def get_branch(self, branch: str):
499
+ """
500
+ Return a branch by name on this repo, if exists
501
+
502
+ Args:
503
+ branch: `str`
504
+ The name of the branch to return
505
+ """
506
+ return self._repo.get_branch(branch)
507
+
508
+ def create_branch(self, branch: str):
509
+ """
510
+ Return a branch by name on this repo,
511
+ creating it from the currently checked out branch if it doesn't exist
512
+
513
+ Args:
514
+ branch: `str`
515
+ The name to assign to the created branch
516
+ """
517
+ print(f"Creating branch '{branch}' from commit '{self._repo.commit_id}'")
518
+ return self._repo.create_branch(branch)
519
+
520
+ def delete_branch(self, branch: str):
521
+ """
522
+ Delete a branch from the remote repo.
523
+
524
+ Args:
525
+ branch: `str`
526
+ The name of the branch to delete
527
+ """
528
+ return self._repo.delete_branch(branch)
529
+
530
+ def create_checkout_branch(self, branch: str):
531
+ """
532
+ Create a new branch from the currently checked out branch,
533
+ and switch to it
534
+
535
+ Args:
536
+ branch: `str`
537
+ The name to assign to the created branch
538
+ """
539
+ if not self.branch_exists(branch):
540
+ self.create_branch(branch)
541
+ return self.checkout(branch)
542
+
543
+ def merge(self, base_branch: str, head_branch: str):
544
+ """
545
+ Merge the head branch into the base branch on the remote repo.
546
+
547
+ Args:
548
+ base_branch: `str`
549
+ The base branch to merge into
550
+ head_branch: `str`
551
+ The head branch to merge
552
+ """
553
+ commit = self._repo.merge(base_branch, head_branch)
554
+ return commit
555
+
556
+ def mergeable(self, base_branch: str, head_branch: str):
557
+ """
558
+ Check if a branch is mergeable into another branch.
559
+
560
+ Args:
561
+ base_branch: str
562
+ The target branch to merge into
563
+ head_branch: str
564
+ The source branch to merge from
565
+ """
566
+ return self._repo.mergeable(base_branch, head_branch)
567
+
568
+ def diff(
569
+ self,
570
+ base: str | PyCommit,
571
+ head: str | PyCommit,
572
+ path: str,
573
+ ):
574
+ """
575
+ Get the diff between two refs on the remote repo.
576
+
577
+ Args:
578
+ base: `str`
579
+ The base ref to diff (branch or commit)
580
+ head: `str`
581
+ The head ref to diff (branch or commit)
582
+ path: `str`
583
+ The path to the file to diff
584
+ """
585
+ diff = self._repo.diff_file(str(base), str(head), path)
586
+ if diff.format == "text":
587
+ return diff.text
588
+ else:
589
+ raise NotImplementedError(
590
+ "Only text diffs are supported in RemoteRepo right now"
591
+ )
592
+
593
+ @property
594
+ def namespace(self) -> str:
595
+ """
596
+ The namespace for the repo.
597
+ """
598
+ return self._repo.namespace()
599
+
600
+ @property
601
+ def name(self) -> str:
602
+ """
603
+ The name of the repo.
604
+ """
605
+ return self._repo.name()
606
+
607
+ @property
608
+ def identifier(self):
609
+ """
610
+ The namespace/name of the repo.
611
+ """
612
+ return f"{self.namespace}/{self.name}"
613
+
614
+ @property
615
+ def url(self) -> str:
616
+ """
617
+ The remote url for the repo.
618
+ """
619
+ return self._repo.url()
620
+
621
+ @property
622
+ def revision(self) -> str:
623
+ """
624
+ The branch or commit id for the repo
625
+ """
626
+ return self._repo.revision