oxenai 0.42.4__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oxen/remote_repo.py ADDED
@@ -0,0 +1,656 @@
1
+ import os
2
+
3
+ from typing import Optional
4
+ from typing import List, Tuple
5
+ from .oxen import PyRemoteRepo, remote, PyCommit
6
+ from . import user as oxen_user
7
+ from .workspace import Workspace
8
+
9
+
10
+ def get_repo(name: str, host: str = "hub.oxen.ai", scheme: str = "https"):
11
+ """
12
+ Get a RemoteRepo object for the specified name. For example 'ox/CatDogBBox'.
13
+
14
+ Args:
15
+ name: `str`
16
+ Name of the repository in the format 'namespace/repo_name'.
17
+ host: `str`
18
+ The host to connect to. Defaults to 'hub.oxen.ai'
19
+ Returns:
20
+ [RemoteRepo](/python-api/remote_repo)
21
+ """
22
+ py_repo = remote.get_repo(name, host, scheme)
23
+
24
+ if py_repo is None:
25
+ raise ValueError(f"Repository {name} not found")
26
+
27
+ repo_id = f"{py_repo.namespace()}/{py_repo.name()}"
28
+ return RemoteRepo(repo_id, py_repo.host, py_repo.revision, py_repo.scheme)
29
+
30
+
31
+ def create_repo(
32
+ name: str,
33
+ description="",
34
+ is_public: bool = True,
35
+ host: str = "hub.oxen.ai",
36
+ scheme: str = "https",
37
+ files: List[Tuple[str, str]] = [],
38
+ ):
39
+ """
40
+ Create a new repository on the remote server.
41
+
42
+ Args:
43
+ name: `str`
44
+ Name of the repository in the format 'namespace/repo_name'.
45
+ description: `str`
46
+ Description of the repository.
47
+ Only applicable to [OxenHub](https://oxen.ai).
48
+ is_public: `bool`
49
+ Whether the repository is public or private.
50
+ Only applicable to [OxenHub](https://oxen.ai).
51
+ host: `str`
52
+ The host to connect to. Defaults to 'hub.oxen.ai'
53
+ scheme: `str`
54
+ The scheme to use for the remote url. Default: 'https'
55
+ files: `List[Tuple[str, str]]`
56
+ A list of tuples containing the path to the file and the contents
57
+ of the file that you would like to seed the repository with.
58
+ Returns:
59
+ [RemoteRepo](/python-api/remote_repo)
60
+ """
61
+ py_repo = remote.create_repo(name, description, is_public, host, scheme, files)
62
+ repo_id = f"{py_repo.namespace()}/{py_repo.name()}"
63
+ return RemoteRepo(repo_id, py_repo.host, "main", py_repo.scheme)
64
+
65
+
66
+ class RemoteRepo:
67
+ """
68
+ The RemoteRepo class allows you to interact with an Oxen repository
69
+ without downloading the data locally.
70
+
71
+ ## Examples
72
+
73
+ ### Add & Commit Files
74
+
75
+ Adding and committing a file to a remote workspace.
76
+
77
+ ```python
78
+ from oxen import RemoteRepo
79
+
80
+ repo = RemoteRepo("ox/CatDogBBox")
81
+ repo.add("/path/to/image.png")
82
+ status = repo.status()
83
+ print(status.added_files())
84
+ repo.commit("Adding my image to the remote workspace.")
85
+ ```
86
+
87
+ ### Downloading Specific Files
88
+
89
+ Grab a specific file revision and load it into pandas.
90
+
91
+ ```python
92
+ from oxen import RemoteRepo
93
+ import pandas as pd
94
+
95
+ # Connect to the remote repo
96
+ repo = RemoteRepo("ox/CatDogBBox")
97
+ # Specify the version of the file you want to download
98
+ branch = repo.get_branch("my-pets")
99
+ # Download takes a file or directory a commit id
100
+ repo.download("annotations", revision=branch.commit_id)
101
+ # Once you have the data locally, use whatever library you want to explore the data
102
+ df = pd.read_csv("annotations/train.csv")
103
+ print(df.head())
104
+ ```
105
+ """
106
+
107
+ def __init__(
108
+ self,
109
+ repo_id: str,
110
+ host: str = "hub.oxen.ai",
111
+ revision: str = "main",
112
+ scheme: str = "https",
113
+ ):
114
+ """
115
+ Create a new RemoteRepo object to interact with.
116
+
117
+ Args:
118
+ repo_id: `str`
119
+ Name of the repository in the format 'namespace/repo_name'.
120
+ For example 'ox/chatbot'
121
+ host: `str`
122
+ The host to connect to. Defaults to 'hub.oxen.ai'
123
+ revision: `str`
124
+ The branch name or commit id to checkout. Defaults to 'main'
125
+ scheme: `str`
126
+ The scheme to use for the remote url. Default: 'https'
127
+ """
128
+ self._repo = PyRemoteRepo(repo_id, host, revision, scheme)
129
+ # An internal workspace gets created on the first add() call
130
+ self._workspace = None
131
+
132
+ def __repr__(self):
133
+ return f"RemoteRepo({self._repo.url()})"
134
+
135
+ def create(self, empty: bool = False, is_public: bool = False):
136
+ """
137
+ Will create the repo on the remote server.
138
+
139
+ Args:
140
+ empty: `bool`
141
+ Whether to create an empty repo or not. Default: False
142
+ is_public: `bool`
143
+ Whether the repository is public or private. Default: False
144
+ """
145
+ self._repo.create(empty, is_public)
146
+
147
+ def exists(self) -> bool:
148
+ """
149
+ Checks if this remote repo exists on the server.
150
+ """
151
+ return self._repo.exists()
152
+
153
+ def delete(self):
154
+ """
155
+ Delete this remote repo from the server.
156
+ """
157
+ self._repo.delete()
158
+
159
+ def checkout(self, revision: str, create=False):
160
+ """
161
+ Switches the remote repo to the specified revision.
162
+
163
+ Args:
164
+ revision: `str`
165
+ The name of the branch or commit id to checkout.
166
+ create: `bool`
167
+ Whether to create a new branch if it doesn't exist. Default: False
168
+ """
169
+ if create:
170
+ self._repo.create_branch(revision)
171
+
172
+ return self._repo.checkout(revision)
173
+
174
+ def ls(
175
+ self, directory: Optional[str] = None, page_num: int = 1, page_size: int = 100
176
+ ):
177
+ """
178
+ Lists the contents of a directory in the remote repo.
179
+
180
+ Args:
181
+ directory: `str`
182
+ The directory to list. If None, will list the root directory.
183
+ page_num: `int`
184
+ The page number to return. Default: 1
185
+ page_size: `int`
186
+ The number of items to return per page. Default: 100
187
+ """
188
+ if directory is None:
189
+ return self._repo.ls("", page_num, page_size)
190
+
191
+ return self._repo.ls(directory, page_num, page_size)
192
+
193
+ def scan(self, directory: Optional[str] = None, page_size: int = 100):
194
+ """
195
+ Generator over the contents of a directory in the remote repo
196
+
197
+ Args:
198
+ directory: `str`
199
+ The directory to list. If None, will list the root directory
200
+ page_size: `int`
201
+ The number of items to return per page. Default: 100
202
+ """
203
+ if directory is None:
204
+ directory = ""
205
+
206
+ current_page = 1
207
+
208
+ while True:
209
+ contents = self._repo.ls(
210
+ directory, page_num=current_page, page_size=page_size
211
+ )
212
+
213
+ if not contents.entries:
214
+ return
215
+
216
+ yield from contents.entries
217
+
218
+ if current_page >= contents.total_pages:
219
+ return
220
+
221
+ current_page += 1
222
+
223
+ def download(
224
+ self, src: str, dst: Optional[str] = None, revision: Optional[str] = None
225
+ ):
226
+ """
227
+ Download a file or directory from the remote repo.
228
+
229
+ Args:
230
+ src: `str`
231
+ The path to the remote file
232
+ dst: `str | None`
233
+ The path to the local file. If None, will download to
234
+ the same path as `src`
235
+ revision: `str | None`
236
+ The branch or commit id to download. Defaults to `self.revision`
237
+ """
238
+ if dst is None:
239
+ dst = src
240
+ # create parent dir if it does not exist
241
+ directory = os.path.dirname(dst)
242
+ if directory and not os.path.exists(directory):
243
+ os.makedirs(directory, exist_ok=True)
244
+
245
+ if revision is None:
246
+ self._repo.download(src, dst, self.revision)
247
+ else:
248
+ self._repo.download(src, dst, revision)
249
+
250
+ def get_file(self, src: str, revision: Optional[str] = None):
251
+ """
252
+ Get a file from the remote repo.
253
+
254
+ Args:
255
+ src: `str`
256
+ The path to the remote file
257
+ revision: `str | None`
258
+ The branch or commit id to download. Defaults to `self.revision`
259
+ """
260
+ if revision is None:
261
+ return self._repo.get_file(src, self.revision)
262
+ else:
263
+ return self._repo.get_file(src, revision)
264
+
265
+ def create_workspace(
266
+ self, branch: Optional[str] = None, workspace_name: Optional[str] = None
267
+ ):
268
+ """
269
+ Create a new workspace in the remote repo. If the workspace already exists, it will just be returned.
270
+
271
+ Args:
272
+ branch: `str | None`
273
+ The branch to create the workspace on. Defaults to `self.revision`
274
+ workspace_name: `str | None`
275
+ The named workspace to use when adding the file. If None, will create a temporary workspace
276
+
277
+ Returns:
278
+ [Workspace](/python-api/workspace)
279
+ """
280
+ if branch is None or branch == "":
281
+ branch = self.revision
282
+
283
+ if self._workspace is None:
284
+ self._workspace = Workspace(self, branch, workspace_name=workspace_name)
285
+ print(
286
+ f"Workspace '{self._workspace.id}' created from commit '{self._workspace.commit_id}'"
287
+ )
288
+ self._repo.set_commit_id(self._workspace.commit_id)
289
+ return self._workspace
290
+ elif (
291
+ self._workspace.branch == branch and self._workspace.name == workspace_name
292
+ ):
293
+ # workspace already exists
294
+ return self._workspace
295
+ else:
296
+ raise ValueError(
297
+ "A different workspace is already open for this repo, commit or delete it first"
298
+ )
299
+
300
+ def delete_workspace(self):
301
+ """
302
+ Delete the current workspace in the remote repo.
303
+ """
304
+ if self._workspace is not None:
305
+ self._workspace.delete()
306
+ self._workspace = None
307
+
308
+ def add(
309
+ self,
310
+ src: str,
311
+ dst: Optional[str] = "",
312
+ branch: Optional[str] = None,
313
+ workspace_name: Optional[str] = None,
314
+ ):
315
+ """
316
+ Stage a file to a workspace in the remote repo.
317
+
318
+ Args:
319
+ src: `str`
320
+ The path to the local file to upload
321
+ dst: `str | None`
322
+ The directory to upload the file to. If None, will upload to the root directory.
323
+ branch: `str | None`
324
+ The branch to upload the file to. Defaults to `self.revision`
325
+ workspace_name: `str | None`
326
+ The named workspace to use when adding the file. If None, will create a temporary workspace
327
+
328
+ Returns:
329
+ [Workspace](/python-api/workspace)
330
+ """
331
+ # If the workspace already exists, this is a no-op
332
+ self.create_workspace(branch, workspace_name)
333
+ self._workspace.add(src, dst)
334
+ return self._workspace
335
+
336
+ def status(self):
337
+ """
338
+ Get the status of the workspace.
339
+ """
340
+ if self._workspace is None:
341
+ raise ValueError("No workspace found. Please call add() first.")
342
+
343
+ return self._workspace.status()
344
+
345
+ def commit(self, message: str, branch: Optional[str] = None):
346
+ """
347
+ Commit the workspace to the remote repo.
348
+
349
+ Args:
350
+ message: `str`
351
+ The message to commit with
352
+ branch: `str | None`
353
+ The branch to commit to. Defaults to the branch the workspace was created on.
354
+ """
355
+ if self._workspace is None:
356
+ raise ValueError("No workspace found. Please call add() first.")
357
+
358
+ commit = self._workspace.commit(message, branch)
359
+ self._repo.set_commit_id(commit.id)
360
+
361
+ # If it's not a named workspace, it's deleted after commit
362
+ if self._workspace.name is None:
363
+ self._workspace = None
364
+ return commit
365
+
366
+ def delete_file(
367
+ self,
368
+ path: str,
369
+ commit_message: Optional[str] = None,
370
+ branch: Optional[str] = None,
371
+ ):
372
+ """
373
+ Delete from the remote repo. This can be used with a committed file or dir
374
+ Args:
375
+ path: `str`
376
+ The path to the remote file to remove
377
+ commit_message: `str` | None
378
+ The message to commit with. Defaults to "Removed '{path}'"
379
+ branch: `str | None`
380
+ The branch to remove the file from. Defaults to `self.revision`
381
+ """
382
+ if branch is None:
383
+ branch = self.revision
384
+ if commit_message is None:
385
+ message = f"Removed '{path}'"
386
+ else:
387
+ message = commit_message
388
+ user = oxen_user.current_user()
389
+
390
+ self._repo.delete_file(branch, path, message, user)
391
+
392
+ def upload(
393
+ self,
394
+ src: str,
395
+ commit_message: str,
396
+ file_name: Optional[str] = None,
397
+ dst_dir: Optional[str] = "",
398
+ branch: Optional[str] = None,
399
+ ):
400
+ """
401
+ Upload a file to the remote repo.
402
+
403
+ Args:
404
+ src: `str`
405
+ The path to the local file to upload
406
+ commit_message: `str`
407
+ The message to commit with. Defaults to "Uploaded '{src}'"
408
+ file_name: `str | None`
409
+ The name of the file to upload. If None, will use the name of the file in `src`
410
+ dst_dir: `str | None`
411
+ The directory to upload the file to. If None, will upload to the root directory.
412
+ branch: `str | None`
413
+ The branch to upload the file to. Defaults to `self.revision`
414
+ """
415
+ if branch is None:
416
+ branch = self.revision
417
+ if file_name is None:
418
+ file_name = os.path.basename(src)
419
+ if commit_message is None:
420
+ commit_message = f"Uploaded {src}"
421
+ user = oxen_user.current_user()
422
+
423
+ self._repo.put_file(branch, dst_dir, src, file_name, commit_message, user)
424
+
425
+ def metadata(self, path: str):
426
+ """
427
+ Get the metadata for a file in the remote repo.
428
+ """
429
+ return self._repo.metadata(path)
430
+
431
+ def file_exists(self, path: str, revision: Optional[str] = None):
432
+ """
433
+ Check if a file exists in the remote repo.
434
+
435
+ Args:
436
+ path: `str`
437
+ The path to the file to check
438
+ revision: `str`
439
+ The revision to check against, defaults to `self.revision`
440
+ """
441
+
442
+ if revision is None:
443
+ revision = self.revision
444
+
445
+ return self._repo.file_exists(path, revision)
446
+
447
+ def file_has_changes(
448
+ self, local_path: str, remote_path: str = None, revision: str = None
449
+ ):
450
+ """
451
+ Check if a local file has changed compared to a remote revision
452
+
453
+ Args:
454
+ local_path: `str`
455
+ The local path to the file to check
456
+ remote_path: `str`
457
+ The remote path to the file to check, will default to `local_path` if not provided
458
+ revision: `str`
459
+ The revision to check against, defaults to `self.revision`
460
+ """
461
+
462
+ if remote_path is None:
463
+ remote_path = local_path
464
+
465
+ if revision is None:
466
+ revision = self.revision
467
+
468
+ # If the file doesn't exist on the remote repo, it's a new file, hence has changes
469
+ if not self.file_exists(remote_path, revision):
470
+ return True
471
+
472
+ return self._repo.file_has_changes(local_path, remote_path, revision)
473
+
474
+ def log(
475
+ self,
476
+ revision: Optional[str] = None,
477
+ path: Optional[str] = None,
478
+ page_num: int = 1,
479
+ page_size: int = 10,
480
+ ):
481
+ """
482
+ Get the commit history for a remote repo
483
+
484
+ Args:
485
+ revision: `str | None`
486
+ The revision to get the commit history for. Defaults to `self.revision`
487
+ path: `str | None`
488
+ The path to the file to get the commit history for. Defaults to
489
+ None, which will return the commit history for the entire repo
490
+ page_num: `int`
491
+ The page number to return. Defaults to 1
492
+ page_size: `int`
493
+ The number of items to return per page. Defaults to 10
494
+ """
495
+ if revision is None:
496
+ revision = self.revision
497
+
498
+ return self._repo.log(revision, path, page_num, page_size)
499
+
500
+ def branch_exists(self, name: str) -> bool:
501
+ """
502
+ Check if a branch exists in the remote repo.
503
+
504
+ Args:
505
+ name: `str`
506
+ The name of the branch to check
507
+ """
508
+ return self._repo.branch_exists(name)
509
+
510
+ def branch(self):
511
+ """
512
+ Get the current branch for a remote repo
513
+ """
514
+ return self.get_branch(self.revision)
515
+
516
+ def branches(self):
517
+ """
518
+ List all branches for a remote repo
519
+ """
520
+ return self._repo.list_branches()
521
+
522
+ def list_workspaces(self):
523
+ """
524
+ List all workspaces for a remote repo
525
+ """
526
+ return self._repo.list_workspaces()
527
+
528
+ def get_branch(self, branch: str):
529
+ """
530
+ Return a branch by name on this repo, if exists
531
+
532
+ Args:
533
+ branch: `str`
534
+ The name of the branch to return
535
+ """
536
+ return self._repo.get_branch(branch)
537
+
538
+ def create_branch(self, branch: str):
539
+ """
540
+ Return a branch by name on this repo,
541
+ creating it from the currently checked out branch if it doesn't exist
542
+
543
+ Args:
544
+ branch: `str`
545
+ The name to assign to the created branch
546
+ """
547
+ print(f"Creating branch '{branch}' from commit '{self._repo.commit_id}'")
548
+ return self._repo.create_branch(branch)
549
+
550
+ def delete_branch(self, branch: str):
551
+ """
552
+ Delete a branch from the remote repo.
553
+
554
+ Args:
555
+ branch: `str`
556
+ The name of the branch to delete
557
+ """
558
+ return self._repo.delete_branch(branch)
559
+
560
+ def create_checkout_branch(self, branch: str):
561
+ """
562
+ Create a new branch from the currently checked out branch,
563
+ and switch to it
564
+
565
+ Args:
566
+ branch: `str`
567
+ The name to assign to the created branch
568
+ """
569
+ if not self.branch_exists(branch):
570
+ self.create_branch(branch)
571
+ return self.checkout(branch)
572
+
573
+ def merge(self, base_branch: str, head_branch: str):
574
+ """
575
+ Merge the head branch into the base branch on the remote repo.
576
+
577
+ Args:
578
+ base_branch: `str`
579
+ The base branch to merge into
580
+ head_branch: `str`
581
+ The head branch to merge
582
+ """
583
+ commit = self._repo.merge(base_branch, head_branch)
584
+ return commit
585
+
586
+ def mergeable(self, base_branch: str, head_branch: str):
587
+ """
588
+ Check if a branch is mergeable into another branch.
589
+
590
+ Args:
591
+ base_branch: str
592
+ The target branch to merge into
593
+ head_branch: str
594
+ The source branch to merge from
595
+ """
596
+ return self._repo.mergeable(base_branch, head_branch)
597
+
598
+ def diff(
599
+ self,
600
+ base: str | PyCommit,
601
+ head: str | PyCommit,
602
+ path: str,
603
+ ):
604
+ """
605
+ Get the diff between two refs on the remote repo.
606
+
607
+ Args:
608
+ base: `str`
609
+ The base ref to diff (branch or commit)
610
+ head: `str`
611
+ The head ref to diff (branch or commit)
612
+ path: `str`
613
+ The path to the file to diff
614
+ """
615
+ diff = self._repo.diff_file(str(base), str(head), path)
616
+ if diff.format == "text":
617
+ return diff.text
618
+ else:
619
+ raise NotImplementedError(
620
+ "Only text diffs are supported in RemoteRepo right now"
621
+ )
622
+
623
+ @property
624
+ def namespace(self) -> str:
625
+ """
626
+ The namespace for the repo.
627
+ """
628
+ return self._repo.namespace()
629
+
630
+ @property
631
+ def name(self) -> str:
632
+ """
633
+ The name of the repo.
634
+ """
635
+ return self._repo.name()
636
+
637
+ @property
638
+ def identifier(self):
639
+ """
640
+ The namespace/name of the repo.
641
+ """
642
+ return f"{self.namespace}/{self.name}"
643
+
644
+ @property
645
+ def url(self) -> str:
646
+ """
647
+ The remote url for the repo.
648
+ """
649
+ return self._repo.url()
650
+
651
+ @property
652
+ def revision(self) -> str:
653
+ """
654
+ The branch or commit id for the repo
655
+ """
656
+ return self._repo.revision