modaic 0.10.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modaic/__init__.py +27 -0
- modaic/auto.py +301 -0
- modaic/constants.py +18 -0
- modaic/datasets.py +22 -0
- modaic/exceptions.py +59 -0
- modaic/hub.py +671 -0
- modaic/module_utils.py +560 -0
- modaic/observability.py +259 -0
- modaic/precompiled.py +608 -0
- modaic/programs/__init__.py +1 -0
- modaic/programs/predict.py +51 -0
- modaic/programs/rag_program.py +35 -0
- modaic/programs/registry.py +104 -0
- modaic/serializers.py +222 -0
- modaic/utils.py +115 -0
- modaic-0.10.4.dist-info/METADATA +138 -0
- modaic-0.10.4.dist-info/RECORD +19 -0
- modaic-0.10.4.dist-info/WHEEL +4 -0
- modaic-0.10.4.dist-info/licenses/LICENSE +31 -0
modaic/hub.py
ADDED
|
@@ -0,0 +1,671 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import shutil
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Tuple, Union
|
|
8
|
+
|
|
9
|
+
import git
|
|
10
|
+
import requests
|
|
11
|
+
from dotenv import find_dotenv, load_dotenv
|
|
12
|
+
from git.repo.fun import BadName, BadObject, name_to_object
|
|
13
|
+
|
|
14
|
+
from .constants import (
|
|
15
|
+
MODAIC_API_URL,
|
|
16
|
+
MODAIC_CACHE,
|
|
17
|
+
MODAIC_GIT_URL,
|
|
18
|
+
MODAIC_HUB_CACHE,
|
|
19
|
+
MODAIC_TOKEN,
|
|
20
|
+
STAGING_DIR,
|
|
21
|
+
USE_GITHUB,
|
|
22
|
+
)
|
|
23
|
+
from .exceptions import (
|
|
24
|
+
AuthenticationError,
|
|
25
|
+
ModaicError,
|
|
26
|
+
RepositoryExistsError,
|
|
27
|
+
RepositoryNotFoundError,
|
|
28
|
+
RevisionNotFoundError,
|
|
29
|
+
)
|
|
30
|
+
from .module_utils import (
|
|
31
|
+
copy_update_from,
|
|
32
|
+
copy_update_program_dir,
|
|
33
|
+
create_sync_dir,
|
|
34
|
+
smart_link,
|
|
35
|
+
sync_dir_from,
|
|
36
|
+
)
|
|
37
|
+
from .utils import aggresive_rmtree
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from .precompiled import PrecompiledProgram, Retriever
|
|
41
|
+
|
|
42
|
+
env_file = find_dotenv(usecwd=True)
|
|
43
|
+
load_dotenv(env_file)
|
|
44
|
+
|
|
45
|
+
user_info = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class Commit:
|
|
50
|
+
"""
|
|
51
|
+
Represents a commit in a git repository.
|
|
52
|
+
Args:
|
|
53
|
+
repo: The path to the git repository.
|
|
54
|
+
sha: The full commit SHA.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
repo: str
|
|
58
|
+
sha: str
|
|
59
|
+
|
|
60
|
+
def __repr__(self):
|
|
61
|
+
return f"{self.repo}@{self.sha}"
|
|
62
|
+
|
|
63
|
+
def __str__(self):
|
|
64
|
+
return f"{self.repo}@{self.sha}"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def create_remote_repo(repo_path: str, access_token: str, exist_ok: bool = False, private: bool = False) -> bool:
|
|
68
|
+
"""
|
|
69
|
+
Creates a remote repository in modaic hub on the given repo_path. e.g. "user/repo"
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
repo_path: The path on Modaic hub to create the remote repository.
|
|
73
|
+
access_token: User's access token for authentication.
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
AlreadyExists: If the repository already exists on the hub.
|
|
78
|
+
AuthenticationError: If authentication fails or access is denied.
|
|
79
|
+
ValueError: If inputs are invalid.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
True if the a new repository was created, False if it already existed.
|
|
83
|
+
"""
|
|
84
|
+
if not repo_path or not repo_path.strip():
|
|
85
|
+
raise ValueError("Repository ID cannot be empty")
|
|
86
|
+
|
|
87
|
+
api_url = get_repos_endpoint()
|
|
88
|
+
|
|
89
|
+
headers = get_headers(access_token)
|
|
90
|
+
|
|
91
|
+
payload = get_repo_payload(repo_path, private=private)
|
|
92
|
+
# TODO: Implement orgs path. Also switch to using gitea's push-to-create
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
response = requests.post(api_url, json=payload, headers=headers, timeout=30)
|
|
96
|
+
|
|
97
|
+
if response.ok:
|
|
98
|
+
return True
|
|
99
|
+
|
|
100
|
+
error_data = {}
|
|
101
|
+
try:
|
|
102
|
+
error_data = response.json()
|
|
103
|
+
except Exception:
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
error_message = error_data.get("message", f"HTTP {response.status_code}")
|
|
107
|
+
|
|
108
|
+
if response.status_code == 409 or response.status_code == 422 or "already exists" in error_message.lower():
|
|
109
|
+
if exist_ok:
|
|
110
|
+
return False
|
|
111
|
+
else:
|
|
112
|
+
raise RepositoryExistsError(f"Repository '{repo_path}' already exists")
|
|
113
|
+
elif response.status_code == 401:
|
|
114
|
+
raise AuthenticationError("Invalid access token or authentication failed")
|
|
115
|
+
elif response.status_code == 403:
|
|
116
|
+
raise AuthenticationError("Access denied - insufficient permissions")
|
|
117
|
+
else:
|
|
118
|
+
raise Exception(f"Failed to create repository: {error_message}")
|
|
119
|
+
|
|
120
|
+
except requests.exceptions.RequestException as e:
|
|
121
|
+
raise Exception(f"Request failed: {str(e)}") from e
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _has_ref(repo: git.Repo, ref: str) -> bool:
|
|
125
|
+
try:
|
|
126
|
+
repo.rev_parse(ref)
|
|
127
|
+
return True
|
|
128
|
+
except BadName:
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _attempt_push(repo: git.Repo, branch: str, tag: Optional[str] = None) -> None:
|
|
133
|
+
refs = [branch]
|
|
134
|
+
if tag:
|
|
135
|
+
try:
|
|
136
|
+
repo.git.tag(tag)
|
|
137
|
+
except git.exc.GitCommandError:
|
|
138
|
+
raise ModaicError(f"tag: {tag} already exists") from None
|
|
139
|
+
refs.append(tag)
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
repo.remotes.origin.push(refs)
|
|
143
|
+
except git.exc.GitCommandError as e: # handle nothing to push error
|
|
144
|
+
raise ModaicError(f"Git push failed: {e.stderr}") from None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def sync_and_push(
|
|
148
|
+
module: Union["PrecompiledProgram", "Retriever"],
|
|
149
|
+
repo_path: str,
|
|
150
|
+
access_token: Optional[str] = None,
|
|
151
|
+
commit_message: str = "(no commit message)",
|
|
152
|
+
private: bool = False,
|
|
153
|
+
branch: str = "main",
|
|
154
|
+
tag: str = None,
|
|
155
|
+
with_code: bool = False,
|
|
156
|
+
) -> Commit:
|
|
157
|
+
"""
|
|
158
|
+
1. Syncs a non-git repository to a git repository.
|
|
159
|
+
2. Pushes the git repository to modaic hub.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
sync_dir: The 'sync' directory containing the desired layout of symlinks to the source code files.
|
|
163
|
+
repo_path: The path on Modaic hub to create the remote repository. e.g. "user/repo"
|
|
164
|
+
access_token: The access token to use for authentication.
|
|
165
|
+
commit_message: The message to use for the commit.
|
|
166
|
+
private: Whether the repository should be private. Defaults to False.
|
|
167
|
+
branch: The branch to push to. Defaults to "main".
|
|
168
|
+
tag: The tag to push to. Defaults to None.
|
|
169
|
+
Warning:
|
|
170
|
+
This is not the standard pull/push workflow. No merging/rebasing is done.
|
|
171
|
+
This simply pushes new changes to make main mirror the local directory.
|
|
172
|
+
|
|
173
|
+
Warning:
|
|
174
|
+
Assumes that the remote repository exists
|
|
175
|
+
"""
|
|
176
|
+
# First create the sync directory which will be used to update the git repository.
|
|
177
|
+
# if module was loaded from AutoProgram/AutoRetriever, we will use its source repo from MODAIC_CACHE/modaic_hub to update the repo_dir
|
|
178
|
+
# other wise bootstrap sync_dir from working directory.
|
|
179
|
+
if module._from_auto:
|
|
180
|
+
sync_dir = sync_dir_from(module._source)
|
|
181
|
+
else:
|
|
182
|
+
sync_dir = create_sync_dir(repo_path, with_code=with_code)
|
|
183
|
+
save_auto_json = with_code and not module._from_auto
|
|
184
|
+
module.save_precompiled(sync_dir, _with_auto_classes=save_auto_json)
|
|
185
|
+
|
|
186
|
+
if not access_token and MODAIC_TOKEN:
|
|
187
|
+
access_token = MODAIC_TOKEN
|
|
188
|
+
elif not access_token and not MODAIC_TOKEN:
|
|
189
|
+
raise AuthenticationError("MODAIC_TOKEN is not set")
|
|
190
|
+
|
|
191
|
+
if "/" in branch:
|
|
192
|
+
raise ModaicError(
|
|
193
|
+
f"Branch name '{branch}' is invalid. Must be a single branch name without any remote prefix (e.g., 'main', not 'origin/main')"
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
if "/" not in repo_path:
|
|
197
|
+
raise NotImplementedError(
|
|
198
|
+
"Modaic fast paths not yet implemented. Please load programs with 'user/repo' or 'org/repo' format"
|
|
199
|
+
)
|
|
200
|
+
assert repo_path.count("/") <= 1, f"Extra '/' in repo_path: {repo_path}"
|
|
201
|
+
# TODO: try pushing first and on error create the repo. create_remote_repo currently takes ~1.5 seconds to run
|
|
202
|
+
create_remote_repo(repo_path, access_token, exist_ok=True, private=private)
|
|
203
|
+
|
|
204
|
+
repo_dir = STAGING_DIR / repo_path
|
|
205
|
+
repo_dir.mkdir(parents=True, exist_ok=True)
|
|
206
|
+
|
|
207
|
+
# Initialize git as git repo if not already initialized.
|
|
208
|
+
repo = git.Repo.init(repo_dir)
|
|
209
|
+
remote_url = _make_git_url(repo_path, access_token)
|
|
210
|
+
try:
|
|
211
|
+
if "origin" not in [r.name for r in repo.remotes]:
|
|
212
|
+
repo.create_remote("origin", remote_url)
|
|
213
|
+
else:
|
|
214
|
+
repo.remotes.origin.set_url(remote_url)
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
repo.remotes.origin.fetch()
|
|
218
|
+
except git.exc.GitCommandError as e:
|
|
219
|
+
if "repository" in e.stderr.lower() and "not found" in e.stderr.lower():
|
|
220
|
+
raise RepositoryNotFoundError(f"Repository '{repo_path}' does not exist") from None
|
|
221
|
+
else:
|
|
222
|
+
raise ModaicError(f"Git fetch failed: {e.stderr}") from None
|
|
223
|
+
|
|
224
|
+
# Handle main branch separately. Get latest version of main, add changes, and push.
|
|
225
|
+
if branch == "main":
|
|
226
|
+
try:
|
|
227
|
+
repo.git.switch("-C", "main", "origin/main")
|
|
228
|
+
except git.exc.GitCommandError:
|
|
229
|
+
pass
|
|
230
|
+
_sync_repo(sync_dir, repo_dir)
|
|
231
|
+
repo.git.add("-A")
|
|
232
|
+
# git commit exits non-zero when there is nothing to commit (clean tree).
|
|
233
|
+
# Treat that as a no-op, but bubble up unexpected commit errors.
|
|
234
|
+
_smart_commit(repo, commit_message, access_token)
|
|
235
|
+
_attempt_push(repo, "main", tag)
|
|
236
|
+
return Commit(repo_path, repo.head.commit.hexsha)
|
|
237
|
+
|
|
238
|
+
# Ensure existence of main branch.
|
|
239
|
+
# first attempt to sync main branch with origin
|
|
240
|
+
try:
|
|
241
|
+
repo.git.switch("-C", "main", "origin/main")
|
|
242
|
+
# if that fails we must add changes to main and push.
|
|
243
|
+
except git.exc.GitCommandError:
|
|
244
|
+
_sync_repo(sync_dir, repo_dir)
|
|
245
|
+
repo.git.add("-A")
|
|
246
|
+
_smart_commit(repo, commit_message, access_token)
|
|
247
|
+
repo.remotes.origin.push("main")
|
|
248
|
+
|
|
249
|
+
# Now that main exists, switch to target branch and sync.
|
|
250
|
+
# Switch to the branch or create it if it doesn't exist. And ensure it is up to date.
|
|
251
|
+
try:
|
|
252
|
+
repo.git.switch("-C", branch, f"origin/{branch}")
|
|
253
|
+
except git.exc.GitCommandError:
|
|
254
|
+
# if origin/branch does not exist this is a new branch
|
|
255
|
+
# if source_commit is provided, start the new branch there
|
|
256
|
+
if module._source_commit and _has_ref(repo, module._source_commit.sha):
|
|
257
|
+
repo.git.switch("-C", branch, module._source_commit.sha)
|
|
258
|
+
# otherwise start the new branch from main
|
|
259
|
+
else:
|
|
260
|
+
repo.git.switch("-C", branch)
|
|
261
|
+
|
|
262
|
+
_sync_repo(sync_dir, repo_dir)
|
|
263
|
+
repo.git.add("-A")
|
|
264
|
+
|
|
265
|
+
# Handle error when working tree is clean (nothing to commit)
|
|
266
|
+
_smart_commit(repo, commit_message, access_token)
|
|
267
|
+
_attempt_push(repo, branch, tag)
|
|
268
|
+
return Commit(repo_path, repo.head.commit.hexsha)
|
|
269
|
+
except Exception as e:
|
|
270
|
+
try:
|
|
271
|
+
aggresive_rmtree(repo_dir)
|
|
272
|
+
except Exception:
|
|
273
|
+
raise ModaicError(
|
|
274
|
+
f"Failed to cleanup MODAIC_CACHE after a failed operation. We recommend manually deleting your modaic cache as it may be corrupted. Your cache is located at {MODAIC_CACHE}"
|
|
275
|
+
) from e
|
|
276
|
+
raise e
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _smart_commit(repo: git.Repo, commit_message: str, access_token: str) -> None:
|
|
280
|
+
user_info = get_user_info(access_token)
|
|
281
|
+
repo.git.config("user.email", user_info["email"])
|
|
282
|
+
repo.git.config("user.name", user_info["name"])
|
|
283
|
+
try:
|
|
284
|
+
repo.git.commit("-m", commit_message)
|
|
285
|
+
except git.exc.GitCommandError as e:
|
|
286
|
+
if "nothing to commit" in str(e).lower():
|
|
287
|
+
raise ModaicError("Nothing to commit") from None
|
|
288
|
+
raise ModaicError(f"Git commit failed: {e.stderr}") from e
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def get_headers(access_token: str) -> Dict[str, str]:
|
|
292
|
+
if USE_GITHUB:
|
|
293
|
+
return {
|
|
294
|
+
"Accept": "application/vnd.github+json",
|
|
295
|
+
"Authorization": f"Bearer {access_token}",
|
|
296
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
297
|
+
}
|
|
298
|
+
else:
|
|
299
|
+
return {
|
|
300
|
+
"Authorization": f"token {access_token}",
|
|
301
|
+
"Content-Type": "application/json",
|
|
302
|
+
"Accept": "application/json",
|
|
303
|
+
"User-Agent": "ModaicClient/1.0",
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def get_repos_endpoint() -> str:
|
|
308
|
+
if USE_GITHUB:
|
|
309
|
+
return "https://api.github.com/user/repos"
|
|
310
|
+
else:
|
|
311
|
+
return f"{MODAIC_API_URL}/api/v1/agents/create"
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def get_repo_payload(repo_path: str, private: bool = False) -> Dict[str, Any]:
|
|
315
|
+
repo_user = repo_path.strip().split("/")[0]
|
|
316
|
+
repo_name = repo_path.strip().split("/")[1]
|
|
317
|
+
|
|
318
|
+
if len(repo_name) > 100:
|
|
319
|
+
raise ValueError("Repository name too long (max 100 characters)")
|
|
320
|
+
payload = {
|
|
321
|
+
"username": repo_user,
|
|
322
|
+
"name": repo_name,
|
|
323
|
+
"description": "",
|
|
324
|
+
"private": private,
|
|
325
|
+
"auto_init": True,
|
|
326
|
+
"default_branch": "main",
|
|
327
|
+
}
|
|
328
|
+
if not USE_GITHUB:
|
|
329
|
+
payload["trust_model"] = "default"
|
|
330
|
+
return payload
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
# TODO: add persistent filesystem based cache mapping access_token to user_info. Currently takes ~1 second
|
|
334
|
+
def get_user_info(access_token: str) -> Dict[str, Any]:
|
|
335
|
+
"""
|
|
336
|
+
Returns the user info for the given access token.
|
|
337
|
+
Caches the user info in the global user_info variable.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
access_token: The access token to get the user info for.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
```python
|
|
344
|
+
{
|
|
345
|
+
"login": str,
|
|
346
|
+
"email": str,
|
|
347
|
+
"avatar_url": str,
|
|
348
|
+
"name": str,
|
|
349
|
+
}
|
|
350
|
+
```
|
|
351
|
+
"""
|
|
352
|
+
global user_info
|
|
353
|
+
if user_info:
|
|
354
|
+
return user_info
|
|
355
|
+
if USE_GITHUB:
|
|
356
|
+
response = requests.get("https://api.github.com/user", headers=get_headers(access_token)).json()
|
|
357
|
+
user_info = {
|
|
358
|
+
"login": response["login"],
|
|
359
|
+
"email": response["email"],
|
|
360
|
+
"avatar_url": response["avatar_url"],
|
|
361
|
+
"name": response["name"],
|
|
362
|
+
}
|
|
363
|
+
else:
|
|
364
|
+
protocol = "https://" if MODAIC_GIT_URL.startswith("https://") else "http://"
|
|
365
|
+
response = requests.get(
|
|
366
|
+
f"{protocol}{MODAIC_GIT_URL.replace('https://', '').replace('http://', '')}/api/v1/user",
|
|
367
|
+
headers=get_headers(access_token),
|
|
368
|
+
).json()
|
|
369
|
+
user_info = {
|
|
370
|
+
"login": response["login"],
|
|
371
|
+
"email": response["email"],
|
|
372
|
+
"avatar_url": response["avatar_url"],
|
|
373
|
+
"name": response["full_name"],
|
|
374
|
+
}
|
|
375
|
+
return user_info
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
# TODO:
|
|
379
|
+
def git_snapshot(
|
|
380
|
+
repo_path: str,
|
|
381
|
+
*,
|
|
382
|
+
rev: str = "main",
|
|
383
|
+
access_token: Optional[str] = None,
|
|
384
|
+
) -> Tuple[Path, Optional[Commit]]:
|
|
385
|
+
"""
|
|
386
|
+
Ensure a local cached checkout of a hub repository and return its path.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
repo_path: Hub path ("user/repo").
|
|
390
|
+
rev: Branch, tag, or full commit SHA to checkout; defaults to "main".
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
Absolute path to the local cached repository under MODAIC_HUB_CACHE/repo_path.
|
|
394
|
+
"""
|
|
395
|
+
|
|
396
|
+
if access_token is None and MODAIC_TOKEN is not None:
|
|
397
|
+
access_token = MODAIC_TOKEN
|
|
398
|
+
|
|
399
|
+
program_dir = Path(MODAIC_HUB_CACHE) / repo_path
|
|
400
|
+
main_dir = program_dir / "main"
|
|
401
|
+
|
|
402
|
+
try:
|
|
403
|
+
main_dir.parent.mkdir(parents=True, exist_ok=True)
|
|
404
|
+
remote_url = _make_git_url(repo_path, access_token)
|
|
405
|
+
|
|
406
|
+
# Ensure we have a main checkout at program_dir/main
|
|
407
|
+
if not (main_dir / ".git").exists():
|
|
408
|
+
shutil.rmtree(main_dir, ignore_errors=True)
|
|
409
|
+
git.Repo.clone_from(remote_url, main_dir, multi_options=["--branch", "main"])
|
|
410
|
+
|
|
411
|
+
# Attatch origin
|
|
412
|
+
main_repo = git.Repo(main_dir)
|
|
413
|
+
if "origin" not in [r.name for r in main_repo.remotes]:
|
|
414
|
+
main_repo.create_remote("origin", remote_url)
|
|
415
|
+
else:
|
|
416
|
+
main_repo.remotes.origin.set_url(remote_url)
|
|
417
|
+
|
|
418
|
+
main_repo.remotes.origin.fetch()
|
|
419
|
+
|
|
420
|
+
revision = resolve_revision(main_repo, rev)
|
|
421
|
+
|
|
422
|
+
if revision.type == "commit" or revision.type == "tag":
|
|
423
|
+
rev_dir = program_dir / revision.sha
|
|
424
|
+
|
|
425
|
+
if not rev_dir.exists():
|
|
426
|
+
main_repo.git.worktree("add", str(rev_dir.resolve()), revision.sha)
|
|
427
|
+
|
|
428
|
+
shortcut_dir = program_dir / revision.name
|
|
429
|
+
shortcut_dir.unlink(missing_ok=True)
|
|
430
|
+
smart_link(shortcut_dir, rev_dir)
|
|
431
|
+
|
|
432
|
+
elif revision.type == "branch":
|
|
433
|
+
rev_dir = program_dir / revision.name
|
|
434
|
+
|
|
435
|
+
if not rev_dir.exists():
|
|
436
|
+
main_repo.git.worktree("add", str(rev_dir.resolve()), f"origin/{revision.name}")
|
|
437
|
+
else:
|
|
438
|
+
repo = git.Repo(rev_dir)
|
|
439
|
+
repo.remotes.origin.pull(revision.name)
|
|
440
|
+
|
|
441
|
+
# get the up to date sha for the branch
|
|
442
|
+
revision = resolve_revision(main_repo, f"origin/{revision.name}")
|
|
443
|
+
|
|
444
|
+
return rev_dir, Commit(repo_path, revision.sha)
|
|
445
|
+
|
|
446
|
+
except Exception as e:
|
|
447
|
+
try:
|
|
448
|
+
aggresive_rmtree(program_dir)
|
|
449
|
+
except Exception:
|
|
450
|
+
raise ModaicError(
|
|
451
|
+
f"Failed to cleanup MODAIC_CACHE after a failed operation. We recommend manually deleting your modaic cache as it may be corrupted. Your cache is located at {MODAIC_CACHE}"
|
|
452
|
+
) from e
|
|
453
|
+
raise e
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def _move_to_commit_sha_folder(repo: git.Repo) -> git.Repo:
|
|
457
|
+
"""
|
|
458
|
+
Moves the repo to a new path based on the commit SHA. (Unused for now)
|
|
459
|
+
Args:
|
|
460
|
+
repo: The git.Repo object.
|
|
461
|
+
|
|
462
|
+
Returns:
|
|
463
|
+
The new git.Repo object.
|
|
464
|
+
"""
|
|
465
|
+
commit = repo.head.commit
|
|
466
|
+
repo_dir = Path(repo.working_dir)
|
|
467
|
+
new_path = repo_dir / commit.hexsha
|
|
468
|
+
repo_dir.rename(new_path)
|
|
469
|
+
return git.Repo(new_path)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def load_repo(
|
|
473
|
+
repo_path: str, access_token: Optional[str] = None, is_local: bool = False, rev: str = "main"
|
|
474
|
+
) -> Tuple[Path, Optional[Commit]]:
|
|
475
|
+
if is_local:
|
|
476
|
+
path = Path(repo_path)
|
|
477
|
+
if not path.exists():
|
|
478
|
+
raise FileNotFoundError(f"Local repo path {repo_path} does not exist")
|
|
479
|
+
return path, None
|
|
480
|
+
else:
|
|
481
|
+
return git_snapshot(repo_path, access_token=access_token, rev=rev)
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
@dataclass
|
|
485
|
+
class Revision:
|
|
486
|
+
"""
|
|
487
|
+
Represents a revision of a git repository.
|
|
488
|
+
Args:
|
|
489
|
+
type: The type of the revision. e.g. "branch", "tag", "commit"
|
|
490
|
+
name: The name of the revision. e.g. "main", "v1.0.0", "1234567"
|
|
491
|
+
sha: Full commit SHA of the revision. e.g. "1234567890abcdef1234567890abcdef12345678" (None for branches)
|
|
492
|
+
"""
|
|
493
|
+
|
|
494
|
+
type: Literal["branch", "tag", "commit"]
|
|
495
|
+
name: str
|
|
496
|
+
sha: str
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def resolve_revision(repo: git.Repo, rev: str) -> Revision:
|
|
500
|
+
"""
|
|
501
|
+
Resolves the revision to a branch, tag, or commit SHA.
|
|
502
|
+
Args:
|
|
503
|
+
repo: The git.Repo object.
|
|
504
|
+
rev: The revision to resolve.
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
Revision dataclass where:
|
|
508
|
+
- type ∈ {"branch", "tag", "commit"}
|
|
509
|
+
- name is the normalized name:
|
|
510
|
+
- branch: branch name without any remote prefix (e.g., "main", not "origin/main")
|
|
511
|
+
- tag: tag name (e.g., "v1.0.0")
|
|
512
|
+
- commit: full commit SHA
|
|
513
|
+
- sha is the target commit SHA for branch/tag, or the commit SHA itself for commit
|
|
514
|
+
Raises:
|
|
515
|
+
ValueError: If the revision is not a valid branch, tag, or commit SHA.
|
|
516
|
+
|
|
517
|
+
Example:
|
|
518
|
+
>>> resolve_revision(repo, "main")
|
|
519
|
+
Revision(type="branch", name="main", sha="<sha>")
|
|
520
|
+
>>> resolve_revision(repo, "v1.0.0")
|
|
521
|
+
Revision(type="tag", name="v1.0.0", sha="<sha>")
|
|
522
|
+
>>> resolve_revision(repo, "1234567890")
|
|
523
|
+
Revision(type="commit", name="<sha>", sha="<sha>")
|
|
524
|
+
"""
|
|
525
|
+
repo.remotes.origin.fetch()
|
|
526
|
+
|
|
527
|
+
# Fast validation of rev; if not found, try origin/<rev> for branches existing only on remote
|
|
528
|
+
try:
|
|
529
|
+
ref = repo.rev_parse(rev)
|
|
530
|
+
except BadName:
|
|
531
|
+
try:
|
|
532
|
+
ref = repo.rev_parse(f"origin/{rev}")
|
|
533
|
+
except BadName:
|
|
534
|
+
raise RevisionNotFoundError(
|
|
535
|
+
f"Revision '{rev}' is not a valid branch, tag, or commit SHA", rev=rev
|
|
536
|
+
) from None
|
|
537
|
+
else:
|
|
538
|
+
rev = f"origin/{rev}"
|
|
539
|
+
|
|
540
|
+
if not isinstance(ref, git.objects.Commit):
|
|
541
|
+
raise RevisionNotFoundError(f"Revision '{rev}' is not a valid branch, tag, or commit SHA", rev=rev) from None
|
|
542
|
+
|
|
543
|
+
# Try to resolve to a reference where possible (branch/tag), else fallback to commit
|
|
544
|
+
try:
|
|
545
|
+
ref = name_to_object(repo, rev, return_ref=True)
|
|
546
|
+
except BadObject:
|
|
547
|
+
pass
|
|
548
|
+
|
|
549
|
+
# Commit SHA case
|
|
550
|
+
if isinstance(ref, git.objects.Commit):
|
|
551
|
+
full_sha = ref.hexsha
|
|
552
|
+
return Revision(type="commit", name=full_sha[:7], sha=full_sha)
|
|
553
|
+
|
|
554
|
+
# refs/tags/<tag>
|
|
555
|
+
m_tag = re.match(r"^refs/tags/(?P<tag>.+)$", ref.name)
|
|
556
|
+
if m_tag:
|
|
557
|
+
tag_name = m_tag.group("tag")
|
|
558
|
+
commit_sha = ref.commit.hexsha # TagReference.commit returns the peeled commit
|
|
559
|
+
return Revision(type="tag", name=tag_name, sha=commit_sha)
|
|
560
|
+
|
|
561
|
+
# refs/heads/<branch>
|
|
562
|
+
m_head = re.match(r"^refs/heads/(?P<branch>.+)$", ref.name)
|
|
563
|
+
if m_head:
|
|
564
|
+
branch_name = m_head.group("branch")
|
|
565
|
+
commit_sha = ref.commit.hexsha
|
|
566
|
+
return Revision(type="branch", name=branch_name, sha=commit_sha)
|
|
567
|
+
|
|
568
|
+
# refs/remotes/<remote>/<branch> (normalize branch name without remote, e.g., drop 'origin/')
|
|
569
|
+
m_remote = re.match(r"^refs/remotes/(?P<remote>[^/]+)/(?P<branch>.+)$", ref.name)
|
|
570
|
+
if m_remote:
|
|
571
|
+
branch_name = m_remote.group("branch")
|
|
572
|
+
commit_sha = ref.commit.hexsha
|
|
573
|
+
return Revision(type="branch", name=branch_name, sha=commit_sha)
|
|
574
|
+
|
|
575
|
+
# Some refs may present as "<remote>/<branch>" or just "<branch>" in name; handle common forms
|
|
576
|
+
m_remote_simple = re.match(r"^(?P<remote>[^/]+)/(?P<branch>.+)$", ref.name)
|
|
577
|
+
if m_remote_simple:
|
|
578
|
+
branch_name = m_remote_simple.group("branch")
|
|
579
|
+
commit_sha = ref.commit.hexsha
|
|
580
|
+
return Revision(type="branch", name=branch_name, sha=commit_sha)
|
|
581
|
+
|
|
582
|
+
# If we still haven't matched, attempt to treat as a tag/branch name directly
|
|
583
|
+
# Try heads/<name>
|
|
584
|
+
try:
|
|
585
|
+
possible_ref = name_to_object(repo, f"refs/heads/{ref.name}", return_ref=True)
|
|
586
|
+
commit_sha = possible_ref.commit.hexsha
|
|
587
|
+
return Revision(type="branch", name=ref.name, sha=commit_sha)
|
|
588
|
+
except Exception:
|
|
589
|
+
pass
|
|
590
|
+
# Try tags/<name>
|
|
591
|
+
try:
|
|
592
|
+
possible_ref = name_to_object(repo, f"refs/tags/{ref.name}", return_ref=True)
|
|
593
|
+
commit_sha = possible_ref.commit.hexsha
|
|
594
|
+
return Revision(type="tag", name=ref.name, sha=commit_sha)
|
|
595
|
+
except Exception:
|
|
596
|
+
pass
|
|
597
|
+
|
|
598
|
+
# As a last resort, if it peels to a commit, return commit
|
|
599
|
+
try:
|
|
600
|
+
commit_obj = repo.commit(ref.name)
|
|
601
|
+
full_sha = commit_obj.hexsha
|
|
602
|
+
return Revision(type="commit", name=full_sha, sha=full_sha)
|
|
603
|
+
except Exception:
|
|
604
|
+
raise RevisionNotFoundError(f"Revision '{rev}' is not a valid branch, tag, or commit SHA", rev=rev) from None
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
# Not in use currently
|
|
608
|
+
def _update_staging_dir(
|
|
609
|
+
module: Union["PrecompiledProgram", "Retriever"],
|
|
610
|
+
repo_dir: Path,
|
|
611
|
+
repo_path: str,
|
|
612
|
+
with_code: bool = False,
|
|
613
|
+
source: Optional[Path] = None,
|
|
614
|
+
):
|
|
615
|
+
# if source is not None then module was loaded with AutoProgram/AutoRetriever, we will use its source repo from MODAIC_CACHE/modaic_hub to update the repo_dir
|
|
616
|
+
if source and sys.platform.startswith("win"):
|
|
617
|
+
# Windows - source provided: Copy code from source into repo_dir
|
|
618
|
+
copy_update_from(repo_dir, source)
|
|
619
|
+
elif source and not sys.platform.startswith("win"):
|
|
620
|
+
# Linux/Unix - source provided: Sync code from source into repo_dir (uses symlinks)
|
|
621
|
+
sync_dir = sync_dir_from(source)
|
|
622
|
+
_sync_repo(sync_dir, repo_dir)
|
|
623
|
+
elif not source and sys.platform.startswith("win"):
|
|
624
|
+
# Windows - no source provided: Copy code from workspace into repo_dir
|
|
625
|
+
copy_update_program_dir(repo_dir, repo_path, with_code=with_code)
|
|
626
|
+
elif not source and not sys.platform.startswith("win"):
|
|
627
|
+
# Linux/Unix - no source provided: Sync code from workspace into repo_dir (uses symlinks)
|
|
628
|
+
sync_dir = create_sync_dir(repo_path, with_code=with_code)
|
|
629
|
+
_sync_repo(sync_dir, repo_dir)
|
|
630
|
+
|
|
631
|
+
# save auto_classes.json only if we are saving the code and not using a source repo
|
|
632
|
+
save_auto_json = with_code and not source
|
|
633
|
+
module.save_precompiled(repo_dir, _with_auto_classes=save_auto_json)
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
def _sync_repo(sync_dir: Path, repo_dir: Path) -> None:
|
|
637
|
+
"""Syncs a 'sync' directory containing the a desired layout of symlinks to the source code files to the 'repo' directory a git repository tracked by modaic hub"""
|
|
638
|
+
if sys.platform.startswith("win"):
|
|
639
|
+
subprocess.run(
|
|
640
|
+
[
|
|
641
|
+
"robocopy",
|
|
642
|
+
f"{sync_dir.resolve()}/",
|
|
643
|
+
f"{repo_dir.resolve()}/",
|
|
644
|
+
"/MIR",
|
|
645
|
+
"/XD",
|
|
646
|
+
".git", # make sure .git is not deleted
|
|
647
|
+
],
|
|
648
|
+
)
|
|
649
|
+
else:
|
|
650
|
+
subprocess.run(
|
|
651
|
+
[
|
|
652
|
+
"rsync",
|
|
653
|
+
"-aL",
|
|
654
|
+
"--delete",
|
|
655
|
+
"--ignore-times", # rsync usually looks at edit times to determine if it should skip a file. Disabling this behavior is useful for our pytest-suite.
|
|
656
|
+
f"{sync_dir.resolve()}/",
|
|
657
|
+
f"{repo_dir.resolve()}/",
|
|
658
|
+
"--exclude",
|
|
659
|
+
".git", # make sure .git is not deleted
|
|
660
|
+
],
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
def _make_git_url(repo_path: str, access_token: Optional[str] = None) -> str:
|
|
665
|
+
protocol = "https://" if MODAIC_GIT_URL.startswith("https://") else "http://"
|
|
666
|
+
|
|
667
|
+
if access_token is None:
|
|
668
|
+
return f"{protocol}{MODAIC_GIT_URL.replace('https://', '').replace('http://', '')}/{repo_path}.git"
|
|
669
|
+
else:
|
|
670
|
+
username = get_user_info(access_token)["login"]
|
|
671
|
+
return f"{protocol}{username}:{access_token}@{MODAIC_GIT_URL.replace('https://', '').replace('http://', '')}/{repo_path}.git"
|