furu 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- furu/__init__.py +82 -0
- furu/adapters/__init__.py +3 -0
- furu/adapters/submitit.py +195 -0
- furu/config.py +98 -0
- furu/core/__init__.py +4 -0
- furu/core/furu.py +999 -0
- furu/core/list.py +123 -0
- furu/dashboard/__init__.py +9 -0
- furu/dashboard/__main__.py +7 -0
- furu/dashboard/api/__init__.py +7 -0
- furu/dashboard/api/models.py +170 -0
- furu/dashboard/api/routes.py +135 -0
- furu/dashboard/frontend/dist/assets/index-CbdDfSOZ.css +1 -0
- furu/dashboard/frontend/dist/assets/index-DDv_TYB_.js +67 -0
- furu/dashboard/frontend/dist/favicon.svg +10 -0
- furu/dashboard/frontend/dist/index.html +22 -0
- furu/dashboard/main.py +134 -0
- furu/dashboard/scanner.py +931 -0
- furu/errors.py +76 -0
- furu/migrate.py +48 -0
- furu/migration.py +926 -0
- furu/runtime/__init__.py +27 -0
- furu/runtime/env.py +8 -0
- furu/runtime/logging.py +301 -0
- furu/runtime/tracebacks.py +64 -0
- furu/serialization/__init__.py +20 -0
- furu/serialization/migrations.py +246 -0
- furu/serialization/serializer.py +233 -0
- furu/storage/__init__.py +32 -0
- furu/storage/metadata.py +282 -0
- furu/storage/migration.py +81 -0
- furu/storage/state.py +1107 -0
- furu-0.0.1.dist-info/METADATA +502 -0
- furu-0.0.1.dist-info/RECORD +36 -0
- furu-0.0.1.dist-info/WHEEL +4 -0
- furu-0.0.1.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import enum
|
|
3
|
+
import hashlib
|
|
4
|
+
import importlib
|
|
5
|
+
import json
|
|
6
|
+
import pathlib
|
|
7
|
+
import textwrap
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import chz
|
|
12
|
+
|
|
13
|
+
from ..errors import _FuruMissing
|
|
14
|
+
from pydantic import BaseModel as PydanticBaseModel
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Type alias for JSON-serializable values. We use Any here because this serialization
|
|
18
|
+
# library handles arbitrary user-defined objects that we cannot know at compile time.
|
|
19
|
+
JsonValue = Any
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FuruSerializer:
|
|
23
|
+
"""Handles serialization, deserialization, and hashing of Furu objects."""
|
|
24
|
+
|
|
25
|
+
CLASS_MARKER = "__class__"
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def get_classname(obj: object) -> str:
|
|
29
|
+
"""Get fully qualified class name."""
|
|
30
|
+
classname = obj.__class__.__module__
|
|
31
|
+
if classname == "__main__":
|
|
32
|
+
raise ValueError("Cannot serialize objects from __main__ module")
|
|
33
|
+
|
|
34
|
+
if isinstance(obj, enum.Enum):
|
|
35
|
+
return f"{classname}.{obj.__class__.__qualname__}:{obj.name}"
|
|
36
|
+
return f"{classname}.{obj.__class__.__qualname__}"
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def to_dict(cls, obj: object) -> JsonValue:
|
|
40
|
+
"""Convert object to JSON-serializable dictionary."""
|
|
41
|
+
if isinstance(obj, _FuruMissing):
|
|
42
|
+
raise ValueError("Cannot serialize Furu.MISSING")
|
|
43
|
+
|
|
44
|
+
if chz.is_chz(obj):
|
|
45
|
+
result = {cls.CLASS_MARKER: cls.get_classname(obj)}
|
|
46
|
+
for field_name in chz.chz_fields(obj):
|
|
47
|
+
result[field_name] = cls.to_dict(getattr(obj, field_name))
|
|
48
|
+
return result
|
|
49
|
+
|
|
50
|
+
if isinstance(obj, pathlib.Path):
|
|
51
|
+
return str(obj)
|
|
52
|
+
|
|
53
|
+
if isinstance(obj, (list, tuple)):
|
|
54
|
+
return [cls.to_dict(v) for v in obj]
|
|
55
|
+
|
|
56
|
+
if isinstance(obj, dict):
|
|
57
|
+
return {k: cls.to_dict(v) for k, v in obj.items()}
|
|
58
|
+
|
|
59
|
+
return obj
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def from_dict(cls, data: JsonValue) -> JsonValue:
|
|
63
|
+
"""Reconstruct object from dictionary."""
|
|
64
|
+
if isinstance(data, dict) and cls.CLASS_MARKER in data:
|
|
65
|
+
module_path, _, class_name = data[cls.CLASS_MARKER].rpartition(".")
|
|
66
|
+
data_class = getattr(importlib.import_module(module_path), class_name)
|
|
67
|
+
|
|
68
|
+
kwargs = {
|
|
69
|
+
k: cls.from_dict(v) for k, v in data.items() if k != cls.CLASS_MARKER
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
path_types = (Path, pathlib.Path)
|
|
73
|
+
|
|
74
|
+
if chz.is_chz(data_class):
|
|
75
|
+
for name, field in chz.chz_fields(data_class).items():
|
|
76
|
+
if field.final_type in path_types and isinstance(
|
|
77
|
+
kwargs.get(name), str
|
|
78
|
+
):
|
|
79
|
+
kwargs[name] = pathlib.Path(kwargs[name])
|
|
80
|
+
return data_class(**kwargs)
|
|
81
|
+
|
|
82
|
+
if isinstance(data, list):
|
|
83
|
+
return [cls.from_dict(v) for v in data]
|
|
84
|
+
|
|
85
|
+
if isinstance(data, dict):
|
|
86
|
+
return {k: cls.from_dict(v) for k, v in data.items()}
|
|
87
|
+
|
|
88
|
+
return data
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def compute_hash(cls, obj: object, verbose: bool = False) -> str:
|
|
92
|
+
"""Compute deterministic hash of object."""
|
|
93
|
+
|
|
94
|
+
def canonicalize(item: object) -> JsonValue:
|
|
95
|
+
if isinstance(item, _FuruMissing):
|
|
96
|
+
raise ValueError("Cannot hash Furu.MISSING")
|
|
97
|
+
|
|
98
|
+
if chz.is_chz(item):
|
|
99
|
+
fields = chz.chz_fields(item)
|
|
100
|
+
return {
|
|
101
|
+
"__class__": cls.get_classname(item),
|
|
102
|
+
**{
|
|
103
|
+
name: canonicalize(getattr(item, name))
|
|
104
|
+
for name in fields
|
|
105
|
+
if not name.startswith("_")
|
|
106
|
+
},
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if isinstance(item, dict):
|
|
110
|
+
filtered = item
|
|
111
|
+
if cls.CLASS_MARKER in item:
|
|
112
|
+
filtered = {
|
|
113
|
+
k: v
|
|
114
|
+
for k, v in item.items()
|
|
115
|
+
if not (isinstance(k, str) and k.startswith("_"))
|
|
116
|
+
or k == cls.CLASS_MARKER
|
|
117
|
+
}
|
|
118
|
+
return {k: canonicalize(v) for k, v in sorted(filtered.items())}
|
|
119
|
+
|
|
120
|
+
if isinstance(item, (list, tuple)):
|
|
121
|
+
return [canonicalize(v) for v in item]
|
|
122
|
+
|
|
123
|
+
if isinstance(item, Path):
|
|
124
|
+
return str(item)
|
|
125
|
+
|
|
126
|
+
if isinstance(item, enum.Enum):
|
|
127
|
+
return {"__enum__": cls.get_classname(item)}
|
|
128
|
+
|
|
129
|
+
if isinstance(item, (set, frozenset)):
|
|
130
|
+
return sorted(canonicalize(v) for v in item)
|
|
131
|
+
|
|
132
|
+
if isinstance(item, (bytes, bytearray, memoryview)):
|
|
133
|
+
return {"__bytes__": hashlib.sha256(item).hexdigest()}
|
|
134
|
+
|
|
135
|
+
if isinstance(item, datetime.datetime):
|
|
136
|
+
return item.astimezone(datetime.timezone.utc).isoformat(
|
|
137
|
+
timespec="microseconds"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
if isinstance(item, (str, int, float, bool)) or item is None:
|
|
141
|
+
return item
|
|
142
|
+
|
|
143
|
+
if isinstance(item, PydanticBaseModel):
|
|
144
|
+
return {
|
|
145
|
+
"__class__": cls.get_classname(item),
|
|
146
|
+
**{k: canonicalize(v) for k, v in item.model_dump().items()},
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
raise TypeError(f"Cannot hash type: {type(item)}")
|
|
150
|
+
|
|
151
|
+
canonical = canonicalize(obj)
|
|
152
|
+
json_str = json.dumps(canonical, sort_keys=True, separators=(",", ":"))
|
|
153
|
+
|
|
154
|
+
if verbose:
|
|
155
|
+
print(json_str)
|
|
156
|
+
|
|
157
|
+
return hashlib.blake2s(json_str.encode(), digest_size=10).hexdigest()
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
def to_python(cls, obj: object, multiline: bool = True) -> str:
|
|
161
|
+
"""Convert object to Python code representation."""
|
|
162
|
+
|
|
163
|
+
def to_py_recursive(item: object, indent: int = 0) -> str:
|
|
164
|
+
if isinstance(item, _FuruMissing):
|
|
165
|
+
raise ValueError("Cannot convert Furu.MISSING to Python")
|
|
166
|
+
|
|
167
|
+
pad = "" if not multiline else " " * indent
|
|
168
|
+
next_indent = indent + (4 if multiline else 0)
|
|
169
|
+
|
|
170
|
+
if chz.is_chz(item):
|
|
171
|
+
cls_path = cls.get_classname(item)
|
|
172
|
+
fields = []
|
|
173
|
+
for name, field in chz.chz_fields(item).items():
|
|
174
|
+
fields.append(
|
|
175
|
+
f"{name}={to_py_recursive(getattr(item, name), next_indent)}"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
if multiline:
|
|
179
|
+
inner = (",\n" + " " * next_indent).join(fields)
|
|
180
|
+
return f"{cls_path}(\n{pad} {inner}\n{pad})"
|
|
181
|
+
return f"{cls_path}({', '.join(fields)})"
|
|
182
|
+
|
|
183
|
+
if isinstance(item, enum.Enum):
|
|
184
|
+
return cls.get_classname(item)
|
|
185
|
+
|
|
186
|
+
if isinstance(item, pathlib.Path):
|
|
187
|
+
return f"pathlib.Path({str(item)!r})"
|
|
188
|
+
|
|
189
|
+
if isinstance(item, datetime.datetime):
|
|
190
|
+
iso = item.astimezone(datetime.timezone.utc).isoformat(
|
|
191
|
+
timespec="microseconds"
|
|
192
|
+
)
|
|
193
|
+
return f"datetime.datetime.fromisoformat({iso!r})"
|
|
194
|
+
|
|
195
|
+
if isinstance(item, (bytes, bytearray, memoryview)):
|
|
196
|
+
hex_str = hashlib.sha256(item).hexdigest()
|
|
197
|
+
return f"bytes.fromhex({hex_str!r})"
|
|
198
|
+
|
|
199
|
+
if isinstance(item, list):
|
|
200
|
+
items = ", ".join(to_py_recursive(v, next_indent) for v in item)
|
|
201
|
+
return f"[{items}]"
|
|
202
|
+
|
|
203
|
+
if isinstance(item, tuple):
|
|
204
|
+
items = ", ".join(to_py_recursive(v, next_indent) for v in item)
|
|
205
|
+
comma = "," if len(item) == 1 else ""
|
|
206
|
+
return f"({items}{comma})"
|
|
207
|
+
|
|
208
|
+
if isinstance(item, set):
|
|
209
|
+
items = ", ".join(to_py_recursive(v, next_indent) for v in item)
|
|
210
|
+
return f"{{{items}}}"
|
|
211
|
+
|
|
212
|
+
if isinstance(item, frozenset):
|
|
213
|
+
items = ", ".join(to_py_recursive(v, next_indent) for v in item)
|
|
214
|
+
return f"frozenset({{{items}}})"
|
|
215
|
+
|
|
216
|
+
if isinstance(item, dict):
|
|
217
|
+
kv_pairs = [
|
|
218
|
+
f"{to_py_recursive(k, next_indent)}: {to_py_recursive(v, next_indent)}"
|
|
219
|
+
for k, v in item.items()
|
|
220
|
+
]
|
|
221
|
+
|
|
222
|
+
if multiline:
|
|
223
|
+
joined = (",\n" + " " * (indent + 4)).join(kv_pairs)
|
|
224
|
+
return f"{{\n{pad} {joined}\n{pad}}}"
|
|
225
|
+
else:
|
|
226
|
+
return "{" + ", ".join(kv_pairs) + "}"
|
|
227
|
+
|
|
228
|
+
return repr(item)
|
|
229
|
+
|
|
230
|
+
result = to_py_recursive(obj, indent=0)
|
|
231
|
+
if multiline:
|
|
232
|
+
result = textwrap.dedent(result).strip()
|
|
233
|
+
return result
|
furu/storage/__init__.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from .metadata import (
|
|
2
|
+
EnvironmentInfo,
|
|
3
|
+
GitInfo,
|
|
4
|
+
FuruMetadata,
|
|
5
|
+
MetadataManager,
|
|
6
|
+
clear_metadata_cache,
|
|
7
|
+
)
|
|
8
|
+
from .migration import MigrationManager, MigrationRecord
|
|
9
|
+
from .state import (
|
|
10
|
+
ComputeLockContext,
|
|
11
|
+
FuruErrorState,
|
|
12
|
+
StateAttempt,
|
|
13
|
+
StateManager,
|
|
14
|
+
StateOwner,
|
|
15
|
+
compute_lock,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"ComputeLockContext",
|
|
20
|
+
"EnvironmentInfo",
|
|
21
|
+
"GitInfo",
|
|
22
|
+
"FuruErrorState",
|
|
23
|
+
"FuruMetadata",
|
|
24
|
+
"MetadataManager",
|
|
25
|
+
"MigrationManager",
|
|
26
|
+
"MigrationRecord",
|
|
27
|
+
"StateAttempt",
|
|
28
|
+
"StateManager",
|
|
29
|
+
"StateOwner",
|
|
30
|
+
"clear_metadata_cache",
|
|
31
|
+
"compute_lock",
|
|
32
|
+
]
|
furu/storage/metadata.py
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import getpass
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import platform
|
|
6
|
+
import socket
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, ConfigDict
|
|
13
|
+
|
|
14
|
+
from ..config import FURU_CONFIG
|
|
15
|
+
from ..serialization import BaseModel as PydanticBaseModel
|
|
16
|
+
from ..serialization import FuruSerializer
|
|
17
|
+
from ..serialization.serializer import JsonValue
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from ..core.furu import Furu
|
|
21
|
+
|
|
22
|
+
# Module-level cache for metadata (controlled via FURU_CACHE_METADATA)
|
|
23
|
+
_cached_git_info: "GitInfo | None" = None
|
|
24
|
+
_cached_git_info_time: float = 0.0
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def clear_metadata_cache() -> None:
|
|
28
|
+
"""Clear the cached metadata. Useful for testing or long-running processes."""
|
|
29
|
+
global _cached_git_info, _cached_git_info_time
|
|
30
|
+
_cached_git_info = None
|
|
31
|
+
_cached_git_info_time = 0.0
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class GitInfo(BaseModel):
|
|
35
|
+
"""Git repository information."""
|
|
36
|
+
|
|
37
|
+
model_config = ConfigDict(extra="forbid", strict=True)
|
|
38
|
+
|
|
39
|
+
git_commit: str
|
|
40
|
+
git_branch: str
|
|
41
|
+
git_remote: str | None
|
|
42
|
+
git_patch: str
|
|
43
|
+
git_submodules: dict[str, str]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class EnvironmentInfo(BaseModel):
|
|
47
|
+
"""Runtime environment information."""
|
|
48
|
+
|
|
49
|
+
model_config = ConfigDict(extra="forbid", strict=True)
|
|
50
|
+
|
|
51
|
+
timestamp: str
|
|
52
|
+
command: str
|
|
53
|
+
python_version: str
|
|
54
|
+
executable: str
|
|
55
|
+
platform: str
|
|
56
|
+
hostname: str
|
|
57
|
+
user: str
|
|
58
|
+
pid: int
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class FuruMetadata(BaseModel):
|
|
62
|
+
"""Complete metadata for a Furu experiment."""
|
|
63
|
+
|
|
64
|
+
model_config = ConfigDict(extra="forbid", strict=True)
|
|
65
|
+
|
|
66
|
+
# Furu-specific fields
|
|
67
|
+
furu_python_def: str
|
|
68
|
+
furu_obj: JsonValue # Serialized Furu object from FuruSerializer.to_dict()
|
|
69
|
+
furu_hash: str
|
|
70
|
+
furu_path: str
|
|
71
|
+
|
|
72
|
+
# Git info
|
|
73
|
+
git_commit: str
|
|
74
|
+
git_branch: str
|
|
75
|
+
git_remote: str | None
|
|
76
|
+
git_patch: str
|
|
77
|
+
git_submodules: dict[str, str]
|
|
78
|
+
|
|
79
|
+
# Environment info
|
|
80
|
+
timestamp: str
|
|
81
|
+
command: str
|
|
82
|
+
python_version: str
|
|
83
|
+
executable: str
|
|
84
|
+
platform: str
|
|
85
|
+
hostname: str
|
|
86
|
+
user: str
|
|
87
|
+
pid: int
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class MetadataManager:
|
|
91
|
+
"""Handles metadata collection and storage."""
|
|
92
|
+
|
|
93
|
+
INTERNAL_DIR = ".furu"
|
|
94
|
+
METADATA_FILE = "metadata.json"
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def get_metadata_path(cls, directory: Path) -> Path:
|
|
98
|
+
return directory / cls.INTERNAL_DIR / cls.METADATA_FILE
|
|
99
|
+
|
|
100
|
+
@staticmethod
|
|
101
|
+
def run_git_command(args: list[str]) -> str:
|
|
102
|
+
"""Run git command, return output."""
|
|
103
|
+
proc = subprocess.run(
|
|
104
|
+
["git", *args], text=True, capture_output=True, timeout=10
|
|
105
|
+
)
|
|
106
|
+
if proc.returncode not in (0, 1):
|
|
107
|
+
proc.check_returncode()
|
|
108
|
+
return proc.stdout.strip()
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def collect_git_info(cls, ignore_diff: bool = False) -> GitInfo:
|
|
112
|
+
"""Collect git repository information."""
|
|
113
|
+
global _cached_git_info, _cached_git_info_time
|
|
114
|
+
import time
|
|
115
|
+
|
|
116
|
+
ttl = FURU_CONFIG.cache_metadata_ttl_sec
|
|
117
|
+
# Return cached result if caching is enabled and not expired
|
|
118
|
+
if ttl is not None and _cached_git_info is not None:
|
|
119
|
+
age = time.time() - _cached_git_info_time
|
|
120
|
+
if age < ttl:
|
|
121
|
+
return _cached_git_info
|
|
122
|
+
|
|
123
|
+
if not FURU_CONFIG.require_git:
|
|
124
|
+
try:
|
|
125
|
+
head = cls.run_git_command(["rev-parse", "HEAD"])
|
|
126
|
+
branch = cls.run_git_command(["rev-parse", "--abbrev-ref", "HEAD"])
|
|
127
|
+
except subprocess.CalledProcessError:
|
|
128
|
+
return GitInfo(
|
|
129
|
+
git_commit="<no-git>",
|
|
130
|
+
git_branch="<no-git>",
|
|
131
|
+
git_remote=None,
|
|
132
|
+
git_patch="<no-git>",
|
|
133
|
+
git_submodules={},
|
|
134
|
+
)
|
|
135
|
+
else:
|
|
136
|
+
head = cls.run_git_command(["rev-parse", "HEAD"])
|
|
137
|
+
branch = cls.run_git_command(["rev-parse", "--abbrev-ref", "HEAD"])
|
|
138
|
+
|
|
139
|
+
if FURU_CONFIG.require_git_remote:
|
|
140
|
+
remote = cls.run_git_command(["remote", "get-url", "origin"])
|
|
141
|
+
else:
|
|
142
|
+
try:
|
|
143
|
+
remote = cls.run_git_command(["remote", "get-url", "origin"])
|
|
144
|
+
except subprocess.CalledProcessError:
|
|
145
|
+
remote = None
|
|
146
|
+
|
|
147
|
+
if ignore_diff:
|
|
148
|
+
patch = "<ignored-diff>"
|
|
149
|
+
else:
|
|
150
|
+
unstaged = cls.run_git_command(["diff"])
|
|
151
|
+
staged = cls.run_git_command(["diff", "--cached"])
|
|
152
|
+
untracked = cls.run_git_command(
|
|
153
|
+
["ls-files", "--others", "--exclude-standard"]
|
|
154
|
+
).splitlines()
|
|
155
|
+
|
|
156
|
+
untracked_patches = "\n".join(
|
|
157
|
+
cls.run_git_command(["diff", "--no-index", "/dev/null", f])
|
|
158
|
+
for f in untracked
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
patch = "\n".join(
|
|
162
|
+
filter(
|
|
163
|
+
None,
|
|
164
|
+
[
|
|
165
|
+
"# === unstaged ==================================================",
|
|
166
|
+
unstaged,
|
|
167
|
+
"# === staged ====================================================",
|
|
168
|
+
staged,
|
|
169
|
+
"# === untracked ================================================",
|
|
170
|
+
untracked_patches,
|
|
171
|
+
],
|
|
172
|
+
)
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
if len(patch) > 50_000:
|
|
176
|
+
raise ValueError(
|
|
177
|
+
f"Git diff too large ({len(patch):,} bytes). "
|
|
178
|
+
"Use ignore_diff=True or FURU_IGNORE_DIFF=1"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
submodules: dict[str, str] = {}
|
|
182
|
+
for line in cls.run_git_command(["submodule", "status"]).splitlines():
|
|
183
|
+
parts = line.split()
|
|
184
|
+
if len(parts) >= 2:
|
|
185
|
+
submodules[parts[1]] = parts[0]
|
|
186
|
+
|
|
187
|
+
result = GitInfo(
|
|
188
|
+
git_commit=head,
|
|
189
|
+
git_branch=branch,
|
|
190
|
+
git_remote=remote,
|
|
191
|
+
git_patch=patch,
|
|
192
|
+
git_submodules=submodules,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Cache result if caching is enabled
|
|
196
|
+
if ttl is not None:
|
|
197
|
+
_cached_git_info = result
|
|
198
|
+
_cached_git_info_time = time.time()
|
|
199
|
+
|
|
200
|
+
return result
|
|
201
|
+
|
|
202
|
+
@staticmethod
|
|
203
|
+
def collect_environment_info() -> EnvironmentInfo:
|
|
204
|
+
"""Collect environment information."""
|
|
205
|
+
return EnvironmentInfo(
|
|
206
|
+
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(
|
|
207
|
+
timespec="microseconds"
|
|
208
|
+
),
|
|
209
|
+
command=" ".join(sys.argv) if sys.argv else "<unknown>",
|
|
210
|
+
python_version=sys.version,
|
|
211
|
+
executable=sys.executable,
|
|
212
|
+
platform=platform.platform(),
|
|
213
|
+
hostname=socket.gethostname(),
|
|
214
|
+
user=getpass.getuser(),
|
|
215
|
+
pid=os.getpid(),
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
@classmethod
|
|
219
|
+
def create_metadata(
|
|
220
|
+
cls, furu_obj: "Furu", directory: Path, ignore_diff: bool = False
|
|
221
|
+
) -> FuruMetadata:
|
|
222
|
+
"""Create complete metadata for a Furu object."""
|
|
223
|
+
git_info = cls.collect_git_info(ignore_diff)
|
|
224
|
+
env_info = cls.collect_environment_info()
|
|
225
|
+
|
|
226
|
+
serialized_obj = FuruSerializer.to_dict(furu_obj)
|
|
227
|
+
if not isinstance(serialized_obj, dict):
|
|
228
|
+
raise TypeError(
|
|
229
|
+
f"Expected FuruSerializer.to_dict to return dict, got {type(serialized_obj)}"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
return FuruMetadata(
|
|
233
|
+
furu_python_def=FuruSerializer.to_python(furu_obj, multiline=False),
|
|
234
|
+
furu_obj=serialized_obj,
|
|
235
|
+
furu_hash=FuruSerializer.compute_hash(furu_obj),
|
|
236
|
+
furu_path=str(directory.resolve()),
|
|
237
|
+
git_commit=git_info.git_commit,
|
|
238
|
+
git_branch=git_info.git_branch,
|
|
239
|
+
git_remote=git_info.git_remote,
|
|
240
|
+
git_patch=git_info.git_patch,
|
|
241
|
+
git_submodules=git_info.git_submodules,
|
|
242
|
+
timestamp=env_info.timestamp,
|
|
243
|
+
command=env_info.command,
|
|
244
|
+
python_version=env_info.python_version,
|
|
245
|
+
executable=env_info.executable,
|
|
246
|
+
platform=env_info.platform,
|
|
247
|
+
hostname=env_info.hostname,
|
|
248
|
+
user=env_info.user,
|
|
249
|
+
pid=env_info.pid,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
@classmethod
|
|
253
|
+
def write_metadata(cls, metadata: FuruMetadata, directory: Path) -> None:
|
|
254
|
+
"""Write metadata to file."""
|
|
255
|
+
metadata_path = cls.get_metadata_path(directory)
|
|
256
|
+
metadata_path.parent.mkdir(parents=True, exist_ok=True)
|
|
257
|
+
metadata_path.write_text(
|
|
258
|
+
json.dumps(
|
|
259
|
+
metadata.model_dump(mode="json"),
|
|
260
|
+
indent=2,
|
|
261
|
+
default=lambda o: o.model_dump()
|
|
262
|
+
if PydanticBaseModel is not None and isinstance(o, PydanticBaseModel)
|
|
263
|
+
else str(o),
|
|
264
|
+
)
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
@classmethod
|
|
268
|
+
def read_metadata(cls, directory: Path) -> FuruMetadata:
|
|
269
|
+
"""Read metadata from file."""
|
|
270
|
+
metadata_path = cls.get_metadata_path(directory)
|
|
271
|
+
if not metadata_path.is_file():
|
|
272
|
+
raise FileNotFoundError(f"Metadata not found: {metadata_path}")
|
|
273
|
+
data = json.loads(metadata_path.read_text())
|
|
274
|
+
return FuruMetadata.model_validate(data)
|
|
275
|
+
|
|
276
|
+
@classmethod
|
|
277
|
+
def read_metadata_raw(cls, directory: Path) -> dict[str, JsonValue] | None:
|
|
278
|
+
"""Read raw metadata JSON from file, returning None if not found."""
|
|
279
|
+
metadata_path = cls.get_metadata_path(directory)
|
|
280
|
+
if not metadata_path.is_file():
|
|
281
|
+
return None
|
|
282
|
+
return json.loads(metadata_path.read_text())
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, ConfigDict
|
|
8
|
+
|
|
9
|
+
from ..config import FURU_CONFIG
|
|
10
|
+
from ..serialization.serializer import JsonValue
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
RootKind = Literal["data", "git"]
|
|
14
|
+
MigrationPolicy = Literal["alias", "move", "copy"]
|
|
15
|
+
MigrationKind = Literal["alias", "moved", "copied", "migrated"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MigrationRecord(BaseModel):
|
|
19
|
+
model_config = ConfigDict(extra="ignore", strict=True)
|
|
20
|
+
|
|
21
|
+
kind: MigrationKind
|
|
22
|
+
policy: MigrationPolicy
|
|
23
|
+
from_namespace: str
|
|
24
|
+
from_hash: str
|
|
25
|
+
from_root: RootKind
|
|
26
|
+
to_namespace: str
|
|
27
|
+
to_hash: str
|
|
28
|
+
to_root: RootKind
|
|
29
|
+
migrated_at: str
|
|
30
|
+
overwritten_at: str | None = None
|
|
31
|
+
default_values: dict[str, JsonValue] | None = None
|
|
32
|
+
origin: str | None = None
|
|
33
|
+
note: str | None = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class MigrationManager:
|
|
37
|
+
INTERNAL_DIR = ".furu"
|
|
38
|
+
MIGRATION_FILE = "migration.json"
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def get_migration_path(cls, directory: Path) -> Path:
|
|
42
|
+
return directory / cls.INTERNAL_DIR / cls.MIGRATION_FILE
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def read_migration(cls, directory: Path) -> MigrationRecord | None:
|
|
46
|
+
path = cls.get_migration_path(directory)
|
|
47
|
+
if not path.is_file():
|
|
48
|
+
return None
|
|
49
|
+
data = json.loads(path.read_text())
|
|
50
|
+
return MigrationRecord.model_validate(data)
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def write_migration(cls, record: MigrationRecord, directory: Path) -> None:
|
|
54
|
+
path = cls.get_migration_path(directory)
|
|
55
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
tmp = path.with_suffix(".tmp")
|
|
57
|
+
tmp.write_text(json.dumps(record.model_dump(mode="json"), indent=2))
|
|
58
|
+
tmp.replace(path)
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def resolve_dir(
|
|
62
|
+
cls, record: MigrationRecord, *, target: Literal["from", "to"]
|
|
63
|
+
) -> Path:
|
|
64
|
+
if target == "from":
|
|
65
|
+
namespace = record.from_namespace
|
|
66
|
+
furu_hash = record.from_hash
|
|
67
|
+
root_kind = record.from_root
|
|
68
|
+
else:
|
|
69
|
+
namespace = record.to_namespace
|
|
70
|
+
furu_hash = record.to_hash
|
|
71
|
+
root_kind = record.to_root
|
|
72
|
+
root = FURU_CONFIG.get_root(version_controlled=root_kind == "git")
|
|
73
|
+
return root / Path(*namespace.split(".")) / furu_hash
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def root_kind_for_dir(cls, directory: Path) -> RootKind:
|
|
77
|
+
for version_controlled in (False, True):
|
|
78
|
+
root = FURU_CONFIG.get_root(version_controlled=version_controlled)
|
|
79
|
+
if directory.is_relative_to(root):
|
|
80
|
+
return "git" if version_controlled else "data"
|
|
81
|
+
raise ValueError(f"Directory {directory} is not under a Furu root")
|