lamindb 0.76.8__py3-none-any.whl → 0.76.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +113 -113
- lamindb/_artifact.py +1205 -1205
- lamindb/_can_validate.py +579 -579
- lamindb/_collection.py +389 -387
- lamindb/_curate.py +1601 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +242 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +256 -256
- lamindb/_from_values.py +382 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +362 -362
- lamindb/_record.py +649 -649
- lamindb/_run.py +57 -57
- lamindb/_save.py +308 -308
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +127 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -94
- lamindb/core/_context.py +574 -574
- lamindb/core/_data.py +438 -438
- lamindb/core/_feature_manager.py +867 -867
- lamindb/core/_label_manager.py +253 -253
- lamindb/core/_mapped_collection.py +631 -597
- lamindb/core/_settings.py +187 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +581 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -90
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -164
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +740 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -204
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -172
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +158 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/LICENSE +201 -201
- {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/METADATA +4 -4
- lamindb-0.76.9.dist-info/RECORD +60 -0
- {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/WHEEL +1 -1
- lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/core/_settings.py
CHANGED
@@ -1,187 +1,187 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import os
|
4
|
-
from typing import TYPE_CHECKING, Literal, Mapping
|
5
|
-
|
6
|
-
import lamindb_setup as ln_setup
|
7
|
-
from lamin_utils import logger
|
8
|
-
from lamindb_setup._set_managed_storage import set_managed_storage
|
9
|
-
from lamindb_setup.core._settings import settings as setup_settings
|
10
|
-
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
11
|
-
|
12
|
-
from .subsettings._creation_settings import CreationSettings, creation_settings
|
13
|
-
from .subsettings._transform_settings import TransformSettings, transform_settings
|
14
|
-
|
15
|
-
if TYPE_CHECKING:
|
16
|
-
from pathlib import Path
|
17
|
-
|
18
|
-
from lamindb_setup.core._settings_storage import StorageSettings
|
19
|
-
from upath import UPath
|
20
|
-
|
21
|
-
VERBOSITY_TO_INT = {
|
22
|
-
"error": 0, # 40
|
23
|
-
"warning": 1, # 30
|
24
|
-
"success": 2, # 25
|
25
|
-
"info": 3, # 20
|
26
|
-
"hint": 4, # 15
|
27
|
-
"debug": 5, # 10
|
28
|
-
}
|
29
|
-
VERBOSITY_TO_STR: dict[int, str] = dict(
|
30
|
-
[reversed(i) for i in VERBOSITY_TO_INT.items()] # type: ignore
|
31
|
-
)
|
32
|
-
|
33
|
-
|
34
|
-
class Settings:
|
35
|
-
"""Settings.
|
36
|
-
|
37
|
-
Use ``lamindb.settings`` instead of instantiating this class yourself.
|
38
|
-
"""
|
39
|
-
|
40
|
-
def __init__(self, git_repo: str | None):
|
41
|
-
self._verbosity_int: int = 1 # warning-level logging
|
42
|
-
logger.set_verbosity(self._verbosity_int)
|
43
|
-
self._sync_git_repo: str | None = git_repo
|
44
|
-
|
45
|
-
@property
|
46
|
-
def creation(self) -> CreationSettings:
|
47
|
-
"""Record creation settings.
|
48
|
-
|
49
|
-
For example, `ln.settings.creation.search_names = False` will disable
|
50
|
-
searching for records with similar names during creation.
|
51
|
-
"""
|
52
|
-
return creation_settings
|
53
|
-
|
54
|
-
track_run_inputs: bool = True
|
55
|
-
"""Track files as input upon `.load()`, `.cache()` and `.open()`.
|
56
|
-
|
57
|
-
Requires a global run context with :func:`~lamindb.core.Context.track` was created!
|
58
|
-
|
59
|
-
FAQ: :doc:`/faq/track-run-inputs`
|
60
|
-
"""
|
61
|
-
__using_key: str | None = None
|
62
|
-
_using_storage: str | None = None
|
63
|
-
|
64
|
-
@property
|
65
|
-
def _using_key(self) -> str | None:
|
66
|
-
"""Key for Django database settings."""
|
67
|
-
return self.__using_key
|
68
|
-
|
69
|
-
@_using_key.setter
|
70
|
-
def _using_key(self, value: str | None):
|
71
|
-
ln_setup.settings._using_key = value
|
72
|
-
self.__using_key = value
|
73
|
-
|
74
|
-
@property
|
75
|
-
def _storage_settings(self) -> ln_setup.core.StorageSettings:
|
76
|
-
if self._using_storage is None:
|
77
|
-
storage_settings = ln_setup.settings.storage
|
78
|
-
else:
|
79
|
-
storage_settings = ln_setup.core.StorageSettings(root=self._using_storage)
|
80
|
-
return storage_settings
|
81
|
-
|
82
|
-
@property
|
83
|
-
def transform(self) -> TransformSettings:
|
84
|
-
"""Transform settings.
|
85
|
-
|
86
|
-
Is deprecated since version 0.76.1.
|
87
|
-
"""
|
88
|
-
# enable warning soon
|
89
|
-
# logger.warning("Transform settings are deprecated, please instead set `ln.context.uid`")
|
90
|
-
return transform_settings
|
91
|
-
|
92
|
-
@property
|
93
|
-
def sync_git_repo(self) -> str | None:
|
94
|
-
"""Sync transforms with scripts in git repository.
|
95
|
-
|
96
|
-
Provide the full git repo URL.
|
97
|
-
"""
|
98
|
-
return self._sync_git_repo
|
99
|
-
|
100
|
-
@sync_git_repo.setter
|
101
|
-
def sync_git_repo(self, value) -> None:
|
102
|
-
"""Sync transforms with scripts in git repository.
|
103
|
-
|
104
|
-
For example: `ln.sync_git_repo = https://github.com/laminlabs/redun-lamin`
|
105
|
-
"""
|
106
|
-
self._sync_git_repo = sanitize_git_repo_url(value)
|
107
|
-
if not self._sync_git_repo.startswith("https://"): # pragma: nocover
|
108
|
-
raise ValueError("git repository URL must start with 'https://'.")
|
109
|
-
|
110
|
-
@property
|
111
|
-
def storage(self) -> StorageSettings:
|
112
|
-
"""Default storage location.
|
113
|
-
|
114
|
-
Examples:
|
115
|
-
|
116
|
-
>>> ln.settings.storage
|
117
|
-
StorageSettings(root='s3://my-bucket', uid='j7MaPxtLxPeE')
|
118
|
-
|
119
|
-
>>> ln.settings.storage.root
|
120
|
-
UPath('s3://my-bucket')
|
121
|
-
|
122
|
-
You can switch the default storage location to another managed storage
|
123
|
-
location by passing a string:
|
124
|
-
|
125
|
-
>>> ln.settings.storage = "s3://some-bucket"
|
126
|
-
|
127
|
-
You can also pass additional fsspec kwargs via:
|
128
|
-
|
129
|
-
>>> kwargs = dict(
|
130
|
-
>>> profile="some_profile", # fsspec arg
|
131
|
-
>>> cache_regions=True # fsspec arg for s3
|
132
|
-
>>> )
|
133
|
-
>>> ln.settings.storage = "s3://some-bucket", kwargs
|
134
|
-
"""
|
135
|
-
return self._storage_settings
|
136
|
-
|
137
|
-
@storage.setter
|
138
|
-
def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
|
139
|
-
if isinstance(path_kwargs, tuple):
|
140
|
-
path, kwargs = path_kwargs
|
141
|
-
else:
|
142
|
-
path, kwargs = path_kwargs, {}
|
143
|
-
set_managed_storage(path, **kwargs)
|
144
|
-
|
145
|
-
@property
|
146
|
-
def storage_local(self) -> StorageSettings:
|
147
|
-
"""An additional local default storage (a path to its root).
|
148
|
-
|
149
|
-
Is only available if :attr:`~lamindb.setup.core.InstanceSettings.keep_artifacts_local` is enabled.
|
150
|
-
|
151
|
-
Guide: :doc:`faq/keep-artifacts-local`
|
152
|
-
"""
|
153
|
-
return ln_setup.settings.instance.storage_local
|
154
|
-
|
155
|
-
@storage_local.setter
|
156
|
-
def storage_local(self, local_root: Path):
|
157
|
-
ln_setup.settings.instance.storage_local = local_root
|
158
|
-
|
159
|
-
@property
|
160
|
-
def verbosity(self) -> str:
|
161
|
-
"""Logger verbosity (default `'warning'`).
|
162
|
-
|
163
|
-
- `'error'`: ❌ only show error messages
|
164
|
-
- `'warning'`: ❗ also show warning messages
|
165
|
-
- `'success'`: ✅ also show success and save messages
|
166
|
-
- `'info'`: 💡 also show info messages
|
167
|
-
- `'hint'`: 💡 also show hint messages
|
168
|
-
- `'debug'`: 🐛 also show detailed debug messages
|
169
|
-
"""
|
170
|
-
return VERBOSITY_TO_STR[self._verbosity_int]
|
171
|
-
|
172
|
-
@verbosity.setter
|
173
|
-
def verbosity(self, verbosity: str | int):
|
174
|
-
if isinstance(verbosity, str):
|
175
|
-
verbosity_int = VERBOSITY_TO_INT[verbosity]
|
176
|
-
else:
|
177
|
-
verbosity_int = verbosity
|
178
|
-
self._verbosity_int = verbosity_int
|
179
|
-
logger.set_verbosity(verbosity_int)
|
180
|
-
|
181
|
-
|
182
|
-
if os.environ.get("LAMINDB_MULTI_INSTANCE") == "true":
|
183
|
-
git_repo = None
|
184
|
-
else:
|
185
|
-
git_repo = setup_settings.instance.git_repo
|
186
|
-
|
187
|
-
settings = Settings(git_repo=git_repo)
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import os
|
4
|
+
from typing import TYPE_CHECKING, Literal, Mapping
|
5
|
+
|
6
|
+
import lamindb_setup as ln_setup
|
7
|
+
from lamin_utils import logger
|
8
|
+
from lamindb_setup._set_managed_storage import set_managed_storage
|
9
|
+
from lamindb_setup.core._settings import settings as setup_settings
|
10
|
+
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
11
|
+
|
12
|
+
from .subsettings._creation_settings import CreationSettings, creation_settings
|
13
|
+
from .subsettings._transform_settings import TransformSettings, transform_settings
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from pathlib import Path
|
17
|
+
|
18
|
+
from lamindb_setup.core._settings_storage import StorageSettings
|
19
|
+
from upath import UPath
|
20
|
+
|
21
|
+
VERBOSITY_TO_INT = {
|
22
|
+
"error": 0, # 40
|
23
|
+
"warning": 1, # 30
|
24
|
+
"success": 2, # 25
|
25
|
+
"info": 3, # 20
|
26
|
+
"hint": 4, # 15
|
27
|
+
"debug": 5, # 10
|
28
|
+
}
|
29
|
+
VERBOSITY_TO_STR: dict[int, str] = dict(
|
30
|
+
[reversed(i) for i in VERBOSITY_TO_INT.items()] # type: ignore
|
31
|
+
)
|
32
|
+
|
33
|
+
|
34
|
+
class Settings:
|
35
|
+
"""Settings.
|
36
|
+
|
37
|
+
Use ``lamindb.settings`` instead of instantiating this class yourself.
|
38
|
+
"""
|
39
|
+
|
40
|
+
def __init__(self, git_repo: str | None):
|
41
|
+
self._verbosity_int: int = 1 # warning-level logging
|
42
|
+
logger.set_verbosity(self._verbosity_int)
|
43
|
+
self._sync_git_repo: str | None = git_repo
|
44
|
+
|
45
|
+
@property
|
46
|
+
def creation(self) -> CreationSettings:
|
47
|
+
"""Record creation settings.
|
48
|
+
|
49
|
+
For example, `ln.settings.creation.search_names = False` will disable
|
50
|
+
searching for records with similar names during creation.
|
51
|
+
"""
|
52
|
+
return creation_settings
|
53
|
+
|
54
|
+
track_run_inputs: bool = True
|
55
|
+
"""Track files as input upon `.load()`, `.cache()` and `.open()`.
|
56
|
+
|
57
|
+
Requires a global run context with :func:`~lamindb.core.Context.track` was created!
|
58
|
+
|
59
|
+
FAQ: :doc:`/faq/track-run-inputs`
|
60
|
+
"""
|
61
|
+
__using_key: str | None = None
|
62
|
+
_using_storage: str | None = None
|
63
|
+
|
64
|
+
@property
|
65
|
+
def _using_key(self) -> str | None:
|
66
|
+
"""Key for Django database settings."""
|
67
|
+
return self.__using_key
|
68
|
+
|
69
|
+
@_using_key.setter
|
70
|
+
def _using_key(self, value: str | None):
|
71
|
+
ln_setup.settings._using_key = value
|
72
|
+
self.__using_key = value
|
73
|
+
|
74
|
+
@property
|
75
|
+
def _storage_settings(self) -> ln_setup.core.StorageSettings:
|
76
|
+
if self._using_storage is None:
|
77
|
+
storage_settings = ln_setup.settings.storage
|
78
|
+
else:
|
79
|
+
storage_settings = ln_setup.core.StorageSettings(root=self._using_storage)
|
80
|
+
return storage_settings
|
81
|
+
|
82
|
+
@property
|
83
|
+
def transform(self) -> TransformSettings:
|
84
|
+
"""Transform settings.
|
85
|
+
|
86
|
+
Is deprecated since version 0.76.1.
|
87
|
+
"""
|
88
|
+
# enable warning soon
|
89
|
+
# logger.warning("Transform settings are deprecated, please instead set `ln.context.uid`")
|
90
|
+
return transform_settings
|
91
|
+
|
92
|
+
@property
|
93
|
+
def sync_git_repo(self) -> str | None:
|
94
|
+
"""Sync transforms with scripts in git repository.
|
95
|
+
|
96
|
+
Provide the full git repo URL.
|
97
|
+
"""
|
98
|
+
return self._sync_git_repo
|
99
|
+
|
100
|
+
@sync_git_repo.setter
|
101
|
+
def sync_git_repo(self, value) -> None:
|
102
|
+
"""Sync transforms with scripts in git repository.
|
103
|
+
|
104
|
+
For example: `ln.sync_git_repo = https://github.com/laminlabs/redun-lamin`
|
105
|
+
"""
|
106
|
+
self._sync_git_repo = sanitize_git_repo_url(value)
|
107
|
+
if not self._sync_git_repo.startswith("https://"): # pragma: nocover
|
108
|
+
raise ValueError("git repository URL must start with 'https://'.")
|
109
|
+
|
110
|
+
@property
|
111
|
+
def storage(self) -> StorageSettings:
|
112
|
+
"""Default storage location.
|
113
|
+
|
114
|
+
Examples:
|
115
|
+
|
116
|
+
>>> ln.settings.storage
|
117
|
+
StorageSettings(root='s3://my-bucket', uid='j7MaPxtLxPeE')
|
118
|
+
|
119
|
+
>>> ln.settings.storage.root
|
120
|
+
UPath('s3://my-bucket')
|
121
|
+
|
122
|
+
You can switch the default storage location to another managed storage
|
123
|
+
location by passing a string:
|
124
|
+
|
125
|
+
>>> ln.settings.storage = "s3://some-bucket"
|
126
|
+
|
127
|
+
You can also pass additional fsspec kwargs via:
|
128
|
+
|
129
|
+
>>> kwargs = dict(
|
130
|
+
>>> profile="some_profile", # fsspec arg
|
131
|
+
>>> cache_regions=True # fsspec arg for s3
|
132
|
+
>>> )
|
133
|
+
>>> ln.settings.storage = "s3://some-bucket", kwargs
|
134
|
+
"""
|
135
|
+
return self._storage_settings
|
136
|
+
|
137
|
+
@storage.setter
|
138
|
+
def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
|
139
|
+
if isinstance(path_kwargs, tuple):
|
140
|
+
path, kwargs = path_kwargs
|
141
|
+
else:
|
142
|
+
path, kwargs = path_kwargs, {}
|
143
|
+
set_managed_storage(path, **kwargs)
|
144
|
+
|
145
|
+
@property
|
146
|
+
def storage_local(self) -> StorageSettings:
|
147
|
+
"""An additional local default storage (a path to its root).
|
148
|
+
|
149
|
+
Is only available if :attr:`~lamindb.setup.core.InstanceSettings.keep_artifacts_local` is enabled.
|
150
|
+
|
151
|
+
Guide: :doc:`faq/keep-artifacts-local`
|
152
|
+
"""
|
153
|
+
return ln_setup.settings.instance.storage_local
|
154
|
+
|
155
|
+
@storage_local.setter
|
156
|
+
def storage_local(self, local_root: Path):
|
157
|
+
ln_setup.settings.instance.storage_local = local_root
|
158
|
+
|
159
|
+
@property
|
160
|
+
def verbosity(self) -> str:
|
161
|
+
"""Logger verbosity (default `'warning'`).
|
162
|
+
|
163
|
+
- `'error'`: ❌ only show error messages
|
164
|
+
- `'warning'`: ❗ also show warning messages
|
165
|
+
- `'success'`: ✅ also show success and save messages
|
166
|
+
- `'info'`: 💡 also show info messages
|
167
|
+
- `'hint'`: 💡 also show hint messages
|
168
|
+
- `'debug'`: 🐛 also show detailed debug messages
|
169
|
+
"""
|
170
|
+
return VERBOSITY_TO_STR[self._verbosity_int]
|
171
|
+
|
172
|
+
@verbosity.setter
|
173
|
+
def verbosity(self, verbosity: str | int):
|
174
|
+
if isinstance(verbosity, str):
|
175
|
+
verbosity_int = VERBOSITY_TO_INT[verbosity]
|
176
|
+
else:
|
177
|
+
verbosity_int = verbosity
|
178
|
+
self._verbosity_int = verbosity_int
|
179
|
+
logger.set_verbosity(verbosity_int)
|
180
|
+
|
181
|
+
|
182
|
+
if os.environ.get("LAMINDB_MULTI_INSTANCE") == "true":
|
183
|
+
git_repo = None
|
184
|
+
else:
|
185
|
+
git_repo = setup_settings.instance.git_repo
|
186
|
+
|
187
|
+
settings = Settings(git_repo=git_repo)
|
lamindb/core/_sync_git.py
CHANGED
@@ -1,138 +1,138 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import subprocess
|
4
|
-
from pathlib import Path
|
5
|
-
|
6
|
-
from lamin_utils import logger
|
7
|
-
from lamindb_setup import settings as setup_settings
|
8
|
-
from lamindb_setup.core.hashing import hash_code
|
9
|
-
|
10
|
-
from ._settings import sanitize_git_repo_url, settings
|
11
|
-
|
12
|
-
|
13
|
-
class BlobHashNotFound(SystemExit):
|
14
|
-
pass
|
15
|
-
|
16
|
-
|
17
|
-
def get_git_repo_from_remote() -> Path:
|
18
|
-
repo_url = settings.sync_git_repo
|
19
|
-
repo_dir = setup_settings.storage.cache_dir / repo_url.split("/")[-1]
|
20
|
-
if repo_dir.exists():
|
21
|
-
logger.warning(f"git repo {repo_dir} already exists locally")
|
22
|
-
return repo_dir
|
23
|
-
logger.important(
|
24
|
-
f"running outside of synched git repo, cloning {repo_url} into {repo_dir}"
|
25
|
-
)
|
26
|
-
result = subprocess.run(
|
27
|
-
["git", "clone", "--depth", "10", f"{repo_url}.git"],
|
28
|
-
capture_output=True,
|
29
|
-
cwd=setup_settings.storage.cache_dir,
|
30
|
-
)
|
31
|
-
if result.returncode != 0 or not repo_dir.exists():
|
32
|
-
raise RuntimeError(result.stderr.decode())
|
33
|
-
return repo_dir
|
34
|
-
|
35
|
-
|
36
|
-
def check_local_git_repo() -> bool:
|
37
|
-
result = subprocess.run(
|
38
|
-
["git", "config", "--get remote.origin.url"],
|
39
|
-
capture_output=True,
|
40
|
-
)
|
41
|
-
result_str = result.stdout.decode().strip()
|
42
|
-
if result_str == "":
|
43
|
-
# running-not-in-a-git-repo
|
44
|
-
return False
|
45
|
-
else:
|
46
|
-
remote_url = sanitize_git_repo_url(result_str)
|
47
|
-
if remote_url == settings.sync_git_repo:
|
48
|
-
# running-in-correct-git-repo
|
49
|
-
return True
|
50
|
-
else:
|
51
|
-
# running-outside-of-correct-git-repo
|
52
|
-
return False
|
53
|
-
|
54
|
-
|
55
|
-
def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
|
56
|
-
command = ["git", "log", f"--find-object={blob_hash}", "--pretty=format:%H"]
|
57
|
-
result = subprocess.run(
|
58
|
-
command,
|
59
|
-
capture_output=True,
|
60
|
-
cwd=repo_dir,
|
61
|
-
)
|
62
|
-
# we just care to find one commit
|
63
|
-
# hence, we split by new line ("\n") and use the first one
|
64
|
-
commit_hash = result.stdout.decode().split("\n")[0]
|
65
|
-
if commit_hash == "" or result.returncode == 1:
|
66
|
-
return None
|
67
|
-
else:
|
68
|
-
assert ( # noqa: S101
|
69
|
-
len(commit_hash) == 40
|
70
|
-
), f"commit hash |{commit_hash}| is not 40 characters long"
|
71
|
-
return commit_hash
|
72
|
-
|
73
|
-
|
74
|
-
def get_filepath_within_git_repo(
|
75
|
-
commit_hash: str, blob_hash: str, repo_dir: Path | None
|
76
|
-
) -> str:
|
77
|
-
# repo_dir might not point to the root of the
|
78
|
-
# the git repository because git log --find-object works
|
79
|
-
# from anywhere in the repo, hence, let's get the root
|
80
|
-
repo_root = (
|
81
|
-
subprocess.run(
|
82
|
-
["git", "rev-parse", "--show-toplevel"],
|
83
|
-
capture_output=True,
|
84
|
-
cwd=repo_dir,
|
85
|
-
)
|
86
|
-
.stdout.decode()
|
87
|
-
.strip()
|
88
|
-
)
|
89
|
-
# Run the git commands separately to circumvent spawning a shell
|
90
|
-
git_command = ["git", "ls-tree", "-r", commit_hash]
|
91
|
-
git_process = subprocess.Popen(
|
92
|
-
git_command,
|
93
|
-
stdout=subprocess.PIPE,
|
94
|
-
cwd=repo_root,
|
95
|
-
)
|
96
|
-
|
97
|
-
grep_command = ["grep", "-E", blob_hash]
|
98
|
-
result = subprocess.run(
|
99
|
-
grep_command,
|
100
|
-
stdin=git_process.stdout,
|
101
|
-
capture_output=True,
|
102
|
-
cwd=repo_root,
|
103
|
-
)
|
104
|
-
|
105
|
-
# Close the stdout to allow git_process to receive a SIGPIPE if grep_command exits
|
106
|
-
git_process.stdout.close()
|
107
|
-
git_process.wait()
|
108
|
-
|
109
|
-
command = " ".join(git_command) + " | " + " ".join(grep_command)
|
110
|
-
if result.returncode != 0 and result.stderr.decode() != "":
|
111
|
-
raise RuntimeError(f"{command}\n{result.stderr.decode()}")
|
112
|
-
if len(result.stdout.decode()) == 0:
|
113
|
-
raise RuntimeError(
|
114
|
-
f"Could not find path in git repo {settings.sync_git_repo} running:\n{command}"
|
115
|
-
f"\nin local clone: {repo_root}"
|
116
|
-
)
|
117
|
-
filepath = result.stdout.decode().split()[-1]
|
118
|
-
return filepath
|
119
|
-
|
120
|
-
|
121
|
-
def get_transform_reference_from_git_repo(path: Path) -> str:
|
122
|
-
blob_hash = hash_code(path).hexdigest()
|
123
|
-
commit_hash = None
|
124
|
-
if check_local_git_repo():
|
125
|
-
repo_dir = None
|
126
|
-
else:
|
127
|
-
repo_dir = get_git_repo_from_remote()
|
128
|
-
commit_hash = get_git_commit_hash(blob_hash, repo_dir=repo_dir)
|
129
|
-
if commit_hash is None:
|
130
|
-
if repo_dir is None:
|
131
|
-
repo_dir = Path.cwd()
|
132
|
-
raise BlobHashNotFound(
|
133
|
-
f"❌ Did not find blob hash {blob_hash} in git repo ({settings.sync_git_repo}) {repo_dir}\n"
|
134
|
-
f"Did you commit the script? -> {path}"
|
135
|
-
)
|
136
|
-
gitpath = get_filepath_within_git_repo(commit_hash, blob_hash, repo_dir)
|
137
|
-
reference = f"{settings.sync_git_repo}/blob/{commit_hash}/{gitpath}"
|
138
|
-
return reference
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import subprocess
|
4
|
+
from pathlib import Path
|
5
|
+
|
6
|
+
from lamin_utils import logger
|
7
|
+
from lamindb_setup import settings as setup_settings
|
8
|
+
from lamindb_setup.core.hashing import hash_code
|
9
|
+
|
10
|
+
from ._settings import sanitize_git_repo_url, settings
|
11
|
+
|
12
|
+
|
13
|
+
class BlobHashNotFound(SystemExit):
|
14
|
+
pass
|
15
|
+
|
16
|
+
|
17
|
+
def get_git_repo_from_remote() -> Path:
|
18
|
+
repo_url = settings.sync_git_repo
|
19
|
+
repo_dir = setup_settings.storage.cache_dir / repo_url.split("/")[-1]
|
20
|
+
if repo_dir.exists():
|
21
|
+
logger.warning(f"git repo {repo_dir} already exists locally")
|
22
|
+
return repo_dir
|
23
|
+
logger.important(
|
24
|
+
f"running outside of synched git repo, cloning {repo_url} into {repo_dir}"
|
25
|
+
)
|
26
|
+
result = subprocess.run(
|
27
|
+
["git", "clone", "--depth", "10", f"{repo_url}.git"],
|
28
|
+
capture_output=True,
|
29
|
+
cwd=setup_settings.storage.cache_dir,
|
30
|
+
)
|
31
|
+
if result.returncode != 0 or not repo_dir.exists():
|
32
|
+
raise RuntimeError(result.stderr.decode())
|
33
|
+
return repo_dir
|
34
|
+
|
35
|
+
|
36
|
+
def check_local_git_repo() -> bool:
|
37
|
+
result = subprocess.run(
|
38
|
+
["git", "config", "--get remote.origin.url"],
|
39
|
+
capture_output=True,
|
40
|
+
)
|
41
|
+
result_str = result.stdout.decode().strip()
|
42
|
+
if result_str == "":
|
43
|
+
# running-not-in-a-git-repo
|
44
|
+
return False
|
45
|
+
else:
|
46
|
+
remote_url = sanitize_git_repo_url(result_str)
|
47
|
+
if remote_url == settings.sync_git_repo:
|
48
|
+
# running-in-correct-git-repo
|
49
|
+
return True
|
50
|
+
else:
|
51
|
+
# running-outside-of-correct-git-repo
|
52
|
+
return False
|
53
|
+
|
54
|
+
|
55
|
+
def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
|
56
|
+
command = ["git", "log", f"--find-object={blob_hash}", "--pretty=format:%H"]
|
57
|
+
result = subprocess.run(
|
58
|
+
command,
|
59
|
+
capture_output=True,
|
60
|
+
cwd=repo_dir,
|
61
|
+
)
|
62
|
+
# we just care to find one commit
|
63
|
+
# hence, we split by new line ("\n") and use the first one
|
64
|
+
commit_hash = result.stdout.decode().split("\n")[0]
|
65
|
+
if commit_hash == "" or result.returncode == 1:
|
66
|
+
return None
|
67
|
+
else:
|
68
|
+
assert ( # noqa: S101
|
69
|
+
len(commit_hash) == 40
|
70
|
+
), f"commit hash |{commit_hash}| is not 40 characters long"
|
71
|
+
return commit_hash
|
72
|
+
|
73
|
+
|
74
|
+
def get_filepath_within_git_repo(
|
75
|
+
commit_hash: str, blob_hash: str, repo_dir: Path | None
|
76
|
+
) -> str:
|
77
|
+
# repo_dir might not point to the root of the
|
78
|
+
# the git repository because git log --find-object works
|
79
|
+
# from anywhere in the repo, hence, let's get the root
|
80
|
+
repo_root = (
|
81
|
+
subprocess.run(
|
82
|
+
["git", "rev-parse", "--show-toplevel"],
|
83
|
+
capture_output=True,
|
84
|
+
cwd=repo_dir,
|
85
|
+
)
|
86
|
+
.stdout.decode()
|
87
|
+
.strip()
|
88
|
+
)
|
89
|
+
# Run the git commands separately to circumvent spawning a shell
|
90
|
+
git_command = ["git", "ls-tree", "-r", commit_hash]
|
91
|
+
git_process = subprocess.Popen(
|
92
|
+
git_command,
|
93
|
+
stdout=subprocess.PIPE,
|
94
|
+
cwd=repo_root,
|
95
|
+
)
|
96
|
+
|
97
|
+
grep_command = ["grep", "-E", blob_hash]
|
98
|
+
result = subprocess.run(
|
99
|
+
grep_command,
|
100
|
+
stdin=git_process.stdout,
|
101
|
+
capture_output=True,
|
102
|
+
cwd=repo_root,
|
103
|
+
)
|
104
|
+
|
105
|
+
# Close the stdout to allow git_process to receive a SIGPIPE if grep_command exits
|
106
|
+
git_process.stdout.close()
|
107
|
+
git_process.wait()
|
108
|
+
|
109
|
+
command = " ".join(git_command) + " | " + " ".join(grep_command)
|
110
|
+
if result.returncode != 0 and result.stderr.decode() != "":
|
111
|
+
raise RuntimeError(f"{command}\n{result.stderr.decode()}")
|
112
|
+
if len(result.stdout.decode()) == 0:
|
113
|
+
raise RuntimeError(
|
114
|
+
f"Could not find path in git repo {settings.sync_git_repo} running:\n{command}"
|
115
|
+
f"\nin local clone: {repo_root}"
|
116
|
+
)
|
117
|
+
filepath = result.stdout.decode().split()[-1]
|
118
|
+
return filepath
|
119
|
+
|
120
|
+
|
121
|
+
def get_transform_reference_from_git_repo(path: Path) -> str:
|
122
|
+
blob_hash = hash_code(path).hexdigest()
|
123
|
+
commit_hash = None
|
124
|
+
if check_local_git_repo():
|
125
|
+
repo_dir = None
|
126
|
+
else:
|
127
|
+
repo_dir = get_git_repo_from_remote()
|
128
|
+
commit_hash = get_git_commit_hash(blob_hash, repo_dir=repo_dir)
|
129
|
+
if commit_hash is None:
|
130
|
+
if repo_dir is None:
|
131
|
+
repo_dir = Path.cwd()
|
132
|
+
raise BlobHashNotFound(
|
133
|
+
f"❌ Did not find blob hash {blob_hash} in git repo ({settings.sync_git_repo}) {repo_dir}\n"
|
134
|
+
f"Did you commit the script? -> {path}"
|
135
|
+
)
|
136
|
+
gitpath = get_filepath_within_git_repo(commit_hash, blob_hash, repo_dir)
|
137
|
+
reference = f"{settings.sync_git_repo}/blob/{commit_hash}/{gitpath}"
|
138
|
+
return reference
|