lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +114 -113
  2. lamindb/_artifact.py +1206 -1205
  3. lamindb/_can_validate.py +621 -579
  4. lamindb/_collection.py +390 -387
  5. lamindb/_curate.py +1603 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +244 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +250 -256
  10. lamindb/_from_values.py +403 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +364 -362
  15. lamindb/_record.py +668 -649
  16. lamindb/_run.py +60 -57
  17. lamindb/_save.py +310 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +130 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +590 -574
  25. lamindb/core/_data.py +510 -438
  26. lamindb/core/_django.py +209 -0
  27. lamindb/core/_feature_manager.py +994 -867
  28. lamindb/core/_label_manager.py +289 -253
  29. lamindb/core/_mapped_collection.py +631 -597
  30. lamindb/core/_settings.py +188 -187
  31. lamindb/core/_sync_git.py +138 -138
  32. lamindb/core/_track_environment.py +27 -27
  33. lamindb/core/datasets/__init__.py +59 -59
  34. lamindb/core/datasets/_core.py +581 -571
  35. lamindb/core/datasets/_fake.py +36 -36
  36. lamindb/core/exceptions.py +90 -90
  37. lamindb/core/fields.py +12 -12
  38. lamindb/core/loaders.py +164 -164
  39. lamindb/core/schema.py +56 -56
  40. lamindb/core/storage/__init__.py +25 -25
  41. lamindb/core/storage/_anndata_accessor.py +741 -740
  42. lamindb/core/storage/_anndata_sizes.py +41 -41
  43. lamindb/core/storage/_backed_access.py +98 -98
  44. lamindb/core/storage/_tiledbsoma.py +204 -204
  45. lamindb/core/storage/_valid_suffixes.py +21 -21
  46. lamindb/core/storage/_zarr.py +110 -110
  47. lamindb/core/storage/objects.py +62 -62
  48. lamindb/core/storage/paths.py +172 -172
  49. lamindb/core/subsettings/__init__.py +12 -12
  50. lamindb/core/subsettings/_creation_settings.py +38 -38
  51. lamindb/core/subsettings/_transform_settings.py +21 -21
  52. lamindb/core/types.py +19 -19
  53. lamindb/core/versioning.py +146 -158
  54. lamindb/integrations/__init__.py +12 -12
  55. lamindb/integrations/_vitessce.py +107 -107
  56. lamindb/setup/__init__.py +14 -14
  57. lamindb/setup/core/__init__.py +4 -4
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
  59. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
  60. lamindb-0.76.10.dist-info/RECORD +61 -0
  61. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
  62. lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/core/_settings.py CHANGED
@@ -1,187 +1,188 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- from typing import TYPE_CHECKING, Literal, Mapping
5
-
6
- import lamindb_setup as ln_setup
7
- from lamin_utils import logger
8
- from lamindb_setup._set_managed_storage import set_managed_storage
9
- from lamindb_setup.core._settings import settings as setup_settings
10
- from lamindb_setup.core._settings_instance import sanitize_git_repo_url
11
-
12
- from .subsettings._creation_settings import CreationSettings, creation_settings
13
- from .subsettings._transform_settings import TransformSettings, transform_settings
14
-
15
- if TYPE_CHECKING:
16
- from pathlib import Path
17
-
18
- from lamindb_setup.core._settings_storage import StorageSettings
19
- from upath import UPath
20
-
21
- VERBOSITY_TO_INT = {
22
- "error": 0, # 40
23
- "warning": 1, # 30
24
- "success": 2, # 25
25
- "info": 3, # 20
26
- "hint": 4, # 15
27
- "debug": 5, # 10
28
- }
29
- VERBOSITY_TO_STR: dict[int, str] = dict(
30
- [reversed(i) for i in VERBOSITY_TO_INT.items()] # type: ignore
31
- )
32
-
33
-
34
- class Settings:
35
- """Settings.
36
-
37
- Use ``lamindb.settings`` instead of instantiating this class yourself.
38
- """
39
-
40
- def __init__(self, git_repo: str | None):
41
- self._verbosity_int: int = 1 # warning-level logging
42
- logger.set_verbosity(self._verbosity_int)
43
- self._sync_git_repo: str | None = git_repo
44
-
45
- @property
46
- def creation(self) -> CreationSettings:
47
- """Record creation settings.
48
-
49
- For example, `ln.settings.creation.search_names = False` will disable
50
- searching for records with similar names during creation.
51
- """
52
- return creation_settings
53
-
54
- track_run_inputs: bool = True
55
- """Track files as input upon `.load()`, `.cache()` and `.open()`.
56
-
57
- Requires a global run context with :func:`~lamindb.core.Context.track` was created!
58
-
59
- FAQ: :doc:`/faq/track-run-inputs`
60
- """
61
- __using_key: str | None = None
62
- _using_storage: str | None = None
63
-
64
- @property
65
- def _using_key(self) -> str | None:
66
- """Key for Django database settings."""
67
- return self.__using_key
68
-
69
- @_using_key.setter
70
- def _using_key(self, value: str | None):
71
- ln_setup.settings._using_key = value
72
- self.__using_key = value
73
-
74
- @property
75
- def _storage_settings(self) -> ln_setup.core.StorageSettings:
76
- if self._using_storage is None:
77
- storage_settings = ln_setup.settings.storage
78
- else:
79
- storage_settings = ln_setup.core.StorageSettings(root=self._using_storage)
80
- return storage_settings
81
-
82
- @property
83
- def transform(self) -> TransformSettings:
84
- """Transform settings.
85
-
86
- Is deprecated since version 0.76.1.
87
- """
88
- # enable warning soon
89
- # logger.warning("Transform settings are deprecated, please instead set `ln.context.uid`")
90
- return transform_settings
91
-
92
- @property
93
- def sync_git_repo(self) -> str | None:
94
- """Sync transforms with scripts in git repository.
95
-
96
- Provide the full git repo URL.
97
- """
98
- return self._sync_git_repo
99
-
100
- @sync_git_repo.setter
101
- def sync_git_repo(self, value) -> None:
102
- """Sync transforms with scripts in git repository.
103
-
104
- For example: `ln.sync_git_repo = https://github.com/laminlabs/redun-lamin`
105
- """
106
- self._sync_git_repo = sanitize_git_repo_url(value)
107
- if not self._sync_git_repo.startswith("https://"): # pragma: nocover
108
- raise ValueError("git repository URL must start with 'https://'.")
109
-
110
- @property
111
- def storage(self) -> StorageSettings:
112
- """Default storage location.
113
-
114
- Examples:
115
-
116
- >>> ln.settings.storage
117
- StorageSettings(root='s3://my-bucket', uid='j7MaPxtLxPeE')
118
-
119
- >>> ln.settings.storage.root
120
- UPath('s3://my-bucket')
121
-
122
- You can switch the default storage location to another managed storage
123
- location by passing a string:
124
-
125
- >>> ln.settings.storage = "s3://some-bucket"
126
-
127
- You can also pass additional fsspec kwargs via:
128
-
129
- >>> kwargs = dict(
130
- >>> profile="some_profile", # fsspec arg
131
- >>> cache_regions=True # fsspec arg for s3
132
- >>> )
133
- >>> ln.settings.storage = "s3://some-bucket", kwargs
134
- """
135
- return self._storage_settings
136
-
137
- @storage.setter
138
- def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
139
- if isinstance(path_kwargs, tuple):
140
- path, kwargs = path_kwargs
141
- else:
142
- path, kwargs = path_kwargs, {}
143
- set_managed_storage(path, **kwargs)
144
-
145
- @property
146
- def storage_local(self) -> StorageSettings:
147
- """An additional local default storage (a path to its root).
148
-
149
- Is only available if :attr:`~lamindb.setup.core.InstanceSettings.keep_artifacts_local` is enabled.
150
-
151
- Guide: :doc:`faq/keep-artifacts-local`
152
- """
153
- return ln_setup.settings.instance.storage_local
154
-
155
- @storage_local.setter
156
- def storage_local(self, local_root: Path):
157
- ln_setup.settings.instance.storage_local = local_root
158
-
159
- @property
160
- def verbosity(self) -> str:
161
- """Logger verbosity (default `'warning'`).
162
-
163
- - `'error'`: ❌ only show error messages
164
- - `'warning'`: also show warning messages
165
- - `'success'`: also show success and save messages
166
- - `'info'`: 💡 also show info messages
167
- - `'hint'`: 💡 also show hint messages
168
- - `'debug'`: 🐛 also show detailed debug messages
169
- """
170
- return VERBOSITY_TO_STR[self._verbosity_int]
171
-
172
- @verbosity.setter
173
- def verbosity(self, verbosity: str | int):
174
- if isinstance(verbosity, str):
175
- verbosity_int = VERBOSITY_TO_INT[verbosity]
176
- else:
177
- verbosity_int = verbosity
178
- self._verbosity_int = verbosity_int
179
- logger.set_verbosity(verbosity_int)
180
-
181
-
182
- if os.environ.get("LAMINDB_MULTI_INSTANCE") == "true":
183
- git_repo = None
184
- else:
185
- git_repo = setup_settings.instance.git_repo
186
-
187
- settings = Settings(git_repo=git_repo)
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import TYPE_CHECKING, Literal
5
+
6
+ import lamindb_setup as ln_setup
7
+ from lamin_utils import logger
8
+ from lamindb_setup._set_managed_storage import set_managed_storage
9
+ from lamindb_setup.core._settings import settings as setup_settings
10
+ from lamindb_setup.core._settings_instance import sanitize_git_repo_url
11
+
12
+ from .subsettings._creation_settings import CreationSettings, creation_settings
13
+ from .subsettings._transform_settings import TransformSettings, transform_settings
14
+
15
+ if TYPE_CHECKING:
16
+ from collections.abc import Mapping
17
+ from pathlib import Path
18
+
19
+ from lamindb_setup.core._settings_storage import StorageSettings
20
+ from upath import UPath
21
+
22
+ VERBOSITY_TO_INT = {
23
+ "error": 0, # 40
24
+ "warning": 1, # 30
25
+ "success": 2, # 25
26
+ "info": 3, # 20
27
+ "hint": 4, # 15
28
+ "debug": 5, # 10
29
+ }
30
+ VERBOSITY_TO_STR: dict[int, str] = dict(
31
+ [reversed(i) for i in VERBOSITY_TO_INT.items()] # type: ignore
32
+ )
33
+
34
+
35
+ class Settings:
36
+ """Settings.
37
+
38
+ Use ``lamindb.settings`` instead of instantiating this class yourself.
39
+ """
40
+
41
+ def __init__(self, git_repo: str | None):
42
+ self._verbosity_int: int = 1 # warning-level logging
43
+ logger.set_verbosity(self._verbosity_int)
44
+ self._sync_git_repo: str | None = git_repo
45
+
46
+ @property
47
+ def creation(self) -> CreationSettings:
48
+ """Record creation settings.
49
+
50
+ For example, `ln.settings.creation.search_names = False` will disable
51
+ searching for records with similar names during creation.
52
+ """
53
+ return creation_settings
54
+
55
+ track_run_inputs: bool = True
56
+ """Track files as input upon `.load()`, `.cache()` and `.open()`.
57
+
58
+ Requires a global run context with :func:`~lamindb.core.Context.track` was created!
59
+
60
+ FAQ: :doc:`/faq/track-run-inputs`
61
+ """
62
+ __using_key: str | None = None
63
+ _using_storage: str | None = None
64
+
65
+ @property
66
+ def _using_key(self) -> str | None:
67
+ """Key for Django database settings."""
68
+ return self.__using_key
69
+
70
+ @_using_key.setter
71
+ def _using_key(self, value: str | None):
72
+ ln_setup.settings._using_key = value
73
+ self.__using_key = value
74
+
75
+ @property
76
+ def _storage_settings(self) -> ln_setup.core.StorageSettings:
77
+ if self._using_storage is None:
78
+ storage_settings = ln_setup.settings.storage
79
+ else:
80
+ storage_settings = ln_setup.core.StorageSettings(root=self._using_storage)
81
+ return storage_settings
82
+
83
+ @property
84
+ def transform(self) -> TransformSettings:
85
+ """Transform settings.
86
+
87
+ Is deprecated since version 0.76.1.
88
+ """
89
+ # enable warning soon
90
+ # logger.warning("Transform settings are deprecated, please instead set `ln.context.uid`")
91
+ return transform_settings
92
+
93
+ @property
94
+ def sync_git_repo(self) -> str | None:
95
+ """Sync transforms with scripts in git repository.
96
+
97
+ Provide the full git repo URL.
98
+ """
99
+ return self._sync_git_repo
100
+
101
+ @sync_git_repo.setter
102
+ def sync_git_repo(self, value) -> None:
103
+ """Sync transforms with scripts in git repository.
104
+
105
+ For example: `ln.sync_git_repo = https://github.com/laminlabs/redun-lamin`
106
+ """
107
+ self._sync_git_repo = sanitize_git_repo_url(value)
108
+ if not self._sync_git_repo.startswith("https://"): # pragma: nocover
109
+ raise ValueError("git repository URL must start with 'https://'.")
110
+
111
+ @property
112
+ def storage(self) -> StorageSettings:
113
+ """Default storage location.
114
+
115
+ Examples:
116
+
117
+ >>> ln.settings.storage
118
+ StorageSettings(root='s3://my-bucket', uid='j7MaPxtLxPeE')
119
+
120
+ >>> ln.settings.storage.root
121
+ UPath('s3://my-bucket')
122
+
123
+ You can switch the default storage location to another managed storage
124
+ location by passing a string:
125
+
126
+ >>> ln.settings.storage = "s3://some-bucket"
127
+
128
+ You can also pass additional fsspec kwargs via:
129
+
130
+ >>> kwargs = dict(
131
+ >>> profile="some_profile", # fsspec arg
132
+ >>> cache_regions=True # fsspec arg for s3
133
+ >>> )
134
+ >>> ln.settings.storage = "s3://some-bucket", kwargs
135
+ """
136
+ return self._storage_settings
137
+
138
+ @storage.setter
139
+ def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
140
+ if isinstance(path_kwargs, tuple):
141
+ path, kwargs = path_kwargs
142
+ else:
143
+ path, kwargs = path_kwargs, {}
144
+ set_managed_storage(path, **kwargs)
145
+
146
+ @property
147
+ def storage_local(self) -> StorageSettings:
148
+ """An additional local default storage (a path to its root).
149
+
150
+ Is only available if :attr:`~lamindb.setup.core.InstanceSettings.keep_artifacts_local` is enabled.
151
+
152
+ Guide: :doc:`faq/keep-artifacts-local`
153
+ """
154
+ return ln_setup.settings.instance.storage_local
155
+
156
+ @storage_local.setter
157
+ def storage_local(self, local_root: Path):
158
+ ln_setup.settings.instance.storage_local = local_root
159
+
160
+ @property
161
+ def verbosity(self) -> str:
162
+ """Logger verbosity (default `'warning'`).
163
+
164
+ - `'error'`: only show error messages
165
+ - `'warning'`: also show warning messages
166
+ - `'success'`: also show success and save messages
167
+ - `'info'`: 💡 also show info messages
168
+ - `'hint'`: 💡 also show hint messages
169
+ - `'debug'`: 🐛 also show detailed debug messages
170
+ """
171
+ return VERBOSITY_TO_STR[self._verbosity_int]
172
+
173
+ @verbosity.setter
174
+ def verbosity(self, verbosity: str | int):
175
+ if isinstance(verbosity, str):
176
+ verbosity_int = VERBOSITY_TO_INT[verbosity]
177
+ else:
178
+ verbosity_int = verbosity
179
+ self._verbosity_int = verbosity_int
180
+ logger.set_verbosity(verbosity_int)
181
+
182
+
183
+ if os.environ.get("LAMINDB_MULTI_INSTANCE") == "true":
184
+ git_repo = None
185
+ else:
186
+ git_repo = setup_settings.instance.git_repo
187
+
188
+ settings = Settings(git_repo=git_repo)
lamindb/core/_sync_git.py CHANGED
@@ -1,138 +1,138 @@
1
- from __future__ import annotations
2
-
3
- import subprocess
4
- from pathlib import Path
5
-
6
- from lamin_utils import logger
7
- from lamindb_setup import settings as setup_settings
8
- from lamindb_setup.core.hashing import hash_code
9
-
10
- from ._settings import sanitize_git_repo_url, settings
11
-
12
-
13
- class BlobHashNotFound(SystemExit):
14
- pass
15
-
16
-
17
- def get_git_repo_from_remote() -> Path:
18
- repo_url = settings.sync_git_repo
19
- repo_dir = setup_settings.storage.cache_dir / repo_url.split("/")[-1]
20
- if repo_dir.exists():
21
- logger.warning(f"git repo {repo_dir} already exists locally")
22
- return repo_dir
23
- logger.important(
24
- f"running outside of synched git repo, cloning {repo_url} into {repo_dir}"
25
- )
26
- result = subprocess.run(
27
- ["git", "clone", "--depth", "10", f"{repo_url}.git"],
28
- capture_output=True,
29
- cwd=setup_settings.storage.cache_dir,
30
- )
31
- if result.returncode != 0 or not repo_dir.exists():
32
- raise RuntimeError(result.stderr.decode())
33
- return repo_dir
34
-
35
-
36
- def check_local_git_repo() -> bool:
37
- result = subprocess.run(
38
- ["git", "config", "--get remote.origin.url"],
39
- capture_output=True,
40
- )
41
- result_str = result.stdout.decode().strip()
42
- if result_str == "":
43
- # running-not-in-a-git-repo
44
- return False
45
- else:
46
- remote_url = sanitize_git_repo_url(result_str)
47
- if remote_url == settings.sync_git_repo:
48
- # running-in-correct-git-repo
49
- return True
50
- else:
51
- # running-outside-of-correct-git-repo
52
- return False
53
-
54
-
55
- def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
56
- command = ["git", "log", f"--find-object={blob_hash}", "--pretty=format:%H"]
57
- result = subprocess.run(
58
- command,
59
- capture_output=True,
60
- cwd=repo_dir,
61
- )
62
- # we just care to find one commit
63
- # hence, we split by new line ("\n") and use the first one
64
- commit_hash = result.stdout.decode().split("\n")[0]
65
- if commit_hash == "" or result.returncode == 1:
66
- return None
67
- else:
68
- assert ( # noqa: S101
69
- len(commit_hash) == 40
70
- ), f"commit hash |{commit_hash}| is not 40 characters long"
71
- return commit_hash
72
-
73
-
74
- def get_filepath_within_git_repo(
75
- commit_hash: str, blob_hash: str, repo_dir: Path | None
76
- ) -> str:
77
- # repo_dir might not point to the root of the
78
- # the git repository because git log --find-object works
79
- # from anywhere in the repo, hence, let's get the root
80
- repo_root = (
81
- subprocess.run(
82
- ["git", "rev-parse", "--show-toplevel"],
83
- capture_output=True,
84
- cwd=repo_dir,
85
- )
86
- .stdout.decode()
87
- .strip()
88
- )
89
- # Run the git commands separately to circumvent spawning a shell
90
- git_command = ["git", "ls-tree", "-r", commit_hash]
91
- git_process = subprocess.Popen(
92
- git_command,
93
- stdout=subprocess.PIPE,
94
- cwd=repo_root,
95
- )
96
-
97
- grep_command = ["grep", "-E", blob_hash]
98
- result = subprocess.run(
99
- grep_command,
100
- stdin=git_process.stdout,
101
- capture_output=True,
102
- cwd=repo_root,
103
- )
104
-
105
- # Close the stdout to allow git_process to receive a SIGPIPE if grep_command exits
106
- git_process.stdout.close()
107
- git_process.wait()
108
-
109
- command = " ".join(git_command) + " | " + " ".join(grep_command)
110
- if result.returncode != 0 and result.stderr.decode() != "":
111
- raise RuntimeError(f"{command}\n{result.stderr.decode()}")
112
- if len(result.stdout.decode()) == 0:
113
- raise RuntimeError(
114
- f"Could not find path in git repo {settings.sync_git_repo} running:\n{command}"
115
- f"\nin local clone: {repo_root}"
116
- )
117
- filepath = result.stdout.decode().split()[-1]
118
- return filepath
119
-
120
-
121
- def get_transform_reference_from_git_repo(path: Path) -> str:
122
- blob_hash = hash_code(path).hexdigest()
123
- commit_hash = None
124
- if check_local_git_repo():
125
- repo_dir = None
126
- else:
127
- repo_dir = get_git_repo_from_remote()
128
- commit_hash = get_git_commit_hash(blob_hash, repo_dir=repo_dir)
129
- if commit_hash is None:
130
- if repo_dir is None:
131
- repo_dir = Path.cwd()
132
- raise BlobHashNotFound(
133
- f"❌ Did not find blob hash {blob_hash} in git repo ({settings.sync_git_repo}) {repo_dir}\n"
134
- f"Did you commit the script? -> {path}"
135
- )
136
- gitpath = get_filepath_within_git_repo(commit_hash, blob_hash, repo_dir)
137
- reference = f"{settings.sync_git_repo}/blob/{commit_hash}/{gitpath}"
138
- return reference
1
+ from __future__ import annotations
2
+
3
+ import subprocess
4
+ from pathlib import Path
5
+
6
+ from lamin_utils import logger
7
+ from lamindb_setup import settings as setup_settings
8
+ from lamindb_setup.core.hashing import hash_code
9
+
10
+ from ._settings import sanitize_git_repo_url, settings
11
+
12
+
13
+ class BlobHashNotFound(SystemExit):
14
+ pass
15
+
16
+
17
+ def get_git_repo_from_remote() -> Path:
18
+ repo_url = settings.sync_git_repo
19
+ repo_dir = setup_settings.storage.cache_dir / repo_url.split("/")[-1]
20
+ if repo_dir.exists():
21
+ logger.warning(f"git repo {repo_dir} already exists locally")
22
+ return repo_dir
23
+ logger.important(
24
+ f"running outside of synched git repo, cloning {repo_url} into {repo_dir}"
25
+ )
26
+ result = subprocess.run(
27
+ ["git", "clone", "--depth", "10", f"{repo_url}.git"],
28
+ capture_output=True,
29
+ cwd=setup_settings.storage.cache_dir,
30
+ )
31
+ if result.returncode != 0 or not repo_dir.exists():
32
+ raise RuntimeError(result.stderr.decode())
33
+ return repo_dir
34
+
35
+
36
+ def check_local_git_repo() -> bool:
37
+ result = subprocess.run(
38
+ ["git", "config", "--get remote.origin.url"],
39
+ capture_output=True,
40
+ )
41
+ result_str = result.stdout.decode().strip()
42
+ if result_str == "":
43
+ # running-not-in-a-git-repo
44
+ return False
45
+ else:
46
+ remote_url = sanitize_git_repo_url(result_str)
47
+ if remote_url == settings.sync_git_repo:
48
+ # running-in-correct-git-repo
49
+ return True
50
+ else:
51
+ # running-outside-of-correct-git-repo
52
+ return False
53
+
54
+
55
+ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
56
+ command = ["git", "log", f"--find-object={blob_hash}", "--pretty=format:%H"]
57
+ result = subprocess.run(
58
+ command,
59
+ capture_output=True,
60
+ cwd=repo_dir,
61
+ )
62
+ # we just care to find one commit
63
+ # hence, we split by new line ("\n") and use the first one
64
+ commit_hash = result.stdout.decode().split("\n")[0]
65
+ if commit_hash == "" or result.returncode == 1:
66
+ return None
67
+ else:
68
+ assert ( # noqa: S101
69
+ len(commit_hash) == 40
70
+ ), f"commit hash |{commit_hash}| is not 40 characters long"
71
+ return commit_hash
72
+
73
+
74
+ def get_filepath_within_git_repo(
75
+ commit_hash: str, blob_hash: str, repo_dir: Path | None
76
+ ) -> str:
77
+ # repo_dir might not point to the root of the
78
+ # the git repository because git log --find-object works
79
+ # from anywhere in the repo, hence, let's get the root
80
+ repo_root = (
81
+ subprocess.run(
82
+ ["git", "rev-parse", "--show-toplevel"],
83
+ capture_output=True,
84
+ cwd=repo_dir,
85
+ )
86
+ .stdout.decode()
87
+ .strip()
88
+ )
89
+ # Run the git commands separately to circumvent spawning a shell
90
+ git_command = ["git", "ls-tree", "-r", commit_hash]
91
+ git_process = subprocess.Popen(
92
+ git_command,
93
+ stdout=subprocess.PIPE,
94
+ cwd=repo_root,
95
+ )
96
+
97
+ grep_command = ["grep", "-E", blob_hash]
98
+ result = subprocess.run(
99
+ grep_command,
100
+ stdin=git_process.stdout,
101
+ capture_output=True,
102
+ cwd=repo_root,
103
+ )
104
+
105
+ # Close the stdout to allow git_process to receive a SIGPIPE if grep_command exits
106
+ git_process.stdout.close()
107
+ git_process.wait()
108
+
109
+ command = " ".join(git_command) + " | " + " ".join(grep_command)
110
+ if result.returncode != 0 and result.stderr.decode() != "":
111
+ raise RuntimeError(f"{command}\n{result.stderr.decode()}")
112
+ if len(result.stdout.decode()) == 0:
113
+ raise RuntimeError(
114
+ f"Could not find path in git repo {settings.sync_git_repo} running:\n{command}"
115
+ f"\nin local clone: {repo_root}"
116
+ )
117
+ filepath = result.stdout.decode().split()[-1]
118
+ return filepath
119
+
120
+
121
+ def get_transform_reference_from_git_repo(path: Path) -> str:
122
+ blob_hash = hash_code(path).hexdigest()
123
+ commit_hash = None
124
+ if check_local_git_repo():
125
+ repo_dir = None
126
+ else:
127
+ repo_dir = get_git_repo_from_remote()
128
+ commit_hash = get_git_commit_hash(blob_hash, repo_dir=repo_dir)
129
+ if commit_hash is None:
130
+ if repo_dir is None:
131
+ repo_dir = Path.cwd()
132
+ raise BlobHashNotFound(
133
+ f"❌ Did not find blob hash {blob_hash} in git repo ({settings.sync_git_repo}) {repo_dir}\n"
134
+ f"Did you commit the script? -> {path}"
135
+ )
136
+ gitpath = get_filepath_within_git_repo(commit_hash, blob_hash, repo_dir)
137
+ reference = f"{settings.sync_git_repo}/blob/{commit_hash}/{gitpath}"
138
+ return reference