lamindb_setup 1.12.2__py3-none-any.whl → 1.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb_setup/__init__.py CHANGED
@@ -35,7 +35,7 @@ Modules & settings:
35
35
 
36
36
  """
37
37
 
38
- __version__ = "1.12.2" # denote a release candidate for 0.1.0 with 0.1rc1
38
+ __version__ = "1.13.0" # denote a release candidate for 0.1.0 with 0.1rc1
39
39
 
40
40
  import os
41
41
  import warnings
@@ -23,7 +23,7 @@ from .core._settings import settings
23
23
  from .core._settings_instance import InstanceSettings
24
24
  from .core._settings_load import load_instance_settings
25
25
  from .core._settings_storage import StorageSettings
26
- from .core._settings_store import instance_settings_file, settings_dir
26
+ from .core._settings_store import instance_settings_file
27
27
  from .core.cloud_sqlite_locker import unlock_cloud_sqlite_upon_exception
28
28
  from .core.django import reset_django
29
29
  from .errors import CannotSwitchDefaultInstance
lamindb_setup/_migrate.py CHANGED
@@ -98,6 +98,27 @@ class migrate:
98
98
 
99
99
  @classmethod
100
100
  def deploy(cls, package_name: str | None = None, number: int | None = None) -> None:
101
+ import os
102
+
103
+ # NOTE: this is a temporary solution to avoid breaking tests
104
+ LAMIN_MIGRATE_ON_LAMBDA = (
105
+ os.environ.get("LAMIN_MIGRATE_ON_LAMBDA", "false") == "true"
106
+ )
107
+
108
+ if settings.instance.is_on_hub and LAMIN_MIGRATE_ON_LAMBDA:
109
+ response = httpx.post(
110
+ f"{settings.instance.api_url}/instances/{settings.instance._id}/migrate",
111
+ headers={"Authorization": f"Bearer {settings.user.access_token}"},
112
+ )
113
+ if response.status_code != 200:
114
+ raise Exception(f"Failed to migrate instance: {response.text}")
115
+ else:
116
+ cls._deploy(package_name=package_name, number=number)
117
+
118
+ @classmethod
119
+ def _deploy(
120
+ cls, package_name: str | None = None, number: int | None = None
121
+ ) -> None:
101
122
  """Deploy a migration."""
102
123
  from lamindb_setup._connect_instance import connect
103
124
  from lamindb_setup._schema_metadata import update_schema_in_hub
@@ -143,13 +164,6 @@ class migrate:
143
164
  logger.important(f"updating lamindb version in hub: {lamindb.__version__}")
144
165
  if settings.instance.dialect != "sqlite":
145
166
  update_schema_in_hub()
146
- logger.warning(
147
- "clearing instance cache in hub; if this fails, re-run with latest lamindb version"
148
- )
149
- httpx.delete(
150
- f"{settings.instance.api_url}/cache/instances/{settings.instance._id.hex}",
151
- headers={"Authorization": f"Bearer {settings.user.access_token}"},
152
- )
153
167
  call_with_fallback_auth(
154
168
  update_instance,
155
169
  instance_id=settings.instance._id.hex,
@@ -6,16 +6,6 @@ import json
6
6
  from typing import TYPE_CHECKING, Literal
7
7
  from uuid import UUID
8
8
 
9
- from django.db.models import (
10
- Field,
11
- ForeignKey,
12
- ForeignObjectRel,
13
- ManyToManyField,
14
- ManyToManyRel,
15
- ManyToOneRel,
16
- OneToOneField,
17
- OneToOneRel,
18
- )
19
9
  from lamin_utils import logger
20
10
  from pydantic import BaseModel
21
11
 
@@ -34,6 +24,16 @@ except Exception:
34
24
 
35
25
 
36
26
  if TYPE_CHECKING:
27
+ from django.db.models import (
28
+ Field,
29
+ ForeignKey,
30
+ ForeignObjectRel,
31
+ ManyToManyField,
32
+ ManyToManyRel,
33
+ ManyToOneRel,
34
+ OneToOneField,
35
+ OneToOneRel,
36
+ )
37
37
  from supabase import Client
38
38
 
39
39
 
@@ -285,6 +285,9 @@ class _ModelHandler:
285
285
 
286
286
  column = None
287
287
  if relation_type not in ["many-to-many", "one-to-many"]:
288
+ # have to reload it here in case reset happened
289
+ from django.db.models import ForeignObjectRel
290
+
288
291
  if not isinstance(field, ForeignObjectRel):
289
292
  column = field.column
290
293
 
@@ -316,6 +319,8 @@ class _ModelHandler:
316
319
 
317
320
  @staticmethod
318
321
  def _get_through_many_to_many(field_or_rel: ManyToManyField | ManyToManyRel):
322
+ # have to reload it here in case reset happened
323
+ from django.db.models import ManyToManyField, ManyToManyRel
319
324
  from lamindb.models import Registry
320
325
 
321
326
  if isinstance(field_or_rel, ManyToManyField):
@@ -349,6 +354,9 @@ class _ModelHandler:
349
354
  def _get_through(
350
355
  self, field_or_rel: ForeignKey | OneToOneField | ManyToOneRel | OneToOneRel
351
356
  ):
357
+ # have to reload it here in case reset happened
358
+ from django.db.models import ForeignObjectRel
359
+
352
360
  if isinstance(field_or_rel, ForeignObjectRel):
353
361
  rel_1 = field_or_rel.field.related_fields[0][0]
354
362
  rel_2 = field_or_rel.field.related_fields[0][1]
@@ -13,6 +13,7 @@ Settings:
13
13
  """
14
14
 
15
15
  from . import django, upath
16
+ from ._clone import connect_local_sqlite, init_local_sqlite
16
17
  from ._deprecated import deprecated
17
18
  from ._docs import doc_args
18
19
  from ._settings import SetupSettings
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import logging
4
4
  import os
5
5
  import time
6
+ from typing import Any
6
7
 
7
8
  from lamin_utils import logger
8
9
  from upath import UPath
@@ -55,11 +56,17 @@ class AWSOptionsManager:
55
56
  self._credentials_cache = {}
56
57
  self._parameters_cache = {} # this is not refreshed
57
58
 
59
+ from aiobotocore.session import AioSession
58
60
  from s3fs import S3FileSystem
59
61
 
60
62
  # this is cached so will be resued with the connection initialized
63
+ # these options are set for paths in _path_inject_options
64
+ # here we set the same options to cache the filesystem
61
65
  fs = S3FileSystem(
62
- cache_regions=True, use_listings_cache=True, version_aware=False
66
+ cache_regions=True,
67
+ use_listings_cache=True,
68
+ version_aware=False,
69
+ config_kwargs={"max_pool_connections": 64},
63
70
  )
64
71
 
65
72
  self._suppress_aiobotocore_traceback_logging()
@@ -82,6 +89,12 @@ class AWSOptionsManager:
82
89
  except Exception:
83
90
  self.anon_public = True
84
91
 
92
+ empty_session = AioSession(profile="lamindb_empty_profile")
93
+ empty_session.full_config["profiles"]["lamindb_empty_profile"] = {}
94
+ # this is set downstream to avoid using local configs when we provide credentials
95
+ # or when we set anon=True
96
+ self.empty_session = empty_session
97
+
85
98
  def _find_root(self, path_str: str) -> str | None:
86
99
  roots = self._credentials_cache.keys()
87
100
  if path_str in roots:
@@ -109,35 +122,47 @@ class AWSOptionsManager:
109
122
  def _path_inject_options(
110
123
  self, path: UPath, credentials: dict, extra_parameters: dict | None = None
111
124
  ) -> UPath:
125
+ connection_options: dict[str, Any] = {}
126
+ storage_options = path.storage_options
112
127
  if credentials == {}:
113
- # credentials were specified manually for the path
114
- if "anon" in path.storage_options:
115
- anon = path.storage_options["anon"]
116
- elif path.fs.key is not None and path.fs.secret is not None:
117
- anon = False
118
- else:
128
+ # otherwise credentials were specified manually for the path
129
+ if "anon" not in storage_options and (
130
+ path.fs.key is None or path.fs.secret is None
131
+ ):
119
132
  anon = self.anon
120
133
  if not anon and self.anon_public and path.drive in PUBLIC_BUCKETS:
121
134
  anon = True
122
- connection_options = {"anon": anon}
135
+ if anon:
136
+ connection_options["anon"] = anon
137
+ connection_options["session"] = self.empty_session
123
138
  else:
124
- connection_options = credentials
139
+ connection_options.update(credentials)
140
+ connection_options["session"] = self.empty_session
125
141
 
126
- if "cache_regions" in path.storage_options:
127
- connection_options["cache_regions"] = path.storage_options["cache_regions"]
142
+ if "cache_regions" in storage_options:
143
+ connection_options["cache_regions"] = storage_options["cache_regions"]
128
144
  else:
129
145
  connection_options["cache_regions"] = (
130
- path.storage_options.get("endpoint_url", None) is None
146
+ storage_options.get("endpoint_url", None) is None
131
147
  )
132
148
  # we use cache to avoid some uneeded downloads or credential problems
133
149
  # see in upload_from
134
- connection_options["use_listings_cache"] = path.storage_options.get(
150
+ connection_options["use_listings_cache"] = storage_options.get(
135
151
  "use_listings_cache", True
136
152
  )
137
153
  # normally we want to ignore objects vsrsions in a versioned bucket
138
- connection_options["version_aware"] = path.storage_options.get(
154
+ connection_options["version_aware"] = storage_options.get(
139
155
  "version_aware", False
140
156
  )
157
+ # this is for better concurrency as the default batch_size is 128
158
+ # read https://github.com/laminlabs/lamindb-setup/pull/1146
159
+ if "config_kwargs" not in storage_options:
160
+ connection_options["config_kwargs"] = {"max_pool_connections": 64}
161
+ elif "max_pool_connections" not in (
162
+ config_kwargs := storage_options["config_kwargs"]
163
+ ):
164
+ config_kwargs["max_pool_connections"] = 64
165
+ connection_options["config_kwargs"] = config_kwargs
141
166
 
142
167
  if extra_parameters:
143
168
  connection_options.update(extra_parameters)
@@ -152,6 +177,7 @@ class AWSOptionsManager:
152
177
  if "r2.cloudflarestorage.com" in endpoint_url:
153
178
  # fixed_upload_size should always be True for R2
154
179
  # this option is needed for correct uploads to R2
180
+ # TODO: maybe set max_pool_connections=64 here also
155
181
  path = UPath(path, fixed_upload_size=True)
156
182
  return path
157
183
  # trailing slash is needed to avoid returning incorrect results with .startswith
@@ -0,0 +1,93 @@
1
+ """Utilities to copy, clone and load Postgres instances as local SQLite databases.
2
+
3
+ .. autosummary::
4
+ :toctree:
5
+
6
+ init_local_sqlite
7
+ connect_local_sqlite
8
+ """
9
+
10
+ import os
11
+
12
+ from lamindb_setup.core._settings_instance import InstanceSettings
13
+ from lamindb_setup.core._settings_load import load_instance_settings
14
+ from lamindb_setup.core._settings_store import instance_settings_file
15
+ from lamindb_setup.core.django import reset_django
16
+
17
+
18
+ def init_local_sqlite(
19
+ instance: str | None = None, copy_suffix: str | None = None
20
+ ) -> None:
21
+ """Initialize SQLite copy of an existing Postgres instance.
22
+
23
+ Creates a SQLite database with the same schema as the source Postgres instance.
24
+ The copy shares the same storage location as the original instance.
25
+
26
+ The copy is intended for read-only access to instance data without requiring a Postgres connection.
27
+ Data synchronization to complete the clone happens via a separate Lambda function.
28
+
29
+ Note that essential user, branch and storage tables are missing.
30
+ Therefore, it is not possible to store Artifacts without having replayed these records first.
31
+
32
+ Args:
33
+ instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
34
+ If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
35
+ If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
36
+ copy_suffix: Optional suffix to append to the local clone name.
37
+ """
38
+ import lamindb_setup as ln_setup
39
+
40
+ if instance is None: # pragma: no cover
41
+ instance = os.environ.get("LAMIN_CURRENT_INSTANCE")
42
+
43
+ if instance is None:
44
+ raise ValueError(
45
+ "No instance identifier provided and LAMIN_CURRENT_INSTANCE is not set"
46
+ )
47
+
48
+ if ln_setup.settings.instance is None: # pragma: no cover
49
+ ln_setup.connect(instance)
50
+
51
+ name = (
52
+ f"{ln_setup.settings.instance.name}{copy_suffix}"
53
+ if copy_suffix is not None
54
+ else ln_setup.settings.instance.name
55
+ )
56
+ isettings = InstanceSettings(
57
+ id=ln_setup.settings.instance._id,
58
+ owner=ln_setup.settings.instance.owner, # type: ignore
59
+ name=name,
60
+ storage=ln_setup.settings.storage,
61
+ db=None,
62
+ modules=",".join(ln_setup.settings.instance.modules),
63
+ is_on_hub=False,
64
+ )
65
+
66
+ isettings._persist(write_to_disk=True)
67
+
68
+ if not isettings._sqlite_file_local.exists():
69
+ # Reset Django configuration before _init_db() because Django was already configured for the original Postgres instance.
70
+ # Without this reset, the if not settings.configured check in setup_django() would skip reconfiguration,
71
+ # causing migrations to run against the old Postgres database instead of the new SQLite clone database.
72
+ reset_django()
73
+ isettings._init_db()
74
+
75
+
76
+ def connect_local_sqlite(instance: str) -> None:
77
+ """Load a SQLite instance of which a remote hub Postgres instance exists.
78
+
79
+ This function bypasses the hub lookup that `lamin connect` performs, loading the SQLite clone directly from local settings files.
80
+ The clone must first be created via `init_local_sqlite()`.
81
+
82
+ Args:
83
+ instance: Instance slug in the form `account/name` (e.g., `laminlabs/privatedata-local`).
84
+ """
85
+ owner, name = instance.split("/")
86
+ settings_file = instance_settings_file(name=name, owner=owner)
87
+
88
+ if not settings_file.exists():
89
+ raise ValueError("SQLite clone not found. Run init_local_sqlite() first.")
90
+
91
+ isettings = load_instance_settings(settings_file)
92
+ isettings._persist(write_to_disk=False)
93
+ isettings._load_db()
@@ -13,6 +13,8 @@ from postgrest.exceptions import APIError
13
13
 
14
14
  from lamindb_setup._migrate import check_whether_migrations_in_sync
15
15
 
16
+ from ._aws_options import HOSTED_REGIONS
17
+ from ._aws_storage import find_closest_aws_region
16
18
  from ._hub_client import (
17
19
  call_with_fallback,
18
20
  call_with_fallback_auth,
@@ -420,6 +422,54 @@ def _init_instance_hub(
420
422
  logger.important(f"go to: https://lamin.ai/{slug}")
421
423
 
422
424
 
425
+ def _get_default_bucket_for_instance(
426
+ instance_id: UUID | None, region: str | None, client: Client
427
+ ):
428
+ if instance_id is not None:
429
+ bucket_base = (
430
+ client.rpc(
431
+ "get_api_server_default_bucket_by_instance_id",
432
+ {"p_instance_id": instance_id.hex},
433
+ )
434
+ .execute()
435
+ .data
436
+ )
437
+ if bucket_base is not None:
438
+ return f"s3://{bucket_base}"
439
+
440
+ if os.getenv("LAMIN_ENV") in {None, "prod"}:
441
+ if region is None:
442
+ region = find_closest_aws_region()
443
+ elif region not in HOSTED_REGIONS:
444
+ raise ValueError(f"region has to be one of {HOSTED_REGIONS}")
445
+ root = f"s3://lamin-{region}"
446
+ else:
447
+ root = "s3://lamin-hosted-test"
448
+
449
+ return root
450
+
451
+
452
+ # pass None if initializing an instance
453
+ # this can be from the api server attached to the instance or the default bucket
454
+ # that we use for instances with no api servers attached
455
+ def get_default_bucket_for_instance(
456
+ instance_id: UUID | None, region: str | None = None, access_token: str | None = None
457
+ ):
458
+ if settings.user.handle != "anonymous" or access_token is not None:
459
+ return call_with_fallback_auth(
460
+ _get_default_bucket_for_instance,
461
+ instance_id=instance_id,
462
+ region=region,
463
+ access_token=access_token,
464
+ )
465
+ else:
466
+ return call_with_fallback(
467
+ _get_default_bucket_for_instance,
468
+ region=region,
469
+ instance_id=instance_id,
470
+ )
471
+
472
+
423
473
  def _connect_instance_hub(
424
474
  owner: str, # account_handle
425
475
  name: str, # instance_name
@@ -321,25 +321,28 @@ class SetupSettings:
321
321
 
322
322
  def __repr__(self) -> str:
323
323
  """Rich string representation."""
324
+ from lamin_utils import colors
325
+
324
326
  # do not show current setting representation when building docs
325
327
  if "sphinx" in sys.modules:
326
328
  return object.__repr__(self)
329
+
327
330
  repr = ""
328
331
  if self._instance_exists:
329
- repr += "Current branch & space:\n"
332
+ instance_rep = self.instance.__repr__().split("\n")
333
+ repr += f"{colors.cyan('Instance:')} {instance_rep[0].replace('Instance: ', '')}\n"
334
+ repr += f" - work-dir: {self.work_dir}\n"
330
335
  repr += f" - branch: {self._read_branch_idlike_name()[1]}\n"
331
- repr += f" - space: {self._read_space_idlike_name()[1]}\n"
332
- repr += self.instance.__repr__()
336
+ repr += f" - space: {self._read_space_idlike_name()[1]}"
337
+ repr += f"\n{colors.yellow('Details:')}\n"
338
+ repr += "\n".join(instance_rep[1:])
333
339
  else:
334
- repr += "Current instance: None"
335
- repr += "\nConfig:\n"
336
- repr += f" - private Django API: {self.private_django_api}\n"
337
- repr += "Local directories:\n"
338
- repr += f" - working directory: {self.work_dir}\n"
340
+ repr += f"{colors.cyan('Instance:')} None"
341
+ repr += f"\n{colors.blue('Cache & settings:')}\n"
339
342
  repr += f" - cache: {self.cache_dir.as_posix()}\n"
340
343
  repr += f" - user settings: {settings_dir.as_posix()}\n"
341
- repr += f" - system settings: {system_settings_dir.as_posix()}\n"
342
- repr += self.user.__repr__()
344
+ repr += f" - system settings: {system_settings_dir.as_posix()}"
345
+ repr += f"\n{colors.green('User:')} {self.user.handle}"
343
346
  return repr
344
347
 
345
348
 
@@ -112,8 +112,8 @@ class InstanceSettings:
112
112
 
113
113
  def __repr__(self):
114
114
  """Rich string representation."""
115
- representation = "Current instance:"
116
- attrs = ["slug", "storage", "db", "modules", "git_repo"]
115
+ representation = f"Instance: {self.slug}"
116
+ attrs = ["storage", "db", "modules", "git_repo"]
117
117
  for attr in attrs:
118
118
  value = getattr(self, attr)
119
119
  if attr == "storage":
@@ -145,8 +145,8 @@ class InstanceSettings:
145
145
  else:
146
146
  db_print = value
147
147
  representation += f"\n - {attr}: {db_print}"
148
- elif attr == "modules":
149
- representation += f"\n - {attr}: {value if value else '{}'}"
148
+ elif attr == "modules" and value:
149
+ representation += f"\n - {attr}: {', '.join(value)}"
150
150
  else:
151
151
  representation += f"\n - {attr}: {value}"
152
152
  return representation
@@ -342,7 +342,12 @@ class InstanceSettings:
342
342
 
343
343
  Provide the full git repo URL.
344
344
  """
345
- return self._git_repo
345
+ if self._git_repo is not None:
346
+ return self._git_repo
347
+ elif os.environ.get("LAMINDB_SYNC_GIT_REPO") is not None:
348
+ return sanitize_git_repo_url(os.environ["LAMINDB_SYNC_GIT_REPO"])
349
+ else:
350
+ return None
346
351
 
347
352
  @property
348
353
  def api_url(self) -> str | None:
@@ -101,7 +101,11 @@ def init_storage(
101
101
  StorageSettings,
102
102
  Literal["hub-record-not-created", "hub-record-retrieved", "hub-record-created"],
103
103
  ]:
104
- from ._hub_core import delete_storage_record, init_storage_hub
104
+ from ._hub_core import (
105
+ delete_storage_record,
106
+ get_default_bucket_for_instance,
107
+ init_storage_hub,
108
+ )
105
109
 
106
110
  assert root is not None, "`root` argument can't be `None`"
107
111
 
@@ -117,20 +121,14 @@ def init_storage(
117
121
  # this means we constructed a hosted location of shape s3://bucket-name/uid
118
122
  # within LaminHub
119
123
  assert root_str.endswith(uid)
120
- lamin_env = os.getenv("LAMIN_ENV")
121
124
  if root_str.startswith("create-s3"):
122
125
  if root_str != "create-s3":
123
126
  assert "--" in root_str, "example: `create-s3--eu-central-1`"
124
127
  region = root_str.replace("create-s3--", "")
125
- if region is None:
126
- region = find_closest_aws_region()
127
- else:
128
- if region not in HOSTED_REGIONS:
129
- raise ValueError(f"region has to be one of {HOSTED_REGIONS}")
130
- if lamin_env is None or lamin_env == "prod":
131
- root = f"s3://lamin-{region}/{uid}"
132
- else:
133
- root = f"s3://lamin-hosted-test/{uid}"
128
+ bucket = get_default_bucket_for_instance(
129
+ None if init_instance else instance_id, region
130
+ )
131
+ root = f"{bucket}/{uid}"
134
132
  elif (input_protocol := fsspec.utils.get_protocol(root_str)) not in VALID_PROTOCOLS:
135
133
  valid_protocols = ("local",) + VALID_PROTOCOLS[1:] # show local instead of file
136
134
  raise ValueError(
@@ -7,8 +7,9 @@ import sys
7
7
  import importlib as il
8
8
  import jwt
9
9
  import time
10
+ import threading
10
11
  from pathlib import Path
11
- import time
12
+ from packaging import version
12
13
  from ._settings_instance import InstanceSettings, is_local_db_url
13
14
 
14
15
  from lamin_utils import logger
@@ -59,21 +60,21 @@ class DBTokenManager:
59
60
  from django.db.transaction import Atomic
60
61
 
61
62
  self.original_atomic_enter = Atomic.__enter__
63
+ self.atomic_is_patched = False
62
64
 
63
65
  self.tokens: dict[str, DBToken] = {}
64
66
 
65
67
  def get_connection(self, connection_name: str):
66
68
  from django.db import connections
67
69
 
68
- connection = connections[connection_name]
69
- assert connection.vendor == "postgresql"
70
-
71
- return connection
70
+ return connections[connection_name]
72
71
 
73
72
  def set(self, token: DBToken, connection_name: str = "default"):
74
- from django.db.transaction import Atomic
73
+ if connection_name in self.tokens:
74
+ return
75
75
 
76
- connection = self.get_connection(connection_name)
76
+ from django.db.transaction import Atomic
77
+ from django.db.backends.signals import connection_created
77
78
 
78
79
  def set_token_wrapper(execute, sql, params, many, context):
79
80
  not_in_atomic_block = (
@@ -97,28 +98,50 @@ class DBTokenManager:
97
98
  result.nextset()
98
99
  return result
99
100
 
100
- connection.execute_wrappers.append(set_token_wrapper)
101
+ self.get_connection(connection_name).execute_wrappers.append(set_token_wrapper)
102
+
103
+ def connection_callback(sender, connection, **kwargs):
104
+ if (
105
+ connection.alias == connection_name
106
+ and set_token_wrapper not in connection.execute_wrappers
107
+ ):
108
+ connection.execute_wrappers.append(set_token_wrapper)
109
+
110
+ dispatch_uid = f"dbtokenmanager:{id(self)}:{connection_name}"
111
+ # emitted when a database connection is established
112
+ # not when a database wrapper is created
113
+ connection_created.connect(
114
+ connection_callback, dispatch_uid=dispatch_uid, weak=False
115
+ )
101
116
 
102
117
  self.tokens[connection_name] = token
103
118
 
104
- # ensure we set the token only once for an outer atomic block
105
- def __enter__(atomic):
106
- self.original_atomic_enter(atomic)
107
- connection_name = "default" if atomic.using is None else atomic.using
108
- if connection_name in self.tokens:
109
- # here we don't use the connection from the closure
110
- # because Atomic is a single class to manage transactions for all connections
111
- connection = self.get_connection(connection_name)
112
- if len(connection.atomic_blocks) == 1:
113
- token = self.tokens[connection_name]
114
- # use raw psycopg2 connection here
115
- # atomic block ensures connection
116
- connection.connection.cursor().execute(token.token_query)
117
-
118
- Atomic.__enter__ = __enter__
119
- logger.debug("django.db.transaction.Atomic.__enter__ has been patched")
119
+ if not self.atomic_is_patched:
120
+ # ensure we set the token only once for an outer atomic block
121
+ def __enter__(atomic):
122
+ self.original_atomic_enter(atomic)
123
+ connection_name = "default" if atomic.using is None else atomic.using
124
+ if connection_name in self.tokens:
125
+ # here we don't use the connection from the closure
126
+ # because Atomic is a single class to manage transactions for all connections
127
+ connection = self.get_connection(connection_name)
128
+ if len(connection.atomic_blocks) == 1:
129
+ token = self.tokens[connection_name]
130
+ # use raw psycopg2 connection here
131
+ # atomic block ensures connection
132
+ connection.connection.cursor().execute(token.token_query)
133
+
134
+ Atomic.__enter__ = __enter__
135
+
136
+ self.atomic_is_patched = True
137
+ logger.debug("django.db.transaction.Atomic.__enter__ has been patched")
120
138
 
121
139
  def reset(self, connection_name: str = "default"):
140
+ if connection_name not in self.tokens:
141
+ return
142
+
143
+ from django.db.backends.signals import connection_created
144
+
122
145
  connection = self.get_connection(connection_name)
123
146
 
124
147
  connection.execute_wrappers = [
@@ -127,8 +150,17 @@ class DBTokenManager:
127
150
  if getattr(w, "__name__", None) != "set_token_wrapper"
128
151
  ]
129
152
 
153
+ dispatch_uid = f"dbtokenmanager:{id(self)}:{connection_name}"
154
+ connection_created.disconnect(dispatch_uid=dispatch_uid)
155
+
130
156
  self.tokens.pop(connection_name, None)
131
157
 
158
+ if not self.tokens:
159
+ from django.db.transaction import Atomic
160
+
161
+ Atomic.__enter__ = self.original_atomic_enter
162
+ self.atomic_is_patched = False
163
+
132
164
 
133
165
  db_token_manager = DBTokenManager()
134
166
 
@@ -244,6 +276,19 @@ def setup_django(
244
276
  "django.db.backends.base.base.BaseDatabaseWrapper.close_if_health_check_failed has been patched"
245
277
  )
246
278
 
279
+ disable_context: bool = False
280
+ if (
281
+ env_disable_context := os.getenv("LAMINDB_DISABLE_CONNECTION_CONTEXT")
282
+ ) is not None:
283
+ disable_context = env_disable_context == "true"
284
+ elif IS_RUN_FROM_IPYTHON:
285
+ from ipykernel import __version__ as ipykernel_version
286
+
287
+ disable_context = version.parse(ipykernel_version) >= version.parse("7.0.0")
288
+ if disable_context:
289
+ django.db.connections._connections = threading.local()
290
+ logger.debug("django.db.connections._connections has been patched")
291
+
247
292
  if isettings._fine_grained_access and isettings._db_permissions == "jwt":
248
293
  db_token = DBToken(isettings)
249
294
  db_token_manager.set(db_token) # sets for the default connection
@@ -289,6 +334,10 @@ def reset_django():
289
334
 
290
335
  connections.close_all()
291
336
 
337
+ global db_token_manager
338
+
339
+ db_token_manager.reset()
340
+
292
341
  if getattr(settings, "_wrapped", None) is not None:
293
342
  settings._wrapped = None
294
343
 
@@ -307,7 +356,6 @@ def reset_django():
307
356
 
308
357
  il.invalidate_caches()
309
358
 
310
- global db_token_manager
311
359
  db_token_manager = DBTokenManager()
312
360
 
313
361
  global IS_SETUP
@@ -334,27 +334,27 @@ def upload_from(
334
334
  callback = ProgressCallback(local_path.name, "uploading")
335
335
  kwargs["callback"] = callback
336
336
 
337
+ protocol = self.protocol
338
+ cleanup_cache = False
337
339
  source: str | list[str] = local_path.as_posix()
338
340
  destination: str | list[str] = self.as_posix()
339
341
  if local_path_is_dir:
340
- size: int = 0
341
- files: list[str] = []
342
- for file in (path for path in local_path.rglob("*") if path.is_file()):
343
- size += file.stat().st_size
344
- files.append(file.as_posix())
345
- # see https://github.com/fsspec/s3fs/issues/897
346
- # here we reduce batch_size for folders bigger than 8 GiB
347
- # to avoid the problem in the issue
348
- # the default batch size for this case is 128
349
- if "batch_size" not in kwargs and size >= 8 * 2**30:
350
- kwargs["batch_size"] = 64
351
-
352
342
  if not create_folder:
353
- source = files
343
+ source = [
344
+ path.as_posix() for path in local_path.rglob("*") if path.is_file()
345
+ ]
354
346
  destination = fsspec.utils.other_paths(
355
- files, self.as_posix(), exists=False, flatten=False
347
+ source, self.as_posix(), exists=False, flatten=False
356
348
  )
357
- elif self.protocol == "s3" and "chunksize" not in kwargs:
349
+ elif protocol == "s3" and (bucket := self.drive) not in self.fs.dircache:
350
+ # the below lines are to avoid s3fs triggering create_bucket in upload if
351
+ # dirs are present, it allows to avoid the permission error
352
+ self.fs.dircache[bucket] = [{}]
353
+ assert isinstance(destination, str)
354
+ if not destination.endswith(TRAILING_SEP):
355
+ destination += "/"
356
+ cleanup_cache = True
357
+ elif protocol == "s3" and "chunksize" not in kwargs:
358
358
  size = local_path.stat().st_size
359
359
  MiB = 1024**2
360
360
  DEFAULT_CHUNKSIZE = 50 * MiB # so in s3fs
@@ -364,21 +364,6 @@ def upload_from(
364
364
  rounded = math.ceil(raw / step) * step
365
365
  kwargs["chunksize"] = rounded
366
366
 
367
- # the below lines are to avoid s3fs triggering create_bucket in upload if
368
- # dirs are present, it allows to avoid the permission error
369
- if self.protocol == "s3" and local_path_is_dir and create_folder:
370
- bucket = self.drive
371
- if bucket not in self.fs.dircache:
372
- self.fs.dircache[bucket] = [{}]
373
- assert isinstance(destination, str)
374
- if not destination.endswith(TRAILING_SEP): # type: ignore
375
- destination += "/"
376
- cleanup_cache = True
377
- else:
378
- cleanup_cache = False
379
- else:
380
- cleanup_cache = False
381
-
382
367
  self.fs.upload(source, destination, recursive=create_folder, **kwargs)
383
368
 
384
369
  if cleanup_cache:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lamindb_setup
3
- Version: 1.12.2
3
+ Version: 1.13.0
4
4
  Summary: Setup & configure LaminDB.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.10
@@ -21,7 +21,7 @@ Requires-Dist: pyjwt<3.0.0
21
21
  Requires-Dist: psutil
22
22
  Requires-Dist: packaging
23
23
  Requires-Dist: aiobotocore[boto3]>=2.12.4,<3.0.0 ; extra == "aws"
24
- Requires-Dist: s3fs>=2023.12.2,<=2025.7.0,!=2024.10.0 ; extra == "aws"
24
+ Requires-Dist: s3fs>=2023.12.2,<=2025.9.0,!=2024.10.0 ; extra == "aws"
25
25
  Requires-Dist: line_profiler ; extra == "dev"
26
26
  Requires-Dist: psycopg2-binary ; extra == "dev"
27
27
  Requires-Dist: python-dotenv ; extra == "dev"
@@ -32,7 +32,7 @@ Requires-Dist: pytest-xdist ; extra == "dev"
32
32
  Requires-Dist: nbproject-test>=0.4.3 ; extra == "dev"
33
33
  Requires-Dist: pandas ; extra == "dev"
34
34
  Requires-Dist: django-schema-graph ; extra == "erdiagram"
35
- Requires-Dist: gcsfs>=2023.12.2,<=2025.7.0 ; extra == "gcp"
35
+ Requires-Dist: gcsfs>=2023.12.2,<=2025.9.0 ; extra == "gcp"
36
36
  Project-URL: Home, https://github.com/laminlabs/lamindb-setup
37
37
  Provides-Extra: aws
38
38
  Provides-Extra: dev
@@ -1,8 +1,8 @@
1
- lamindb_setup/__init__.py,sha256=3WL1Tb4ewlV8h1mrJAlhlYkKmBJJDDkVDTFLMeLC9hE,3112
1
+ lamindb_setup/__init__.py,sha256=SV2P-RJAPRv_l-GGuqpUGnfTTGxuutqh1UgzIgNjQTM,3112
2
2
  lamindb_setup/_cache.py,sha256=pGvDNVHGx4HWr_6w5ajqEJOdysmaGc6F221qFnXkT-k,2747
3
3
  lamindb_setup/_check.py,sha256=28PcG8Kp6OpjSLSi1r2boL2Ryeh6xkaCL87HFbjs6GA,129
4
4
  lamindb_setup/_check_setup.py,sha256=ToKMxsUq8dQBQh8baOrNVlSb1iC8h4zTg5dV8wMu0W4,6760
5
- lamindb_setup/_connect_instance.py,sha256=OSOGZEj_9JsLvZT9JsXGvbC96DY4akmbAOuqUKyjjiE,17717
5
+ lamindb_setup/_connect_instance.py,sha256=QMlPxupGpTR0j1_akh4lJHOFND7izrFnGbzUTXIX2F8,17703
6
6
  lamindb_setup/_delete.py,sha256=KS3r-xGFuDmAbzPUy-9JR-YnPShYdaHjDRQrAmXQ0qM,5863
7
7
  lamindb_setup/_disconnect.py,sha256=FT8EpCm5XXDdhDH7QtAnkO3KPatq2HqT9VXGNjgJDbk,1232
8
8
  lamindb_setup/_django.py,sha256=uIQflpkp8l3axyPaKURlk3kacgpElVP5KOKmFxYSMGk,1454
@@ -10,41 +10,42 @@ lamindb_setup/_entry_points.py,sha256=sKwXPX9xjOotoAjvgkU5LBwjjHLWVkh0ZGdiSsrch9
10
10
  lamindb_setup/_exportdb.py,sha256=QLjoH4dEwqa01A12naKaDPglCCzl2_VLKWFfJRE_uSg,2113
11
11
  lamindb_setup/_importdb.py,sha256=fKv9ev5OOj_-bmzC8XZ1GxOcjIjI486yrHSHDWQrJeI,1874
12
12
  lamindb_setup/_init_instance.py,sha256=8ejD6zjV0eF7KR-DvnmDAVJb9Ty0hjaPtIkFbyLDvA0,14806
13
- lamindb_setup/_migrate.py,sha256=aOWE13LJOW55mC4QiYeCS5bJGSTRsRZPpUYz6e_xoFs,10773
13
+ lamindb_setup/_migrate.py,sha256=nkTzzNlUMqCAPujIgYK-A6Eg08r6LQAhm3rtbNtKZnU,11212
14
14
  lamindb_setup/_register_instance.py,sha256=RdUZxZWHLdbvdNZWpF8e0UWROb_T0cStWbzc5yUw34I,1047
15
15
  lamindb_setup/_schema.py,sha256=b3uzhhWpV5mQtDwhMINc2MabGCnGLESy51ito3yl6Wc,679
16
- lamindb_setup/_schema_metadata.py,sha256=At_EAE9mMzMJIJ1mfiOZYXVgBaXRkWUW6a3fLz5Z_lY,15132
16
+ lamindb_setup/_schema_metadata.py,sha256=af1Es7qFKGPRdNmk48384HiB2r-cDTdBPu0wB9qrga4,15526
17
17
  lamindb_setup/_set_managed_storage.py,sha256=y5YQASsWNYVWUYeLgh3N2YBETYP7mBtbpxe3X_Vgb5I,2699
18
18
  lamindb_setup/_setup_user.py,sha256=DapdzT3u0f5LN5W9W9A6PWw-n8ejcJciQtHN9b5lidA,5889
19
19
  lamindb_setup/_silence_loggers.py,sha256=AKF_YcHvX32eGXdsYK8MJlxEaZ-Uo2f6QDRzjKFCtws,1568
20
20
  lamindb_setup/errors.py,sha256=qZTfSL0rpbY8AIG-Z4-3-_EbLW5zyo2CFEJrVU02-3A,1863
21
21
  lamindb_setup/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  lamindb_setup/types.py,sha256=XlXLb4nmbc68uBj5Hp3xpDRezYGJIBZv6jAAqqN0p10,614
23
- lamindb_setup/core/__init__.py,sha256=5M4A6CVHBO_T5Rr9MeLaPW3WTk4-y00cgRYEgUJVU5U,410
24
- lamindb_setup/core/_aws_options.py,sha256=SadUhcLCRtvsy3Qvx6799Iv4_CJkb1rwWB6d5GIixHc,8080
23
+ lamindb_setup/core/__init__.py,sha256=qPSHqJoRKoG9542Au7EfTRFUHBPrKaCoXmNNwQEonMY,470
24
+ lamindb_setup/core/_aws_options.py,sha256=NtDLfR2BIz3MiR4rGrBu4uW70MFy2p3hjxCnN1sGDB8,9414
25
25
  lamindb_setup/core/_aws_storage.py,sha256=QEtV-riQrwfivcwqHnXBbkJ-9YyNEXL4fLoCmOHZ1BI,2003
26
+ lamindb_setup/core/_clone.py,sha256=2NlXV04yykqg_k7z59C_kD1F1Hi4H-55H-JtNjhenQ0,3691
26
27
  lamindb_setup/core/_deprecated.py,sha256=M3vpM4fZPOncxY2qsXQAPeaEph28xWdv7tYaueaUyAA,2554
27
28
  lamindb_setup/core/_docs.py,sha256=3k-YY-oVaJd_9UIY-LfBg_u8raKOCNfkZQPA73KsUhs,276
28
29
  lamindb_setup/core/_hub_client.py,sha256=J0x43at0zb0yWP-RoT2lyqaHV66ewUP3OiYVYQCjxe8,9974
29
- lamindb_setup/core/_hub_core.py,sha256=axnNugfAehXIB_GLJoJE2zQfsAfPNEw5UELnUvTLWHY,27279
30
+ lamindb_setup/core/_hub_core.py,sha256=ii7IZ_CVw_-GRgFgfvnXhVL6KI3UiuGKnMeIwTgiMd4,28907
30
31
  lamindb_setup/core/_hub_crud.py,sha256=j6516H82kLjFUNPqFGUINbDw9YbofMgjxadGzYb0OS4,6362
31
32
  lamindb_setup/core/_hub_utils.py,sha256=6dyDGyzYFgVfR_lE3VN3CP1jGp98gxPtr-T91PAP05U,2687
32
33
  lamindb_setup/core/_private_django_api.py,sha256=By63l3vIEtK1pq246FhHq3tslxsaTJGKm5VakYluWp4,2656
33
- lamindb_setup/core/_settings.py,sha256=0nz3HKnBuXdDY4R2UJQts3ZVC7vROpsBAxWIgZNz800,14017
34
- lamindb_setup/core/_settings_instance.py,sha256=oLRH_BlQ6EshZtpXZ9v4qk_2bUZDG5Km4qjqpII3EPA,23675
34
+ lamindb_setup/core/_settings.py,sha256=QrxSClRK2GgP1-xgNSi0I0hp-wCTUsCs40CnFsncBts,14175
35
+ lamindb_setup/core/_settings_instance.py,sha256=7VXd1W88fgqEnAfzFQKUlDnTr3pmA_e8aIag7FqPrJI,23899
35
36
  lamindb_setup/core/_settings_load.py,sha256=j20cy3J56ZBHLDfB2A8oKjekNetMNsy0_W3eWD36pWI,5161
36
37
  lamindb_setup/core/_settings_save.py,sha256=jh412jXIAbIYvnSoW9riBFePRAa4vmPm-ScYD0smlnw,3292
37
- lamindb_setup/core/_settings_storage.py,sha256=pyU25hP5rQYjVe0tFPR8P6TzAYzu1NpT-PIbXoxfV18,15348
38
+ lamindb_setup/core/_settings_storage.py,sha256=yFz0w7yB7_3UWdrndVHJ_BZyV4ONj2lPRaQn_cksFQw,15150
38
39
  lamindb_setup/core/_settings_store.py,sha256=ykJeBA9IODK4G_jrfBE9pb0c1xkfePkARPpb306DT08,2687
39
40
  lamindb_setup/core/_settings_user.py,sha256=gFfyMf-738onbh1Mf4wsmLlenQJPtjQfpUgKnOlqc2o,1453
40
41
  lamindb_setup/core/_setup_bionty_sources.py,sha256=ox3X-SHiHa2lNPSWjwZhINypbLacX6kGwH6hVVrSFZc,1505
41
42
  lamindb_setup/core/cloud_sqlite_locker.py,sha256=H_CTUCjURFXwD1cCtV_Jn0_60iztZTkaesLLXIBgIxc,7204
42
- lamindb_setup/core/django.py,sha256=kV8W3WZy5Rkhn4rDJv2GNoq8JYvX_8dLBHhDRZdSgwE,10542
43
+ lamindb_setup/core/django.py,sha256=2HwhtfUEX4peSkczc0VSfA-CpfCGL4vNgkPe9Pwu5kw,12429
43
44
  lamindb_setup/core/exceptions.py,sha256=qjMzqy_uzPA7mCOdnoWnS_fdA6OWbdZGftz-YYplrY0,84
44
45
  lamindb_setup/core/hashing.py,sha256=Y8Uc5uSGTfU6L2R_gb5w8DdHhGRog7RnkK-e9FEMjPY,3680
45
46
  lamindb_setup/core/types.py,sha256=T7NwspfRHgIIpYsXDcApks8jkOlGeGRW-YbVLB7jNIo,67
46
- lamindb_setup/core/upath.py,sha256=uk3LpDA7Jbk1GzUb8hCsxByg5cMYTjPusIvwyXe8g3Y,36023
47
- lamindb_setup-1.12.2.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
48
- lamindb_setup-1.12.2.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
49
- lamindb_setup-1.12.2.dist-info/METADATA,sha256=6x5IAxLh33hQYnOh9EIIckgewprGjwTotuxNmHAlzXI,1798
50
- lamindb_setup-1.12.2.dist-info/RECORD,,
47
+ lamindb_setup/core/upath.py,sha256=J43wCLFLRxNAUFN1bAtm6y6Mgt168JKq-wnhhG048Us,35486
48
+ lamindb_setup-1.13.0.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
49
+ lamindb_setup-1.13.0.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
50
+ lamindb_setup-1.13.0.dist-info/METADATA,sha256=80aqYu5D3oJTyPh7GbZwC29u0m8fWNRL_BNLiRkZSJM,1798
51
+ lamindb_setup-1.13.0.dist-info/RECORD,,