lamindb_setup 1.16.0__py3-none-any.whl → 1.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,7 @@ lamin_env = os.getenv("LAMIN_ENV")
20
20
  if lamin_env is None or lamin_env == "prod":
21
21
  HOSTED_BUCKETS = tuple([f"s3://lamin-{region}" for region in HOSTED_REGIONS])
22
22
  else:
23
- logger.warning("loaded LAMIN_ENV: staging")
23
+ logger.warning(f"loaded LAMIN_ENV: {lamin_env}")
24
24
  HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
25
25
 
26
26
 
@@ -60,6 +60,7 @@ class AWSOptionsManager:
60
60
  from aiobotocore.session import AioSession
61
61
  from s3fs import S3FileSystem
62
62
 
63
+ anon_env = os.getenv("LAMIN_S3_ANON") == "true"
63
64
  # this is cached so will be resued with the connection initialized
64
65
  # these options are set for paths in _path_inject_options
65
66
  # here we set the same options to cache the filesystem
@@ -68,19 +69,28 @@ class AWSOptionsManager:
68
69
  use_listings_cache=True,
69
70
  version_aware=False,
70
71
  config_kwargs={"max_pool_connections": 64},
72
+ anon=anon_env,
71
73
  )
72
74
 
73
75
  self._suppress_aiobotocore_traceback_logging()
74
76
 
75
- try:
76
- fs.connect()
77
- self.anon: bool = fs.session._credentials is None
78
- except Exception as e:
77
+ if anon_env:
78
+ self.anon: bool = True
79
79
  logger.warning(
80
- f"There is a problem with your default AWS Credentials: {e}\n"
81
- "`anon` mode will be used for all non-managed buckets."
80
+ "`anon` mode will be used for all non-managed buckets "
81
+ "because the environment variable LAMIN_S3_ANON was set to 'true'"
82
82
  )
83
- self.anon = True
83
+ else:
84
+ try:
85
+ fs.connect()
86
+ self.anon = fs.session._credentials is None
87
+ except Exception as e:
88
+ logger.warning(
89
+ f"There is a problem with your default AWS Credentials: {e}\n"
90
+ "`anon` mode will be used for all non-managed buckets"
91
+ )
92
+ self.anon = True
93
+
84
94
  self.anon_public: bool | None = None
85
95
  if not self.anon:
86
96
  try:
@@ -1,142 +1,18 @@
1
- """Utilities to copy, clone and load Postgres instances as local SQLite databases.
1
+ """Utilities to work with Postgres Snapshots.
2
2
 
3
3
  .. autosummary::
4
4
  :toctree:
5
5
 
6
- init_local_sqlite
7
- connect_local_sqlite
8
- connect_remote_sqlite
9
6
  upload_sqlite_clone
10
7
  """
11
8
 
12
9
  import gzip
13
- import os
14
10
  import shutil
15
11
  from pathlib import Path
16
12
 
17
- from lamindb_setup.core._settings_load import load_instance_settings
18
- from lamindb_setup.core._settings_store import instance_settings_file
19
- from lamindb_setup.core.django import reset_django
20
13
  from lamindb_setup.core.upath import create_path
21
14
 
22
15
 
23
- def init_local_sqlite(
24
- instance: str | None = None, copy_suffix: str | None = None
25
- ) -> None:
26
- """Initialize SQLite copy of an existing Postgres instance.
27
-
28
- Creates a SQLite database with the same schema as the source Postgres instance.
29
- The copy shares the same storage location as the original instance.
30
-
31
- The copy is intended for read-only access to instance data without requiring a Postgres connection.
32
- Data synchronization to complete the clone happens via a separate Lambda function.
33
-
34
- Note that essential user, branch and storage tables are missing.
35
- Therefore, it is not possible to store Artifacts without having replayed these records first.
36
-
37
- Args:
38
- instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
39
- If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
40
- If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
41
- copy_suffix: Optional suffix to append to the local clone name.
42
- """
43
- import lamindb_setup as ln_setup
44
-
45
- if instance is None: # pragma: no cover
46
- instance = os.environ.get("LAMIN_CURRENT_INSTANCE")
47
-
48
- if instance is None:
49
- raise ValueError(
50
- "No instance identifier provided and LAMIN_CURRENT_INSTANCE is not set"
51
- )
52
-
53
- if ln_setup.settings.instance is None: # pragma: no cover
54
- ln_setup.connect(instance)
55
-
56
- name = (
57
- f"{ln_setup.settings.instance.name}{copy_suffix}"
58
- if copy_suffix is not None
59
- else ln_setup.settings.instance.name
60
- )
61
- isettings = ln_setup._connect_instance._connect_instance(
62
- owner=ln_setup.settings.instance.owner, name=name
63
- )
64
- isettings._db = None
65
- isettings._is_on_hub = False
66
- isettings._fine_grained_access = False
67
- name = (
68
- f"{isettings.name}{copy_suffix}" if copy_suffix is not None else isettings.name
69
- )
70
- isettings._name = name
71
- isettings._is_clone = True
72
- isettings._persist(write_to_disk=True)
73
-
74
- if not isettings._sqlite_file_local.exists():
75
- # Reset Django configuration before _init_db() because Django was already configured for the original Postgres instance.
76
- # Without this reset, the `if not settings.configured`` check in `setup_django()` would skip reconfiguration,
77
- # causing migrations to run against the old Postgres database instead of the new SQLite clone database.
78
- reset_django()
79
- isettings._init_db()
80
-
81
-
82
- def connect_local_sqlite(
83
- instance: str,
84
- ) -> None:
85
- """Load a locally stored SQLite instance of which a remote hub Postgres instance exists.
86
-
87
- This function bypasses the hub lookup that `lamin connect` performs, loading the SQLite clone directly from local settings files.
88
- The clone must first be created via `init_local_sqlite()`.
89
-
90
- Args:
91
- instance: Instance slug in the form `account/name` (e.g., `laminlabs/privatedata-local`).
92
- """
93
- owner, name = instance.split("/")
94
- settings_file = instance_settings_file(name=name, owner=owner)
95
-
96
- if not settings_file.exists():
97
- raise ValueError(
98
- "SQLite clone not found."
99
- " Run `init_local_sqlite()` to create a local copy or connect to a remote copy using `connect_remote_sqlite`."
100
- )
101
-
102
- isettings = load_instance_settings(settings_file)
103
- isettings._persist(write_to_disk=False)
104
-
105
- # Using `setup_django` instead of `_load_db` to not ping AWS RDS
106
- from lamindb_setup._check_setup import disable_auto_connect
107
-
108
- from .django import setup_django
109
-
110
- disable_auto_connect(setup_django)(isettings)
111
-
112
-
113
- def connect_remote_sqlite(instance: str, *, copy_suffix: str | None = None) -> None:
114
- """Load an existing SQLite copy of a hub instance.
115
-
116
- Args:
117
- instance: Instance slug in the form `account/name` (e.g., `laminlabs/privatedata-local`).
118
- copy_suffix: Optional suffix of the local clone.
119
- """
120
- import lamindb_setup as ln_setup
121
-
122
- owner, name = instance.split("/")
123
-
124
- # Step 1: Create the settings file
125
- isettings = ln_setup._connect_instance._connect_instance(owner=owner, name=name)
126
- isettings._db = None
127
- isettings._is_on_hub = False
128
- isettings._fine_grained_access = False
129
- isettings._db_permissions = "read"
130
- name = (
131
- f"{isettings.name}{copy_suffix}" if copy_suffix is not None else isettings.name
132
- )
133
- isettings._name = name
134
- isettings._is_clone = True
135
- isettings._persist(write_to_disk=True)
136
-
137
- connect_local_sqlite(instance=instance + (copy_suffix or ""))
138
-
139
-
140
16
  def upload_sqlite_clone(
141
17
  local_sqlite_path: Path | str | None = None, compress: bool = True
142
18
  ) -> None:
@@ -11,8 +11,7 @@ import httpx
11
11
  from httpx_retries import Retry, RetryTransport
12
12
  from lamin_utils import logger
13
13
  from pydantic_settings import BaseSettings
14
- from supabase import Client, create_client # type: ignore
15
- from supabase.lib.client_options import ClientOptions
14
+ from supabase import Client, ClientOptions, create_client
16
15
 
17
16
  from ._settings_save import save_user_settings
18
17
 
@@ -383,7 +383,8 @@ def _init_instance_hub(
383
383
  ) -> None:
384
384
  from ._settings import settings
385
385
 
386
- account_id = settings.user._uuid if account_id is None else account_id
386
+ created_by_id = settings.user._uuid.hex if account_id is None else account_id.hex # type: ignore
387
+ owner_account_id = os.getenv("LAMINDB_ACCOUNT_ID_INIT", created_by_id)
387
388
 
388
389
  try:
389
390
  lamindb_version = metadata.version("lamindb")
@@ -391,13 +392,13 @@ def _init_instance_hub(
391
392
  lamindb_version = None
392
393
  fields = {
393
394
  "id": isettings._id.hex,
394
- "account_id": account_id.hex, # type: ignore
395
+ "account_id": owner_account_id,
395
396
  "name": isettings.name,
396
397
  "lnid": isettings.uid,
397
398
  "schema_str": isettings._schema_str,
398
399
  "lamindb_version": lamindb_version,
399
400
  "public": False,
400
- "created_by_id": account_id.hex, # type: ignore
401
+ "created_by_id": created_by_id,
401
402
  }
402
403
  if isettings.dialect != "sqlite":
403
404
  db_dsn = LaminDsnModel(db=isettings.db)
@@ -407,7 +408,7 @@ def _init_instance_hub(
407
408
  "db_port": db_dsn.db.port,
408
409
  "db_database": db_dsn.db.database,
409
410
  }
410
- fields.update(db_fields)
411
+ fields.update(db_fields) # type: ignore
411
412
  slug = isettings.slug
412
413
  # I'd like the following to be an upsert, but this seems to violate RLS
413
414
  # Similarly, if we don't specify `returning="minimal"`, we'll violate RLS
@@ -415,7 +416,9 @@ def _init_instance_hub(
415
416
  # as then init_instance is no longer idempotent
416
417
  try:
417
418
  client.table("instance").insert(fields, returning="minimal").execute()
418
- except APIError:
419
+ except APIError as e:
420
+ if "new row violates row-level security policy" in str(e):
421
+ raise e
419
422
  logger.warning(f"instance already existed at: https://lamin.ai/{slug}")
420
423
  return None
421
424
  if isettings.dialect != "sqlite" and isettings.is_remote:
@@ -713,7 +716,7 @@ def get_lamin_site_base_url():
713
716
 
714
717
 
715
718
  def sign_up_local_hub(email) -> str | tuple[str, str, str]:
716
- # raises gotrue.errors.AuthApiError: User already registered
719
+ # raises AuthApiError: User already registered
717
720
  password = base62(40) # generate new password
718
721
  sign_up_kwargs = {"email": email, "password": password}
719
722
  client = connect_hub()
@@ -34,8 +34,8 @@ def private_django_api(reverse=False):
34
34
  # the order here matters
35
35
  # changing it might break the tests
36
36
  attributes = [
37
- "MultipleObjectsReturned",
38
37
  "add_to_class",
38
+ "arefresh_from_db",
39
39
  "adelete",
40
40
  "asave",
41
41
  "clean",
@@ -51,10 +51,8 @@ def private_django_api(reverse=False):
51
51
  "validate_unique",
52
52
  ]
53
53
  if reverse:
54
- attributes.append("arefresh_from_db")
55
54
  attributes.append("full_clean")
56
55
  else:
57
- attributes.append("a_refresh_from_db")
58
56
  attributes.append("full__clean")
59
57
 
60
58
  django_path = Path(db.__file__).parent.parent
@@ -46,6 +46,12 @@ def _process_cache_path(cache_path: UPathStr | None) -> UPath | None:
46
46
  return cache_dir
47
47
 
48
48
 
49
+ # returned by settings.branch for none/none instance
50
+ class MainBranchMock:
51
+ id = 1
52
+ name = "main"
53
+
54
+
49
55
  class SetupSettings:
50
56
  """Setup settings."""
51
57
 
@@ -140,6 +146,10 @@ class SetupSettings:
140
146
  # and we never need a DB request
141
147
  def branch(self) -> Branch:
142
148
  """Default branch."""
149
+ # this is needed for .filter() with non-default connections
150
+ if not self._instance_exists:
151
+ return MainBranchMock()
152
+
143
153
  if self._branch is None:
144
154
  from lamindb import Branch
145
155
 
@@ -222,10 +232,9 @@ class SetupSettings:
222
232
  If `True`, the current instance is connected, meaning that the db and other settings
223
233
  are properly configured for use.
224
234
  """
225
- if self._instance_exists:
226
- return self.instance.slug != "none/none"
227
- else:
228
- return False
235
+ from . import django
236
+
237
+ return self._instance_exists and django.IS_SETUP
229
238
 
230
239
  @property
231
240
  def private_django_api(self) -> bool:
@@ -284,12 +293,7 @@ class SetupSettings:
284
293
 
285
294
  @property
286
295
  def _instance_exists(self):
287
- try:
288
- self.instance # noqa
289
- return True
290
- # this is implicit logic that catches if no instance is loaded
291
- except CurrentInstanceNotConfigured:
292
- return False
296
+ return self.instance.slug != "none/none"
293
297
 
294
298
  @property
295
299
  def cache_dir(self) -> UPath:
@@ -125,10 +125,11 @@ class InstanceSettings:
125
125
  if self._local_storage is not None:
126
126
  value_local = self.local_storage
127
127
  representation += f"\n - local storage: {value_local.root_as_str} ({value_local.region})"
128
- representation += (
129
- f"\n - cloud storage: {value.root_as_str} ({value.region})"
130
- )
131
- else:
128
+ if value is not None:
129
+ representation += (
130
+ f"\n - cloud storage: {value.root_as_str} ({value.region})"
131
+ )
132
+ elif value is not None:
132
133
  representation += (
133
134
  f"\n - storage: {value.root_as_str} ({value.region})"
134
135
  )
@@ -322,16 +323,6 @@ class InstanceSettings:
322
323
  else:
323
324
  logger.warning(f"could not set this local storage location: {local_root}")
324
325
 
325
- @property
326
- @deprecated("local_storage")
327
- def storage_local(self) -> StorageSettings:
328
- return self.local_storage
329
-
330
- @storage_local.setter
331
- @deprecated("local_storage")
332
- def storage_local(self, local_root_host: tuple[Path | str, str]):
333
- self.local_storage = local_root_host # type: ignore
334
-
335
326
  @property
336
327
  def slug(self) -> str:
337
328
  """Unique semantic identifier of form `"{account_handle}/{instance_name}"`."""
@@ -417,11 +408,6 @@ class InstanceSettings:
417
408
  else:
418
409
  return {module for module in self._schema_str.split(",") if module != ""}
419
410
 
420
- @property
421
- @deprecated("modules")
422
- def schema(self) -> set[str]:
423
- return self.modules
424
-
425
411
  @property
426
412
  def _sqlite_file(self) -> UPath:
427
413
  """SQLite file."""
@@ -513,17 +499,36 @@ class InstanceSettings:
513
499
 
514
500
  @property
515
501
  def dialect(self) -> Literal["sqlite", "postgresql"]:
516
- """SQL dialect."""
502
+ """SQL dialect.
503
+
504
+ Equivalent to :attr:`vendor`.
505
+
506
+ "vendor" is the Django terminology for the type of database. "dialect" is the SQLAlchemy terminology.
507
+ """
517
508
  if self._db is None or self._db.startswith("sqlite://"):
518
509
  return "sqlite"
519
510
  else:
520
511
  assert self._db.startswith("postgresql"), f"Unexpected DB value: {self._db}"
521
512
  return "postgresql"
522
513
 
514
+ @property
515
+ def vendor(self) -> Literal["sqlite", "postgresql"]:
516
+ """Database vendor.
517
+
518
+ Equivalent to :attr:`dialect`.
519
+
520
+ "vendor" is the Django terminology for the type of database. "dialect" is the SQLAlchemy terminology.
521
+ """
522
+ return self.dialect
523
+
523
524
  @property
524
525
  def _is_cloud_sqlite(self) -> bool:
525
526
  """Is this a cloud instance with sqlite db."""
526
- return self.dialect == "sqlite" and self.storage.type_is_cloud
527
+ return (
528
+ self.dialect == "sqlite"
529
+ and self.storage is not None
530
+ and self.storage.type_is_cloud
531
+ )
527
532
 
528
533
  @property
529
534
  def _cloud_sqlite_locker(self):
@@ -543,14 +548,16 @@ class InstanceSettings:
543
548
  @property
544
549
  def is_remote(self) -> bool:
545
550
  """Boolean indicating if an instance has no local component."""
551
+ if self.storage is None and self.db == "sqlite:///:memory:":
552
+ return False
546
553
  return check_is_instance_remote(self.storage.root_as_str, self.db)
547
554
 
548
555
  @property
549
556
  def is_on_hub(self) -> bool:
550
- """Is this instance on the hub?
557
+ """Is this instance registered on the hub?
551
558
 
552
- Can only reliably establish if user has access to the instance.
553
- Will return `False` in case the instance isn't found.
559
+ Can only establish if user has access to the instance.
560
+ Will return `False` in case the user token can't find the instance.
554
561
  """
555
562
  if self._is_on_hub is None:
556
563
  from ._hub_client import call_with_fallback_auth
@@ -572,6 +579,15 @@ class InstanceSettings:
572
579
  self._is_on_hub = True
573
580
  return self._is_on_hub
574
581
 
582
+ @property
583
+ def is_managed_by_hub(self) -> bool:
584
+ """Is this instance managed by the hub?
585
+
586
+ Returns `True` if the instance is _managed_ by LaminHub, i.e.,
587
+ it was connected to LaminHub to manage access, migrations, a REST API, a UI, etc.
588
+ """
589
+ return self.api_url is not None
590
+
575
591
  def _get_settings_file(self) -> Path:
576
592
  return instance_settings_file(self.name, self.owner)
577
593
 
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
+ from importlib.util import find_spec
4
5
  from pathlib import Path
5
6
  from typing import TYPE_CHECKING
6
7
  from uuid import UUID, uuid4
@@ -46,19 +47,36 @@ def load_cache_path_from_settings(storage_settings: Path | None = None) -> Path
46
47
  return None
47
48
 
48
49
 
50
+ def find_module_candidates():
51
+ """Find all local packages that depend on lamindb."""
52
+ candidates = ["bionty", "wetlab"]
53
+ return [c for c in candidates if find_spec(c) is not None]
54
+
55
+
49
56
  def load_instance_settings(instance_settings_file: Path | None = None):
50
57
  if instance_settings_file is None:
51
- instance_settings_file = current_instance_settings_file()
52
- if not instance_settings_file.exists():
58
+ isettings_file = current_instance_settings_file()
59
+ if not isettings_file.exists():
60
+ isettings = InstanceSettings(
61
+ id=UUID("00000000-0000-0000-0000-000000000000"),
62
+ owner="none",
63
+ name="none",
64
+ storage=None,
65
+ modules=",".join(find_module_candidates()),
66
+ )
67
+ return isettings
68
+ else:
69
+ isettings_file = instance_settings_file
70
+
71
+ if not isettings_file.exists():
72
+ # this errors only if the file was explicitly provided
53
73
  raise CurrentInstanceNotConfigured
54
74
  try:
55
- settings_store = InstanceSettingsStore(_env_file=instance_settings_file)
75
+ settings_store = InstanceSettingsStore(_env_file=isettings_file)
56
76
  except (ValidationError, TypeError) as error:
57
- with open(instance_settings_file) as f:
58
- content = f.read()
59
77
  raise SettingsEnvFileOutdated(
60
- f"\n\n{error}\n\nYour instance settings file with\n\n{content}\nis invalid"
61
- f" (likely outdated), see validation error. Please delete {instance_settings_file} &"
78
+ f"\n\n{error}\n\nYour instance settings file with\n\n{isettings_file.read_text()}\nis invalid"
79
+ f" (likely outdated), see validation error. Please delete {isettings_file} &"
62
80
  " reload (remote) or re-initialize (local) the instance with the same name & storage location."
63
81
  ) from error
64
82
  isettings = setup_instance_from_store(settings_store)
@@ -255,11 +255,6 @@ class StorageSettings:
255
255
  self._has_local = False
256
256
  self._local = None
257
257
 
258
- @property
259
- @deprecated("_id")
260
- def id(self) -> int:
261
- return self._id
262
-
263
258
  @property
264
259
  def _id(self) -> int:
265
260
  """Storage id.
@@ -5,13 +5,15 @@ import builtins
5
5
  import os
6
6
  import sys
7
7
  import importlib as il
8
+ import gzip
8
9
  import jwt
9
10
  import time
10
11
  import threading
11
12
  from pathlib import Path
13
+ import shutil
12
14
  from packaging import version
13
15
  from ._settings_instance import InstanceSettings, is_local_db_url
14
-
16
+ from ..errors import CurrentInstanceNotConfigured
15
17
  from lamin_utils import logger
16
18
 
17
19
 
@@ -21,6 +23,24 @@ IS_MIGRATING = False
21
23
  CONN_MAX_AGE = 299
22
24
 
23
25
 
26
+ def get_connection(connection_name: str):
27
+ from django.db import connections
28
+
29
+ return connections[connection_name]
30
+
31
+
32
+ def error_no_instance_wrapper(execute, sql, params, many, context):
33
+ connection = context["connection"]
34
+
35
+ if (
36
+ connection.vendor == "sqlite"
37
+ and connection.settings_dict.get("NAME") == ":memory:"
38
+ ):
39
+ raise CurrentInstanceNotConfigured
40
+
41
+ return execute(sql, params, many, context)
42
+
43
+
24
44
  # db token that refreshes on access if needed
25
45
  class DBToken:
26
46
  def __init__(
@@ -64,11 +84,6 @@ class DBTokenManager:
64
84
 
65
85
  self.tokens: dict[str, DBToken] = {}
66
86
 
67
- def get_connection(self, connection_name: str):
68
- from django.db import connections
69
-
70
- return connections[connection_name]
71
-
72
87
  def set(self, token: DBToken, connection_name: str = "default"):
73
88
  if connection_name in self.tokens:
74
89
  return
@@ -77,11 +92,7 @@ class DBTokenManager:
77
92
  from django.db.backends.signals import connection_created
78
93
 
79
94
  def set_token_wrapper(execute, sql, params, many, context):
80
- not_in_atomic_block = (
81
- context is None
82
- or "connection" not in context
83
- or not context["connection"].in_atomic_block
84
- )
95
+ not_in_atomic_block = not context["connection"].in_atomic_block
85
96
  # ignore atomic blocks
86
97
  if not_in_atomic_block:
87
98
  sql = token.token_query + sql
@@ -98,7 +109,7 @@ class DBTokenManager:
98
109
  result.nextset()
99
110
  return result
100
111
 
101
- self.get_connection(connection_name).execute_wrappers.append(set_token_wrapper)
112
+ get_connection(connection_name).execute_wrappers.append(set_token_wrapper)
102
113
 
103
114
  def connection_callback(sender, connection, **kwargs):
104
115
  if (
@@ -124,7 +135,7 @@ class DBTokenManager:
124
135
  if connection_name in self.tokens:
125
136
  # here we don't use the connection from the closure
126
137
  # because Atomic is a single class to manage transactions for all connections
127
- connection = self.get_connection(connection_name)
138
+ connection = get_connection(connection_name)
128
139
  if len(connection.atomic_blocks) == 1:
129
140
  token = self.tokens[connection_name]
130
141
  # use raw psycopg2 connection here
@@ -142,7 +153,7 @@ class DBTokenManager:
142
153
 
143
154
  from django.db.backends.signals import connection_created
144
155
 
145
- connection = self.get_connection(connection_name)
156
+ connection = get_connection(connection_name)
146
157
 
147
158
  connection.execute_wrappers = [
148
159
  w
@@ -291,6 +302,9 @@ def setup_django(
291
302
  django.db.connections._connections = threading.local()
292
303
  logger.debug("django.db.connections._connections has been patched")
293
304
 
305
+ # error if trying to query with the default connection without setting up an instance
306
+ get_connection("default").execute_wrappers.insert(0, error_no_instance_wrapper)
307
+
294
308
  if isettings._fine_grained_access and isettings._db_permissions == "jwt":
295
309
  db_token = DBToken(isettings)
296
310
  db_token_manager.set(db_token) # sets for the default connection
@@ -311,6 +325,20 @@ def setup_django(
311
325
  call_command("migrate", app_name, app_number, verbosity=2)
312
326
  isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
313
327
  elif init:
328
+ modules_beyond_bionty = isettings.modules.copy()
329
+ compressed_sqlite_path = Path(__file__).parent / "lamin.db.gz"
330
+ if "bionty" in modules_beyond_bionty:
331
+ modules_beyond_bionty.remove("bionty")
332
+ # seed from compressed sqlite file
333
+ if (
334
+ isettings.dialect == "sqlite"
335
+ and os.getenv("LAMINDB_INIT_FROM_SCRATCH", "false") != "true"
336
+ and len(modules_beyond_bionty) == 0
337
+ and compressed_sqlite_path.exists()
338
+ ):
339
+ with gzip.open(compressed_sqlite_path, "rb") as f_in:
340
+ with open(isettings._sqlite_file_local, "wb") as f_out:
341
+ shutil.copyfileobj(f_in, f_out)
314
342
  global IS_MIGRATING
315
343
  IS_MIGRATING = True
316
344
  call_command("migrate", verbosity=0)
@@ -88,7 +88,7 @@ def hash_file(
88
88
  file_path: Path,
89
89
  file_size: int | None = None,
90
90
  chunk_size: int | None = 50 * 1024 * 1024,
91
- ) -> tuple[str, str]:
91
+ ) -> tuple[int, str, str]:
92
92
  with open(file_path, "rb") as fp:
93
93
  if file_size is None:
94
94
  fp.seek(0, 2)
@@ -107,15 +107,15 @@ def hash_file(
107
107
  hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
108
108
  ).digest()
109
109
  hash_type = "sha1-fl"
110
- return to_b64_str(digest)[:HASH_LENGTH], hash_type
110
+ return file_size, to_b64_str(digest)[:HASH_LENGTH], hash_type
111
111
 
112
112
 
113
- def hash_dir(path: Path):
113
+ def hash_dir(path: Path) -> tuple[int, str, str, int]:
114
114
  files = (subpath for subpath in path.rglob("*") if subpath.is_file())
115
115
 
116
116
  def hash_size(file):
117
- file_size = file.stat().st_size
118
- return hash_file(file, file_size)[0], file_size
117
+ size, hash, _ = hash_file(file)
118
+ return hash, size
119
119
 
120
120
  try:
121
121
  n_workers = len(psutil.Process().cpu_affinity())
Binary file