datachain 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (38) hide show
  1. datachain/__init__.py +2 -0
  2. datachain/catalog/catalog.py +62 -228
  3. datachain/cli.py +136 -22
  4. datachain/client/fsspec.py +9 -0
  5. datachain/client/local.py +11 -32
  6. datachain/config.py +126 -51
  7. datachain/data_storage/schema.py +66 -33
  8. datachain/data_storage/sqlite.py +12 -4
  9. datachain/data_storage/warehouse.py +101 -129
  10. datachain/lib/convert/sql_to_python.py +8 -12
  11. datachain/lib/dc.py +275 -80
  12. datachain/lib/func/__init__.py +32 -0
  13. datachain/lib/func/aggregate.py +353 -0
  14. datachain/lib/func/func.py +152 -0
  15. datachain/lib/listing.py +6 -21
  16. datachain/lib/listing_info.py +4 -0
  17. datachain/lib/signal_schema.py +17 -8
  18. datachain/lib/udf.py +3 -3
  19. datachain/lib/utils.py +5 -0
  20. datachain/listing.py +22 -48
  21. datachain/query/__init__.py +1 -2
  22. datachain/query/batch.py +0 -1
  23. datachain/query/dataset.py +33 -46
  24. datachain/query/schema.py +1 -61
  25. datachain/query/session.py +33 -25
  26. datachain/remote/studio.py +63 -14
  27. datachain/sql/functions/__init__.py +1 -1
  28. datachain/sql/functions/aggregate.py +47 -0
  29. datachain/sql/functions/array.py +0 -8
  30. datachain/sql/sqlite/base.py +20 -2
  31. datachain/studio.py +129 -0
  32. datachain/utils.py +58 -0
  33. {datachain-0.6.0.dist-info → datachain-0.6.2.dist-info}/METADATA +7 -6
  34. {datachain-0.6.0.dist-info → datachain-0.6.2.dist-info}/RECORD +38 -33
  35. {datachain-0.6.0.dist-info → datachain-0.6.2.dist-info}/WHEEL +1 -1
  36. {datachain-0.6.0.dist-info → datachain-0.6.2.dist-info}/LICENSE +0 -0
  37. {datachain-0.6.0.dist-info → datachain-0.6.2.dist-info}/entry_points.txt +0 -0
  38. {datachain-0.6.0.dist-info → datachain-0.6.2.dist-info}/top_level.txt +0 -0
datachain/__init__.py CHANGED
@@ -1,3 +1,4 @@
1
+ from datachain.lib import func
1
2
  from datachain.lib.data_model import DataModel, DataType, is_chain_type
2
3
  from datachain.lib.dc import C, Column, DataChain, Sys
3
4
  from datachain.lib.file import (
@@ -34,6 +35,7 @@ __all__ = [
34
35
  "Sys",
35
36
  "TarVFile",
36
37
  "TextFile",
38
+ "func",
37
39
  "is_chain_type",
38
40
  "metrics",
39
41
  "param",
@@ -1,4 +1,3 @@
1
- import glob
2
1
  import io
3
2
  import json
4
3
  import logging
@@ -35,7 +34,6 @@ from tqdm import tqdm
35
34
 
36
35
  from datachain.cache import DataChainCache
37
36
  from datachain.client import Client
38
- from datachain.config import get_remote_config, read_config
39
37
  from datachain.dataset import (
40
38
  DATASET_PREFIX,
41
39
  QUERY_DATASET_PREFIX,
@@ -48,12 +46,10 @@ from datachain.dataset import (
48
46
  parse_dataset_uri,
49
47
  )
50
48
  from datachain.error import (
51
- ClientError,
52
49
  DataChainError,
53
50
  DatasetInvalidVersionError,
54
51
  DatasetNotFoundError,
55
52
  DatasetVersionNotFoundError,
56
- PendingIndexingError,
57
53
  QueryScriptCancelError,
58
54
  QueryScriptRunError,
59
55
  )
@@ -61,8 +57,8 @@ from datachain.listing import Listing
61
57
  from datachain.node import DirType, Node, NodeWithPath
62
58
  from datachain.nodes_thread_pool import NodesThreadPool
63
59
  from datachain.remote.studio import StudioClient
64
- from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
65
- from datachain.storage import Storage, StorageStatus, StorageURI
60
+ from datachain.sql.types import DateTime, SQLType, String
61
+ from datachain.storage import StorageURI
66
62
  from datachain.utils import (
67
63
  DataChainDir,
68
64
  batched,
@@ -102,7 +98,7 @@ PULL_DATASET_SLEEP_INTERVAL = 0.1 # sleep time while waiting for chunk to be av
102
98
  PULL_DATASET_CHECK_STATUS_INTERVAL = 20 # interval to check export status in Studio
103
99
 
104
100
 
105
- def _raise_remote_error(error_message: str) -> NoReturn:
101
+ def raise_remote_error(error_message: str) -> NoReturn:
106
102
  raise DataChainError(f"Error from server: {error_message}")
107
103
 
108
104
 
@@ -130,7 +126,6 @@ class DatasetRowsFetcher(NodesThreadPool):
130
126
  self,
131
127
  metastore: "AbstractMetastore",
132
128
  warehouse: "AbstractWarehouse",
133
- remote_config: dict[str, Any],
134
129
  dataset_name: str,
135
130
  dataset_version: int,
136
131
  schema: dict[str, Union[SQLType, type[SQLType]]],
@@ -144,10 +139,7 @@ class DatasetRowsFetcher(NodesThreadPool):
144
139
  self.dataset_version = dataset_version
145
140
  self.schema = schema
146
141
  self.last_status_check: Optional[float] = None
147
-
148
- self.studio_client = StudioClient(
149
- remote_config["url"], remote_config["username"], remote_config["token"]
150
- )
142
+ self.studio_client = StudioClient()
151
143
 
152
144
  def done_task(self, done):
153
145
  for task in done:
@@ -181,14 +173,14 @@ class DatasetRowsFetcher(NodesThreadPool):
181
173
  self.dataset_name, self.dataset_version
182
174
  )
183
175
  if not export_status_response.ok:
184
- _raise_remote_error(export_status_response.message)
176
+ raise_remote_error(export_status_response.message)
185
177
 
186
178
  export_status = export_status_response.data["status"] # type: ignore [index]
187
179
 
188
180
  if export_status == "failed":
189
- _raise_remote_error("Dataset export failed in Studio")
181
+ raise_remote_error("Dataset export failed in Studio")
190
182
  if export_status == "removed":
191
- _raise_remote_error("Dataset export removed in Studio")
183
+ raise_remote_error("Dataset export removed in Studio")
192
184
 
193
185
  self.last_status_check = time.time()
194
186
 
@@ -483,17 +475,12 @@ def compute_metafile_data(node_groups) -> list[dict[str, Any]]:
483
475
  if not node_group.sources:
484
476
  continue
485
477
  listing: Listing = node_group.listing
486
- source_path: str = node_group.source_path
487
- if not node_group.is_dataset:
488
- assert listing.storage
489
- data_source = listing.storage.to_dict(source_path)
490
- else:
491
- data_source = {"uri": listing.metastore.uri}
492
-
493
- metafile_group = {"data-source": data_source, "files": []}
478
+ metafile_group = {"data-source": {"uri": listing.uri}, "files": []}
494
479
  for node in node_group.instantiated_nodes:
495
480
  if not node.n.is_dir:
496
- metafile_group["files"].append(node.get_metafile_data())
481
+ metafile_group["files"].append( # type: ignore [attr-defined]
482
+ node.get_metafile_data()
483
+ )
497
484
  if metafile_group["files"]:
498
485
  metafile_data.append(metafile_group)
499
486
 
@@ -569,6 +556,12 @@ class Catalog:
569
556
 
570
557
  return self._warehouse
571
558
 
559
+ @cached_property
560
+ def session(self):
561
+ from datachain.query.session import Session
562
+
563
+ return Session.get(catalog=self)
564
+
572
565
  def get_init_params(self) -> dict[str, Any]:
573
566
  return {
574
567
  **self._init_params,
@@ -599,162 +592,29 @@ class Catalog:
599
592
  def enlist_source(
600
593
  self,
601
594
  source: str,
602
- ttl: int,
603
- force_update=False,
604
- skip_indexing=False,
595
+ update=False,
605
596
  client_config=None,
597
+ object_name="file",
598
+ skip_indexing=False,
606
599
  ) -> tuple[Listing, str]:
607
- if force_update and skip_indexing:
608
- raise ValueError(
609
- "Both force_update and skip_indexing flags"
610
- " cannot be True at the same time"
611
- )
612
-
613
- partial_id: Optional[int]
614
- partial_path: Optional[str]
600
+ from datachain.lib.dc import DataChain
615
601
 
616
- client_config = client_config or self.client_config
617
- uri, path = Client.parse_url(source)
618
- client = Client.get_client(source, self.cache, **client_config)
619
- stem = os.path.basename(os.path.normpath(path))
620
- prefix = (
621
- posixpath.dirname(path)
622
- if glob.has_magic(stem) or client.fs.isfile(source)
623
- else path
602
+ DataChain.from_storage(
603
+ source, session=self.session, update=update, object_name=object_name
624
604
  )
625
- storage_dataset_name = Storage.dataset_name(uri, posixpath.join(prefix, ""))
626
- source_metastore = self.metastore.clone(uri)
627
-
628
- columns = [
629
- Column("path", String),
630
- Column("etag", String),
631
- Column("version", String),
632
- Column("is_latest", Boolean),
633
- Column("last_modified", DateTime(timezone=True)),
634
- Column("size", Int64),
635
- Column("location", JSON),
636
- Column("source", String),
637
- ]
638
-
639
- if skip_indexing:
640
- source_metastore.create_storage_if_not_registered(uri)
641
- storage = source_metastore.get_storage(uri)
642
- source_metastore.init_partial_id(uri)
643
- partial_id = source_metastore.get_next_partial_id(uri)
644
-
645
- source_metastore = self.metastore.clone(uri=uri, partial_id=partial_id)
646
- source_metastore.init(uri)
647
-
648
- source_warehouse = self.warehouse.clone()
649
- dataset = self.create_dataset(
650
- storage_dataset_name, columns=columns, listing=True
651
- )
652
-
653
- return (
654
- Listing(storage, source_metastore, source_warehouse, client, dataset),
655
- path,
656
- )
657
-
658
- (
659
- storage,
660
- need_index,
661
- in_progress,
662
- partial_id,
663
- partial_path,
664
- ) = source_metastore.register_storage_for_indexing(uri, force_update, prefix)
665
- if in_progress:
666
- raise PendingIndexingError(f"Pending indexing operation: uri={storage.uri}")
667
-
668
- if not need_index:
669
- assert partial_id is not None
670
- assert partial_path is not None
671
- source_metastore = self.metastore.clone(uri=uri, partial_id=partial_id)
672
- source_warehouse = self.warehouse.clone()
673
- dataset = self.get_dataset(Storage.dataset_name(uri, partial_path))
674
- lst = Listing(storage, source_metastore, source_warehouse, client, dataset)
675
- logger.debug(
676
- "Using cached listing %s. Valid till: %s",
677
- storage.uri,
678
- storage.expires_to_local,
679
- )
680
- # Listing has to have correct version of data storage
681
- # initialized with correct Storage
682
-
683
- self.update_dataset_version_with_warehouse_info(
684
- dataset,
685
- dataset.latest_version,
686
- )
687
-
688
- return lst, path
689
-
690
- source_metastore.init_partial_id(uri)
691
- partial_id = source_metastore.get_next_partial_id(uri)
692
-
693
- source_metastore.init(uri)
694
- source_metastore = self.metastore.clone(uri=uri, partial_id=partial_id)
695
605
 
696
- source_warehouse = self.warehouse.clone()
697
-
698
- dataset = self.create_dataset(
699
- storage_dataset_name, columns=columns, listing=True
606
+ list_ds_name, list_uri, list_path, _ = DataChain.parse_uri(
607
+ source, self.session, update=update
700
608
  )
701
609
 
702
- lst = Listing(storage, source_metastore, source_warehouse, client, dataset)
703
-
704
- try:
705
- lst.fetch(prefix)
706
-
707
- source_metastore.mark_storage_indexed(
708
- storage.uri,
709
- StorageStatus.PARTIAL if prefix else StorageStatus.COMPLETE,
710
- ttl,
711
- prefix=prefix,
712
- partial_id=partial_id,
713
- dataset=dataset,
714
- )
715
-
716
- self.update_dataset_version_with_warehouse_info(
717
- dataset,
718
- dataset.latest_version,
719
- )
720
-
721
- except ClientError as e:
722
- # for handling cloud errors
723
- error_message = INDEX_INTERNAL_ERROR_MESSAGE
724
- if e.error_code in ["InvalidAccessKeyId", "SignatureDoesNotMatch"]:
725
- error_message = "Invalid cloud credentials"
726
-
727
- source_metastore.mark_storage_indexed(
728
- storage.uri,
729
- StorageStatus.FAILED,
730
- ttl,
731
- prefix=prefix,
732
- error_message=error_message,
733
- error_stack=traceback.format_exc(),
734
- dataset=dataset,
735
- )
736
- self._remove_dataset_rows_and_warehouse_info(
737
- dataset, dataset.latest_version
738
- )
739
- raise
740
- except:
741
- source_metastore.mark_storage_indexed(
742
- storage.uri,
743
- StorageStatus.FAILED,
744
- ttl,
745
- prefix=prefix,
746
- error_message=INDEX_INTERNAL_ERROR_MESSAGE,
747
- error_stack=traceback.format_exc(),
748
- dataset=dataset,
749
- )
750
- self._remove_dataset_rows_and_warehouse_info(
751
- dataset, dataset.latest_version
752
- )
753
- raise
754
-
755
- lst.storage = storage
610
+ lst = Listing(
611
+ self.warehouse.clone(),
612
+ Client.get_client(list_uri, self.cache, **self.client_config),
613
+ self.get_dataset(list_ds_name),
614
+ object_name=object_name,
615
+ )
756
616
 
757
- return lst, path
617
+ return lst, list_path
758
618
 
759
619
  def _remove_dataset_rows_and_warehouse_info(
760
620
  self, dataset: DatasetRecord, version: int, **kwargs
@@ -770,7 +630,6 @@ class Catalog:
770
630
  def enlist_sources(
771
631
  self,
772
632
  sources: list[str],
773
- ttl: int,
774
633
  update: bool,
775
634
  skip_indexing=False,
776
635
  client_config=None,
@@ -780,10 +639,9 @@ class Catalog:
780
639
  for src in sources: # Opt: parallel
781
640
  listing, file_path = self.enlist_source(
782
641
  src,
783
- ttl,
784
642
  update,
785
- skip_indexing=skip_indexing,
786
643
  client_config=client_config or self.client_config,
644
+ skip_indexing=skip_indexing,
787
645
  )
788
646
  enlisted_sources.append((listing, file_path))
789
647
 
@@ -802,7 +660,6 @@ class Catalog:
802
660
  def enlist_sources_grouped(
803
661
  self,
804
662
  sources: list[str],
805
- ttl: int,
806
663
  update: bool,
807
664
  no_glob: bool = False,
808
665
  client_config=None,
@@ -823,7 +680,6 @@ class Catalog:
823
680
  for ds in edatachain_data:
824
681
  listing, source_path = self.enlist_source(
825
682
  ds["data-source"]["uri"],
826
- ttl,
827
683
  update,
828
684
  client_config=client_config,
829
685
  )
@@ -843,11 +699,13 @@ class Catalog:
843
699
  )
844
700
  indexed_sources = []
845
701
  for source in dataset_sources:
702
+ from datachain.lib.dc import DataChain
703
+
846
704
  client = self.get_client(source, **client_config)
847
705
  uri = client.uri
848
- ms = self.metastore.clone(uri, None)
849
706
  st = self.warehouse.clone()
850
- listing = Listing(None, ms, st, client, None)
707
+ dataset_name, _, _, _ = DataChain.parse_uri(uri, self.session)
708
+ listing = Listing(st, client, self.get_dataset(dataset_name))
851
709
  rows = DatasetQuery(
852
710
  name=dataset.name, version=ds_version, catalog=self
853
711
  ).to_db_records()
@@ -864,7 +722,7 @@ class Catalog:
864
722
  enlisted_sources.append((False, True, indexed_sources))
865
723
  else:
866
724
  listing, source_path = self.enlist_source(
867
- src, ttl, update, client_config=client_config
725
+ src, update, client_config=client_config
868
726
  )
869
727
  enlisted_sources.append((False, False, (listing, source_path)))
870
728
 
@@ -989,13 +847,6 @@ class Catalog:
989
847
  c.name: c.type.to_dict() for c in columns if isinstance(c.type, SQLType)
990
848
  }
991
849
 
992
- job_id = job_id or os.getenv("DATACHAIN_JOB_ID")
993
- if not job_id:
994
- from datachain.query.session import Session
995
-
996
- session = Session.get(catalog=self)
997
- job_id = session.job_id
998
-
999
850
  dataset = self.metastore.create_dataset_version(
1000
851
  dataset,
1001
852
  version,
@@ -1122,19 +973,16 @@ class Catalog:
1122
973
  raise ValueError("Sources needs to be non empty list")
1123
974
 
1124
975
  from datachain.lib.dc import DataChain
1125
- from datachain.query.session import Session
1126
-
1127
- session = Session.get(catalog=self, client_config=client_config)
1128
976
 
1129
977
  chains = []
1130
978
  for source in sources:
1131
979
  if source.startswith(DATASET_PREFIX):
1132
980
  dc = DataChain.from_dataset(
1133
- source[len(DATASET_PREFIX) :], session=session
981
+ source[len(DATASET_PREFIX) :], session=self.session
1134
982
  )
1135
983
  else:
1136
984
  dc = DataChain.from_storage(
1137
- source, session=session, recursive=recursive
985
+ source, session=self.session, recursive=recursive
1138
986
  )
1139
987
 
1140
988
  chains.append(dc)
@@ -1218,6 +1066,7 @@ class Catalog:
1218
1066
  preview=dataset_version.preview,
1219
1067
  job_id=dataset_version.job_id,
1220
1068
  )
1069
+
1221
1070
  # to avoid re-creating rows table, we are just renaming it for a new version
1222
1071
  # of target dataset
1223
1072
  self.warehouse.rename_dataset_table(
@@ -1245,17 +1094,12 @@ class Catalog:
1245
1094
  def get_dataset(self, name: str) -> DatasetRecord:
1246
1095
  return self.metastore.get_dataset(name)
1247
1096
 
1248
- def get_remote_dataset(self, name: str, *, remote_config=None) -> DatasetRecord:
1249
- remote_config = remote_config or get_remote_config(
1250
- read_config(DataChainDir.find().root), remote=""
1251
- )
1252
- studio_client = StudioClient(
1253
- remote_config["url"], remote_config["username"], remote_config["token"]
1254
- )
1097
+ def get_remote_dataset(self, name: str) -> DatasetRecord:
1098
+ studio_client = StudioClient()
1255
1099
 
1256
1100
  info_response = studio_client.dataset_info(name)
1257
1101
  if not info_response.ok:
1258
- _raise_remote_error(info_response.message)
1102
+ raise_remote_error(info_response.message)
1259
1103
 
1260
1104
  dataset_info = info_response.data
1261
1105
  assert isinstance(dataset_info, dict)
@@ -1312,6 +1156,20 @@ class Catalog:
1312
1156
  for v in d.versions
1313
1157
  )
1314
1158
 
1159
+ def listings(self):
1160
+ """
1161
+ Returns list of ListingInfo objects which are representing specific
1162
+ storage listing datasets
1163
+ """
1164
+ from datachain.lib.listing import is_listing_dataset
1165
+ from datachain.lib.listing_info import ListingInfo
1166
+
1167
+ return [
1168
+ ListingInfo.from_models(d, v, j)
1169
+ for d, v, j in self.list_datasets_versions(include_listing=True)
1170
+ if is_listing_dataset(d.name)
1171
+ ]
1172
+
1315
1173
  def ls_dataset_rows(
1316
1174
  self, name: str, version: int, offset=None, limit=None
1317
1175
  ) -> list[dict]:
@@ -1325,8 +1183,6 @@ class Catalog:
1325
1183
  if offset:
1326
1184
  q = q.offset(offset)
1327
1185
 
1328
- q = q.order_by("sys__id")
1329
-
1330
1186
  return q.to_db_records()
1331
1187
 
1332
1188
  def signed_url(self, source: str, path: str, client_config=None) -> str:
@@ -1438,7 +1294,6 @@ class Catalog:
1438
1294
  self,
1439
1295
  sources: list[str],
1440
1296
  fields: Iterable[str],
1441
- ttl=TTL_INT,
1442
1297
  update=False,
1443
1298
  skip_indexing=False,
1444
1299
  *,
@@ -1446,7 +1301,6 @@ class Catalog:
1446
1301
  ) -> Iterator[tuple[DataSource, Iterable[tuple]]]:
1447
1302
  data_sources = self.enlist_sources(
1448
1303
  sources,
1449
- ttl,
1450
1304
  update,
1451
1305
  skip_indexing=skip_indexing,
1452
1306
  client_config=client_config or self.client_config,
@@ -1465,7 +1319,6 @@ class Catalog:
1465
1319
  edatachain_file: Optional[str] = None,
1466
1320
  *,
1467
1321
  client_config=None,
1468
- remote_config=None,
1469
1322
  ) -> None:
1470
1323
  # TODO add progress bar https://github.com/iterative/dvcx/issues/750
1471
1324
  # TODO copy correct remote dates https://github.com/iterative/dvcx/issues/new
@@ -1487,13 +1340,8 @@ class Catalog:
1487
1340
  raise ValueError("Please provide output directory for instantiation")
1488
1341
 
1489
1342
  client_config = client_config or self.client_config
1490
- remote_config = remote_config or get_remote_config(
1491
- read_config(DataChainDir.find().root), remote=""
1492
- )
1493
1343
 
1494
- studio_client = StudioClient(
1495
- remote_config["url"], remote_config["username"], remote_config["token"]
1496
- )
1344
+ studio_client = StudioClient()
1497
1345
 
1498
1346
  try:
1499
1347
  remote_dataset_name, version = parse_dataset_uri(dataset_uri)
@@ -1507,9 +1355,7 @@ class Catalog:
1507
1355
  # we will create new one if it doesn't exist
1508
1356
  pass
1509
1357
 
1510
- remote_dataset = self.get_remote_dataset(
1511
- remote_dataset_name, remote_config=remote_config
1512
- )
1358
+ remote_dataset = self.get_remote_dataset(remote_dataset_name)
1513
1359
  # if version is not specified in uri, take the latest one
1514
1360
  if not version:
1515
1361
  version = remote_dataset.latest_version
@@ -1534,7 +1380,7 @@ class Catalog:
1534
1380
 
1535
1381
  stats_response = studio_client.dataset_stats(remote_dataset_name, version)
1536
1382
  if not stats_response.ok:
1537
- _raise_remote_error(stats_response.message)
1383
+ raise_remote_error(stats_response.message)
1538
1384
  dataset_stats = stats_response.data
1539
1385
 
1540
1386
  dataset_save_progress_bar = tqdm(
@@ -1566,7 +1412,7 @@ class Catalog:
1566
1412
  remote_dataset_name, version
1567
1413
  )
1568
1414
  if not export_response.ok:
1569
- _raise_remote_error(export_response.message)
1415
+ raise_remote_error(export_response.message)
1570
1416
 
1571
1417
  signed_urls = export_response.data
1572
1418
 
@@ -1580,7 +1426,6 @@ class Catalog:
1580
1426
  rows_fetcher = DatasetRowsFetcher(
1581
1427
  metastore,
1582
1428
  warehouse,
1583
- remote_config,
1584
1429
  dataset.name,
1585
1430
  version,
1586
1431
  schema,
@@ -1623,7 +1468,6 @@ class Catalog:
1623
1468
  no_cp: bool = False,
1624
1469
  edatachain: bool = False,
1625
1470
  edatachain_file: Optional[str] = None,
1626
- ttl: int = TTL_INT,
1627
1471
  *,
1628
1472
  client_config=None,
1629
1473
  ) -> None:
@@ -1645,7 +1489,6 @@ class Catalog:
1645
1489
  edatachain_only=no_cp,
1646
1490
  no_edatachain_file=not edatachain,
1647
1491
  edatachain_file=edatachain_file,
1648
- ttl=ttl,
1649
1492
  client_config=client_config,
1650
1493
  )
1651
1494
  else:
@@ -1653,7 +1496,6 @@ class Catalog:
1653
1496
  # it needs to be done here
1654
1497
  self.enlist_sources(
1655
1498
  sources,
1656
- ttl,
1657
1499
  update,
1658
1500
  client_config=client_config or self.client_config,
1659
1501
  )
@@ -1713,7 +1555,6 @@ class Catalog:
1713
1555
  edatachain_only: bool = False,
1714
1556
  no_edatachain_file: bool = False,
1715
1557
  no_glob: bool = False,
1716
- ttl: int = TTL_INT,
1717
1558
  *,
1718
1559
  client_config=None,
1719
1560
  ) -> list[dict[str, Any]]:
@@ -1725,7 +1566,6 @@ class Catalog:
1725
1566
  client_config = client_config or self.client_config
1726
1567
  node_groups = self.enlist_sources_grouped(
1727
1568
  sources,
1728
- ttl,
1729
1569
  update,
1730
1570
  no_glob,
1731
1571
  client_config=client_config,
@@ -1784,14 +1624,12 @@ class Catalog:
1784
1624
  self,
1785
1625
  sources,
1786
1626
  depth=0,
1787
- ttl=TTL_INT,
1788
1627
  update=False,
1789
1628
  *,
1790
1629
  client_config=None,
1791
1630
  ) -> Iterable[tuple[str, float]]:
1792
1631
  sources = self.enlist_sources(
1793
1632
  sources,
1794
- ttl,
1795
1633
  update,
1796
1634
  client_config=client_config or self.client_config,
1797
1635
  )
@@ -1812,7 +1650,6 @@ class Catalog:
1812
1650
  def find(
1813
1651
  self,
1814
1652
  sources,
1815
- ttl=TTL_INT,
1816
1653
  update=False,
1817
1654
  names=None,
1818
1655
  inames=None,
@@ -1826,7 +1663,6 @@ class Catalog:
1826
1663
  ) -> Iterator[str]:
1827
1664
  sources = self.enlist_sources(
1828
1665
  sources,
1829
- ttl,
1830
1666
  update,
1831
1667
  client_config=client_config or self.client_config,
1832
1668
  )
@@ -1862,7 +1698,6 @@ class Catalog:
1862
1698
  def index(
1863
1699
  self,
1864
1700
  sources,
1865
- ttl=TTL_INT,
1866
1701
  update=False,
1867
1702
  *,
1868
1703
  client_config=None,
@@ -1888,7 +1723,6 @@ class Catalog:
1888
1723
 
1889
1724
  self.enlist_sources(
1890
1725
  non_root_sources,
1891
- ttl,
1892
1726
  update,
1893
1727
  client_config=client_config,
1894
1728
  only_index=True,