esgpull 0.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. esgpull/__init__.py +12 -0
  2. esgpull/auth.py +181 -0
  3. esgpull/cli/__init__.py +73 -0
  4. esgpull/cli/add.py +103 -0
  5. esgpull/cli/autoremove.py +38 -0
  6. esgpull/cli/config.py +116 -0
  7. esgpull/cli/convert.py +285 -0
  8. esgpull/cli/decorators.py +342 -0
  9. esgpull/cli/download.py +74 -0
  10. esgpull/cli/facet.py +23 -0
  11. esgpull/cli/get.py +28 -0
  12. esgpull/cli/install.py +85 -0
  13. esgpull/cli/link.py +105 -0
  14. esgpull/cli/login.py +56 -0
  15. esgpull/cli/remove.py +73 -0
  16. esgpull/cli/retry.py +43 -0
  17. esgpull/cli/search.py +201 -0
  18. esgpull/cli/self.py +238 -0
  19. esgpull/cli/show.py +66 -0
  20. esgpull/cli/status.py +67 -0
  21. esgpull/cli/track.py +87 -0
  22. esgpull/cli/update.py +184 -0
  23. esgpull/cli/utils.py +247 -0
  24. esgpull/config.py +410 -0
  25. esgpull/constants.py +56 -0
  26. esgpull/context.py +724 -0
  27. esgpull/database.py +161 -0
  28. esgpull/download.py +162 -0
  29. esgpull/esgpull.py +447 -0
  30. esgpull/exceptions.py +167 -0
  31. esgpull/fs.py +253 -0
  32. esgpull/graph.py +460 -0
  33. esgpull/install_config.py +185 -0
  34. esgpull/migrations/README +1 -0
  35. esgpull/migrations/env.py +82 -0
  36. esgpull/migrations/script.py.mako +24 -0
  37. esgpull/migrations/versions/0.3.0_update_tables.py +170 -0
  38. esgpull/migrations/versions/0.3.1_update_tables.py +25 -0
  39. esgpull/migrations/versions/0.3.2_update_tables.py +26 -0
  40. esgpull/migrations/versions/0.3.3_update_tables.py +25 -0
  41. esgpull/migrations/versions/0.3.4_update_tables.py +25 -0
  42. esgpull/migrations/versions/0.3.5_update_tables.py +25 -0
  43. esgpull/migrations/versions/0.3.6_update_tables.py +26 -0
  44. esgpull/migrations/versions/0.3.7_update_tables.py +26 -0
  45. esgpull/migrations/versions/0.3.8_update_tables.py +26 -0
  46. esgpull/migrations/versions/0.4.0_update_tables.py +25 -0
  47. esgpull/migrations/versions/0.5.0_update_tables.py +26 -0
  48. esgpull/migrations/versions/0.5.1_update_tables.py +26 -0
  49. esgpull/migrations/versions/0.5.2_update_tables.py +25 -0
  50. esgpull/migrations/versions/0.5.3_update_tables.py +26 -0
  51. esgpull/migrations/versions/0.5.4_update_tables.py +25 -0
  52. esgpull/migrations/versions/0.5.5_update_tables.py +25 -0
  53. esgpull/migrations/versions/0.6.0_update_tables.py +25 -0
  54. esgpull/migrations/versions/0.6.1_update_tables.py +25 -0
  55. esgpull/migrations/versions/0.6.2_update_tables.py +25 -0
  56. esgpull/migrations/versions/0.6.3_update_tables.py +25 -0
  57. esgpull/models/__init__.py +31 -0
  58. esgpull/models/base.py +50 -0
  59. esgpull/models/dataset.py +34 -0
  60. esgpull/models/facet.py +18 -0
  61. esgpull/models/file.py +65 -0
  62. esgpull/models/options.py +164 -0
  63. esgpull/models/query.py +481 -0
  64. esgpull/models/selection.py +201 -0
  65. esgpull/models/sql.py +258 -0
  66. esgpull/models/synda_file.py +85 -0
  67. esgpull/models/tag.py +19 -0
  68. esgpull/models/utils.py +54 -0
  69. esgpull/presets.py +13 -0
  70. esgpull/processor.py +172 -0
  71. esgpull/py.typed +0 -0
  72. esgpull/result.py +53 -0
  73. esgpull/tui.py +346 -0
  74. esgpull/utils.py +54 -0
  75. esgpull/version.py +1 -0
  76. esgpull-0.6.3.dist-info/METADATA +110 -0
  77. esgpull-0.6.3.dist-info/RECORD +80 -0
  78. esgpull-0.6.3.dist-info/WHEEL +4 -0
  79. esgpull-0.6.3.dist-info/entry_points.txt +3 -0
  80. esgpull-0.6.3.dist-info/licenses/LICENSE +28 -0
@@ -0,0 +1,481 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterator, MutableMapping, Sequence
4
+ from typing import Any, Literal
5
+
6
+ import sqlalchemy as sa
7
+ from rich.console import Console, ConsoleOptions
8
+ from rich.table import Table
9
+ from rich.text import Text
10
+ from rich.tree import Tree
11
+ from sqlalchemy.orm import Mapped, mapped_column, object_session, relationship
12
+ from typing_extensions import NotRequired, TypedDict
13
+
14
+ from esgpull.exceptions import UntrackableQuery
15
+ from esgpull.models.base import Base, Sha
16
+ from esgpull.models.file import FileDict, FileStatus
17
+ from esgpull.models.options import Options
18
+ from esgpull.models.selection import FacetValues, Selection
19
+ from esgpull.models.tag import Tag
20
+ from esgpull.models.utils import (
21
+ find_int,
22
+ find_str,
23
+ get_local_path,
24
+ rich_measure_impl,
25
+ short_sha,
26
+ )
27
+ from esgpull.utils import format_size
28
+
29
+ query_file_proxy = sa.Table(
30
+ "query_file",
31
+ Base.metadata,
32
+ sa.Column("query_sha", Sha, sa.ForeignKey("query.sha"), primary_key=True),
33
+ sa.Column("file_sha", Sha, sa.ForeignKey("file.sha"), primary_key=True),
34
+ )
35
+ query_tag_proxy = sa.Table(
36
+ "query_tag",
37
+ Base.metadata,
38
+ sa.Column("query_sha", Sha, sa.ForeignKey("query.sha"), primary_key=True),
39
+ sa.Column("tag_sha", Sha, sa.ForeignKey("tag.sha"), primary_key=True),
40
+ )
41
+
42
+
43
+ class File(Base):
44
+ __tablename__ = "file"
45
+
46
+ file_id: Mapped[str] = mapped_column(sa.String(255), unique=True)
47
+ dataset_id: Mapped[str] = mapped_column(sa.String(255))
48
+ master_id: Mapped[str] = mapped_column(sa.String(255))
49
+ url: Mapped[str] = mapped_column(sa.String(255))
50
+ version: Mapped[str] = mapped_column(sa.String(16))
51
+ filename: Mapped[str] = mapped_column(sa.String(255))
52
+ local_path: Mapped[str] = mapped_column(sa.String(255))
53
+ data_node: Mapped[str] = mapped_column(sa.String(40))
54
+ checksum: Mapped[str] = mapped_column(sa.String(64))
55
+ checksum_type: Mapped[str] = mapped_column(sa.String(16))
56
+ size: Mapped[int] = mapped_column(sa.BigInteger)
57
+ status: Mapped[FileStatus] = mapped_column(
58
+ sa.Enum(FileStatus), default=FileStatus.New
59
+ )
60
+ queries: Mapped[list[Query]] = relationship(
61
+ secondary=query_file_proxy,
62
+ default_factory=list,
63
+ back_populates="files",
64
+ repr=False,
65
+ )
66
+
67
+ def _as_bytes(self) -> bytes:
68
+ self_tuple = (self.file_id, self.checksum)
69
+ return str(self_tuple).encode()
70
+
71
+ def compute_sha(self) -> None:
72
+ Base.compute_sha(self)
73
+
74
+ @classmethod
75
+ def fromdict(cls, source: FileDict) -> File:
76
+ result = cls(
77
+ file_id=source["file_id"],
78
+ dataset_id=source["dataset_id"],
79
+ master_id=source["master_id"],
80
+ url=source["url"],
81
+ version=source["version"],
82
+ filename=source["filename"],
83
+ local_path=source["local_path"],
84
+ data_node=source["data_node"],
85
+ checksum=source["checksum"],
86
+ checksum_type=source["checksum_type"],
87
+ size=source["size"],
88
+ )
89
+ if "status" in source:
90
+ result.status = FileStatus(source.get("source"))
91
+ return result
92
+
93
+ @classmethod
94
+ def serialize(cls, source: dict) -> File:
95
+ dataset_id = find_str(source["dataset_id"]).partition("|")[0]
96
+ filename = find_str(source["title"])
97
+ url = find_str(source["url"]).partition("|")[0]
98
+ url = url.replace("http://", "https://") # TODO: is this always true ?
99
+ data_node = find_str(source["data_node"])
100
+ checksum = find_str(source["checksum"])
101
+ checksum_type = find_str(source["checksum_type"])
102
+ size = find_int(source["size"])
103
+ file_id = ".".join([dataset_id, filename])
104
+ dataset_master, version = dataset_id.rsplit(".", 1) # remove version
105
+ master_id = ".".join([dataset_master, filename])
106
+ local_path = get_local_path(source, version)
107
+ result = cls.fromdict(
108
+ {
109
+ "file_id": file_id,
110
+ "dataset_id": dataset_id,
111
+ "master_id": master_id,
112
+ "url": url,
113
+ "version": version,
114
+ "filename": filename,
115
+ "local_path": local_path,
116
+ "data_node": data_node,
117
+ "checksum": checksum,
118
+ "checksum_type": checksum_type,
119
+ "size": size,
120
+ }
121
+ )
122
+ result.compute_sha()
123
+ return result
124
+
125
+ def asdict(self) -> FileDict:
126
+ return FileDict(
127
+ file_id=self.file_id,
128
+ dataset_id=self.dataset_id,
129
+ master_id=self.master_id,
130
+ url=self.url,
131
+ version=self.version,
132
+ filename=self.filename,
133
+ local_path=self.local_path,
134
+ data_node=self.data_node,
135
+ checksum=self.checksum,
136
+ checksum_type=self.checksum_type,
137
+ size=self.size,
138
+ status=self.status.name,
139
+ )
140
+
141
+ def clone(self, compute_sha: bool = True) -> File:
142
+ result = File.fromdict(self.asdict())
143
+ if compute_sha:
144
+ result.compute_sha()
145
+ return result
146
+
147
+
148
+ class QueryDict(TypedDict):
149
+ tags: NotRequired[str | list[str]]
150
+ tracked: NotRequired[Literal[True]]
151
+ require: NotRequired[str]
152
+ options: NotRequired[MutableMapping[str, bool | None]]
153
+ selection: NotRequired[MutableMapping[str, FacetValues]]
154
+ files: NotRequired[list[FileDict]]
155
+
156
+
157
+ class Query(Base):
158
+ __tablename__ = "query"
159
+
160
+ tags: Mapped[list[Tag]] = relationship(
161
+ secondary=query_tag_proxy,
162
+ default_factory=list,
163
+ )
164
+ tracked: Mapped[bool] = mapped_column(default=False)
165
+ require: Mapped[str | None] = mapped_column(Sha, default=None)
166
+ options_sha: Mapped[str] = mapped_column(
167
+ Sha,
168
+ sa.ForeignKey("options.sha"),
169
+ init=False,
170
+ )
171
+ options: Mapped[Options] = relationship(default_factory=Options)
172
+ selection_sha: Mapped[int] = mapped_column(
173
+ Sha,
174
+ sa.ForeignKey("selection.sha"),
175
+ init=False,
176
+ )
177
+ selection: Mapped[Selection] = relationship(default_factory=Selection)
178
+ files: Mapped[list[File]] = relationship(
179
+ secondary=query_file_proxy,
180
+ default_factory=list,
181
+ back_populates="queries",
182
+ repr=False,
183
+ )
184
+
185
+ def __init__(
186
+ self,
187
+ *,
188
+ tracked: bool = False,
189
+ require: str | None = None,
190
+ tags: Sequence[Tag | str] | Tag | str | None = None,
191
+ options: Options | MutableMapping[str, bool | None] | None = None,
192
+ selection: Selection | MutableMapping[str, FacetValues] | None = None,
193
+ files: list[FileDict] | None = None,
194
+ ) -> None:
195
+ self.tracked = tracked
196
+ self.require = require
197
+ self.tags = []
198
+ if tags is not None:
199
+ if isinstance(tags, (str, Tag)):
200
+ tags = [tags]
201
+ for tag in tags:
202
+ if isinstance(tag, str):
203
+ self.tags.append(Tag(name=tag))
204
+ elif isinstance(tag, Tag):
205
+ self.tags.append(tag)
206
+ if selection is None:
207
+ self.selection = Selection()
208
+ elif isinstance(selection, dict):
209
+ self.selection = Selection(**selection)
210
+ elif isinstance(selection, Selection):
211
+ self.selection = selection
212
+ if options is None:
213
+ self.options = Options()
214
+ elif isinstance(options, dict):
215
+ self.options = Options(**options)
216
+ elif isinstance(options, Options):
217
+ self.options = options
218
+ self.files = []
219
+ if files is not None:
220
+ for file in files:
221
+ self.files.append(File.fromdict(file))
222
+
223
+ @property
224
+ def has_files(self) -> bool:
225
+ stmt: sa.Select[tuple[int]] = (
226
+ sa.select(sa.func.count("*"))
227
+ .join_from(query_file_proxy, File)
228
+ .where(query_file_proxy.c.query_sha == self.sha)
229
+ )
230
+ session = object_session(self)
231
+ if session is None:
232
+ return bool(self.files)
233
+ else:
234
+ nb_files = session.scalar(stmt)
235
+ return nb_files is not None and nb_files > 0
236
+
237
+ def files_count_size(self, *status: FileStatus) -> tuple[int, int]:
238
+ stmt: sa.Select[tuple[int, int | None]] = (
239
+ sa.select(sa.func.count("*"), sa.func.sum(File.size))
240
+ .join_from(query_file_proxy, File)
241
+ .where(query_file_proxy.c.query_sha == self.sha)
242
+ )
243
+ session = object_session(self)
244
+ if session is None:
245
+ if status:
246
+ files = [file for file in self.files if file.status in status]
247
+ else:
248
+ files = [file for file in self.files]
249
+ count: int = len(files)
250
+ size: int | None = sum([file.size for file in files])
251
+ else:
252
+ if status:
253
+ stmt = stmt.where(File.status.in_(status))
254
+ count, size = session.execute(stmt).all()[0]
255
+ return count, size or 0
256
+
257
+ def _as_bytes(self) -> bytes:
258
+ self_tuple = (self.require, self.options.sha, self.selection.sha)
259
+ return str(self_tuple).encode()
260
+
261
+ def compute_sha(self) -> None:
262
+ for tag in self.tags:
263
+ tag.compute_sha()
264
+ self.options.compute_sha()
265
+ self.selection.compute_sha()
266
+ super().compute_sha()
267
+
268
+ @property
269
+ def tag_name(self) -> str | None:
270
+ if len(self.tags) == 1:
271
+ return self.tags[0].name
272
+ else:
273
+ return None
274
+
275
+ @property
276
+ def name(self) -> str:
277
+ # TODO: make these 2 lines useless
278
+ if self.sha is None:
279
+ self.compute_sha()
280
+ elif ":" in self.sha:
281
+ return self.sha.split(":")[0]
282
+ return short_sha(self.sha)
283
+
284
+ @property
285
+ def rich_name(self) -> str:
286
+ return f"[b green]{self.name}[/]"
287
+
288
+ def items(self, include_name: bool = False) -> Iterator[tuple[str, Any]]:
289
+ if include_name:
290
+ yield "name", self.name
291
+ if self.tags:
292
+ yield "tags", [tag.name for tag in self.tags]
293
+ if self.tracked:
294
+ yield "tracked", self.tracked
295
+ if self.require is not None:
296
+ yield "require", short_sha(self.require)
297
+ if self.options:
298
+ yield "options", self.options
299
+ if self.selection:
300
+ yield "selection", self.selection
301
+
302
+ def asdict(self) -> QueryDict:
303
+ result: QueryDict = {}
304
+ if len(self.tags) > 1:
305
+ result["tags"] = [tag.name for tag in self.tags]
306
+ elif len(self.tags) == 1:
307
+ result["tags"] = self.tags[0].name
308
+ if self.tracked:
309
+ result["tracked"] = self.tracked
310
+ if self.require is not None:
311
+ result["require"] = self.require
312
+ if self.options:
313
+ result["options"] = self.options.asdict()
314
+ if self.selection:
315
+ result["selection"] = self.selection.asdict()
316
+ return result
317
+
318
+ def clone(self, compute_sha: bool = True) -> Query:
319
+ instance = Query(**self.asdict())
320
+ instance.files = list(self.files)
321
+ if self.sha == "LEGACY":
322
+ instance.sha = "LEGACY"
323
+ elif compute_sha:
324
+ instance.compute_sha()
325
+ else:
326
+ instance.sha = self.sha
327
+ return instance
328
+
329
+ def get_tag(self, name: str) -> Tag | None:
330
+ result: Tag | None = None
331
+ for tag in self.tags:
332
+ if tag.name == name:
333
+ result = tag
334
+ break
335
+ return result
336
+
337
+ def add_tag(
338
+ self,
339
+ name: str,
340
+ description: str | None = None,
341
+ compute_sha: bool = True,
342
+ ) -> None:
343
+ if self.get_tag(name) is not None:
344
+ raise ValueError(f"Tag {name!r} already exists.")
345
+ tag = Tag(name=name, description=description)
346
+ if compute_sha:
347
+ tag.compute_sha()
348
+ self.tags.append(tag)
349
+
350
+ def update_tag(self, name: str, description: str | None) -> None:
351
+ tag = self.get_tag(name)
352
+ if tag is None:
353
+ raise ValueError(f"Tag {name!r} does not exist.")
354
+ else:
355
+ tag.description = description
356
+
357
+ def remove_tag(self, name: str) -> bool:
358
+ tag = self.get_tag(name)
359
+ if tag is not None:
360
+ self.tags.remove(tag)
361
+ return tag is not None
362
+
363
+ def no_require(self) -> Query:
364
+ cl = self.clone(compute_sha=False)
365
+ cl._rich_no_require = True # type: ignore [attr-defined]
366
+ return cl
367
+
368
+ def __lshift__(self, child: Query) -> Query:
369
+ result = self.clone(compute_sha=False)
370
+ # if self.name != child.require:
371
+ # raise ValueError(f"{self.name} is not required by {child.name}")
372
+ for tag in child.tags:
373
+ if tag not in result.tags:
374
+ result.tags.append(tag)
375
+ for name, option in child.options.items():
376
+ setattr(result.options, name, option)
377
+ for name, values in child.selection.items():
378
+ result.selection[name] = values
379
+ result.tracked = child.tracked
380
+ result.compute_sha()
381
+ files_shas = {f.sha for f in result.files}
382
+ for file in child.files:
383
+ if file.sha not in files_shas:
384
+ result.files.append(file)
385
+ return result
386
+
387
+ @classmethod
388
+ def _from_detailed_dict(cls, source: dict) -> Query:
389
+ result = cls(tracked=True)
390
+ for name, values in source.items():
391
+ try:
392
+ result.selection[name] = values
393
+ except KeyError:
394
+ ...
395
+ result.compute_sha()
396
+ return result
397
+
398
+ def __rich_repr__(self) -> Iterator:
399
+ yield from self.items(include_name=True)
400
+
401
+ def __repr__(self) -> str:
402
+ cls_name = self.__class__.__name__
403
+ items = [f"{k}={v}" for k, v in self.items(include_name=True)]
404
+ return f"{cls_name}(" + ", ".join(items) + ")"
405
+
406
+ __rich_measure__ = rich_measure_impl
407
+
408
+ def _rich_tree(self) -> Tree:
409
+ title = Text.from_markup(self.rich_name)
410
+ if not self.tracked:
411
+ title.append(" untracked", style="i red")
412
+ contents = Table.grid(padding=(0, 1))
413
+ if not hasattr(self, "_rich_no_require") and self.require is not None:
414
+ if len(self.require) == 40:
415
+ require = Text(short_sha(self.require), style="i green")
416
+ else:
417
+ if hasattr(self, "_unknown_require"):
418
+ require = Text(f"{self.require} [?]", style="red")
419
+ else:
420
+ require = Text(self.require, style="magenta")
421
+ contents.add_row("require:", require)
422
+ if self.tags:
423
+ text = Text()
424
+ text.append("tags", style="magenta")
425
+ text.append(":")
426
+ contents.add_row(text, ", ".join([tag.name for tag in self.tags]))
427
+ for name, option in self.options.items():
428
+ text = Text()
429
+ text.append(name, style="yellow")
430
+ text.append(":")
431
+ contents.add_row(text, str(option.value[1]))
432
+ for name, values in self.selection.items():
433
+ text = Text()
434
+ if name != "query":
435
+ text.append(name, style="blue")
436
+ text.append(":")
437
+ if len(values) == 1:
438
+ values_str = values[0]
439
+ else:
440
+ values_str = ", ".join(values)
441
+ contents.add_row(text, values_str)
442
+ if self.has_files:
443
+ count_ondisk, size_ondisk = self.files_count_size(FileStatus.Done)
444
+ count_total, size_total = self.files_count_size()
445
+ sizes = f"{format_size(size_ondisk)} / {format_size(size_total)}"
446
+ lens = f"{count_ondisk}/{count_total}"
447
+ contents.add_row(
448
+ "files:", Text(f"{sizes} [{lens}]", style="magenta")
449
+ )
450
+ tree = Tree("", hide_root=True, guide_style="dim").add(title)
451
+ if contents.row_count:
452
+ tree.add(contents)
453
+ return tree
454
+
455
+ def __rich_console__(
456
+ self,
457
+ console: Console,
458
+ opts: ConsoleOptions,
459
+ ) -> Iterator[Tree]:
460
+ yield self._rich_tree()
461
+
462
+ def trackable(self) -> bool:
463
+ return self.options.trackable()
464
+
465
+ def track(self, options: Options | None = None, compute_sha: bool = True):
466
+ if options is not None:
467
+ self.options.apply_defaults(options)
468
+ elif not self.options.trackable():
469
+ raise UntrackableQuery(self.name)
470
+ self.tracked = True
471
+ if compute_sha:
472
+ self.compute_sha()
473
+
474
+ def untrack(self):
475
+ self.tracked = False
476
+
477
+
478
+ LegacyQuery = Query()
479
+ LegacyQuery.compute_sha() # compute shas for empty selection/options/...
480
+ LegacyQuery.sha = "LEGACY"
481
+ LegacyQuery.compute_sha = lambda: None # type: ignore [assignment]
@@ -0,0 +1,201 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterator, MutableMapping
4
+ from typing import ClassVar, TypeAlias
5
+
6
+ import sqlalchemy as sa
7
+ from rich.pretty import pretty_repr
8
+ from sqlalchemy.orm import Mapped, relationship
9
+
10
+ from esgpull.exceptions import AlreadySetFacet, DuplicateFacet
11
+ from esgpull.models.base import Base, Sha
12
+ from esgpull.models.facet import Facet
13
+
14
+ FacetValues: TypeAlias = str | list[str]
15
+
16
+ selection_facet_proxy = sa.Table(
17
+ "selection_facet",
18
+ Base.metadata,
19
+ sa.Column(
20
+ "selection_sha", Sha, sa.ForeignKey("selection.sha"), primary_key=True
21
+ ),
22
+ sa.Column("facet_sha", Sha, sa.ForeignKey("facet.sha"), primary_key=True),
23
+ )
24
+
25
+
26
+ def opposite(facet_name: str) -> str:
27
+ if facet_name[0] == "!":
28
+ return facet_name[1:]
29
+ else:
30
+ return f"!{facet_name}"
31
+
32
+
33
+ class Selection(Base):
34
+ __tablename__ = "selection"
35
+ _facet_names: ClassVar[set[str]] = set()
36
+
37
+ _facets: Mapped[list[Facet]] = relationship(
38
+ secondary=selection_facet_proxy,
39
+ default_factory=list,
40
+ )
41
+
42
+ @classmethod
43
+ def _add_property(cls, name: str) -> None:
44
+ def getter(self: Selection) -> list[str]:
45
+ indices = self._facet_map_.get(name, set())
46
+ return sorted([self._facets[idx].value for idx in indices])
47
+
48
+ def setter(self: Selection, values: FacetValues):
49
+ other = opposite(name)
50
+ if name in self._facet_map_:
51
+ raise AlreadySetFacet(name, ", ".join(self[name]))
52
+ elif other in self._facet_map_:
53
+ raise AlreadySetFacet(other, ", ".join(self[other]))
54
+ facet_map_name = set()
55
+ if isinstance(values, str):
56
+ iter_values = enumerate([values])
57
+ else:
58
+ iter_values = enumerate(values)
59
+ offset = len(self._facets)
60
+ for i, value in iter_values:
61
+ facet = Facet(name=name, value=value)
62
+ if facet in self._facets:
63
+ raise DuplicateFacet(
64
+ facet.name,
65
+ facet.value,
66
+ pretty_repr(self),
67
+ )
68
+ self._facets.append(facet)
69
+ facet_map_name.add(offset + i)
70
+ self._facet_map_[name] = facet_map_name
71
+
72
+ setattr(cls, name, property(getter, setter))
73
+
74
+ @classmethod
75
+ def configure(cls, *names: str, replace: bool = True) -> None:
76
+ nameset = set(names) | {f"!{name}" for name in names}
77
+ if replace:
78
+ for name in cls._facet_names:
79
+ delattr(cls, name)
80
+ new_names = nameset
81
+ cls._facet_names = nameset
82
+ else:
83
+ new_names = nameset - cls._facet_names
84
+ cls._facet_names |= new_names
85
+ for name in new_names:
86
+ cls._add_property(name)
87
+
88
+ def __init__(
89
+ self,
90
+ facets: list[Facet] | None = None,
91
+ **kwargs: FacetValues,
92
+ ):
93
+ if facets is None:
94
+ self._facets = []
95
+ else:
96
+ self._facets = facets
97
+ self._init_facet_map()
98
+ for name, values in kwargs.items():
99
+ self[name] = values
100
+
101
+ def _init_facet_map(self) -> None:
102
+ self._facet_map_: dict[str, set[int]] = {}
103
+ for i, facet in enumerate(self._facets):
104
+ self._facet_map_.setdefault(facet.name, set())
105
+ self._facet_map_[facet.name].add(i)
106
+
107
+ def __getitem__(self, name: str) -> list[str]:
108
+ if name in self._facet_names:
109
+ return getattr(self, name)
110
+ else:
111
+ raise KeyError(name)
112
+
113
+ def __setitem__(self, name: str, value: FacetValues):
114
+ if name in self._facet_names:
115
+ setattr(self, name, value)
116
+ else:
117
+ raise KeyError(name)
118
+
119
+ def items(self) -> Iterator[tuple[str, list[str]]]:
120
+ if not hasattr(self, "_facet_map_"):
121
+ self._init_facet_map()
122
+ for name in sorted(self._facet_map_.keys()):
123
+ yield name, self[name]
124
+
125
+ def __bool__(self) -> bool:
126
+ return bool(self._facets)
127
+
128
+ def _as_bytes(self) -> bytes:
129
+ return str(tuple(self.items())).encode()
130
+
131
+ def compute_sha(self) -> None:
132
+ for facet in self._facets:
133
+ if facet.sha is None:
134
+ facet.compute_sha()
135
+ super().compute_sha()
136
+
137
+ def asdict(self) -> MutableMapping[str, FacetValues]:
138
+ result: dict[str, FacetValues] = {}
139
+ for name, facet in self.items():
140
+ if len(facet) == 1:
141
+ result[name] = facet[0]
142
+ else:
143
+ result[name] = list(facet)
144
+ return result
145
+
146
+ def __rich_repr__(self) -> Iterator[tuple[str, FacetValues]]:
147
+ for name, facet in self.items():
148
+ if len(facet) == 1:
149
+ yield name, facet[0]
150
+ else:
151
+ yield name, facet
152
+
153
+ def __repr__(self) -> str:
154
+ cls_name = self.__class__.__name__
155
+ items = [f"{k}={v}" for k, v in self.__rich_repr__()]
156
+ return f"{cls_name}(" + ", ".join(items) + ")"
157
+
158
+
159
+ DefaultFacets = [
160
+ "query",
161
+ "start",
162
+ "end",
163
+ "facets",
164
+ "url",
165
+ "data_node",
166
+ "index_node",
167
+ "master_id",
168
+ "instance_id", # search does not work with instance_id
169
+ "title",
170
+ "variable_long_name",
171
+ "experiment_family",
172
+ ]
173
+ BaseFacets = [
174
+ "project",
175
+ "mip_era",
176
+ "experiment",
177
+ "experiment_id",
178
+ "institute",
179
+ "institution_id",
180
+ "model",
181
+ "table_id",
182
+ "activity_id",
183
+ "ensemble",
184
+ "variant_label",
185
+ "realm",
186
+ "frequency",
187
+ "time_frequency",
188
+ "variable",
189
+ "variable_id",
190
+ "dataset_id",
191
+ "source_id",
192
+ "domain",
193
+ "driving_model",
194
+ "rcm_name",
195
+ "member_id",
196
+ "cmor_table",
197
+ "grid_label",
198
+ ]
199
+
200
+
201
+ Selection.configure(*DefaultFacets, *BaseFacets, replace=True)