esgpull 0.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. esgpull/__init__.py +12 -0
  2. esgpull/auth.py +181 -0
  3. esgpull/cli/__init__.py +73 -0
  4. esgpull/cli/add.py +103 -0
  5. esgpull/cli/autoremove.py +38 -0
  6. esgpull/cli/config.py +116 -0
  7. esgpull/cli/convert.py +285 -0
  8. esgpull/cli/decorators.py +342 -0
  9. esgpull/cli/download.py +74 -0
  10. esgpull/cli/facet.py +23 -0
  11. esgpull/cli/get.py +28 -0
  12. esgpull/cli/install.py +85 -0
  13. esgpull/cli/link.py +105 -0
  14. esgpull/cli/login.py +56 -0
  15. esgpull/cli/remove.py +73 -0
  16. esgpull/cli/retry.py +43 -0
  17. esgpull/cli/search.py +201 -0
  18. esgpull/cli/self.py +238 -0
  19. esgpull/cli/show.py +66 -0
  20. esgpull/cli/status.py +67 -0
  21. esgpull/cli/track.py +87 -0
  22. esgpull/cli/update.py +184 -0
  23. esgpull/cli/utils.py +247 -0
  24. esgpull/config.py +410 -0
  25. esgpull/constants.py +56 -0
  26. esgpull/context.py +724 -0
  27. esgpull/database.py +161 -0
  28. esgpull/download.py +162 -0
  29. esgpull/esgpull.py +447 -0
  30. esgpull/exceptions.py +167 -0
  31. esgpull/fs.py +253 -0
  32. esgpull/graph.py +460 -0
  33. esgpull/install_config.py +185 -0
  34. esgpull/migrations/README +1 -0
  35. esgpull/migrations/env.py +82 -0
  36. esgpull/migrations/script.py.mako +24 -0
  37. esgpull/migrations/versions/0.3.0_update_tables.py +170 -0
  38. esgpull/migrations/versions/0.3.1_update_tables.py +25 -0
  39. esgpull/migrations/versions/0.3.2_update_tables.py +26 -0
  40. esgpull/migrations/versions/0.3.3_update_tables.py +25 -0
  41. esgpull/migrations/versions/0.3.4_update_tables.py +25 -0
  42. esgpull/migrations/versions/0.3.5_update_tables.py +25 -0
  43. esgpull/migrations/versions/0.3.6_update_tables.py +26 -0
  44. esgpull/migrations/versions/0.3.7_update_tables.py +26 -0
  45. esgpull/migrations/versions/0.3.8_update_tables.py +26 -0
  46. esgpull/migrations/versions/0.4.0_update_tables.py +25 -0
  47. esgpull/migrations/versions/0.5.0_update_tables.py +26 -0
  48. esgpull/migrations/versions/0.5.1_update_tables.py +26 -0
  49. esgpull/migrations/versions/0.5.2_update_tables.py +25 -0
  50. esgpull/migrations/versions/0.5.3_update_tables.py +26 -0
  51. esgpull/migrations/versions/0.5.4_update_tables.py +25 -0
  52. esgpull/migrations/versions/0.5.5_update_tables.py +25 -0
  53. esgpull/migrations/versions/0.6.0_update_tables.py +25 -0
  54. esgpull/migrations/versions/0.6.1_update_tables.py +25 -0
  55. esgpull/migrations/versions/0.6.2_update_tables.py +25 -0
  56. esgpull/migrations/versions/0.6.3_update_tables.py +25 -0
  57. esgpull/models/__init__.py +31 -0
  58. esgpull/models/base.py +50 -0
  59. esgpull/models/dataset.py +34 -0
  60. esgpull/models/facet.py +18 -0
  61. esgpull/models/file.py +65 -0
  62. esgpull/models/options.py +164 -0
  63. esgpull/models/query.py +481 -0
  64. esgpull/models/selection.py +201 -0
  65. esgpull/models/sql.py +258 -0
  66. esgpull/models/synda_file.py +85 -0
  67. esgpull/models/tag.py +19 -0
  68. esgpull/models/utils.py +54 -0
  69. esgpull/presets.py +13 -0
  70. esgpull/processor.py +172 -0
  71. esgpull/py.typed +0 -0
  72. esgpull/result.py +53 -0
  73. esgpull/tui.py +346 -0
  74. esgpull/utils.py +54 -0
  75. esgpull/version.py +1 -0
  76. esgpull-0.6.3.dist-info/METADATA +110 -0
  77. esgpull-0.6.3.dist-info/RECORD +80 -0
  78. esgpull-0.6.3.dist-info/WHEEL +4 -0
  79. esgpull-0.6.3.dist-info/entry_points.txt +3 -0
  80. esgpull-0.6.3.dist-info/licenses/LICENSE +28 -0
esgpull/database.py ADDED
@@ -0,0 +1,161 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterator, Sequence
4
+ from contextlib import contextmanager
5
+ from dataclasses import InitVar, dataclass, field
6
+ from pathlib import Path
7
+ from typing import TypeVar
8
+
9
+ import alembic.command
10
+ import sqlalchemy as sa
11
+ import sqlalchemy.orm
12
+ from alembic.config import Config as AlembicConfig
13
+ from alembic.migration import MigrationContext
14
+ from alembic.script import ScriptDirectory
15
+ from sqlalchemy.orm import Session, joinedload, make_transient
16
+
17
+ from esgpull import __file__
18
+ from esgpull.config import Config
19
+ from esgpull.models import File, Table, sql
20
+ from esgpull.version import __version__
21
+
22
+ # from esgpull.exceptions import NoClauseError
23
+ # from esgpull.models import Query
24
+
25
+ T = TypeVar("T")
26
+
27
+
28
+ @dataclass
29
+ class Database:
30
+ """
31
+ Main class to interact with esgpull's sqlite db.
32
+ """
33
+
34
+ url: str
35
+ run_migrations: InitVar[bool] = True
36
+ _engine: sa.Engine = field(init=False)
37
+ session: Session = field(init=False)
38
+ version: str | None = field(init=False, default=None)
39
+
40
+ @staticmethod
41
+ def from_config(config: Config, run_migrations: bool = True) -> Database:
42
+ url = f"sqlite:///{config.paths.db / config.db.filename}"
43
+ return Database(url, run_migrations=run_migrations)
44
+
45
+ def __post_init__(self, run_migrations: bool) -> None:
46
+ self._engine = sa.create_engine(self.url)
47
+ self.session = Session(self._engine)
48
+ if run_migrations:
49
+ self._update()
50
+
51
+ def _update(self) -> None:
52
+ alembic_config = AlembicConfig()
53
+ migrations_path = Path(__file__).parent / "migrations"
54
+ alembic_config.set_main_option("script_location", str(migrations_path))
55
+ alembic_config.attributes["connection"] = self._engine
56
+ script = ScriptDirectory.from_config(alembic_config)
57
+ head = script.get_current_head()
58
+ with self._engine.begin() as conn:
59
+ opts = {"version_table": "version"}
60
+ ctx = MigrationContext.configure(conn, opts=opts)
61
+ self.version = ctx.get_current_revision()
62
+ if self.version != head:
63
+ alembic.command.upgrade(alembic_config, __version__)
64
+ self.version = head
65
+ if self.version != __version__:
66
+ alembic.command.revision(
67
+ alembic_config,
68
+ message="update tables",
69
+ autogenerate=True,
70
+ rev_id=__version__,
71
+ )
72
+ self.version = __version__
73
+
74
+ @property
75
+ @contextmanager
76
+ def safe(self) -> Iterator[None]:
77
+ try:
78
+ yield
79
+ except (sa.exc.SQLAlchemyError, KeyboardInterrupt):
80
+ self.session.rollback()
81
+ raise
82
+
83
+ def get(
84
+ self,
85
+ table: type[Table],
86
+ sha: str,
87
+ lazy: bool = True,
88
+ detached: bool = False,
89
+ ) -> Table | None:
90
+ if lazy:
91
+ result = self.session.get(table, sha)
92
+ else:
93
+ stmt = sa.select(table).filter_by(sha=sha)
94
+ match self.scalars(stmt.options(joinedload("*")), unique=True):
95
+ case [result]:
96
+ ...
97
+ case []:
98
+ result = None
99
+ case [*many]:
100
+ raise ValueError(f"{len(many)} found, expected 1.")
101
+ if detached and result is not None:
102
+ result = table(**result.asdict())
103
+ return result
104
+
105
+ def scalars(
106
+ self, statement: sa.Select[tuple[T]], unique: bool = False
107
+ ) -> Sequence[T]:
108
+ with self.safe:
109
+ result = self.session.scalars(statement)
110
+ if unique:
111
+ result = result.unique()
112
+ return result.all()
113
+
114
+ SomeTuple = TypeVar("SomeTuple", bound=tuple)
115
+
116
+ def rows(self, statement: sa.Select[SomeTuple]) -> list[sa.Row[SomeTuple]]:
117
+ with self.safe:
118
+ return list(self.session.execute(statement).all())
119
+
120
+ def add(self, *items: Table) -> None:
121
+ with self.safe:
122
+ self.session.add_all(items)
123
+ self.session.commit()
124
+ for item in items:
125
+ self.session.refresh(item)
126
+
127
+ def delete(self, *items: Table) -> None:
128
+ with self.safe:
129
+ for item in items:
130
+ self.session.delete(item)
131
+ self.session.commit()
132
+ for item in items:
133
+ make_transient(item)
134
+
135
+ def __contains__(self, item: Table) -> bool:
136
+ return self.scalars(sql.count(item))[0] > 0
137
+
138
+ def has_file_id(self, file: File) -> bool:
139
+ return len(self.scalars(sql.file.with_file_id(file.file_id))) == 1
140
+
141
+ def merge(self, item: Table, commit: bool = False) -> Table:
142
+ with self.safe:
143
+ result = self.session.merge(item)
144
+ if commit:
145
+ self.session.commit()
146
+ return result
147
+
148
+ def get_deprecated_files(self) -> list[File]:
149
+ duplicates = self.scalars(sql.file.duplicates())
150
+ duplicates_dict: dict[str, list[File]] = {}
151
+ for file in duplicates:
152
+ duplicates_dict.setdefault(file.master_id, [])
153
+ duplicates_dict[file.master_id].append(file)
154
+ deprecated: list[File] = []
155
+ for files in duplicates_dict.values():
156
+ versions = [int(f.version[1:]) for f in files]
157
+ latest_version = "v" + str(max(versions))
158
+ for file in files:
159
+ if file.version != latest_version:
160
+ deprecated.append(file)
161
+ return deprecated
esgpull/download.py ADDED
@@ -0,0 +1,162 @@
1
+ # from math import ceil
2
+ from collections.abc import AsyncGenerator
3
+ from dataclasses import dataclass
4
+
5
+ from httpx import AsyncClient
6
+
7
+ from esgpull.fs import Digest
8
+ from esgpull.models import File
9
+
10
+ # import asyncio
11
+ # from urllib.parse import urlsplit
12
+ # from esgpull.auth import Auth
13
+ # from esgpull.context import Context
14
+
15
+
16
+ @dataclass
17
+ class DownloadCtx:
18
+ file: File
19
+ completed: int = 0
20
+ chunk: bytes | None = None
21
+ digest: Digest | None = None
22
+
23
+ @property
24
+ def finished(self) -> bool:
25
+ return self.completed == self.file.size
26
+
27
+ @property
28
+ def error(self) -> bool:
29
+ return self.completed > self.file.size
30
+
31
+ def update_digest(self) -> None:
32
+ if self.digest is not None and self.chunk is not None:
33
+ self.digest.update(self.chunk)
34
+
35
+
36
+ class BaseDownloader:
37
+ def stream(
38
+ self,
39
+ client: AsyncClient,
40
+ ctx: DownloadCtx,
41
+ chunk_size: int,
42
+ ) -> AsyncGenerator[DownloadCtx, None]:
43
+ raise NotImplementedError
44
+
45
+
46
+ class Simple(BaseDownloader):
47
+ """
48
+ Simple chunked async downloader.
49
+ """
50
+
51
+ async def stream(
52
+ self,
53
+ client: AsyncClient,
54
+ ctx: DownloadCtx,
55
+ chunk_size: int,
56
+ ) -> AsyncGenerator[DownloadCtx, None]:
57
+ async with client.stream("GET", ctx.file.url) as resp:
58
+ resp.raise_for_status()
59
+ async for chunk in resp.aiter_bytes(chunk_size=chunk_size):
60
+ ctx.completed += len(chunk)
61
+ ctx.chunk = chunk
62
+ ctx.update_digest()
63
+ yield ctx
64
+
65
+
66
+ # class Distributed(BaseDownloader):
67
+ # """
68
+ # Distributed chunked async downloader.
69
+ # Fetches chunks from multiple URLs pointing to the same file.
70
+ # """
71
+
72
+ # def __init__(
73
+ # self,
74
+ # auth: Auth,
75
+ # *,
76
+ # file: File | None = None,
77
+ # url: str | None = None,
78
+ # config: Config | None = None,
79
+ # max_ping: float = 5.0,
80
+ # ) -> None:
81
+ # super().__init__(auth, file=file, url=url, config=config)
82
+ # self.max_ping = max_ping
83
+
84
+ # async def try_url(self, url: str, client: AsyncClient) -> str | None:
85
+ # result = None
86
+ # node = urlsplit(url).netloc
87
+ # print(f"trying url on '{node}'")
88
+ # try:
89
+ # resp = await client.head(url)
90
+ # print(f"got response on '{node}'")
91
+ # resp.raise_for_status()
92
+ # accept_ranges = resp.headers.get("Accept-Ranges")
93
+ # content_length = resp.headers.get("Content-Length")
94
+ # if (
95
+ # accept_ranges == "bytes"
96
+ # and int(content_length) == self.file.size
97
+ # ):
98
+ # result = str(resp.url)
99
+ # else:
100
+ # print(dict(resp.headers))
101
+ # except HTTPError as err:
102
+ # print(type(err))
103
+ # print(err.request.headers)
104
+ # return result
105
+
106
+ # async def process_queue(
107
+ # self, url: str, queue: asyncio.Queue
108
+ # ) -> tuple[list[tuple[int, bytes]], str]:
109
+ # node = urlsplit(url).netloc
110
+ # print(f"starting process on '{node}'")
111
+ # chunks: list[tuple[int, bytes]] = []
112
+ # async with self.make_client() as client:
113
+ # final_url = await self.try_url(url, client)
114
+ # if final_url is None:
115
+ # print(f"no url found for '{node}'")
116
+ # return chunks, url
117
+ # else:
118
+ # url = final_url
119
+ # while not queue.empty():
120
+ # chunk_idx = await queue.get()
121
+ # print(f"processing chunk {chunk_idx} on '{node}'")
122
+ # start = chunk_idx * self.config.download.chunk_size
123
+ # end = min(
124
+ # self.file.size,
125
+ # (chunk_idx + 1) * self.config.download.chunk_size - 1,
126
+ # )
127
+ # headers = {"Range": f"bytes={start}-{end}"}
128
+ # resp = await client.get(url, headers=headers)
129
+ # queue.task_done()
130
+ # if resp.status_code == 206:
131
+ # chunks.append((chunk_idx, resp.content))
132
+ # else:
133
+ # await queue.put(chunk_idx)
134
+ # print(f"error status {resp.status_code} on '{node}'")
135
+ # break
136
+ # return chunks, url
137
+
138
+ # async def fetch_urls(self) -> list[str]:
139
+ # ctx = Context(distrib=True)
140
+ # ctx.query.instance_id = self.file.file_id
141
+ # results = await ctx._search(file=True)
142
+ # files = [File.from_dict(item) for item in results]
143
+ # return [file.url for file in files]
144
+
145
+ # async def aget(self) -> bytes:
146
+ # nb_chunks = ceil(self.file.size / self.config.download.chunk_size)
147
+ # queue: asyncio.Queue[int] = asyncio.Queue(nb_chunks)
148
+ # for chunk_idx in range(nb_chunks):
149
+ # queue.put_nowait(chunk_idx)
150
+ # completed: list[bool] = [False for _ in range(nb_chunks)]
151
+ # chunks: list[bytes] = [bytes() for _ in range(nb_chunks)]
152
+ # urls = await self.fetch_urls()
153
+ # workers = [self.process_queue(url, queue) for url in urls]
154
+ # for future in asyncio.as_completed(workers):
155
+ # some_chunks, url = await future
156
+ # print(f"got {len(some_chunks)} chunks from {url}")
157
+ # for chunk_idx, chunk in some_chunks:
158
+ # completed[chunk_idx] = True
159
+ # chunks[chunk_idx] = chunk
160
+ # if not all(completed):
161
+ # raise ValueError("TODO: progressive write (with .part file)")
162
+ # return b"".join(chunks)