pyspiral 0.4.0__pp310-pypy310_pp73-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. pyspiral-0.4.0.dist-info/METADATA +46 -0
  2. pyspiral-0.4.0.dist-info/RECORD +98 -0
  3. pyspiral-0.4.0.dist-info/WHEEL +4 -0
  4. pyspiral-0.4.0.dist-info/entry_points.txt +2 -0
  5. spiral/__init__.py +10 -0
  6. spiral/_lib.pypy310-pp73-darwin.so +0 -0
  7. spiral/adbc.py +393 -0
  8. spiral/api/__init__.py +64 -0
  9. spiral/api/admin.py +15 -0
  10. spiral/api/client.py +160 -0
  11. spiral/api/filesystems.py +153 -0
  12. spiral/api/organizations.py +77 -0
  13. spiral/api/projects.py +197 -0
  14. spiral/api/telemetry.py +19 -0
  15. spiral/api/types.py +20 -0
  16. spiral/api/workloads.py +52 -0
  17. spiral/arrow_.py +221 -0
  18. spiral/cli/__init__.py +79 -0
  19. spiral/cli/__main__.py +4 -0
  20. spiral/cli/admin.py +16 -0
  21. spiral/cli/app.py +65 -0
  22. spiral/cli/console.py +95 -0
  23. spiral/cli/fs.py +112 -0
  24. spiral/cli/iceberg/__init__.py +7 -0
  25. spiral/cli/iceberg/namespaces.py +47 -0
  26. spiral/cli/iceberg/tables.py +60 -0
  27. spiral/cli/indexes/__init__.py +19 -0
  28. spiral/cli/login.py +22 -0
  29. spiral/cli/orgs.py +90 -0
  30. spiral/cli/printer.py +53 -0
  31. spiral/cli/projects.py +136 -0
  32. spiral/cli/state.py +5 -0
  33. spiral/cli/tables/__init__.py +121 -0
  34. spiral/cli/telemetry.py +18 -0
  35. spiral/cli/types.py +51 -0
  36. spiral/cli/workloads.py +59 -0
  37. spiral/client.py +79 -0
  38. spiral/core/__init__.pyi +0 -0
  39. spiral/core/client/__init__.pyi +117 -0
  40. spiral/core/index/__init__.pyi +15 -0
  41. spiral/core/table/__init__.pyi +108 -0
  42. spiral/core/table/manifests/__init__.pyi +35 -0
  43. spiral/core/table/metastore/__init__.pyi +62 -0
  44. spiral/core/table/spec/__init__.pyi +214 -0
  45. spiral/datetime_.py +27 -0
  46. spiral/expressions/__init__.py +245 -0
  47. spiral/expressions/base.py +149 -0
  48. spiral/expressions/http.py +86 -0
  49. spiral/expressions/io.py +100 -0
  50. spiral/expressions/list_.py +68 -0
  51. spiral/expressions/mp4.py +62 -0
  52. spiral/expressions/png.py +18 -0
  53. spiral/expressions/qoi.py +18 -0
  54. spiral/expressions/refs.py +58 -0
  55. spiral/expressions/str_.py +39 -0
  56. spiral/expressions/struct.py +59 -0
  57. spiral/expressions/text.py +62 -0
  58. spiral/expressions/tiff.py +223 -0
  59. spiral/expressions/udf.py +46 -0
  60. spiral/grpc_.py +32 -0
  61. spiral/iceberg/__init__.py +3 -0
  62. spiral/iceberg/client.py +33 -0
  63. spiral/indexes/__init__.py +5 -0
  64. spiral/indexes/client.py +137 -0
  65. spiral/indexes/index.py +34 -0
  66. spiral/indexes/scan.py +22 -0
  67. spiral/project.py +46 -0
  68. spiral/protogen/_/__init__.py +0 -0
  69. spiral/protogen/_/arrow/__init__.py +0 -0
  70. spiral/protogen/_/arrow/flight/__init__.py +0 -0
  71. spiral/protogen/_/arrow/flight/protocol/__init__.py +0 -0
  72. spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +1990 -0
  73. spiral/protogen/_/scandal/__init__.py +178 -0
  74. spiral/protogen/_/spiral/__init__.py +0 -0
  75. spiral/protogen/_/spiral/table/__init__.py +22 -0
  76. spiral/protogen/_/substrait/__init__.py +3399 -0
  77. spiral/protogen/_/substrait/extensions/__init__.py +115 -0
  78. spiral/protogen/__init__.py +0 -0
  79. spiral/protogen/substrait/__init__.py +3399 -0
  80. spiral/protogen/substrait/extensions/__init__.py +115 -0
  81. spiral/protogen/util.py +41 -0
  82. spiral/py.typed +0 -0
  83. spiral/server.py +17 -0
  84. spiral/settings.py +101 -0
  85. spiral/substrait_.py +279 -0
  86. spiral/tables/__init__.py +12 -0
  87. spiral/tables/client.py +130 -0
  88. spiral/tables/dataset.py +250 -0
  89. spiral/tables/debug/__init__.py +0 -0
  90. spiral/tables/debug/manifests.py +70 -0
  91. spiral/tables/debug/metrics.py +56 -0
  92. spiral/tables/debug/scan.py +248 -0
  93. spiral/tables/maintenance.py +12 -0
  94. spiral/tables/scan.py +193 -0
  95. spiral/tables/snapshot.py +78 -0
  96. spiral/tables/table.py +157 -0
  97. spiral/tables/transaction.py +52 -0
  98. spiral/types_.py +6 -0
spiral/arrow_.py ADDED
@@ -0,0 +1,221 @@
1
+ from collections import defaultdict
2
+ from collections.abc import Callable, Iterable
3
+ from functools import reduce
4
+ from typing import TypeVar
5
+
6
+ import numpy as np
7
+ import pyarrow as pa
8
+ from pyarrow import compute as pc
9
+
10
+ T = TypeVar("T")
11
+
12
+
13
+ def arange(*args, **kwargs) -> pa.Array:
14
+ return pa.array(np.arange(*args, **kwargs), type=pa.int32())
15
+
16
+
17
+ def zip_tables(tables: Iterable[pa.Table]) -> pa.Table:
18
+ data = []
19
+ names = []
20
+ for table in tables:
21
+ data.extend(table.columns)
22
+ names.extend(table.column_names)
23
+ return pa.Table.from_arrays(data, names=names)
24
+
25
+
26
+ def merge_arrays(*arrays: pa.StructArray) -> pa.StructArray:
27
+ """Recursively merge arrays into nested struct arrays."""
28
+ if len(arrays) == 1:
29
+ return arrays[0]
30
+
31
+ nstructs = sum(pa.types.is_struct(a.type) for a in arrays)
32
+ if nstructs == 0:
33
+ # Then we have conflicting arrays and we choose the last.
34
+ return arrays[-1]
35
+
36
+ if nstructs != len(arrays):
37
+ raise ValueError("Cannot merge structs with non-structs.")
38
+
39
+ data = defaultdict(list)
40
+ for array in arrays:
41
+ if isinstance(array, pa.ChunkedArray):
42
+ array = array.combine_chunks()
43
+ for field in array.type:
44
+ data[field.name].append(array.field(field.name))
45
+
46
+ return pa.StructArray.from_arrays([merge_arrays(*v) for v in data.values()], names=list(data.keys()))
47
+
48
+
49
+ def merge_scalars(*scalars: pa.StructScalar) -> pa.StructScalar:
50
+ """Recursively merge scalars into nested struct scalars."""
51
+ if len(scalars) == 1:
52
+ return scalars[0]
53
+
54
+ nstructs = sum(pa.types.is_struct(a.type) for a in scalars)
55
+ if nstructs == 0:
56
+ # Then we have conflicting scalars and we choose the last.
57
+ return scalars[-1]
58
+
59
+ if nstructs != len(scalars):
60
+ raise ValueError("Cannot merge scalars with non-scalars.")
61
+
62
+ data = defaultdict(list)
63
+ for scalar in scalars:
64
+ for field in scalar.type:
65
+ data[field.name].append(scalar[field.name])
66
+
67
+ return pa.scalar({k: merge_scalars(*v) for k, v in data.items()})
68
+
69
+
70
+ def null_table(schema: pa.Schema, length: int = 0) -> pa.Table:
71
+ # We add an extra nulls column to ensure the length is correctly applied.
72
+ return pa.table(
73
+ [pa.nulls(length, type=field.type) for field in schema] + [pa.nulls(length)],
74
+ schema=pa.schema(list(schema) + [pa.field("__", type=pa.null())]),
75
+ ).drop(["__"])
76
+
77
+
78
+ def coalesce_all(table: pa.Table) -> pa.Table:
79
+ """Coalesce all columns that share the same name."""
80
+ columns: dict[str, list[pa.Array]] = defaultdict(list)
81
+ for i, col in enumerate(table.column_names):
82
+ columns[col].append(table[i])
83
+
84
+ data = []
85
+ names = []
86
+ for col, arrays in columns.items():
87
+ names.append(col)
88
+ if len(arrays) == 1:
89
+ data.append(arrays[0])
90
+ else:
91
+ data.append(pc.coalesce(*arrays))
92
+
93
+ return pa.Table.from_arrays(data, names=names)
94
+
95
+
96
+ def join(left: pa.Table, right: pa.Table, keys: list[str], join_type: str) -> pa.Table:
97
+ """Arrow's builtin join doesn't support struct columns. So we join ourselves and zip them in."""
98
+ # TODO(ngates): if join_type == inner, we may have better luck performing two index_in operations since this
99
+ # also preserves sort order.
100
+ lhs = left.select(keys).add_column(0, "__lhs", arange(len(left)))
101
+ rhs = right.select(keys).add_column(0, "__rhs", arange(len(right)))
102
+ joined = lhs.join(rhs, keys=keys, join_type=join_type).sort_by([(k, "ascending") for k in keys])
103
+ return zip_tables(
104
+ [joined.select(keys), left.take(joined["__lhs"]).drop(keys), right.take(joined["__rhs"]).drop(keys)]
105
+ )
106
+
107
+
108
+ def nest_structs(array: pa.StructArray | pa.StructScalar | dict) -> dict:
109
+ """Turn a struct-like value with dot-separated column names into a nested dictionary."""
110
+ data = {}
111
+
112
+ if isinstance(array, pa.StructArray | pa.StructScalar):
113
+ array = {f.name: field(array, f.name) for f in array.type}
114
+
115
+ for name in array.keys():
116
+ if "." not in name:
117
+ data[name] = array[name]
118
+ continue
119
+
120
+ parts = name.split(".")
121
+ child_data = data
122
+ for part in parts[:-1]:
123
+ if part not in child_data:
124
+ child_data[part] = {}
125
+ child_data = child_data[part]
126
+ child_data[parts[-1]] = array[name]
127
+
128
+ return data
129
+
130
+
131
+ def flatten_struct_table(table: pa.Table, separator=".") -> pa.Table:
132
+ """Turn a nested struct table into a flat table with dot-separated names."""
133
+ data = []
134
+ names = []
135
+
136
+ def _unfold(array: pa.Array, prefix: str):
137
+ if pa.types.is_struct(array.type):
138
+ if isinstance(array, pa.ChunkedArray):
139
+ array = array.combine_chunks()
140
+ for f in array.type:
141
+ _unfold(field(array, f.name), f"{prefix}{separator}{f.name}")
142
+ else:
143
+ data.append(array)
144
+ names.append(prefix)
145
+
146
+ for col in table.column_names:
147
+ _unfold(table[col], col)
148
+
149
+ return pa.Table.from_arrays(data, names=names)
150
+
151
+
152
+ def struct_array(fields: list[tuple[str, bool, pa.Array]], /, mask: list[bool] | None = None) -> pa.StructArray:
153
+ return pa.StructArray.from_arrays(
154
+ arrays=[x[2] for x in fields],
155
+ fields=[pa.field(x[0], type=x[2].type, nullable=x[1]) for x in fields],
156
+ mask=pa.array(mask) if mask else mask,
157
+ )
158
+
159
+
160
+ def table(fields: list[tuple[str, bool, pa.Array]], /) -> pa.Table:
161
+ return pa.Table.from_struct_array(struct_array(fields))
162
+
163
+
164
+ def dict_to_table(data) -> pa.Table:
165
+ return pa.Table.from_struct_array(dict_to_struct_array(data))
166
+
167
+
168
+ def dict_to_struct_array(data, propagate_nulls: bool = False) -> pa.StructArray:
169
+ """Convert a nested dictionary of arrays to a table with nested structs."""
170
+ if isinstance(data, pa.ChunkedArray):
171
+ return data.combine_chunks()
172
+ if isinstance(data, pa.Array):
173
+ return data
174
+ arrays = [dict_to_struct_array(value) for value in data.values()]
175
+ return pa.StructArray.from_arrays(
176
+ arrays,
177
+ names=list(data.keys()),
178
+ mask=reduce(pc.and_, [pc.is_null(array) for array in arrays]) if propagate_nulls else None,
179
+ )
180
+
181
+
182
+ def struct_array_to_dict(array: pa.StructArray, array_fn: Callable[[pa.Array], T] = lambda a: a) -> dict | T:
183
+ """Convert a struct array to a nested dictionary."""
184
+ if not pa.types.is_struct(array.type):
185
+ return array_fn(array)
186
+ if isinstance(array, pa.ChunkedArray):
187
+ array = array.combine_chunks()
188
+ return {field.name: struct_array_to_dict(array.field(i), array_fn=array_fn) for i, field in enumerate(array.type)}
189
+
190
+
191
+ def table_to_struct_array(table: pa.Table) -> pa.StructArray:
192
+ if not table.num_rows:
193
+ return pa.array([], type=pa.struct(table.schema))
194
+ array = table.to_struct_array()
195
+ if isinstance(array, pa.ChunkedArray):
196
+ array = array.combine_chunks()
197
+ return array
198
+
199
+
200
+ def table_from_struct_array(array: pa.StructArray | pa.ChunkedArray):
201
+ if len(array) == 0:
202
+ return null_table(pa.schema(array.type))
203
+ return pa.Table.from_struct_array(array)
204
+
205
+
206
+ def field(value: pa.StructArray | pa.StructScalar, name: str) -> pa.Array | pa.Scalar:
207
+ """Get a field from a struct-like value."""
208
+ if isinstance(value, pa.StructScalar):
209
+ return value[name]
210
+ return value.field(name)
211
+
212
+
213
+ def concat_tables(tables: list[pa.Table]) -> pa.Table:
214
+ """
215
+ Concatenate pyarrow.Table objects, filling "missing" data with appropriate null arrays
216
+ and casting arrays to the most common denominator type that fits all fields.
217
+ """
218
+ if len(tables) == 1:
219
+ return tables[0]
220
+ else:
221
+ return pa.concat_tables(tables, promote_options="permissive")
spiral/cli/__init__.py ADDED
@@ -0,0 +1,79 @@
1
+ import asyncio
2
+ import functools
3
+ import inspect
4
+ from typing import IO
5
+
6
+ import rich
7
+ import typer
8
+ from click import ClickException
9
+ from grpclib import GRPCError
10
+ from httpx import HTTPStatusError
11
+
12
+
13
+ class AsyncTyper(typer.Typer):
14
+ """Wrapper to allow async functions to be used as commands.
15
+
16
+ We also pre-bake some configuration.
17
+
18
+ Per https://github.com/tiangolo/typer/issues/88#issuecomment-1732469681
19
+ """
20
+
21
+ def __init__(self, **kwargs):
22
+ super().__init__(
23
+ no_args_is_help=True,
24
+ pretty_exceptions_enable=False,
25
+ **kwargs,
26
+ )
27
+
28
+ def callback(self, *args, **kwargs):
29
+ decorator = super().callback(*args, **kwargs)
30
+ for wrapper in (_wrap_exceptions, _maybe_run_async):
31
+ decorator = functools.partial(wrapper, decorator)
32
+ return decorator
33
+
34
+ def command(self, *args, **kwargs):
35
+ decorator = super().command(*args, **kwargs)
36
+ for wrapper in (_wrap_exceptions, _maybe_run_async):
37
+ decorator = functools.partial(wrapper, decorator)
38
+ return decorator
39
+
40
+
41
+ class _ClickGRPCException(ClickException):
42
+ def __init__(self, err: GRPCError):
43
+ super().__init__(err.message or "GRPCError message was None.")
44
+ self.err = err
45
+ self.exit_code = 1
46
+
47
+ def format_message(self) -> str:
48
+ if self.err.details:
49
+ return f"{self.message}: {self.err.details}"
50
+ return self.message
51
+
52
+ def show(self, file: IO[str] | None = None) -> None:
53
+ rich.print(f"Error: {self.format_message()}", file=file)
54
+
55
+
56
+ def _maybe_run_async(decorator, f):
57
+ if inspect.iscoroutinefunction(f):
58
+
59
+ @functools.wraps(f)
60
+ def runner(*args, **kwargs):
61
+ return asyncio.run(f(*args, **kwargs))
62
+
63
+ decorator(runner)
64
+ else:
65
+ decorator(f)
66
+ return f
67
+
68
+
69
+ def _wrap_exceptions(decorator, f):
70
+ @functools.wraps(f)
71
+ def runner(*args, **kwargs):
72
+ try:
73
+ return f(*args, **kwargs)
74
+ except HTTPStatusError as e:
75
+ raise ClickException(str(e))
76
+ except GRPCError as e:
77
+ raise _ClickGRPCException(e)
78
+
79
+ return decorator(runner)
spiral/cli/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from spiral.cli.app import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
spiral/cli/admin.py ADDED
@@ -0,0 +1,16 @@
1
+ from rich import print
2
+
3
+ from spiral.api.types import OrgId
4
+ from spiral.cli import AsyncTyper, state
5
+
6
+ app = AsyncTyper()
7
+
8
+
9
+ @app.command()
10
+ def sync(
11
+ org_id: OrgId | None = None,
12
+ ):
13
+ state.settings.api._admin.sync_orgs()
14
+
15
+ for membership in state.settings.api._admin.sync_memberships(org_id):
16
+ print(membership)
spiral/cli/app.py ADDED
@@ -0,0 +1,65 @@
1
+ import logging
2
+ import os
3
+ from logging.handlers import RotatingFileHandler
4
+
5
+ from spiral.cli import (
6
+ AsyncTyper,
7
+ admin,
8
+ console,
9
+ fs,
10
+ iceberg,
11
+ indexes,
12
+ login,
13
+ orgs,
14
+ projects,
15
+ state,
16
+ tables,
17
+ telemetry,
18
+ workloads,
19
+ )
20
+ from spiral.settings import LOG_DIR, Settings
21
+
22
+ app = AsyncTyper(name="spiral")
23
+
24
+
25
+ @app.callback()
26
+ def _callback(verbose: bool = False):
27
+ if verbose:
28
+ logging.getLogger().setLevel(level=logging.INFO)
29
+
30
+ # Load the settings (we reload in the callback to support testing under different env vars)
31
+ state.settings = Settings()
32
+
33
+
34
+ app.add_typer(fs.app, name="fs")
35
+ app.add_typer(orgs.app, name="orgs")
36
+ app.add_typer(projects.app, name="projects")
37
+ app.add_typer(iceberg.app, name="iceberg")
38
+ app.add_typer(tables.app, name="tables")
39
+ app.add_typer(indexes.app, name="indexes")
40
+ app.add_typer(telemetry.app, name="telemetry")
41
+ app.command("console")(console.command)
42
+ app.command("login")(login.command)
43
+ app.command("whoami")(login.whoami)
44
+
45
+ # Register unless we're building docs. Because Typer docs command does not skip hidden commands...
46
+ if not bool(os.environ.get("SPIRAL_DOCS", False)):
47
+ app.add_typer(workloads.app, name="workloads", hidden=True)
48
+ app.add_typer(admin.app, name="admin", hidden=True)
49
+ app.command("logout", hidden=True)(login.logout)
50
+
51
+
52
+ def main():
53
+ # Setup rotating CLI logging.
54
+ # NOTE(ngates): we should do the same for the Spiral client? Maybe move this logic elsewhere?
55
+ LOG_DIR.mkdir(parents=True, exist_ok=True)
56
+ logging.basicConfig(
57
+ level=logging.DEBUG,
58
+ handlers=[RotatingFileHandler(LOG_DIR / "cli.log", maxBytes=2**20, backupCount=10)],
59
+ )
60
+
61
+ app()
62
+
63
+
64
+ if __name__ == "__main__":
65
+ main()
spiral/cli/console.py ADDED
@@ -0,0 +1,95 @@
1
+ import os
2
+ import subprocess
3
+
4
+ from spiral import Spiral
5
+ from spiral.adbc import ADBCFlightServer, SpiralADBCServer
6
+ from spiral.server import wait_for_port
7
+
8
+
9
+ def command():
10
+ """Launch a SQL console to query Spiral tables."""
11
+
12
+ # To avoid taking a dependency on Harlequin, we install it on-demand using
13
+ # either uvx or pipx.
14
+ harlequin_args = _uvx()
15
+ if harlequin_args is None:
16
+ harlequin_args = _pipx()
17
+ if harlequin_args is None:
18
+ raise ValueError("Please install pipx to continue\n\tSee https://github.com/pypa/pipx")
19
+
20
+ # Set up a pipe to send the server port to the child process.
21
+ r, w = os.pipe()
22
+
23
+ pid = os.fork()
24
+ if pid == 0: # In the child
25
+ os.close(w)
26
+ port = int.from_bytes(os.read(r, 4), "big")
27
+
28
+ # Wait for the server to be up.
29
+ wait_for_port(port)
30
+
31
+ os.execv(
32
+ harlequin_args[0],
33
+ harlequin_args
34
+ + [
35
+ "-a",
36
+ "adbc",
37
+ "--driver-type",
38
+ "flightsql",
39
+ f"grpc://localhost:{port}",
40
+ ],
41
+ )
42
+ else:
43
+ os.close(r)
44
+
45
+ # I can't get the Flight server to stop writing to stdout. So we need to spawn a new process I think and
46
+ # then hope we can kill it?
47
+ fd = os.open("/dev/null", os.O_WRONLY)
48
+ os.dup2(fd, 1)
49
+ os.dup2(fd, 2)
50
+
51
+ # In the parent, we launch the Flight SQL server and send the port to the child
52
+ server = ADBCFlightServer(SpiralADBCServer(Spiral()))
53
+ os.write(w, server.port.to_bytes(4, "big"))
54
+
55
+ # Then wait for the console app to exit
56
+ os.waitpid(pid, 0)
57
+
58
+
59
+ def _pipx() -> list[str] | None:
60
+ """Run harlequin via pipx."""
61
+ res = subprocess.run(["which", "pipx"], stdout=subprocess.PIPE)
62
+ if res.returncode != 0:
63
+ return None
64
+ # raise ValueError("Please install pipx to continue\n\tSee https://github.com/pypa/pipx")
65
+ pipx = res.stdout.strip()
66
+
67
+ return [
68
+ pipx,
69
+ "run",
70
+ "--pip-args",
71
+ "adbc_driver_flightsql",
72
+ "--pip-args",
73
+ # for now, we pin rich
74
+ "rich<=13.9.1",
75
+ "harlequin[adbc]",
76
+ ]
77
+
78
+
79
+ def _uvx() -> list[str] | None:
80
+ """Run harlequin via uvx."""
81
+ res = subprocess.run(["which", "uvx"], stdout=subprocess.PIPE)
82
+ if res.returncode != 0:
83
+ return None
84
+ uvx = res.stdout.strip()
85
+
86
+ return [
87
+ uvx,
88
+ "--with",
89
+ "adbc_driver_flightsql",
90
+ "--with",
91
+ "rich<=13.9.1",
92
+ "--from",
93
+ "harlequin[adbc]",
94
+ "harlequin",
95
+ ]
spiral/cli/fs.py ADDED
@@ -0,0 +1,112 @@
1
+ from typing import Annotated
2
+
3
+ import questionary
4
+ import rich
5
+ from pydantic import SecretStr
6
+ from typer import Option
7
+
8
+ from spiral.api.filesystems import (
9
+ AWSSecretAccessKey,
10
+ BuiltinFileSystem,
11
+ GCPServiceAccount,
12
+ UpdateGCSFileSystem,
13
+ UpdateS3FileSystem,
14
+ UpstreamFileSystem,
15
+ )
16
+ from spiral.cli import AsyncTyper, state
17
+ from spiral.cli.types import ProjectArg, ask_project
18
+
19
+ app = AsyncTyper(short_help="File Systems.")
20
+
21
+
22
+ @app.command(help="Show the file system configured for project.")
23
+ def show(project: ProjectArg):
24
+ file_system = state.settings.api.file_system.get_file_system(project)
25
+ match file_system:
26
+ case BuiltinFileSystem(provider=provider):
27
+ rich.print(f"provider: {provider}")
28
+ case _:
29
+ rich.print(file_system)
30
+
31
+
32
+ def ask_provider():
33
+ res = state.settings.api.file_system.list_providers()
34
+ return questionary.select("Select a file system provider", choices=res).ask()
35
+
36
+
37
+ BuiltinProviderOpt = Annotated[
38
+ str,
39
+ Option(help="Built-in provider to use for the file system.", show_default=False, default_factory=ask_provider),
40
+ ]
41
+
42
+
43
+ @app.command(help="Update a project's default file system.")
44
+ def update(
45
+ project: ProjectArg,
46
+ builtin: bool = Option(False, help="Use a built-in file system provider."),
47
+ upstream: bool = Option(
48
+ False, help="Use another project as default file system. Only if another project is an external provider."
49
+ ),
50
+ s3: bool = Option(False, help="Use S3 compatible provider."),
51
+ gcs: bool = Option(False, help="Use GCS provider."),
52
+ provider: str = Option(None, help="Built-in provider to use for the file system."),
53
+ endpoint: str = Option(None, help="Endpoint for S3 provider."),
54
+ region: str = Option(None, help="Region for S3 or GCS provider. Required for GCS."),
55
+ bucket: str = Option(None, help="Bucket name for S3 or GCS provider."),
56
+ directory: str = Option(None, help="Directory for S3 or GCS provider."),
57
+ access_key_id: str = Option(None, help="Access key ID for S3 provider. Required for S3."),
58
+ secret_access_key: str = Option(None, help="Secret access key for S3 provider. Required for S3."),
59
+ credentials_path: str = Option(
60
+ None, help="Path to service account credentials file for GCS provider. Required for GCS."
61
+ ),
62
+ ):
63
+ if not any([builtin, s3, gcs, upstream]):
64
+ raise ValueError("Must specify one of --builtin, --upstream, --s3, or --gcs.")
65
+
66
+ if builtin:
67
+ provider = provider or ask_provider()
68
+ file_system = BuiltinFileSystem(provider=provider)
69
+
70
+ elif upstream:
71
+ upstream_project = ask_project(title="Select a project to use as file system.")
72
+ file_system = UpstreamFileSystem(project_id=upstream_project)
73
+
74
+ elif s3:
75
+ if access_key_id is None or secret_access_key is None:
76
+ raise ValueError("--access-key-id and --secret-access-key are required for S3 provider.")
77
+ credentials = AWSSecretAccessKey(access_key_id=access_key_id, secret_access_key=secret_access_key)
78
+
79
+ if bucket is None:
80
+ raise ValueError("--bucket is required for S3 provider.")
81
+ file_system = UpdateS3FileSystem(bucket=bucket, credentials=credentials)
82
+ if endpoint:
83
+ file_system.endpoint = endpoint
84
+ if region:
85
+ file_system.region = region
86
+ if directory:
87
+ file_system.directory = directory
88
+
89
+ elif gcs:
90
+ if credentials_path is None:
91
+ raise ValueError("--credentials-path is required for GCS provider.")
92
+ with open(credentials_path) as f:
93
+ service_account = f.read()
94
+ credentials = GCPServiceAccount(credentials=SecretStr(service_account))
95
+
96
+ if region is None or bucket is None:
97
+ raise ValueError("--region and --bucket is required for GCS provider.")
98
+ file_system = UpdateGCSFileSystem(bucket=bucket, region=region, credentials=credentials)
99
+ if directory:
100
+ file_system.directory = directory
101
+
102
+ else:
103
+ raise ValueError("Must specify either --s3 or --gcs.")
104
+
105
+ res = state.settings.api.file_system.update_file_system(project, file_system)
106
+ rich.print(res.file_system)
107
+
108
+
109
+ @app.command(help="Lists the available built-in file system providers.")
110
+ def list_providers():
111
+ for provider in state.settings.api.file_system.list_providers():
112
+ rich.print(provider)
@@ -0,0 +1,7 @@
1
+ from spiral.cli import AsyncTyper
2
+
3
+ from . import namespaces, tables
4
+
5
+ app = AsyncTyper(short_help="Apache Iceberg Catalog.")
6
+ app.add_typer(tables.app, name="tables")
7
+ app.add_typer(namespaces.app, name="namespaces")
@@ -0,0 +1,47 @@
1
+ import sys
2
+ from typing import Annotated
3
+
4
+ import pyiceberg.exceptions
5
+ import rich
6
+ import typer
7
+ from typer import Argument
8
+
9
+ from spiral.cli import AsyncTyper, state
10
+ from spiral.cli.types import ProjectArg
11
+
12
+ app = AsyncTyper(short_help="Apache Iceberg Namespaces.")
13
+
14
+
15
+ @app.command(help="List namespaces.")
16
+ def ls(
17
+ project: ProjectArg,
18
+ namespace: Annotated[str | None, Argument(help="List only namespaces under this namespace.")] = None,
19
+ ):
20
+ """List Iceberg namespaces."""
21
+ catalog = state.spiral.iceberg.catalog()
22
+
23
+ if namespace is None:
24
+ try:
25
+ namespaces = catalog.list_namespaces(project)
26
+ except pyiceberg.exceptions.ForbiddenError:
27
+ print(
28
+ f"The project, {repr(project)}, does not exist or you lack the "
29
+ f"`iceberg:view` permission to list namespaces in it.",
30
+ file=sys.stderr,
31
+ )
32
+ raise typer.Exit(code=1)
33
+ else:
34
+ try:
35
+ namespaces = catalog.list_namespaces((project, namespace))
36
+ except pyiceberg.exceptions.ForbiddenError:
37
+ print(
38
+ f"The namespace, {repr(project)}.{repr(namespace)}, does not exist or you lack the "
39
+ f"`iceberg:view` permission to list namespaces in it.",
40
+ file=sys.stderr,
41
+ )
42
+ raise typer.Exit(code=1)
43
+
44
+ table = rich.table.Table("Namespace ID", title="Iceberg namespaces")
45
+ for ns in namespaces:
46
+ table.add_row(".".join(ns))
47
+ rich.print(table)