pyspiral 0.6.2__cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.6.4__cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspiral-0.6.2.dist-info → pyspiral-0.6.4.dist-info}/METADATA +3 -3
- {pyspiral-0.6.2.dist-info → pyspiral-0.6.4.dist-info}/RECORD +30 -28
- {pyspiral-0.6.2.dist-info → pyspiral-0.6.4.dist-info}/WHEEL +1 -1
- spiral/_lib.abi3.so +0 -0
- spiral/api/client.py +1 -1
- spiral/api/filesystems.py +9 -40
- spiral/cli/app.py +42 -6
- spiral/cli/fs.py +25 -60
- spiral/cli/login.py +3 -2
- spiral/core/_tools/__init__.pyi +5 -0
- spiral/core/client/__init__.pyi +12 -1
- spiral/core/table/__init__.pyi +3 -0
- spiral/debug/manifests.py +26 -18
- spiral/debug/scan.py +21 -3
- spiral/expressions/__init__.py +2 -2
- spiral/expressions/base.py +9 -3
- spiral/iterable_dataset.py +106 -0
- spiral/protogen/_/arrow/flight/protocol/sql/__init__.py +1 -1
- spiral/protogen/_/google/protobuf/__init__.py +121 -1
- spiral/protogen/_/scandal/__init__.py +1 -1
- spiral/protogen/_/spfs/__init__.py +1 -1
- spiral/protogen/_/spql/__init__.py +1 -1
- spiral/protogen/_/substrait/__init__.py +1 -1
- spiral/protogen/_/substrait/extensions/__init__.py +1 -1
- spiral/scan.py +22 -34
- spiral/settings.py +2 -0
- spiral/snapshot.py +16 -0
- spiral/streaming_/stream.py +7 -3
- spiral/table.py +48 -91
- {pyspiral-0.6.2.dist-info → pyspiral-0.6.4.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pyspiral
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.4
|
4
4
|
Classifier: Intended Audience :: Science/Research
|
5
5
|
Classifier: Operating System :: OS Independent
|
6
6
|
Classifier: Programming Language :: Python
|
@@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.13
|
13
13
|
Classifier: Programming Language :: Rust
|
14
14
|
Classifier: License :: Other/Proprietary License
|
15
|
-
Requires-Dist: betterproto2>=0.
|
15
|
+
Requires-Dist: betterproto2>=0.9.0
|
16
16
|
Requires-Dist: google-re2>=1.1.20240702
|
17
17
|
Requires-Dist: grpclib>=0.4.7
|
18
18
|
Requires-Dist: hishel>=0.0.30
|
@@ -21,7 +21,7 @@ Requires-Dist: nanoid>=2.0.0
|
|
21
21
|
Requires-Dist: numpy>=2
|
22
22
|
Requires-Dist: pyarrow>=21.0.0
|
23
23
|
Requires-Dist: pydantic-settings>=2.3.4
|
24
|
-
Requires-Dist: pydantic[email]>=2.5.3
|
24
|
+
Requires-Dist: pydantic[email]>=2.5.3,<2.12
|
25
25
|
Requires-Dist: pyjwt[crypto]>=2.9.0
|
26
26
|
Requires-Dist: pyperclip>=1.9.0
|
27
27
|
Requires-Dist: questionary>=2.0.1
|
@@ -1,13 +1,13 @@
|
|
1
|
-
pyspiral-0.6.
|
2
|
-
pyspiral-0.6.
|
3
|
-
pyspiral-0.6.
|
1
|
+
pyspiral-0.6.4.dist-info/METADATA,sha256=NHgJTgogXMcDIHbb3I6ONsKdj23IfS2Vwa60m6YgGZo,1842
|
2
|
+
pyspiral-0.6.4.dist-info/WHEEL,sha256=sHl2MPySRQtLBS4t9I9tl1bAeFFBhTGABHYdwnegkVM,130
|
3
|
+
pyspiral-0.6.4.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
|
4
4
|
spiral/__init__.py,sha256=5c0faqg-kHZBDwriQ7LzLAMcFolIucp-IA1EzNvCZ3k,711
|
5
|
-
spiral/_lib.abi3.so,sha256=
|
5
|
+
spiral/_lib.abi3.so,sha256=oq_VegFDXsy3-ap1Coy5wf_JHD4s6s8PZ8VguB65bmQ,50403064
|
6
6
|
spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
|
7
7
|
spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
|
8
8
|
spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
|
9
|
-
spiral/api/client.py,sha256=
|
10
|
-
spiral/api/filesystems.py,sha256=
|
9
|
+
spiral/api/client.py,sha256=xGc3RKRrerDGGt3QA7Y_friEa-OkZXSQI2Yd1KeFDdw,4668
|
10
|
+
spiral/api/filesystems.py,sha256=yEHgHfo7t1_becm0UFedc3nd49_G77hHjYwtYQ6P9XU,4240
|
11
11
|
spiral/api/key_space_indexes.py,sha256=-38rZXTdkL4mLhp9h3CtqyIyutzzq88tV6bhK05MqYE,640
|
12
12
|
spiral/api/organizations.py,sha256=B-8zZ7lFJANGK7dUNbo_aU-cgI959JBP9VcWb6wdgi0,1895
|
13
13
|
spiral/api/projects.py,sha256=62Y1lqI_TpUh3WKQqrjbLWJHiZsI_X3g8u2RTbUwkoA,6162
|
@@ -20,12 +20,12 @@ spiral/arrow_.py,sha256=T1LZ7bh9aMDbXfpUsf0dR0E1roTQyAYSgZ2mL4s8J_4,7681
|
|
20
20
|
spiral/cli/__init__.py,sha256=LutjpWZu5Rvmba8C8bPa5vOCv74JuAoE1kvz0nd48dE,2476
|
21
21
|
spiral/cli/__main__.py,sha256=kNaKM2xgJo7GRogf83nYldLM-RGUR6vymdGwZxywQu0,71
|
22
22
|
spiral/cli/admin.py,sha256=-ubYqs8nKjnQStbQ68jpWx_9xh0TsaxI0wM1Hfko8_U,319
|
23
|
-
spiral/cli/app.py,sha256=
|
23
|
+
spiral/cli/app.py,sha256=lv37s8nvptxrJuloe9W603Oz5-1n5_BPzbbKdIvBkb4,2759
|
24
24
|
spiral/cli/console.py,sha256=6JHbAQV6MFWz3P-VzqPOjhHpkIQagsCdzTMvmuDKMkU,2580
|
25
|
-
spiral/cli/fs.py,sha256=
|
25
|
+
spiral/cli/fs.py,sha256=vaPcSc2YghhHeipxNitIdsHaBhFwlwkvPFqYsFSN9P0,2927
|
26
26
|
spiral/cli/iceberg.py,sha256=Q14tcGcn1LixbFCYP0GhfYwFFXTmmi8tqBPYwalJEyE,3248
|
27
27
|
spiral/cli/key_spaces.py,sha256=x3IFRP5d47pKiAHeWExYMOBaT2TwxbWjVM01SUqKrwI,2943
|
28
|
-
spiral/cli/login.py,sha256=
|
28
|
+
spiral/cli/login.py,sha256=iyWQR2n2cOcg2-6NMaD2uCSQfvsoz7wMeyT9s7h80Fc,698
|
29
29
|
spiral/cli/orgs.py,sha256=fmOuLxpeIFfKqePRi292Gv9k-EF5pPn_tbKd2BLl2Ig,2869
|
30
30
|
spiral/cli/printer.py,sha256=HcvSUpaMItzmhBUfIHROK1Z3SL8J8wDopS3Qo8H00uw,1781
|
31
31
|
spiral/cli/projects.py,sha256=UYrBlLcFacuXExdLX1sZByfvkz9MRtk_0oRAZvqHa0w,5105
|
@@ -37,20 +37,21 @@ spiral/cli/types.py,sha256=XYzo1GgX7dBBItoBSrHI4vO5C2lLmS2sktb-2GnGH3E,1362
|
|
37
37
|
spiral/cli/workloads.py,sha256=2_SLfQTFN6y73R9H0i9dk8VIOVagKxSxOpHXC56yptY,2015
|
38
38
|
spiral/client.py,sha256=Po9xgCH3NwVsCeRZMm3eJUPV77Rknyj-9MfCS1TbdTg,6623
|
39
39
|
spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
|
+
spiral/core/_tools/__init__.pyi,sha256=b2KLfTOQ67pjfbYt07o0IGiTu5o2bZw69lllV8v0Dps,143
|
40
41
|
spiral/core/authn/__init__.pyi,sha256=Jw_8ywTMDTwgAtGxMtFED63rU0jOgrv-eZtaZ5sR5t4,757
|
41
|
-
spiral/core/client/__init__.pyi,sha256=
|
42
|
-
spiral/core/table/__init__.pyi,sha256=
|
42
|
+
spiral/core/client/__init__.pyi,sha256=6D4eu78eHW9Zl5Fx3UIRbk9ywvlZJephal56H1LDjko,6095
|
43
|
+
spiral/core/table/__init__.pyi,sha256=ajxO2N92hTQ4evsl7QBWB8ivz-cDNxXnAv0jytRw0ZY,3183
|
43
44
|
spiral/core/table/manifests/__init__.pyi,sha256=eVfDpmhYSjafIvvALqAkZe5baN3Y1HpKpxYEbjwd4gQ,1043
|
44
45
|
spiral/core/table/metastore/__init__.pyi,sha256=rc3u9MwEKRvL2kxOc8lBorddFRnM8o_o1frqtae86a4,1697
|
45
46
|
spiral/core/table/spec/__init__.pyi,sha256=0NyGeyEhV_ebwKWVU3sqSvdF2D9v8kEVwo6wYAHF99M,5579
|
46
47
|
spiral/dataset.py,sha256=NNqG-oOrhbmNC2OMZ9AYAm4YkwwBozeRI6zXtz4cspA,8008
|
47
48
|
spiral/datetime_.py,sha256=1TA1RYIRU22qcUuipIjVhAtGnPDVn2z9WttuhkmfkwY,964
|
48
49
|
spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
|
-
spiral/debug/manifests.py,sha256=
|
50
|
+
spiral/debug/manifests.py,sha256=7f1O3ba9mrA5nXpOF9cEIQuUAteP5wiBkFy_diQJ7No,3216
|
50
51
|
spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
|
51
|
-
spiral/debug/scan.py,sha256=
|
52
|
-
spiral/expressions/__init__.py,sha256=
|
53
|
-
spiral/expressions/base.py,sha256=
|
52
|
+
spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
|
53
|
+
spiral/expressions/__init__.py,sha256=QQrnKrrWnDk7W5I9yhPS21ME0cUkEou30hga8MVkt1I,6396
|
54
|
+
spiral/expressions/base.py,sha256=4qlXbi4IusZi5b4QEadWhXtmuYd0ETzOB1NWMWYIsTs,5163
|
54
55
|
spiral/expressions/http.py,sha256=begUydWoFHEqjeLkATvI_v66Ez6_rR-OQBWO5cHbb9c,2742
|
55
56
|
spiral/expressions/io.py,sha256=gJ2a0FKMmdxarWKENulPRwH7KDvSJTIh_OUxX306xAM,3045
|
56
57
|
spiral/expressions/list_.py,sha256=MMt5lf5H1M3O-x6N_PvqOLGq9NOk6Ukv0fPWwPC_uy4,1809
|
@@ -65,35 +66,36 @@ spiral/expressions/tiff.py,sha256=fQwIn0kLFBM2Y3YYIHmTgb_EIRHKT2fNc77nioDQQw4,80
|
|
65
66
|
spiral/expressions/udf.py,sha256=yb9MIcrFftpNDxgBF228cvdv6TY-hEFikYz2fq_nzWo,1353
|
66
67
|
spiral/grpc_.py,sha256=f3czdP1Mxme42Y5--a5ogYq1TTiWn-J_MlGjwJ2mWwM,1015
|
67
68
|
spiral/iceberg.py,sha256=JGq62Qnf296r9_hRAoH85GQq45-uSBjwXWw_CvPi6G4,930
|
69
|
+
spiral/iterable_dataset.py,sha256=Eekg9ad8tcwXcloHWReBbvCSr5ZappRHn2ldKTvwqS0,4622
|
68
70
|
spiral/key_space_index.py,sha256=NAB_nONEjpMYbse8suz42w7Qb5OPHuKN9h9CT2NJe08,1460
|
69
71
|
spiral/project.py,sha256=CO_Pn6vPqaonNvRdCNRFcBWr4TqO2AsAUTH5xawIeCE,7283
|
70
72
|
spiral/protogen/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
73
|
spiral/protogen/_/arrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
72
74
|
spiral/protogen/_/arrow/flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
73
75
|
spiral/protogen/_/arrow/flight/protocol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
|
-
spiral/protogen/_/arrow/flight/protocol/sql/__init__.py,sha256=
|
76
|
+
spiral/protogen/_/arrow/flight/protocol/sql/__init__.py,sha256=ooZZsDCRFpktUCH11OdxMRa_GLQYnY9w-1fBr5a7vBk,90023
|
75
77
|
spiral/protogen/_/google/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
76
|
-
spiral/protogen/_/google/protobuf/__init__.py,sha256=
|
78
|
+
spiral/protogen/_/google/protobuf/__init__.py,sha256=H0FVEXusqww2j5dl7Ee05tR6qMG_hQioUp1qFfDgnco,80036
|
77
79
|
spiral/protogen/_/message_pool.py,sha256=4-cRhhiM6bmfpUJZ8qxc8LEyqHBHpLCcotjbyZxl7JM,71
|
78
80
|
spiral/protogen/_/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
79
|
-
spiral/protogen/_/scandal/__init__.py,sha256
|
80
|
-
spiral/protogen/_/spfs/__init__.py,sha256=
|
81
|
-
spiral/protogen/_/spql/__init__.py,sha256=
|
82
|
-
spiral/protogen/_/substrait/__init__.py,sha256
|
83
|
-
spiral/protogen/_/substrait/extensions/__init__.py,sha256=
|
81
|
+
spiral/protogen/_/scandal/__init__.py,sha256=liUQAICLd2sPccCmqo0_c1duSbNj_m8p_IgmdnHsB3E,4965
|
82
|
+
spiral/protogen/_/spfs/__init__.py,sha256=zMMEDIfPXQNBkisLI-iMWbJABye-vK42Gf2BUQQYR_c,2028
|
83
|
+
spiral/protogen/_/spql/__init__.py,sha256=PEC4bI-PHdJ4Zd8Jb1k6Xk2iFYoYqIUbTGlL2JVGnT0,1548
|
84
|
+
spiral/protogen/_/substrait/__init__.py,sha256=eVv-5CRJv8KstANM-_U3WzCVmkmZ8_BTOFP-1f2sSX4,209839
|
85
|
+
spiral/protogen/_/substrait/extensions/__init__.py,sha256=nhnEnho70GAT8WPj2xtwJUzk5GJ6X2e-HTvyk7emGsk,5326
|
84
86
|
spiral/protogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
85
87
|
spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
|
86
88
|
spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
87
|
-
spiral/scan.py,sha256=
|
89
|
+
spiral/scan.py,sha256=SCDZ9UXA4g0Jq9BQ9Zt7cEK2NBq64Hqh_SttR4tF6jo,6252
|
88
90
|
spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
|
89
|
-
spiral/settings.py,sha256=
|
90
|
-
spiral/snapshot.py,sha256=
|
91
|
+
spiral/settings.py,sha256=cLD1MSyzIM1CRRB2ncYB9u849cxGAqYlycvNExU6TGo,3096
|
92
|
+
spiral/snapshot.py,sha256=tYEIKYYqS9Eusb8rsrG46VD7fiNPA9yVOR5ajMMtT_g,2018
|
91
93
|
spiral/streaming_/__init__.py,sha256=s7MlW2ERsuZmZGExLFL6RcZon2e0tNBocBg5ANgki7k,61
|
92
94
|
spiral/streaming_/reader.py,sha256=Kpqknv2jn12jUhHOEEDArj0JZwrWb8XjoOGs9HrdVyA,4047
|
93
|
-
spiral/streaming_/stream.py,sha256
|
95
|
+
spiral/streaming_/stream.py,sha256=-prGp73h0XDsdKW0mAEamy4AXhd1oF5fBbNbbY1k2-A,5931
|
94
96
|
spiral/substrait_.py,sha256=AKeOD4KIXvz2J4TYxnIneOiHddtBIyOhuNxVO_uH0eg,12592
|
95
|
-
spiral/table.py,sha256=
|
97
|
+
spiral/table.py,sha256=ru1G7CXZGD-k4sg621qe-IEAU9kU1WujZ32AXAmdvx4,8861
|
96
98
|
spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
|
97
99
|
spiral/transaction.py,sha256=nSykH4UGs9hGtWuSWK9YyT9jfEuvzfkKoUgMM5Xt4zU,1841
|
98
100
|
spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
|
99
|
-
pyspiral-0.6.
|
101
|
+
pyspiral-0.6.4.dist-info/RECORD,,
|
spiral/_lib.abi3.so
CHANGED
Binary file
|
spiral/api/client.py
CHANGED
@@ -129,7 +129,7 @@ class _Client:
|
|
129
129
|
) -> ResponseT:
|
130
130
|
req_data: dict[str, Any] = {}
|
131
131
|
if req is not None:
|
132
|
-
req_data = dict(json=TypeAdapter(req.__class__).dump_python(req, mode="json"))
|
132
|
+
req_data = dict(json=TypeAdapter(req.__class__).dump_python(req, mode="json", exclude_none=True))
|
133
133
|
|
134
134
|
token = self.authn.token()
|
135
135
|
resp = self.http.request(
|
spiral/api/filesystems.py
CHANGED
@@ -43,10 +43,13 @@ class S3FileSystem(BaseModel):
|
|
43
43
|
"""File system backed by an S3-compatible bucket."""
|
44
44
|
|
45
45
|
type: Literal["s3"] = "s3"
|
46
|
-
endpoint: str =
|
47
|
-
region: str
|
46
|
+
endpoint: str | None = None
|
47
|
+
region: str
|
48
48
|
bucket: str
|
49
|
-
directory: DirectoryPath | None
|
49
|
+
directory: DirectoryPath | None = None
|
50
|
+
# ARN of the role to assume when accessing the bucket https://docs.spiraldb.com/filesystems#aws
|
51
|
+
# role_arn: str | None = None
|
52
|
+
role_arn: str # TODO(marko): Make optional once we support third-party S3-compatible storage.
|
50
53
|
|
51
54
|
|
52
55
|
class GCSFileSystem(BaseModel):
|
@@ -55,7 +58,7 @@ class GCSFileSystem(BaseModel):
|
|
55
58
|
type: Literal["gcs"] = "gcs"
|
56
59
|
region: str
|
57
60
|
bucket: str
|
58
|
-
directory: DirectoryPath | None
|
61
|
+
directory: DirectoryPath | None = None
|
59
62
|
|
60
63
|
|
61
64
|
FileSystem = Annotated[
|
@@ -78,40 +81,6 @@ class Mount(BaseModel):
|
|
78
81
|
principal: str
|
79
82
|
|
80
83
|
|
81
|
-
class AWSSecretAccessKey(BaseModel):
|
82
|
-
"""AWS secret access key credentials to be used with an S3 file system.
|
83
|
-
The access key must have read/write access to the bucket specified in the file system.
|
84
|
-
"""
|
85
|
-
|
86
|
-
access_key_id: str
|
87
|
-
secret_access_key: str
|
88
|
-
|
89
|
-
|
90
|
-
class UpdateS3FileSystem(S3FileSystem):
|
91
|
-
credentials: AWSSecretAccessKey
|
92
|
-
|
93
|
-
|
94
|
-
class GCPServiceAccount(BaseModel):
|
95
|
-
"""Google Cloud Platform service account credentials to be used with a GCS file system.
|
96
|
-
The service account must have read/write access to the bucket specified in the file system.
|
97
|
-
"""
|
98
|
-
|
99
|
-
service_account: str
|
100
|
-
|
101
|
-
|
102
|
-
class UpdateGCSFileSystem(GCSFileSystem):
|
103
|
-
credentials: GCPServiceAccount
|
104
|
-
|
105
|
-
|
106
|
-
UpdateFileSystemRequest = Annotated[
|
107
|
-
BuiltinFileSystem | UpstreamFileSystem | UpdateS3FileSystem | UpdateGCSFileSystem, Field(discriminator="type")
|
108
|
-
]
|
109
|
-
|
110
|
-
|
111
|
-
class UpdateFileSystemResponse(BaseModel):
|
112
|
-
file_system: FileSystem
|
113
|
-
|
114
|
-
|
115
84
|
class CreateMountRequest(BaseModel):
|
116
85
|
directory: DirectoryPath
|
117
86
|
mode: Mode
|
@@ -136,9 +105,9 @@ class FileSystemService(ServiceBase):
|
|
136
105
|
response = self.client.get("/v1/file-systems/builtin-providers", dict)
|
137
106
|
return response.get("providers", [])
|
138
107
|
|
139
|
-
def update_file_system(self, project_id: ProjectId, request:
|
108
|
+
def update_file_system(self, project_id: ProjectId, request: FileSystem) -> FileSystem:
|
140
109
|
"""Update project's default file system."""
|
141
|
-
return self.client.post(f"/v1/file-systems/{project_id}", request,
|
110
|
+
return self.client.post(f"/v1/file-systems/{project_id}", request, FileSystem)
|
142
111
|
|
143
112
|
def get_file_system(self, project_id: ProjectId) -> FileSystem:
|
144
113
|
"""Get project's default file system."""
|
spiral/cli/app.py
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
|
+
from importlib import metadata
|
3
4
|
from logging.handlers import RotatingFileHandler
|
5
|
+
from typing import Annotated
|
6
|
+
|
7
|
+
import typer
|
4
8
|
|
5
9
|
from spiral.cli import (
|
6
10
|
AsyncTyper,
|
@@ -18,16 +22,48 @@ from spiral.cli import (
|
|
18
22
|
text,
|
19
23
|
workloads,
|
20
24
|
)
|
21
|
-
from spiral.settings import LOG_DIR, Settings
|
25
|
+
from spiral.settings import LOG_DIR, PACKAGE_NAME, Settings
|
22
26
|
|
23
27
|
app = AsyncTyper(name="spiral")
|
24
28
|
|
25
29
|
|
26
|
-
|
27
|
-
|
28
|
-
|
30
|
+
def version_callback(ctx: typer.Context, value: bool):
|
31
|
+
"""
|
32
|
+
Display the version of the Spiral CLI.
|
33
|
+
"""
|
34
|
+
# True when generating completion, we can just return
|
35
|
+
if ctx.resilient_parsing:
|
36
|
+
return
|
37
|
+
|
38
|
+
if value:
|
39
|
+
ver = metadata.version(PACKAGE_NAME)
|
40
|
+
print(f"spiral {ver}")
|
41
|
+
raise typer.Exit()
|
42
|
+
|
43
|
+
|
44
|
+
def verbose_callback(ctx: typer.Context, value: bool):
|
45
|
+
"""
|
46
|
+
Use more verbose output.
|
47
|
+
"""
|
48
|
+
# True when generating completion, we can just return
|
49
|
+
if ctx.resilient_parsing:
|
50
|
+
return
|
51
|
+
|
52
|
+
if value:
|
29
53
|
logging.getLogger().setLevel(level=logging.INFO)
|
30
54
|
|
55
|
+
|
56
|
+
@app.callback(invoke_without_command=True)
|
57
|
+
def _callback(
|
58
|
+
ctx: typer.Context,
|
59
|
+
version: Annotated[
|
60
|
+
bool | None,
|
61
|
+
typer.Option("--version", callback=version_callback, help=version_callback.__doc__, is_eager=True),
|
62
|
+
] = None,
|
63
|
+
verbose: Annotated[
|
64
|
+
bool | None, typer.Option("--verbose", callback=verbose_callback, help=verbose_callback.__doc__)
|
65
|
+
] = None,
|
66
|
+
):
|
31
67
|
# Load the settings (we reload in the callback to support testing under different env vars)
|
32
68
|
state.settings = Settings()
|
33
69
|
|
@@ -42,12 +78,12 @@ app.add_typer(text.app, name="text")
|
|
42
78
|
app.add_typer(telemetry.app, name="telemetry")
|
43
79
|
app.command("console")(console.command)
|
44
80
|
app.command("login")(login.command)
|
45
|
-
app.command("whoami")(login.whoami)
|
46
81
|
|
47
82
|
# Register unless we're building docs. Because Typer docs command does not skip hidden commands...
|
48
83
|
if not bool(os.environ.get("SPIRAL_DOCS", False)):
|
49
|
-
app.add_typer(workloads.app, name="workloads", hidden=True)
|
50
84
|
app.add_typer(admin.app, name="admin", hidden=True)
|
85
|
+
app.add_typer(workloads.app, name="workloads", hidden=True)
|
86
|
+
app.command("whoami", hidden=True)(login.whoami)
|
51
87
|
app.command("logout", hidden=True)(login.logout)
|
52
88
|
|
53
89
|
|
spiral/cli/fs.py
CHANGED
@@ -1,15 +1,12 @@
|
|
1
|
-
from typing import
|
1
|
+
from typing import Literal
|
2
2
|
|
3
3
|
import questionary
|
4
|
-
from pydantic import SecretStr
|
5
4
|
from typer import Option
|
6
5
|
|
7
6
|
from spiral.api.filesystems import (
|
8
|
-
AWSSecretAccessKey,
|
9
7
|
BuiltinFileSystem,
|
10
|
-
|
11
|
-
|
12
|
-
UpdateS3FileSystem,
|
8
|
+
GCSFileSystem,
|
9
|
+
S3FileSystem,
|
13
10
|
UpstreamFileSystem,
|
14
11
|
)
|
15
12
|
from spiral.cli import CONSOLE, AsyncTyper, state
|
@@ -21,11 +18,7 @@ app = AsyncTyper(short_help="File Systems.")
|
|
21
18
|
@app.command(help="Show the file system configured for project.")
|
22
19
|
def show(project: ProjectArg):
|
23
20
|
file_system = state.settings.api.file_system.get_file_system(project)
|
24
|
-
|
25
|
-
case BuiltinFileSystem(provider=provider):
|
26
|
-
CONSOLE.print(f"provider: {provider}")
|
27
|
-
case _:
|
28
|
-
CONSOLE.print(file_system)
|
21
|
+
CONSOLE.print(file_system)
|
29
22
|
|
30
23
|
|
31
24
|
def ask_provider():
|
@@ -33,76 +26,48 @@ def ask_provider():
|
|
33
26
|
return questionary.select("Select a file system provider", choices=res).ask()
|
34
27
|
|
35
28
|
|
36
|
-
BuiltinProviderOpt = Annotated[
|
37
|
-
str,
|
38
|
-
Option(help="Built-in provider to use for the file system.", show_default=False, default_factory=ask_provider),
|
39
|
-
]
|
40
|
-
|
41
|
-
|
42
29
|
@app.command(help="Update a project's default file system.")
|
43
30
|
def update(
|
44
31
|
project: ProjectArg,
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
gcs: bool = Option(False, help="Use GCS provider."),
|
51
|
-
provider: str = Option(None, help="Built-in provider to use for the file system."),
|
52
|
-
endpoint: str = Option(None, help="Endpoint for S3 provider."),
|
53
|
-
region: str = Option(None, help="Region for S3 or GCS provider. Required for GCS."),
|
54
|
-
bucket: str = Option(None, help="Bucket name for S3 or GCS provider."),
|
55
|
-
directory: str = Option(None, help="Directory for S3 or GCS provider."),
|
56
|
-
access_key_id: str = Option(None, help="Access key ID for S3 provider. Required for S3."),
|
57
|
-
secret_access_key: str = Option(None, help="Secret access key for S3 provider. Required for S3."),
|
58
|
-
credentials_path: str = Option(
|
59
|
-
None, help="Path to service account credentials file for GCS provider. Required for GCS."
|
32
|
+
type_: Literal["builtin", "s3", "gcs", "upstream"] = Option(None, "--type", help="Type of the file system."),
|
33
|
+
provider: str = Option(None, help="Provider, when using `builtin` type."),
|
34
|
+
endpoint: str = Option(None, help="Endpoint, when using `s3` type."),
|
35
|
+
region: str = Option(
|
36
|
+
None, help="Region, when using `s3` or `gcs` type (defaults to `auto` for `s3` when `endpoint` is set)."
|
60
37
|
),
|
38
|
+
bucket: str = Option(None, help="Bucket, when using `s3` or `gcs` type."),
|
39
|
+
role_arn: str = Option(None, help="Role ARN to assume, when using `s3` type."),
|
61
40
|
):
|
62
|
-
if
|
63
|
-
raise ValueError("Must specify one of --builtin, --upstream, --s3, or --gcs.")
|
64
|
-
|
65
|
-
if builtin:
|
41
|
+
if type_ == "builtin":
|
66
42
|
provider = provider or ask_provider()
|
67
43
|
file_system = BuiltinFileSystem(provider=provider)
|
68
44
|
|
69
|
-
elif upstream:
|
45
|
+
elif type_ == "upstream":
|
70
46
|
upstream_project = ask_project(title="Select a project to use as file system.")
|
71
47
|
file_system = UpstreamFileSystem(project_id=upstream_project)
|
72
48
|
|
73
|
-
elif s3:
|
74
|
-
if
|
75
|
-
raise ValueError("--
|
76
|
-
|
77
|
-
|
49
|
+
elif type_ == "s3":
|
50
|
+
if role_arn is None:
|
51
|
+
raise ValueError("--role-arn is required for S3 provider.")
|
52
|
+
if not role_arn.startswith("arn:aws:iam::") or ":role/" not in role_arn:
|
53
|
+
raise ValueError("Invalid role ARN format. Expected `arn:aws:iam::<account>:role/<role_name>`")
|
78
54
|
if bucket is None:
|
79
55
|
raise ValueError("--bucket is required for S3 provider.")
|
80
|
-
|
56
|
+
region = region or ("auto" if endpoint else None)
|
57
|
+
file_system = S3FileSystem(bucket=bucket, role_arn=role_arn, region=region)
|
81
58
|
if endpoint:
|
82
59
|
file_system.endpoint = endpoint
|
83
|
-
if region:
|
84
|
-
file_system.region = region
|
85
|
-
if directory:
|
86
|
-
file_system.directory = directory
|
87
|
-
|
88
|
-
elif gcs:
|
89
|
-
if credentials_path is None:
|
90
|
-
raise ValueError("--credentials-path is required for GCS provider.")
|
91
|
-
with open(credentials_path) as f:
|
92
|
-
service_account = f.read()
|
93
|
-
credentials = GCPServiceAccount(credentials=SecretStr(service_account))
|
94
60
|
|
61
|
+
elif type_ == "gcs":
|
95
62
|
if region is None or bucket is None:
|
96
63
|
raise ValueError("--region and --bucket is required for GCS provider.")
|
97
|
-
file_system =
|
98
|
-
if directory:
|
99
|
-
file_system.directory = directory
|
64
|
+
file_system = GCSFileSystem(bucket=bucket, region=region)
|
100
65
|
|
101
66
|
else:
|
102
|
-
raise ValueError("
|
67
|
+
raise ValueError(f"Unknown file system type: {type_}")
|
103
68
|
|
104
|
-
|
105
|
-
CONSOLE.print(
|
69
|
+
fs = state.settings.api.file_system.update_file_system(project, file_system)
|
70
|
+
CONSOLE.print(fs)
|
106
71
|
|
107
72
|
|
108
73
|
@app.command(help="Lists the available built-in file system providers.")
|
spiral/cli/login.py
CHANGED
@@ -3,10 +3,11 @@ import jwt
|
|
3
3
|
from spiral.cli import CONSOLE, state
|
4
4
|
|
5
5
|
|
6
|
-
def command(org_id: str | None = None, force: bool = False):
|
6
|
+
def command(org_id: str | None = None, force: bool = False, show_token: bool = False):
|
7
7
|
token = state.settings.device_code_auth.authenticate(force=force, org_id=org_id)
|
8
8
|
CONSOLE.print("Successfully logged in.")
|
9
|
-
|
9
|
+
if show_token:
|
10
|
+
CONSOLE.print(token.expose_secret(), soft_wrap=True)
|
10
11
|
|
11
12
|
|
12
13
|
def whoami():
|
spiral/core/client/__init__.pyi
CHANGED
@@ -136,11 +136,18 @@ class ShuffleStrategy:
|
|
136
136
|
# Externally provided shards to shuffle before reading rows.
|
137
137
|
shards: list[Shard] | None
|
138
138
|
|
139
|
+
# Maximum number of rows to return in a single batch.
|
140
|
+
# If None, it is derived from the shuffle buffer size.
|
141
|
+
# IMPORTANT: The returned batch may be smaller than this size.
|
142
|
+
max_batch_size: int | None
|
143
|
+
|
139
144
|
def __init__(
|
140
145
|
self,
|
146
|
+
shuffle_buffer_size: int,
|
147
|
+
*,
|
141
148
|
seed: int | None = None,
|
142
|
-
shuffle_buffer_size: int | None = None,
|
143
149
|
shards: list[Shard] | None = None,
|
150
|
+
max_batch_size: int | None = None,
|
144
151
|
): ...
|
145
152
|
|
146
153
|
class Operations:
|
@@ -205,3 +212,7 @@ class Operations:
|
|
205
212
|
"""
|
206
213
|
...
|
207
214
|
def metrics(self) -> dict[str, Any]: ...
|
215
|
+
|
216
|
+
def flush_telemetry() -> None:
|
217
|
+
"""Flush telemetry data to the configured exporter."""
|
218
|
+
...
|
spiral/core/table/__init__.pyi
CHANGED
@@ -70,6 +70,9 @@ class Scan:
|
|
70
70
|
self,
|
71
71
|
strategy: ShuffleStrategy | None = None,
|
72
72
|
batch_readahead: int | None = None,
|
73
|
+
num_workers: int | None = None,
|
74
|
+
worker_id: int | None = None,
|
75
|
+
infinite: bool = False,
|
73
76
|
) -> pa.RecordBatchReader: ...
|
74
77
|
def metrics(self) -> dict[str, Any]: ...
|
75
78
|
def _prepare_shard(
|
spiral/debug/manifests.py
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
from rich.console import Console
|
2
|
+
from rich.table import Table
|
3
|
+
|
1
4
|
from spiral import datetime_
|
2
5
|
from spiral.core.table import Scan
|
3
6
|
from spiral.core.table.manifests import FragmentManifest
|
@@ -42,23 +45,26 @@ def _table_of_fragments(manifest: FragmentManifest, title: str):
|
|
42
45
|
avg_size = total_size / fragment_count if fragment_count > 0 else 0
|
43
46
|
|
44
47
|
# Print title and summary
|
45
|
-
|
46
|
-
print(
|
48
|
+
console = Console()
|
49
|
+
console.print(f"\n\n{title}")
|
50
|
+
console.print(
|
47
51
|
f"{fragment_count} fragments, "
|
48
52
|
f"total: {_format_bytes(total_size)}, "
|
49
53
|
f"avg: {_format_bytes(int(avg_size))}, "
|
50
54
|
f"metadata: {_format_bytes(total_metadata_size)}"
|
51
55
|
)
|
52
|
-
print("=" * 120)
|
53
56
|
|
54
|
-
#
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
)
|
59
|
-
|
57
|
+
# Create rich table
|
58
|
+
table = Table(title=None, show_header=True, header_style="bold")
|
59
|
+
table.add_column("ID", style="cyan", no_wrap=True)
|
60
|
+
table.add_column("Size (Metadata)", justify="right")
|
61
|
+
table.add_column("Format", justify="center")
|
62
|
+
table.add_column("Key Span", justify="center")
|
63
|
+
table.add_column("Level", justify="center")
|
64
|
+
table.add_column("Committed At", justify="center")
|
65
|
+
table.add_column("Compacted At", justify="center")
|
60
66
|
|
61
|
-
#
|
67
|
+
# Add each fragment as a row
|
62
68
|
for fragment in manifest:
|
63
69
|
committed_str = str(datetime_.from_timestamp_micros(fragment.committed_at)) if fragment.committed_at else "N/A"
|
64
70
|
compacted_str = str(datetime_.from_timestamp_micros(fragment.compacted_at)) if fragment.compacted_at else "N/A"
|
@@ -68,12 +74,14 @@ def _table_of_fragments(manifest: FragmentManifest, title: str):
|
|
68
74
|
)
|
69
75
|
key_span = f"{fragment.key_span.begin}..{fragment.key_span.end}"
|
70
76
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
77
|
+
table.add_row(
|
78
|
+
fragment.id,
|
79
|
+
size_with_metadata,
|
80
|
+
str(fragment.format),
|
81
|
+
key_span,
|
82
|
+
str(fragment.level),
|
83
|
+
committed_str,
|
84
|
+
compacted_str,
|
79
85
|
)
|
86
|
+
|
87
|
+
console.print(table)
|
spiral/debug/scan.py
CHANGED
@@ -146,14 +146,32 @@ def _get_fragment_color(manifest_file: FragmentFile, color_index, total_colors):
|
|
146
146
|
return cm.viridis(color_index / total_colors)
|
147
147
|
|
148
148
|
|
149
|
+
def _get_human_size(size_bytes: int) -> str:
|
150
|
+
# Convert bytes to a human-readable format
|
151
|
+
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
152
|
+
if size_bytes < 1024:
|
153
|
+
return f"{size_bytes:.2f} {unit}"
|
154
|
+
size_bytes /= 1024
|
155
|
+
return f"{size_bytes:.2f} PB"
|
156
|
+
|
157
|
+
|
158
|
+
def _maybe_truncate(text, max_length: int = 30) -> str:
|
159
|
+
text = str(text)
|
160
|
+
if len(text) <= max_length:
|
161
|
+
return text
|
162
|
+
|
163
|
+
half_length = (max_length - 3) // 2
|
164
|
+
return text[:half_length] + "..." + text[-half_length:]
|
165
|
+
|
166
|
+
|
149
167
|
def _get_fragment_legend(manifest_file: FragmentFile):
|
150
168
|
return "\n".join(
|
151
169
|
[
|
152
170
|
f"id: {manifest_file.id}",
|
153
|
-
f"size: {manifest_file.size_bytes
|
171
|
+
f"size: {_get_human_size(manifest_file.size_bytes)} ({manifest_file.size_bytes} bytes)",
|
154
172
|
f"key_span: {manifest_file.key_span}",
|
155
|
-
f"key_min: {manifest_file.key_extent.min}",
|
156
|
-
f"key_max: {manifest_file.key_extent.max}",
|
173
|
+
f"key_min: {_maybe_truncate(manifest_file.key_extent.min)}",
|
174
|
+
f"key_max: {_maybe_truncate(manifest_file.key_extent.max)}",
|
157
175
|
f"format: {manifest_file.format}",
|
158
176
|
f"level: {manifest_file.level}",
|
159
177
|
f"committed_at: {_format_timestamp(manifest_file.committed_at)}",
|
spiral/expressions/__init__.py
CHANGED
@@ -92,7 +92,7 @@ def lift(expr: ExprLike) -> Expr:
|
|
92
92
|
return lift(pa.array(expr))
|
93
93
|
|
94
94
|
# Unpack tables and chunked arrays
|
95
|
-
if isinstance(expr, pa.Table):
|
95
|
+
if isinstance(expr, pa.Table | pa.RecordBatch):
|
96
96
|
expr = expr.to_struct_array()
|
97
97
|
if isinstance(expr, pa.ChunkedArray):
|
98
98
|
expr = expr.combine_chunks()
|
@@ -104,7 +104,7 @@ def lift(expr: ExprLike) -> Expr:
|
|
104
104
|
if isinstance(expr, pa.StructArray) and expr.null_count != 0:
|
105
105
|
# raise ValueError("lift: cannot lift a struct array with nulls.")
|
106
106
|
warnings.warn("found a struct array with nulls", stacklevel=2)
|
107
|
-
if isinstance(expr, pa.StructScalar) and not expr.is_valid
|
107
|
+
if isinstance(expr, pa.StructScalar) and not expr.is_valid:
|
108
108
|
# raise ValueError("lift: cannot lift a struct scalar with nulls.")
|
109
109
|
warnings.warn("found a struct scalar with nulls", stacklevel=2)
|
110
110
|
return lift(arrow_.nest_structs(expr))
|