pyspiral 0.6.14__cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.6.15__cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyspiral-0.6.14.dist-info → pyspiral-0.6.15.dist-info}/METADATA +1 -1
- {pyspiral-0.6.14.dist-info → pyspiral-0.6.15.dist-info}/RECORD +9 -8
- spiral/_lib.abi3.so +0 -0
- spiral/enrichment.py +17 -1
- spiral/expressions/__init__.py +2 -0
- spiral/expressions/file.py +16 -0
- spiral/expressions/s3.py +1 -1
- {pyspiral-0.6.14.dist-info → pyspiral-0.6.15.dist-info}/WHEEL +0 -0
- {pyspiral-0.6.14.dist-info → pyspiral-0.6.15.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
pyspiral-0.6.
|
|
2
|
-
pyspiral-0.6.
|
|
3
|
-
pyspiral-0.6.
|
|
1
|
+
pyspiral-0.6.15.dist-info/METADATA,sha256=1wRDpjCt2Im2qJm0yh94g-_VYl-19528b6-Vc_ad0D4,1875
|
|
2
|
+
pyspiral-0.6.15.dist-info/WHEEL,sha256=0ecHyBdkJfSXYIVmWsPh7S-4h4fSrB4FlXhlnIu9c_A,130
|
|
3
|
+
pyspiral-0.6.15.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
|
|
4
4
|
spiral/__init__.py,sha256=gAysTwG_oEeKVMdCOfOzDhl0bM2miiK8Ds2vvUihBWw,1153
|
|
5
|
-
spiral/_lib.abi3.so,sha256=
|
|
5
|
+
spiral/_lib.abi3.so,sha256=5vNwYz5Yu337Ivah4ia1SwWQEgyfI0Ylxo6WthXIal8,61245800
|
|
6
6
|
spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
|
|
7
7
|
spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
|
|
8
8
|
spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
|
|
@@ -60,12 +60,13 @@ spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
60
60
|
spiral/debug/manifests.py,sha256=7f1O3ba9mrA5nXpOF9cEIQuUAteP5wiBkFy_diQJ7No,3216
|
|
61
61
|
spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
|
|
62
62
|
spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
|
|
63
|
-
spiral/enrichment.py,sha256=
|
|
64
|
-
spiral/expressions/__init__.py,sha256=
|
|
63
|
+
spiral/enrichment.py,sha256=YDaXcJPtmJzpLrYmn2pdllVcRIkXlb578KKgkIb38Eo,6518
|
|
64
|
+
spiral/expressions/__init__.py,sha256=vMNFeeozkWph3dBpEkHPThUhZdT9ZZzxHe71HnkWlDU,8020
|
|
65
65
|
spiral/expressions/base.py,sha256=PvhJkcUSsPSIaxirHVzM9zlqyBXiaiia1HXohXdOmL4,5377
|
|
66
|
+
spiral/expressions/file.py,sha256=HRzGjc3goIlUlKjysoirexDaflNdnj9OoZ6j2uTKZnA,388
|
|
66
67
|
spiral/expressions/http.py,sha256=WfHVLqz_LjBr78mN3ARBRQqgBrkao7-S73JxjC4Xwvo,356
|
|
67
68
|
spiral/expressions/list_.py,sha256=MMt5lf5H1M3O-x6N_PvqOLGq9NOk6Ukv0fPWwPC_uy4,1809
|
|
68
|
-
spiral/expressions/s3.py,sha256=
|
|
69
|
+
spiral/expressions/s3.py,sha256=d6Z_nnBRXopoPWnwGxlgjwyeYlrIMmonX-83PkE8LPo,375
|
|
69
70
|
spiral/expressions/str_.py,sha256=tY8RXW3JWvr1-bEfCZtk5FAf11wKJnXPuA9EoeJ9tA4,1265
|
|
70
71
|
spiral/expressions/struct.py,sha256=pGAnCDh6AK0BK1XfZ1qG4ce4ranIQEE1HQsgmzBcfwQ,2038
|
|
71
72
|
spiral/expressions/text.py,sha256=-02gBWYoyNQ3qQ1--9HTa8IryUDojYQVIp8C7rgnOWQ,1893
|
|
@@ -105,4 +106,4 @@ spiral/table.py,sha256=prjDBcm6Qerdq3ypXzfbXb7ngAcO0j-Z9aTeZvzKoqs,12209
|
|
|
105
106
|
spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
|
|
106
107
|
spiral/transaction.py,sha256=hQm6DfCklMDpIYJ9qA2wR45cCuUPGCiJy1tHGE3AsEY,3418
|
|
107
108
|
spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
|
|
108
|
-
pyspiral-0.6.
|
|
109
|
+
pyspiral-0.6.15.dist-info/RECORD,,
|
spiral/_lib.abi3.so
CHANGED
|
Binary file
|
spiral/enrichment.py
CHANGED
|
@@ -64,7 +64,12 @@ class Enrichment:
|
|
|
64
64
|
# TODO(marko): Need to figure out this sharding with key space index in places.
|
|
65
65
|
# We could compute on-demand instead of requiring a resource.
|
|
66
66
|
def apply_dask(
|
|
67
|
-
self,
|
|
67
|
+
self,
|
|
68
|
+
*,
|
|
69
|
+
index: Optional["KeySpaceIndex"] = None,
|
|
70
|
+
partition_size_bytes: int | None = None,
|
|
71
|
+
tx_dump: str | None = None,
|
|
72
|
+
**kwargs,
|
|
68
73
|
) -> None:
|
|
69
74
|
"""Use distributed Dask to apply the enrichment. Requires `dask[distributed]` to be installed.
|
|
70
75
|
|
|
@@ -79,6 +84,9 @@ class Enrichment:
|
|
|
79
84
|
Args:
|
|
80
85
|
index: Optional key space index to use for sharding the enrichment.
|
|
81
86
|
If not provided, the table's default sharding will be used.
|
|
87
|
+
partition_size_bytes: The maximum partition size in bytes.
|
|
88
|
+
If not provided, the default partition size is used.
|
|
89
|
+
tx_dump: Optional path to dump the transaction operations as a JSON file for debugging.
|
|
82
90
|
**kwargs: Additional keyword arguments to pass to `dask.distributed.Client`
|
|
83
91
|
such as `address` to connect to an existing cluster.
|
|
84
92
|
"""
|
|
@@ -92,6 +100,7 @@ class Enrichment:
|
|
|
92
100
|
|
|
93
101
|
# Start a transaction BEFORE the planning scan.
|
|
94
102
|
tx = self._table.txn()
|
|
103
|
+
backup_ops = []
|
|
95
104
|
plan_scan = self._table.spiral.scan(self._projection, where=self._where)
|
|
96
105
|
|
|
97
106
|
# Determine the "tasks". Use the index if provided.
|
|
@@ -114,11 +123,18 @@ class Enrichment:
|
|
|
114
123
|
for result in dask_client.gather(enrichments):
|
|
115
124
|
result: EnrichmentTaskResult
|
|
116
125
|
tx.include(result.ops)
|
|
126
|
+
backup_ops.extend(result.ops)
|
|
117
127
|
|
|
118
128
|
if tx.is_empty():
|
|
119
129
|
logger.warning("Transaction not committed. No rows were read for enrichment.")
|
|
120
130
|
return
|
|
121
131
|
|
|
132
|
+
# TODO(marko): We can remove this when I have more trust in very large tx commits.
|
|
133
|
+
if tx_dump is not None:
|
|
134
|
+
with open(tx_dump, "w") as f:
|
|
135
|
+
f.writelines([op.to_json() for op in backup_ops])
|
|
136
|
+
logger.info(f"Transaction dumped to {tx_dump}")
|
|
137
|
+
|
|
122
138
|
tx.commit()
|
|
123
139
|
|
|
124
140
|
|
spiral/expressions/__init__.py
CHANGED
|
@@ -8,6 +8,7 @@ import pyarrow as pa
|
|
|
8
8
|
|
|
9
9
|
from spiral import _lib, arrow_
|
|
10
10
|
|
|
11
|
+
from . import file as file
|
|
11
12
|
from . import http as http
|
|
12
13
|
from . import list_ as list
|
|
13
14
|
from . import s3 as s3
|
|
@@ -50,6 +51,7 @@ __all__ = [
|
|
|
50
51
|
"text",
|
|
51
52
|
"s3",
|
|
52
53
|
"http",
|
|
54
|
+
"file",
|
|
53
55
|
"UDF",
|
|
54
56
|
]
|
|
55
57
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from spiral import _lib
|
|
2
|
+
from spiral.expressions.base import Expr, ExprLike
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get(expr: ExprLike) -> Expr:
|
|
6
|
+
"""Read data from the local filesystem by the file:// URL.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
expr: URLs of the data that needs to be read.
|
|
10
|
+
"""
|
|
11
|
+
from spiral import expressions as se
|
|
12
|
+
|
|
13
|
+
expr = se.lift(expr)
|
|
14
|
+
|
|
15
|
+
# This just works :)
|
|
16
|
+
return Expr(_lib.expr.s3.get(expr.__expr__))
|
spiral/expressions/s3.py
CHANGED
|
@@ -3,7 +3,7 @@ from spiral.expressions.base import Expr, ExprLike
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
def get(expr: ExprLike) -> Expr:
|
|
6
|
-
"""Read data from object storage by the
|
|
6
|
+
"""Read data from object storage by the s3:// URL.
|
|
7
7
|
|
|
8
8
|
Args:
|
|
9
9
|
expr: URLs of the data that needs to be read from object storage.
|
|
File without changes
|
|
File without changes
|