relationalai 0.13.2__py3-none-any.whl → 0.13.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- relationalai/clients/client.py +3 -4
- relationalai/clients/exec_txn_poller.py +62 -31
- relationalai/clients/resources/snowflake/direct_access_resources.py +6 -5
- relationalai/clients/resources/snowflake/snowflake.py +54 -51
- relationalai/clients/resources/snowflake/use_index_poller.py +1 -1
- relationalai/semantics/internal/snowflake.py +5 -1
- relationalai/semantics/lqp/algorithms.py +173 -0
- relationalai/semantics/lqp/builtins.py +199 -2
- relationalai/semantics/lqp/executor.py +90 -41
- relationalai/semantics/lqp/export_rewriter.py +40 -0
- relationalai/semantics/lqp/ir.py +28 -2
- relationalai/semantics/lqp/model2lqp.py +218 -45
- relationalai/semantics/lqp/passes.py +13 -658
- relationalai/semantics/lqp/rewrite/__init__.py +12 -0
- relationalai/semantics/lqp/rewrite/algorithm.py +385 -0
- relationalai/semantics/lqp/rewrite/annotate_constraints.py +22 -10
- relationalai/semantics/lqp/rewrite/constants_to_vars.py +70 -0
- relationalai/semantics/lqp/rewrite/deduplicate_vars.py +104 -0
- relationalai/semantics/lqp/rewrite/eliminate_data.py +108 -0
- relationalai/semantics/lqp/rewrite/functional_dependencies.py +31 -2
- relationalai/semantics/lqp/rewrite/period_math.py +77 -0
- relationalai/semantics/lqp/rewrite/quantify_vars.py +65 -31
- relationalai/semantics/lqp/rewrite/unify_definitions.py +317 -0
- relationalai/semantics/lqp/utils.py +11 -1
- relationalai/semantics/lqp/validators.py +14 -1
- relationalai/semantics/metamodel/builtins.py +2 -1
- relationalai/semantics/metamodel/compiler.py +2 -1
- relationalai/semantics/metamodel/dependency.py +12 -3
- relationalai/semantics/metamodel/executor.py +11 -1
- relationalai/semantics/metamodel/factory.py +2 -2
- relationalai/semantics/metamodel/helpers.py +7 -0
- relationalai/semantics/metamodel/ir.py +3 -2
- relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +30 -20
- relationalai/semantics/metamodel/rewrite/flatten.py +50 -13
- relationalai/semantics/metamodel/rewrite/format_outputs.py +9 -3
- relationalai/semantics/metamodel/typer/checker.py +6 -4
- relationalai/semantics/metamodel/typer/typer.py +2 -5
- relationalai/semantics/metamodel/visitor.py +4 -3
- relationalai/semantics/reasoners/optimization/solvers_dev.py +1 -1
- relationalai/semantics/reasoners/optimization/solvers_pb.py +3 -4
- relationalai/semantics/rel/compiler.py +2 -1
- relationalai/semantics/rel/executor.py +3 -2
- relationalai/semantics/tests/lqp/__init__.py +0 -0
- relationalai/semantics/tests/lqp/algorithms.py +345 -0
- relationalai/semantics/tests/test_snapshot_abstract.py +2 -1
- relationalai/tools/cli_controls.py +216 -67
- relationalai/util/format.py +5 -2
- {relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/METADATA +2 -2
- {relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/RECORD +52 -42
- {relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/WHEEL +0 -0
- {relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/entry_points.txt +0 -0
- {relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from typing import TypeGuard
|
|
2
|
+
from relationalai.semantics.metamodel import factory as f, ir, types
|
|
2
3
|
from relationalai.semantics.metamodel.util import FrozenOrderedSet
|
|
3
4
|
from relationalai.semantics.metamodel import builtins
|
|
4
5
|
|
|
@@ -8,9 +9,205 @@ adhoc = f.relation("adhoc", [])
|
|
|
8
9
|
adhoc_annotation = f.annotation(adhoc, [])
|
|
9
10
|
|
|
10
11
|
# We only want to emit attributes for a known set of annotations.
|
|
11
|
-
|
|
12
|
+
supported_lqp_annotations = FrozenOrderedSet([
|
|
12
13
|
adhoc.name,
|
|
13
14
|
builtins.function.name,
|
|
14
15
|
builtins.track.name,
|
|
15
16
|
builtins.recursion_config.name,
|
|
16
17
|
])
|
|
18
|
+
|
|
19
|
+
# [LoopyIR] Annotations used to mark metamodel IR elements as Loopy constructs.
|
|
20
|
+
# 1. Programming structures:
|
|
21
|
+
# * @script marks Sequence blocks `begin ... end`
|
|
22
|
+
# * @algorithm additionally marks the top-level script
|
|
23
|
+
# * @while marks Loop as a `while(true) {...}`; its sole Task is a @script @while Sequence
|
|
24
|
+
# 2. Base instructions (Update's with derive Effects)
|
|
25
|
+
# * @global marks instructions that write to a global relation (only used in top-level script)
|
|
26
|
+
# * @empty marks instructions that initialize relations to an empty relation
|
|
27
|
+
# * @assign marks instructions that are standard assignments
|
|
28
|
+
# * @upsert marks instructions that perform in-place upserts
|
|
29
|
+
# * @monoid marks instructions that perform in-place monoid updates
|
|
30
|
+
# * @monus marks instructions that perform in-place monus updates
|
|
31
|
+
|
|
32
|
+
# These tasks require dedicated handling and currently are only supported in LQP.
|
|
33
|
+
|
|
34
|
+
# Here we only provide basic inspection functions. Functions for creating these annotations
|
|
35
|
+
# and more complex analysis are in the module relationalai.semantics.lqp.algorithms
|
|
36
|
+
|
|
37
|
+
# Algorithm: for top-level script of an algorithm
|
|
38
|
+
_algorithm_anno_name = "algorithm"
|
|
39
|
+
algorithm = f.relation(_algorithm_anno_name, [])
|
|
40
|
+
|
|
41
|
+
def algorithm_annotation():
|
|
42
|
+
return f.annotation(algorithm, [])
|
|
43
|
+
|
|
44
|
+
def has_algorithm_annotation(node: ir.Node) -> bool:
|
|
45
|
+
if not hasattr(node, "annotations"):
|
|
46
|
+
return False
|
|
47
|
+
annotations = getattr(node, "annotations", [])
|
|
48
|
+
for anno in annotations:
|
|
49
|
+
if anno.relation.name == _algorithm_anno_name:
|
|
50
|
+
return True
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
# Script: for Sequence blocks (algorithm or while loop)
|
|
54
|
+
_script_anno_name = "script"
|
|
55
|
+
script = f.relation(_script_anno_name, [])
|
|
56
|
+
|
|
57
|
+
def script_annotation():
|
|
58
|
+
return f.annotation(script, [])
|
|
59
|
+
|
|
60
|
+
def has_script_annotation(node: ir.Node) -> bool:
|
|
61
|
+
if not hasattr(node, "annotations"):
|
|
62
|
+
return False
|
|
63
|
+
annotations = getattr(node, "annotations", [])
|
|
64
|
+
for anno in annotations:
|
|
65
|
+
if anno.relation.name == _script_anno_name:
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
# While: for a while Loop or its script body (Sequence)
|
|
70
|
+
_while_anno_name = "while"
|
|
71
|
+
while_ = f.relation(_while_anno_name, [])
|
|
72
|
+
|
|
73
|
+
def while_annotation():
|
|
74
|
+
return f.annotation(while_, [])
|
|
75
|
+
|
|
76
|
+
def has_while_annotation(node: ir.Node) -> bool:
|
|
77
|
+
if not hasattr(node, "annotations"):
|
|
78
|
+
return False
|
|
79
|
+
annotations = getattr(node, "annotations", [])
|
|
80
|
+
for anno in annotations:
|
|
81
|
+
if anno.relation.name == _while_anno_name:
|
|
82
|
+
return True
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
# Global: marks instructions that write to relation that is the result of an algorithm
|
|
86
|
+
_global_anno_name = "global"
|
|
87
|
+
global_ = f.relation(_global_anno_name, [])
|
|
88
|
+
|
|
89
|
+
def global_annotation():
|
|
90
|
+
return f.annotation(global_, [])
|
|
91
|
+
|
|
92
|
+
def has_global_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
|
|
93
|
+
if not hasattr(node, "annotations"):
|
|
94
|
+
return False
|
|
95
|
+
annotations = getattr(node, "annotations", [])
|
|
96
|
+
for anno in annotations:
|
|
97
|
+
if anno.relation.name == _global_anno_name:
|
|
98
|
+
return True
|
|
99
|
+
return False
|
|
100
|
+
|
|
101
|
+
# Empty: Initializes a relation to an empty relation
|
|
102
|
+
_empty_anno_name = "empty"
|
|
103
|
+
empty = f.relation(_empty_anno_name, [])
|
|
104
|
+
|
|
105
|
+
def empty_annotation():
|
|
106
|
+
return f.annotation(empty, [])
|
|
107
|
+
|
|
108
|
+
def has_empty_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
|
|
109
|
+
if not hasattr(node, "annotations"):
|
|
110
|
+
return False
|
|
111
|
+
annotations = getattr(node, "annotations", [])
|
|
112
|
+
for anno in annotations:
|
|
113
|
+
if anno.relation.name == _empty_anno_name:
|
|
114
|
+
return True
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
# Assign: overwrites the target relation
|
|
118
|
+
_assign_anno_name = "assign"
|
|
119
|
+
assign = f.relation(_assign_anno_name, [])
|
|
120
|
+
|
|
121
|
+
def assign_annotation():
|
|
122
|
+
return f.annotation(assign, [])
|
|
123
|
+
|
|
124
|
+
def has_assign_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
|
|
125
|
+
if not hasattr(node, "annotations"):
|
|
126
|
+
return False
|
|
127
|
+
annotations = getattr(node, "annotations", [])
|
|
128
|
+
for anno in annotations:
|
|
129
|
+
if anno.relation.name == _assign_anno_name:
|
|
130
|
+
return True
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
# Upsert: In-place update of relation
|
|
134
|
+
_upsert_anno_name = "upsert"
|
|
135
|
+
upsert = f.relation(_upsert_anno_name, [])
|
|
136
|
+
|
|
137
|
+
def upsert_annotation(arity: int):
|
|
138
|
+
return f.annotation(upsert, [f.literal(arity, type=types.Int64)])
|
|
139
|
+
|
|
140
|
+
def has_upsert_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
|
|
141
|
+
if not hasattr(node, "annotations"):
|
|
142
|
+
return False
|
|
143
|
+
annotations = getattr(node, "annotations", [])
|
|
144
|
+
for anno in annotations:
|
|
145
|
+
if anno.relation.name == _upsert_anno_name:
|
|
146
|
+
return True
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
def get_upsert_annotation(i: ir.Update):
|
|
150
|
+
for anno in i.annotations:
|
|
151
|
+
if anno.relation.name == _upsert_anno_name:
|
|
152
|
+
return anno
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
# Monoid: In-place update of relation by another by a monoid operation (e.g. Integer addition)
|
|
156
|
+
_monoid_anno_name = "monoid"
|
|
157
|
+
monoid = f.relation(_monoid_anno_name, [])
|
|
158
|
+
|
|
159
|
+
def monoid_annotation(monoid_type: ir.ScalarType, monoid_op: str, arity: int):
|
|
160
|
+
return f.annotation(monoid, [f.literal(arity, type=types.Int64), monoid_type, f.literal(monoid_op, type=types.String)])
|
|
161
|
+
|
|
162
|
+
def has_monoid_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
|
|
163
|
+
if not hasattr(node, "annotations"):
|
|
164
|
+
return False
|
|
165
|
+
annotations = getattr(node, "annotations", [])
|
|
166
|
+
for anno in annotations:
|
|
167
|
+
if anno.relation.name == _monoid_anno_name:
|
|
168
|
+
return True
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
def get_monoid_annotation(i: ir.Update):
|
|
172
|
+
for anno in i.annotations:
|
|
173
|
+
if anno.relation.name == _monoid_anno_name:
|
|
174
|
+
return anno
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
# Monus: In-place update of relation by another by "subtraction" operation, if it exists (e.g. Integer subtraction)
|
|
178
|
+
_monus_anno_name = "monus"
|
|
179
|
+
monus = f.relation(_monus_anno_name, [])
|
|
180
|
+
|
|
181
|
+
def monus_annotation(monoid_type: ir.ScalarType, monoid_op: str, arity: int):
|
|
182
|
+
return f.annotation(monus, [f.literal(arity, type=types.Int64), monoid_type, f.literal(monoid_op, type=types.String)])
|
|
183
|
+
|
|
184
|
+
def has_monus_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
|
|
185
|
+
if not hasattr(node, "annotations"):
|
|
186
|
+
return False
|
|
187
|
+
annotations = getattr(node, "annotations", [])
|
|
188
|
+
for anno in annotations:
|
|
189
|
+
if anno.relation.name == _monus_anno_name:
|
|
190
|
+
return True
|
|
191
|
+
return False
|
|
192
|
+
|
|
193
|
+
def get_monus_annotation(i: ir.Update):
|
|
194
|
+
for anno in i.annotations:
|
|
195
|
+
if anno.relation.name == _monus_anno_name:
|
|
196
|
+
return anno
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
# Get arity from annotation (for @upsert, @monoid, and @monus)
|
|
200
|
+
def get_arity(i: ir.Annotation):
|
|
201
|
+
for arg in i.args:
|
|
202
|
+
if isinstance(arg, ir.Literal) and (arg.type == types.Int64 or arg.type == types.Int128 or arg.type == types.Number):
|
|
203
|
+
return arg.value
|
|
204
|
+
assert False, "Failed to get arity"
|
|
205
|
+
|
|
206
|
+
# All Loopy instructions
|
|
207
|
+
loopy_instructions = [
|
|
208
|
+
empty,
|
|
209
|
+
assign,
|
|
210
|
+
upsert,
|
|
211
|
+
monoid,
|
|
212
|
+
monus
|
|
213
|
+
]
|
|
@@ -10,8 +10,12 @@ from snowflake.snowpark import Session
|
|
|
10
10
|
|
|
11
11
|
from relationalai import debugging
|
|
12
12
|
from relationalai.errors import NonDefaultLQPSemanticsVersionWarning
|
|
13
|
-
from relationalai.semantics.lqp import result_helpers
|
|
13
|
+
from relationalai.semantics.lqp import result_helpers, export_rewriter
|
|
14
14
|
from relationalai.semantics.metamodel import ir, factory as f, executor as e
|
|
15
|
+
from relationalai.semantics.metamodel.visitor import collect_by_type
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from relationalai.semantics.internal.internal import Model as InternalModel
|
|
15
19
|
from relationalai.semantics.lqp.compiler import Compiler
|
|
16
20
|
from relationalai.semantics.lqp.intrinsics import mk_intrinsic_datetime_now
|
|
17
21
|
from relationalai.semantics.lqp.constructors import mk_transaction
|
|
@@ -280,10 +284,11 @@ class LQPExecutor(e.Executor):
|
|
|
280
284
|
fields.append(f"NULL as \"{name}\"")
|
|
281
285
|
continue
|
|
282
286
|
|
|
283
|
-
|
|
287
|
+
# Get the actual physical column name from column_fields
|
|
288
|
+
colname = column_fields[ix][0]
|
|
284
289
|
ix += 1
|
|
285
290
|
|
|
286
|
-
if colname in sample_keys:
|
|
291
|
+
if colname.lower() in sample_keys:
|
|
287
292
|
# Actual column exists in sample
|
|
288
293
|
fields.append(f"{colname} as \"{name}\"")
|
|
289
294
|
else:
|
|
@@ -364,7 +369,7 @@ class LQPExecutor(e.Executor):
|
|
|
364
369
|
meta=None,
|
|
365
370
|
)
|
|
366
371
|
|
|
367
|
-
def compile_lqp(self, model: ir.Model, task: ir.Task):
|
|
372
|
+
def compile_lqp(self, model: ir.Model, task: ir.Task, format: Optional[Literal["pandas", "snowpark", "csv"]] = "pandas"):
|
|
368
373
|
configure = self._construct_configure()
|
|
369
374
|
# Merge the epochs into a single transaction. Long term the query bits should all
|
|
370
375
|
# go into a WhatIf action and the intrinsics could be fused with either of them. But
|
|
@@ -390,6 +395,21 @@ class LQPExecutor(e.Executor):
|
|
|
390
395
|
result, final_model = self.compiler.compile_inner(query, options)
|
|
391
396
|
export_info, query_epoch = result
|
|
392
397
|
|
|
398
|
+
if format == "csv":
|
|
399
|
+
# Extract original column names from Output
|
|
400
|
+
outputs = collect_by_type(ir.Output, task)
|
|
401
|
+
assert outputs, "No Output found in the task"
|
|
402
|
+
assert len(outputs) == 1, "Multiple Outputs found in the task"
|
|
403
|
+
output = outputs[0]
|
|
404
|
+
original_cols = []
|
|
405
|
+
for alias, _ in output.aliases:
|
|
406
|
+
if not alias:
|
|
407
|
+
continue
|
|
408
|
+
original_cols.append(alias)
|
|
409
|
+
# Use rewriter to filter data_columns
|
|
410
|
+
column_filter = export_rewriter.ExtraColumnsFilter(original_cols)
|
|
411
|
+
query_epoch = column_filter.filter_epoch(query_epoch)
|
|
412
|
+
|
|
393
413
|
epochs.append(query_epoch)
|
|
394
414
|
epochs.append(self._compile_undefine_query(query_epoch))
|
|
395
415
|
|
|
@@ -402,47 +422,23 @@ class LQPExecutor(e.Executor):
|
|
|
402
422
|
txn_proto = convert_transaction(txn)
|
|
403
423
|
return final_model, export_info, txn_proto
|
|
404
424
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
df.columns = cols[: len(df.columns)]
|
|
415
|
-
|
|
416
|
-
# Process exports
|
|
417
|
-
if export_to and not self.dry_run:
|
|
418
|
-
assert cols, "No columns found in the output"
|
|
419
|
-
assert isinstance(raw_results, TransactionAsyncResponse) and raw_results.transaction, "Invalid transaction result"
|
|
420
|
-
|
|
421
|
-
result_cols = export_to._col_names
|
|
422
|
-
|
|
423
|
-
if result_cols is not None:
|
|
424
|
-
assert all(col in result_cols or col in extra_cols for col in cols)
|
|
425
|
-
else:
|
|
426
|
-
result_cols = [col for col in cols if col not in extra_cols]
|
|
427
|
-
assert result_cols
|
|
428
|
-
|
|
429
|
-
assert export_info, "Export info should be populated if we are exporting results"
|
|
430
|
-
self._export(raw_results.transaction['id'], export_info, export_to, cols, result_cols, update)
|
|
431
|
-
|
|
432
|
-
return self._postprocess_df(self.config, df, extra_cols)
|
|
433
|
-
|
|
434
|
-
def execute(self, model: ir.Model, task: ir.Task, format: Literal["pandas", "snowpark"] = "pandas",
|
|
435
|
-
export_to: Optional[Table] = None,
|
|
436
|
-
update: bool = False, meta: dict[str, Any] | None = None) -> DataFrame:
|
|
425
|
+
def execute(
|
|
426
|
+
self,
|
|
427
|
+
model: ir.Model,
|
|
428
|
+
task: ir.Task,
|
|
429
|
+
format: Literal["pandas", "snowpark", "csv"] = "pandas",
|
|
430
|
+
export_to: Optional[Table] = None,
|
|
431
|
+
update: bool = False,
|
|
432
|
+
meta: dict[str, Any] | None = None,
|
|
433
|
+
) -> DataFrame:
|
|
437
434
|
self.prepare_data()
|
|
438
435
|
previous_model = self._last_model
|
|
439
|
-
|
|
440
|
-
final_model, export_info, txn_proto = self.compile_lqp(model, task)
|
|
436
|
+
final_model, export_info, txn_proto = self.compile_lqp(model, task, format=format)
|
|
441
437
|
|
|
442
438
|
if self.dry_run:
|
|
443
439
|
return DataFrame()
|
|
444
440
|
|
|
445
|
-
if format
|
|
441
|
+
if format == "snowpark":
|
|
446
442
|
raise ValueError(f"Unsupported format: {format}")
|
|
447
443
|
|
|
448
444
|
# Format meta as headers
|
|
@@ -459,11 +455,64 @@ class LQPExecutor(e.Executor):
|
|
|
459
455
|
nowait_durable=True,
|
|
460
456
|
headers=headers,
|
|
461
457
|
)
|
|
462
|
-
assert isinstance(raw_results, TransactionAsyncResponse)
|
|
458
|
+
assert isinstance(raw_results, TransactionAsyncResponse), "Expected TransactionAsyncResponse from LQP execution"
|
|
459
|
+
assert raw_results.transaction is not None, "Transaction result is missing"
|
|
460
|
+
txid = raw_results.transaction['id']
|
|
463
461
|
|
|
464
462
|
try:
|
|
465
|
-
|
|
463
|
+
cols, extra_cols = self._compute_cols(task, final_model)
|
|
464
|
+
df, errs = result_helpers.format_results(raw_results, cols)
|
|
465
|
+
self.report_errors(errs)
|
|
466
|
+
|
|
467
|
+
# Rename columns if wide outputs is enabled
|
|
468
|
+
if self.wide_outputs and len(cols) - len(extra_cols) == len(df.columns):
|
|
469
|
+
df.columns = cols[: len(df.columns)]
|
|
470
|
+
|
|
471
|
+
if export_to:
|
|
472
|
+
assert cols, "No columns found in the output"
|
|
473
|
+
assert export_info, "Export info should be populated if we are exporting results"
|
|
474
|
+
result_cols = export_to._col_names
|
|
475
|
+
if result_cols is not None:
|
|
476
|
+
assert all(col in result_cols or col in extra_cols for col in cols)
|
|
477
|
+
else:
|
|
478
|
+
result_cols = [col for col in cols if col not in extra_cols]
|
|
479
|
+
assert result_cols
|
|
480
|
+
self._export(txid, export_info, export_to, cols, result_cols, update)
|
|
481
|
+
|
|
482
|
+
if format == "csv":
|
|
483
|
+
if export_info is not None and isinstance(export_info, tuple) and isinstance(export_info[0], str):
|
|
484
|
+
# The full CSV path has two parts. The first part is chosen by the frontend, while
|
|
485
|
+
# the second part is chosen by the backend to avoid collisions. We need to ensure
|
|
486
|
+
# the second part is synchronized with the future changes in the backend.
|
|
487
|
+
full_path = export_info[0] + f"/data_{txid}.gz"
|
|
488
|
+
return DataFrame([full_path], columns=["path"])
|
|
489
|
+
else:
|
|
490
|
+
raise ValueError("The CSV export was not successful!")
|
|
491
|
+
|
|
492
|
+
return self._postprocess_df(self.config, df, extra_cols)
|
|
493
|
+
|
|
466
494
|
except Exception as e:
|
|
467
495
|
# If processing the results failed, revert to the previous model.
|
|
468
496
|
self._last_model = previous_model
|
|
469
497
|
raise e
|
|
498
|
+
|
|
499
|
+
def export_to_csv(self, model: "InternalModel", query) -> str:
|
|
500
|
+
### Exports the result of the given query fragment to a CSV file in
|
|
501
|
+
### the Snowflake stage area and returns the path to the CSV file.
|
|
502
|
+
|
|
503
|
+
from relationalai.semantics.internal.internal import Fragment, with_source
|
|
504
|
+
from relationalai.environments import runtime_env
|
|
505
|
+
|
|
506
|
+
if not query._select:
|
|
507
|
+
raise ValueError("Cannot export empty selection to CSV")
|
|
508
|
+
|
|
509
|
+
clone = Fragment(parent=query)
|
|
510
|
+
clone._is_export = True
|
|
511
|
+
clone._source = runtime_env.get_source_pos()
|
|
512
|
+
ir_model = model._to_ir()
|
|
513
|
+
with debugging.span("query", dsl=str(clone), **with_source(clone), meta=clone._meta):
|
|
514
|
+
query_task = model._compiler.fragment(clone)
|
|
515
|
+
csv_info = self.execute(ir_model, query_task, format="csv", meta=clone._meta)
|
|
516
|
+
path = csv_info.at[0, "path"]
|
|
517
|
+
assert isinstance(path, str)
|
|
518
|
+
return path
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#----------------------------------------------------------------------------------------------
|
|
2
|
+
# This is a custom LQP rewriter that filters extra columns from CSV export. It is used in the
|
|
3
|
+
# LQP executor, when the format="csv", to ensure only intended columns are being exported.
|
|
4
|
+
#----------------------------------------------------------------------------------------------
|
|
5
|
+
|
|
6
|
+
from dataclasses import replace
|
|
7
|
+
from lqp import ir as lqp_ir
|
|
8
|
+
|
|
9
|
+
class ExtraColumnsFilter:
|
|
10
|
+
|
|
11
|
+
def __init__(self, original_cols: list[str]):
|
|
12
|
+
self.original_cols = set(original_cols)
|
|
13
|
+
|
|
14
|
+
def filter_epoch(self, query_epoch: lqp_ir.Epoch) -> lqp_ir.Epoch:
|
|
15
|
+
|
|
16
|
+
# Only process epochs with a single read which is dedicated to Export
|
|
17
|
+
if not (query_epoch.reads and len(query_epoch.reads) == 1):
|
|
18
|
+
return query_epoch
|
|
19
|
+
|
|
20
|
+
old_read = query_epoch.reads[0]
|
|
21
|
+
if not isinstance(old_read.read_type, lqp_ir.Export):
|
|
22
|
+
return query_epoch
|
|
23
|
+
|
|
24
|
+
config = old_read.read_type.config
|
|
25
|
+
assert isinstance(config, lqp_ir.ExportCSVConfig) and config.data_columns is not None, \
|
|
26
|
+
"Expected ExportCSVConfig with data_columns in the read type"
|
|
27
|
+
|
|
28
|
+
data_columns = config.data_columns
|
|
29
|
+
|
|
30
|
+
# Filter data_columns to only include columns in original_cols
|
|
31
|
+
new_data_columns = [col for col in data_columns if col.column_name in self.original_cols]
|
|
32
|
+
|
|
33
|
+
# Reconstruct the nested structure with filtered data_columns
|
|
34
|
+
new_config = replace(old_read.read_type.config, data_columns=new_data_columns)
|
|
35
|
+
new_read_type = replace(old_read.read_type, config=new_config)
|
|
36
|
+
new_read = replace(old_read, read_type=new_read_type)
|
|
37
|
+
|
|
38
|
+
# Return new epoch with updated read
|
|
39
|
+
remaining_reads = list(query_epoch.reads[1:])
|
|
40
|
+
return replace(query_epoch, reads=[new_read] + remaining_reads)
|
relationalai/semantics/lqp/ir.py
CHANGED
|
@@ -6,7 +6,6 @@ __all__ = [
|
|
|
6
6
|
"Declaration",
|
|
7
7
|
"FunctionalDependency",
|
|
8
8
|
"Def",
|
|
9
|
-
"Loop",
|
|
10
9
|
"Abstraction",
|
|
11
10
|
"Formula",
|
|
12
11
|
"Exists",
|
|
@@ -55,6 +54,20 @@ __all__ = [
|
|
|
55
54
|
"convert_transaction",
|
|
56
55
|
"validate_lqp",
|
|
57
56
|
"construct_configure",
|
|
57
|
+
"Algorithm",
|
|
58
|
+
"Script",
|
|
59
|
+
"Construct",
|
|
60
|
+
"Loop",
|
|
61
|
+
"Instruction",
|
|
62
|
+
"Assign",
|
|
63
|
+
"Break",
|
|
64
|
+
"Upsert",
|
|
65
|
+
"MonoidDef",
|
|
66
|
+
"MonusDef",
|
|
67
|
+
"OrMonoid",
|
|
68
|
+
"MinMonoid",
|
|
69
|
+
"MaxMonoid",
|
|
70
|
+
"SumMonoid",
|
|
58
71
|
]
|
|
59
72
|
|
|
60
73
|
from lqp.ir import (
|
|
@@ -63,7 +76,6 @@ from lqp.ir import (
|
|
|
63
76
|
Declaration,
|
|
64
77
|
FunctionalDependency,
|
|
65
78
|
Def,
|
|
66
|
-
Loop,
|
|
67
79
|
Abstraction,
|
|
68
80
|
Formula,
|
|
69
81
|
Exists,
|
|
@@ -108,6 +120,20 @@ from lqp.ir import (
|
|
|
108
120
|
Configure,
|
|
109
121
|
IVMConfig,
|
|
110
122
|
MaintenanceLevel,
|
|
123
|
+
Algorithm,
|
|
124
|
+
Script,
|
|
125
|
+
Construct,
|
|
126
|
+
Loop,
|
|
127
|
+
Instruction,
|
|
128
|
+
Assign,
|
|
129
|
+
Break,
|
|
130
|
+
Upsert,
|
|
131
|
+
MonoidDef,
|
|
132
|
+
MonusDef,
|
|
133
|
+
OrMonoid,
|
|
134
|
+
MinMonoid,
|
|
135
|
+
MaxMonoid,
|
|
136
|
+
SumMonoid,
|
|
111
137
|
)
|
|
112
138
|
|
|
113
139
|
from lqp.emit import (
|