relationalai 0.12.7__py3-none-any.whl → 0.12.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -851,7 +851,17 @@ Otherwise, remove it from your '{profile}' configuration profile.
851
851
  self.generation
852
852
  )
853
853
  # If cache is valid (data freshness has not expired), skip polling
854
- if not poller.cache.is_valid():
854
+ if poller.cache.is_valid():
855
+ cached_sources = len(poller.cache.sources)
856
+ total_sources = len(sources_list)
857
+ cached_timestamp = poller.cache._metadata.get("cachedIndices", {}).get(poller.cache.key, {}).get("last_use_index_update_on", "")
858
+
859
+ message = f"Using cached data for {cached_sources}/{total_sources} data streams"
860
+ if cached_timestamp:
861
+ print(f"\n{message} (cached at {cached_timestamp})\n")
862
+ else:
863
+ print(f"\n{message}\n")
864
+ else:
855
865
  return poller.poll()
856
866
 
857
867
  #--------------------------------------------------
@@ -3284,12 +3294,24 @@ class DirectAccessResources(Resources):
3284
3294
  try:
3285
3295
  response = _send_request()
3286
3296
  if response.status_code != 200:
3297
+ # For 404 responses with skip_auto_create=True, return immediately to let caller handle it
3298
+ # (e.g., get_engine needs to check 404 and return None for auto_create_engine)
3299
+ # For skip_auto_create=False, continue to auto-creation logic below
3300
+ if response.status_code == 404 and skip_auto_create:
3301
+ return response
3302
+
3287
3303
  try:
3288
3304
  message = response.json().get("message", "")
3289
3305
  except requests.exceptions.JSONDecodeError:
3290
- raise ResponseStatusException(
3291
- f"Failed to parse error response from endpoint {endpoint}.", response
3292
- )
3306
+ # Can't parse JSON response. For skip_auto_create=True (e.g., get_engine),
3307
+ # this should have been caught by the 404 check above, so this is an error.
3308
+ # For skip_auto_create=False, we explicitly check status_code below,
3309
+ # so we don't need to parse the message.
3310
+ if skip_auto_create:
3311
+ raise ResponseStatusException(
3312
+ f"Failed to parse error response from endpoint {endpoint}.", response
3313
+ )
3314
+ message = "" # Not used when we check status_code directly
3293
3315
 
3294
3316
  # fix engine on engine error and retry
3295
3317
  # Skip auto-retry if skip_auto_create is True to avoid recursion
@@ -3482,7 +3504,17 @@ class DirectAccessResources(Resources):
3482
3504
  generation=self.generation,
3483
3505
  )
3484
3506
  # If cache is valid (data freshness has not expired), skip polling
3485
- if not poller.cache.is_valid():
3507
+ if poller.cache.is_valid():
3508
+ cached_sources = len(poller.cache.sources)
3509
+ total_sources = len(sources_list)
3510
+ cached_timestamp = poller.cache._metadata.get("cachedIndices", {}).get(poller.cache.key, {}).get("last_use_index_update_on", "")
3511
+
3512
+ message = f"Using cached data for {cached_sources}/{total_sources} data streams"
3513
+ if cached_timestamp:
3514
+ print(f"\n{message} (cached at {cached_timestamp})\n")
3515
+ else:
3516
+ print(f"\n{message}\n")
3517
+ else:
3486
3518
  return poller.poll()
3487
3519
 
3488
3520
  def _check_exec_async_status(self, txn_id: str, headers: Dict[str, str] | None = None) -> bool:
@@ -250,7 +250,17 @@ class UseIndexPoller:
250
250
  # Cache was used - show how many sources were cached
251
251
  total_sources = len(self.cache.sources)
252
252
  cached_sources = total_sources - len(self.sources)
253
- progress.add_sub_task(f"Using cached data for {cached_sources}/{total_sources} data streams", task_id="cache_usage", category=TASK_CATEGORY_CACHE)
253
+
254
+ # Get the timestamp when sources were cached
255
+ entry = self.cache._metadata.get("cachedIndices", {}).get(self.cache.key, {})
256
+ cached_timestamp = entry.get("last_use_index_update_on", "")
257
+
258
+ message = f"Using cached data for {cached_sources}/{total_sources} data streams"
259
+ # Format the message with timestamp
260
+ if cached_timestamp:
261
+ message += f" (cached at {cached_timestamp})"
262
+
263
+ progress.add_sub_task(message, task_id="cache_usage", category=TASK_CATEGORY_CACHE)
254
264
  # Complete the subtask immediately since it's just informational
255
265
  progress.complete_sub_task("cache_usage")
256
266
 
@@ -6,7 +6,7 @@ from relationalai.semantics.metamodel.util import FrozenOrderedSet
6
6
 
7
7
  from relationalai.semantics.metamodel.rewrite import Flatten
8
8
 
9
- from ..metamodel.rewrite import DischargeConstraints, DNFUnionSplitter, ExtractNestedLogicals
9
+ from ..metamodel.rewrite import DischargeConstraints, DNFUnionSplitter, ExtractNestedLogicals, FormatOutputs
10
10
  from .rewrite import CDC, ExtractCommon, ExtractKeys, FunctionAnnotations, QuantifyVars, Splinter
11
11
 
12
12
  from relationalai.semantics.lqp.utils import output_names
@@ -27,6 +27,7 @@ def lqp_passes() -> list[Pass]:
27
27
  DNFUnionSplitter(),
28
28
  ExtractKeys(),
29
29
  ExtractCommon(),
30
+ FormatOutputs(),
30
31
  Flatten(),
31
32
  Splinter(), # Splits multi-headed rules into multiple rules
32
33
  QuantifyVars(), # Adds missing existentials
@@ -654,6 +654,7 @@ rel_primitive_solverlib_ho_appl = aggregation("rel_primitive_solverlib_ho_appl",
654
654
  ])
655
655
  implies = f.relation("implies", [f.input_field("a", types.Bool), f.input_field("b", types.Bool)])
656
656
  all_different = aggregation("all_different", [f.input_field("over", types.Any)])
657
+ special_ordered_set_type_2 = aggregation("special_ordered_set_type_2", [f.input_field("rank", types.Any)])
657
658
 
658
659
  # graph primitive algorithm helpers
659
660
  infomap = aggregation("infomap", [
@@ -2,5 +2,6 @@ from .discharge_constraints import DischargeConstraints
2
2
  from .dnf_union_splitter import DNFUnionSplitter
3
3
  from .extract_nested_logicals import ExtractNestedLogicals
4
4
  from .flatten import Flatten
5
+ from .format_outputs import FormatOutputs
5
6
 
6
- __all__ = ["DischargeConstraints", "DNFUnionSplitter", "ExtractNestedLogicals", "Flatten"]
7
+ __all__ = ["DischargeConstraints", "DNFUnionSplitter", "ExtractNestedLogicals", "Flatten", "FormatOutputs"]
@@ -150,7 +150,7 @@ class DNFExtractor(Visitor):
150
150
 
151
151
  replacement_tasks: list[ir.Task] = []
152
152
  for body in replacement_bodies:
153
- new_task = f.logical(body)
153
+ new_task = f.logical(body, node.hoisted)
154
154
  replacement_tasks.append(new_task)
155
155
  self.replaced_by[node] = replacement_tasks
156
156
 
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from relationalai.semantics.metamodel import ir, factory as f, helpers
4
- from relationalai.semantics.metamodel.visitor import Rewriter
4
+ from relationalai.semantics.metamodel.visitor import Rewriter, collect_by_type
5
5
  from relationalai.semantics.metamodel.compiler import Pass
6
6
  from relationalai.semantics.metamodel.util import OrderedSet, ordered_set, NameCache
7
7
  from relationalai.semantics.metamodel import dependency
@@ -61,11 +61,10 @@ class LogicalExtractor(Rewriter):
61
61
 
62
62
  # if there are aggregations, make sure we don't expose the projected and input vars,
63
63
  # but expose groupbys
64
- for child in node.body:
65
- if isinstance(child, ir.Aggregate):
66
- exposed_vars.difference_update(child.projection)
67
- exposed_vars.difference_update(helpers.aggregate_inputs(child))
68
- exposed_vars.update(child.group)
64
+ for agg in collect_by_type(ir.Aggregate, node):
65
+ exposed_vars.difference_update(agg.projection)
66
+ exposed_vars.difference_update(helpers.aggregate_inputs(agg))
67
+ exposed_vars.update(agg.group)
69
68
  # add the values (hoisted)
70
69
  exposed_vars.update(helpers.hoisted_vars(logical.hoisted))
71
70
 
@@ -3,12 +3,11 @@ from dataclasses import dataclass
3
3
  from typing import cast, Optional, TypeVar
4
4
  from typing import Tuple
5
5
 
6
- from relationalai.semantics.metamodel import builtins, ir, factory as f, helpers, types
6
+ from relationalai.semantics.metamodel import builtins, ir, factory as f, helpers
7
7
  from relationalai.semantics.metamodel.compiler import Pass, group_tasks
8
8
  from relationalai.semantics.metamodel.util import OrderedSet, ordered_set, NameCache
9
9
  from relationalai.semantics.metamodel import dependency
10
- from relationalai.semantics.metamodel.util import FrozenOrderedSet, filter_by_type
11
- from relationalai.semantics.metamodel.typer.typer import to_type, is_primitive
10
+ from relationalai.semantics.metamodel.typer.typer import to_type
12
11
 
13
12
  class Flatten(Pass):
14
13
  """
@@ -225,15 +224,26 @@ class Flatten(Pass):
225
224
  "ranks": ir.Rank,
226
225
  })
227
226
 
228
- # if there are outputs, adjust them (depending on the config for wide vs gnf)
227
+ # If there are outputs, flatten each into its own top-level rule, along with its
228
+ # dependencies.
229
229
  if groups["outputs"]:
230
- if self._handle_outputs:
231
- return self.adjust_outputs(task, body, groups, ctx)
232
- else:
233
- # When we do not handle outputs. For example, in SQL compiler. We need to leave output as a top-level element.
230
+ if not self._handle_outputs:
234
231
  ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations))
235
232
  return Flatten.HandleResult(None)
236
233
 
234
+ # Analyze the dependencies in the newly rewritten body
235
+ new_logical = ir.Logical(task.engine, task.hoisted, tuple(body))
236
+ info = dependency.analyze(new_logical)
237
+
238
+ for output in groups["outputs"]:
239
+ assert(isinstance(output, ir.Output))
240
+ new_body = info.task_dependencies(output)
241
+ new_body.update(ctx.extra_tasks)
242
+ new_body.add(output)
243
+ ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, task.hoisted, tuple(new_body), task.annotations))
244
+
245
+ return Flatten.HandleResult(None)
246
+
237
247
  # if there are updates, extract as a new top level rule
238
248
  if groups["updates"]:
239
249
  # add task dependencies to the body
@@ -455,147 +465,6 @@ class Flatten(Pass):
455
465
  task.annotations
456
466
  ))
457
467
 
458
- #--------------------------------------------------
459
- # GNF vs wide output support
460
- #--------------------------------------------------
461
- def adjust_outputs(self, task: ir.Logical, body: OrderedSet[ir.Task], groups: dict[str, OrderedSet[ir.Task]], ctx: Context):
462
-
463
- # for wide outputs, only adjust the output task to include the keys.
464
- if ctx.options.get("wide_outputs", False):
465
- for output in groups["outputs"]:
466
- assert(isinstance(output, ir.Output))
467
- if output.keys:
468
- body.remove(output)
469
- body.add(self.rewrite_wide_output(output))
470
- # self.remove_subsumptions(body, ctx)
471
- return Flatten.HandleResult(ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations))
472
-
473
- # for GNF outputs we need to generate a rule for each "column" in the output
474
- else:
475
- # first split outputs in potentially multiple outputs, one for each "column"
476
- for output in groups["outputs"]:
477
- assert(isinstance(output, ir.Output))
478
- if output.keys:
479
- # we will replace the output bellow,
480
- body.remove(output)
481
-
482
- is_export = builtins.export_annotation in output.annotations
483
-
484
- # generate an output for each "column"
485
- # output looks like def output(:cols, :col000, key0, key1, value):
486
- original_cols = OrderedSet()
487
- for idx, alias in enumerate(output.aliases):
488
- # skip None values which are used as a placeholder for missing values
489
- if alias[1] is None:
490
- continue
491
- original_cols.add(alias[1])
492
- self._generate_output_column(body, output, idx, alias, is_export)
493
-
494
- idx = len(output.aliases)
495
- for key in output.keys:
496
- if key not in original_cols:
497
- self._generate_output_column(body, output, idx, (key.name, key), is_export)
498
- idx += 1
499
-
500
- # analyse the resulting logical to be able to pull dependencies
501
- logical = ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
502
- info = dependency.analyze(logical)
503
-
504
- # now extract a logical for each output, bringing together its dependencies
505
- for output in filter_by_type(body, ir.Output):
506
- deps = info.task_dependencies(output)
507
- # TODO: verify safety of doing this
508
- # self.remove_subsumptions(deps, ctx)
509
-
510
- deps.add(output)
511
- ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, tuple(), tuple(deps)))
512
-
513
- return Flatten.HandleResult(None)
514
-
515
- def _generate_output_column(self, body: OrderedSet[ir.Task], output: ir.Output, idx: int, alias: tuple[str, ir.Value], is_export: bool):
516
- if not output.keys:
517
- return output
518
-
519
- aliases = [("cols", f.literal("cols", types.Symbol))] if not is_export else []
520
- aliases.append(("col", f.literal(f"col{idx:03}", types.Symbol)))
521
-
522
- for k in output.keys:
523
- aliases.append((f"key_{k.name}_{idx}", k))
524
-
525
- if (is_export and
526
- isinstance(alias[1], ir.Var) and
527
- (not is_primitive(alias[1].type) or alias[1].type == types.Hash)):
528
-
529
- uuid = f.var(f"{alias[0]}_{idx}_uuid", types.String)
530
- body.add(f.lookup(builtins.uuid_to_string, [alias[1], uuid]))
531
- aliases.append((uuid.name, uuid))
532
- else:
533
- aliases.append(alias)
534
-
535
- body.add(ir.Output(
536
- output.engine,
537
- FrozenOrderedSet.from_iterable(aliases),
538
- output.keys,
539
- output.annotations
540
- ))
541
-
542
-
543
- def remove_subsumptions(self, body:OrderedSet[ir.Task], ctx: Context):
544
- # remove from the body all the tasks that are subsumed by some other task in the set;
545
- # this can be done because some tasks are references to extracted nested logical that
546
- # contain filters they dependend on, so we don't need those filters here if the
547
- # reference is present.
548
- for logical in filter_by_type(body, ir.Logical):
549
- if logical.id in ctx.included:
550
- # if the logical id is included, it means it's a reference to an extracted
551
- # rule, so remove all other items in the body that are already included in
552
- # the body referenced by it
553
- for item in body:
554
- if item in ctx.included[logical.id]:
555
- body.remove(item)
556
-
557
-
558
- def rewrite_wide_output(self, output: ir.Output):
559
- assert(output.keys)
560
-
561
- # only append keys that are not already in the output
562
- suffix_keys = []
563
- for key in output.keys:
564
- if all([val is not key for _, val in output.aliases]):
565
- suffix_keys.append(key)
566
-
567
- aliases: OrderedSet[Tuple[str, ir.Value]] = ordered_set()
568
-
569
- # add the remaining args, unless it is already a key
570
- for name, val in output.aliases:
571
- if not isinstance(val, ir.Var) or val not in suffix_keys:
572
- aliases.add((name, val))
573
-
574
- # add the keys to the output
575
- for key in suffix_keys:
576
- aliases.add((key.name, key))
577
-
578
- # TODO - we are assuming that the Rel compiler will translate nullable lookups
579
- # properly, returning a `Missing` if necessary, like this:
580
- # (nested_192(_adult, _adult_name) or (not nested_192(_adult, _) and _adult_name = Missing)) and
581
- return ir.Output(
582
- output.engine,
583
- aliases.frozen(),
584
- output.keys,
585
- output.annotations
586
- )
587
-
588
- # TODO: in the rel compiler, see if we can do this outer join
589
- # 1. number of keys
590
- # 2. each relation
591
- # 3. each variable, starting with the keys
592
- # 4. tag output with @arrow
593
-
594
- # @arrow def output(_book, _book_title, _author_name):
595
- # rel_primitive_outer_join(#1, book_title, author_name, _book, _book_title, _author_name)
596
- # def output(p, n, c):
597
- # rel_primitive_outer_join(#1, name, coolness, p, n, c)
598
-
599
468
  #--------------------------------------------------
600
469
  # Helpers
601
470
  #--------------------------------------------------
@@ -0,0 +1,165 @@
1
+ from __future__ import annotations
2
+ from typing import Tuple
3
+
4
+ from relationalai.semantics.metamodel import builtins, ir, factory as f, types, visitor
5
+ from relationalai.semantics.metamodel.compiler import Pass, group_tasks
6
+ from relationalai.semantics.metamodel.util import OrderedSet
7
+ from relationalai.semantics.metamodel.util import FrozenOrderedSet
8
+ from relationalai.semantics.metamodel.typer.typer import is_primitive
9
+
10
+ class FormatOutputs(Pass):
11
+ def __init__(self, handle_outputs: bool=True):
12
+ super().__init__()
13
+ self._handle_outputs = handle_outputs
14
+
15
+ #--------------------------------------------------
16
+ # Public API
17
+ #--------------------------------------------------
18
+ def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
19
+ wide_outputs = options.get("wide_outputs", False)
20
+ return self.OutputRewriter(wide_outputs).walk(model)
21
+
22
+ class OutputRewriter(visitor.Rewriter):
23
+ def __init__(self, wide_outputs: bool = False):
24
+ super().__init__()
25
+ self.wide_outputs = wide_outputs
26
+
27
+ def handle_logical(self, node: ir.Logical, parent: ir.Node):
28
+ # Rewrite children first
29
+ node = super().handle_logical(node, parent)
30
+
31
+ groups = group_tasks(node.body, {
32
+ "outputs": ir.Output,
33
+ })
34
+
35
+ # If no outputs, return as is
36
+ if not groups["outputs"]:
37
+ return node
38
+
39
+ return adjust_outputs(node, groups["outputs"], self.wide_outputs)
40
+
41
+ #--------------------------------------------------
42
+ # GNF vs wide output support
43
+ #--------------------------------------------------
44
+ def adjust_outputs(task: ir.Logical, outputs: OrderedSet[ir.Task], wide_outputs: bool = False):
45
+
46
+ body = list(task.body)
47
+
48
+ # For wide outputs, only adjust the output task to include the keys.
49
+ if wide_outputs:
50
+ for output in outputs:
51
+ assert(isinstance(output, ir.Output))
52
+ if output.keys:
53
+ body.remove(output)
54
+ body.append(rewrite_wide_output(output))
55
+ return ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
56
+
57
+ # For GNF outputs we need to generate a rule for each "column" in the output
58
+ else:
59
+ # First split outputs in potentially multiple outputs, one for each "column"
60
+ for output in outputs:
61
+ assert(isinstance(output, ir.Output))
62
+ if output.keys:
63
+ # Remove the original output. This is replaced by per-column outputs below
64
+ body.remove(output)
65
+
66
+ is_export = builtins.export_annotation in output.annotations
67
+
68
+ # Generate an output for each "column"
69
+ # output looks like def output(:cols, :col000, key0, key1, value):
70
+ original_cols = OrderedSet()
71
+ for idx, alias in enumerate(output.aliases):
72
+ # Skip None values which are used as a placeholder for missing values
73
+ if alias[1] is None:
74
+ continue
75
+ original_cols.add(alias[1])
76
+ body.extend(_generate_output_column(output, idx, alias, is_export))
77
+
78
+ idx = len(output.aliases)
79
+ for key in output.keys:
80
+ if key not in original_cols:
81
+ body.extend(_generate_output_column(output, idx, (key.name, key), is_export))
82
+ idx += 1
83
+
84
+ return ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
85
+
86
+ # TODO: return non list?
87
+ def _generate_output_column(output: ir.Output, idx: int, alias: tuple[str, ir.Value], is_export: bool):
88
+ if not output.keys:
89
+ return [output]
90
+
91
+ aliases = [("cols", f.literal("cols", types.Symbol))] if not is_export else []
92
+ aliases.append(("col", f.literal(f"col{idx:03}", types.Symbol)))
93
+
94
+ # Append all keys at the start
95
+ for k in output.keys:
96
+ aliases.append((f"key_{k.name}_{idx}", k))
97
+
98
+ if (is_export and
99
+ isinstance(alias[1], ir.Var) and
100
+ (not is_primitive(alias[1].type) or alias[1].type == types.Hash)):
101
+
102
+ uuid = f.var(f"{alias[0]}_{idx}_uuid", types.String)
103
+ aliases.append((uuid.name, uuid))
104
+
105
+ return [
106
+ ir.Lookup(None, builtins.uuid_to_string, (alias[1], uuid)),
107
+ ir.Output(
108
+ output.engine,
109
+ FrozenOrderedSet.from_iterable(aliases),
110
+ output.keys,
111
+ output.annotations
112
+ )
113
+ ]
114
+ else:
115
+ aliases.append(alias)
116
+
117
+ return [
118
+ ir.Output(
119
+ output.engine,
120
+ FrozenOrderedSet.from_iterable(aliases),
121
+ output.keys,
122
+ output.annotations
123
+ )
124
+ ]
125
+
126
+ def rewrite_wide_output(output: ir.Output):
127
+ assert(output.keys)
128
+
129
+ # Only append keys that are not already in the output
130
+ suffix_keys = []
131
+ for key in output.keys:
132
+ if all([val is not key for _, val in output.aliases]):
133
+ suffix_keys.append(key)
134
+
135
+ aliases: OrderedSet[Tuple[str, ir.Value]] = OrderedSet()
136
+
137
+ # Add the remaining args, unless it is already a key
138
+ for name, val in output.aliases:
139
+ if not isinstance(val, ir.Var) or val not in suffix_keys:
140
+ aliases.add((name, val))
141
+
142
+ # Add the keys to the output
143
+ for key in suffix_keys:
144
+ aliases.add((key.name, key))
145
+
146
+ # TODO - we are assuming that the Rel compiler will translate nullable lookups
147
+ # properly, returning a `Missing` if necessary, like this:
148
+ # (nested_192(_adult, _adult_name) or (not nested_192(_adult, _) and _adult_name = Missing)) and
149
+ return ir.Output(
150
+ output.engine,
151
+ aliases.frozen(),
152
+ output.keys,
153
+ output.annotations
154
+ )
155
+
156
+ # TODO: in the rel compiler, see if we can do this outer join
157
+ # 1. number of keys
158
+ # 2. each relation
159
+ # 3. each variable, starting with the keys
160
+ # 4. tag output with @arrow
161
+
162
+ # @arrow def output(_book, _book_title, _author_name):
163
+ # rel_primitive_outer_join(#1, book_title, author_name, _book, _book_title, _author_name)
164
+ # def output(p, n, c):
165
+ # rel_primitive_outer_join(#1, name, coolness, p, n, c)