PyPI - relationalai - Versions diffs - 1.0.0a1__py3-none-any.whl → 1.0.0a3__py3-none-any.whl - Mend

relationalai 1.0.0a1py3-none-any.whl → 1.0.0a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

relationalai/semantics/metamodel/typer.py CHANGED Viewed

@@ -81,17 +81,21 @@ class Typer:
 # Propagation Network
 #--------------------------------------------------
-# The core idea of the typer is to build a propagation network where nodes
-# are vars, fields, or overloaded lookups/updates/aggregates. The intuition
-# is that _all_ types in the IR ultimately flow from relation fields, so if
-# we figure those out we just need to propagate their types to unknown vars, which
-# may then flow into other fields and so on.
-# This means the network only needs to contain nodes that either directly flow into
-# an abstract node or are themselves abstract. We need to track overloads because
-# their arguments effectively act like abstract vars until we've resolved the final types.
-Node = Union[mm.Var, mm.Field, mm.Literal, mm.Lookup, mm.Update, mm.Aggregate]
+# The core idea of the typer is to build a propagation network that represents data
+# dependencies between nodes. Nodes can be variables, literals, fields and tasks that refer
+# to relations (lookups, aggregates, updates). An edge from node A to node B means that
+# in order to resolve references and types for node B we need to know the type of node A.
+#
+# After building the network, we start with roots that have known types (literals, fields
+# loaded from previous analysis and vars with concrete types) and propagate their types via
+# the edges to other nodes. It is possible that the network contains cycles, so we iterate
+# on the work list until fixpoint is reached, i.e. when we cannot propagate any new type.
+#
+# During the propagation, we are only gathering information about the types and references
+# of nodes. Once we reach a fixpoint, we do a final pass to rewrite the model with the new
+# type information, which may include adding casts to convert between types where needed.
+#
+Node = Union[mm.Var, mm.Field, mm.Literal, mm.Lookup, mm.Aggregate, mm.Update]
 class PropagationNetwork():
     def __init__(self, model: mm.Model):
@@ -104,22 +108,12 @@ class PropagationNetwork():
         # map from unresolved placeholder relations to their potential target replacements
         self.potential_targets: dict[mm.Relation, list[mm.Relation]] = {}
-        # track the set of nodes that represent entry points into the network
-        self.roots = OrderedSet()
-        # we separately want to track nodes that were loaded from a previous run
-        # so that even if we have edges to them, we _still_ consider them roots
-        # and properly propagate types from them at the beginning
+        # nodes loaded from previous analysis, which we use as roots to start propagation
         self.loaded_roots = set()
         # edges in the propagation network, from one node to potentially many
         self.edges:dict[Node, OrderedSet[Node]] = defaultdict(lambda: OrderedSet())
         self.back_edges:dict[Node, OrderedSet[Node]] = defaultdict(lambda: OrderedSet())
-        # all nodes that are the target of an edge (to find roots)
-        self.has_incoming = set()
-        # type requirements: for a var with abstract declared type, the set of fields that
-        # it must match the type of because it flows into them
-        self.type_requirements:dict[mm.Var, OrderedSet[mm.Field]] = defaultdict(lambda: OrderedSet())
         # all errors collected during inference
         self.errors:list[TyperError] = []
@@ -127,22 +121,22 @@ class PropagationNetwork():
         # overloads resolved for a lookup/update/aggregate, by node id. This is only for
         # relations that declare overloads
         self.resolved_overload:dict[int, mm.Overload] = {}
         # placeholders resolved for a lookup, by node id. This is only for relations that
         # are placeholders (i.e. only Any fields) and will be replaced by references to
         # these concrete relations. E.g. a query for "name(Any, Any)" may be replaced by
         # the union of "name(Dog, String)" and name(Cat, String)".
         self.resolved_placeholder:dict[int, list[mm.Relation]] = {}
         # for a given lookup/update/aggregate that involves numbers, the specific number
         # type resolved for it.
         self.resolved_number:dict[int, mm.NumberType] = {}
-        # keep track of nodes already resolved to avoid re-resolving
-        self.resolved_nodes:set[int] = set()
     #--------------------------------------------------
     # Error reporting
     #--------------------------------------------------
-    def type_mismatch(self, node: Node, expected: mm.Type, actual: mm.Type):
+    def type_mismatch(self, node: Node|mm.Update, expected: mm.Type, actual: mm.Type):
         self.errors.append(TypeMismatch(node, expected, actual))
     def invalid_type(self, node: Node, type: mm.Type):
@@ -153,6 +147,8 @@ class PropagationNetwork():
         self.errors.append(UnresolvedOverload(node, [self.resolve(a) for a in node.args]))
     def unresolved_type(self, node: Node):
+        # TODO - this is not being used yet, we need a pass at the end to check for any node
+        # that we could not resolve
         self.errors.append(UnresolvedType(node))
     def has_errors(self, node: Node) -> bool:
@@ -166,15 +162,8 @@ class PropagationNetwork():
     #--------------------------------------------------
     def add_edge(self, source: Node, target: Node):
-        # manage roots
-        if target in self.roots and target not in self.loaded_roots:
-            self.roots.remove(target)
-        if source not in self.has_incoming:
-            self.roots.add(source)
-        # register edge
         self.edges[source].add(target)
         self.back_edges[target].add(source)
-        self.has_incoming.add(target)
     def add_resolved_type(self, node: Node, type: mm.Type):
         """ Register that this node was resolved to have this type. """
@@ -183,13 +172,8 @@ class PropagationNetwork():
         else:
             self.resolved_types[node] = type
-    def add_type_requirement(self, source: mm.Var, field: mm.Field):
-        """ Register that this var, which has an abstract declared type, must match the type
-        of this field as it flows into it.. """
-        self.type_requirements[source].add(field)
     #--------------------------------------------------
-    # Load previous types
+    # Load types from a previous analysis
     #--------------------------------------------------
     def load_types(self, type_dict: dict[Node, mm.Type]):
@@ -197,22 +181,39 @@ class PropagationNetwork():
             if isinstance(node, (mm.Field)):
                 self.add_resolved_type(node, type)
                 self.loaded_roots.add(node)
-                self.roots.add(node)
     #--------------------------------------------------
     # Resolve Values
     #--------------------------------------------------
-    def resolve(self, value: Node|mm.Value) -> mm.Type:
-        if isinstance(value, (mm.Var, mm.Field, mm.Literal)):
+    def resolve(self, value: mm.Value) -> mm.Type:
+        if isinstance(value, (mm.Var, mm.Literal)):
             return self.resolved_types.get(value) or to_type(value)
-        assert not isinstance(value, (mm.Lookup, mm.Update, mm.Aggregate)), "Should never try to resolve a task"
+        if isinstance(value, mm.Field):
+            return self.resolved_types.get(value) or value.type
+        assert not isinstance(value, (mm.Task)), "Should never try to resolve a task"
         return to_type(value)
     #--------------------------------------------------
     # Resolve References
     #--------------------------------------------------
+    def all_dependencies_resolved(self, op:mm.Lookup|mm.Aggregate):
+        """ True iff all dependencies required to resolve this reference are met. """
+        rel = get_relation(op)
+        # if this is a placeholder, eq or cast, we assume all possible args were resolved
+        if bt.is_placeholder(rel) or rel == b.core.eq or rel == b.core.cast:
+            return True
+        # else, find whether all back-edges were resolved
+        for node in self.back_edges[op]:
+            # cannot resolve if a required var, literal or input field is still abstract
+            if isinstance(node, (mm.Var, mm.Literal)) or (isinstance(node, mm.Field) and node.input):
+                node_type = self.resolve(node)
+                if bt.is_abstract(node_type):
+                    return False
+        return True
     def resolve_reference(self, op: mm.Lookup|mm.Aggregate) -> Optional[mm.Overload|list[mm.Relation]]:
         # check if all dependencies required to resolve this reference are met
         if not self.all_dependencies_resolved(op):
@@ -228,7 +229,6 @@ class PropagationNetwork():
                 if all(type_matches(arg, self.resolve(field))
                         for arg, field in zip(resolved_args, fields)):
                     matches.append(target)
             return matches
         elif relation.overloads:
@@ -243,24 +243,11 @@ class PropagationNetwork():
                     self.resolved_overload[op.id] = overload
                     return overload
             return []  # no matches found
         else:
             # this is a relation with type vars or numbers that needs to be specialized
             return [relation]
-    def all_dependencies_resolved(self, op:mm.Lookup|mm.Aggregate):
-        # if this is a placeholder, we need assume all possible args were resolved
-        if bt.is_placeholder(get_relation(op)):
-            return True
-        # else, find whether all back-edges were resolved
-        for node in self.back_edges[op]:
-            if isinstance(node, (mm.Var, mm.Field, mm.Literal)):
-                node_type = self.resolve(node)
-                if bt.is_abstract(node_type):
-                    return False
-        return True
     #--------------------------------------------------
     # Propagation
     #--------------------------------------------------
@@ -269,73 +256,112 @@ class PropagationNetwork():
         edges = self.edges
         work_list = []
-        # go through all the roots and find any that are not abstract, they'll be the first
-        # nodes to push types through the network
-        unhandled_roots = OrderedSet()
-        for node in self.roots:
-            if not isinstance(node, (mm.Var, mm.Field, mm.Literal)):
-                continue
-            node_type = self.resolve(node)
-            if not bt.is_abstract(node_type):
-                work_list.append(node)
-            else:
-                unhandled_roots.add(node)
+        # start with the loaded roots + all literals + sources of edges without back edges
+        work_list.extend(self.loaded_roots)
+        for source in self.edges.keys():
+            if isinstance(source, (mm.Literal)) or not source in self.back_edges:
+                work_list.append(source)
-        # push known type nodes through the edges
-        while work_list:
-            source = work_list.pop(0)
-            self.resolved_nodes.add(source.id)
-            if source in unhandled_roots:
-                unhandled_roots.remove(source)
-            source_type = self.resolve(source)
-            # check to see if the source has ended up with a set of types that
-            # aren't valid, e.g. a union of primitives
-            if invalid_type(source_type):
-                self.invalid_type(source, source_type)
-            # propagate our type to each outgoing edge
-            for out in edges.get(source, []):
-                # if this is an overload then we need to try and resolve it
-                if isinstance(out, (mm.Lookup, mm.Aggregate)):
-                    if not out.id in self.resolved_nodes:
-                        found = self.resolve_reference(out)
-                        if found is not None:
-                            self.resolved_nodes.add(out.id)
-                            self.propagate_reference(out, found)
-                            for arg in out.args:
-                                if arg not in work_list:
-                                    work_list.append(arg)
-                # otherwise, we just add to the outgoing node's type and if it
-                # changes we add it to the work list
-                elif start := self.resolve(out):
-                    self.add_resolved_type(out, source_type)
-                    if out not in work_list and (start != self.resolve(out) or not out.id in self.resolved_nodes):
-                        work_list.append(out)
-        for source in unhandled_roots:
-            self.unresolved_type(source)
-        # now that we've pushed all the types through the network, we need to validate
-        # that all type requirements of those nodes are met
-        for node, fields in self.type_requirements.items():
-            node_type = self.resolve(node)
-            for field in fields:
-                field_type = self.resolve(field)
-                if not type_matches(node_type, field_type) and not conversion_allowed(node_type, field_type):
-                    self.type_mismatch(node, field_type, node_type)
+        # limit the number of iterations to avoid infinite loops
+        i = 0
+        max_iterations = 100 * len(self.edges)
+        # propagate types until we reach a fixed point
+        while work_list:
+            i += 1
+            if i > max_iterations:
+                err("Infinite Loop", "Infinite loop detected in the typer. Please, report this as a bug.")
+                break
-    def propagate_reference(self, task:mm.Lookup|mm.Aggregate, references:mm.Overload|list[mm.Relation]):
+            node = work_list.pop(0)
+            next = None
+            if isinstance(node, mm.Field):
+                # this is a field loaded from a previous analysis, so all we need to do is
+                # propagate its type to its output edges
+                next = edges.get(node, [])
+            elif isinstance(node, (mm.Lookup, mm.Aggregate)):
+                # this is a lookup/aggregate that may be overloaded or a placeholder; try
+                # to resolve its reference, i.e. determine which specific relation or
+                # overload it refers to
+                found = self.resolve_reference(node)
+                if found is not None:
+                    # if found is None it means that we need more info to resolve the
+                    # reference; otherwise it was possible to resolve it (even if to no matches)
+                    # keep the resolved args before propagation to see if they will change
+                    resolved_args = [self.resolve(arg) for arg in node.args]
+                    # propagate the reference resolution
+                    self.propagate_reference(node, resolved_args, found)
+                    if found:
+                        # the next nodes to process are all the outgoing edges plus any arg that
+                        # changed during propagation
+                        next = OrderedSet()
+                        next.update(edges.get(node, []))
+                        next.update([arg for idx, arg in enumerate(node.args) if isinstance(arg, mm.Var) and not (self.resolve(arg) == resolved_args[idx])])
+                    else:
+                        # the reference is unresolved, so remove from the worklist the args
+                        # because they depend on this being resolved
+                        for arg in node.args:
+                            if arg in work_list:
+                                work_list.remove(arg)
+            else:
+                assert isinstance(node, (mm.Var, mm.Literal))
+                resolved = self.resolve(node)
+                # if we ended up with an invalid type, report it on a next edge (which is
+                # where the var or literal is going to be used)
+                if invalid_type(resolved):
+                    self.invalid_type(edges.get(node, [node])[0], resolved)
+                # if the var is still abstract, add it to the work list to try again later
+                if isinstance(node, mm.Var) and bt.is_abstract(resolved):
+                    # but if everything that is left are vars, we cannot make progress
+                    if not all(isinstance(x, mm.Var) for x in work_list):
+                        next = [node]
+                else:
+                    # otherwise, check all outgoing edges
+                    next = []
+                    for out in edges.get(node, []):
+                        if isinstance(out, mm.Update):
+                            # this is an update to a field. We have to check that the type is
+                            # valid for the field and if the field is abstract we can refine the
+                            # type. But if the field is concrete we have to check that the type
+                            # matche or can be converted.
+                            is_population_lookup = isinstance(out.relation, mm.TypeNode)
+                            for field in get_update_fields(node, out):
+                                if not type_matches(resolved, field.type, accept_expected_super_types=is_population_lookup) and not conversion_allowed(resolved, field.type):
+                                    self.type_mismatch(out, field.type, resolved)
+                                elif bt.is_abstract(field.type):
+                                    # if the field type changed, propagate further
+                                    if resolved != self.resolve(field):
+                                        next.append(field)
+                                    self.add_resolved_type(field, resolved)
+                        else:
+                            # this is an arg flowing into a task, so resolve the task next
+                            next.append(out)
+            # add the new nodes to the work list
+            if next is not None:
+                for n in next:
+                    if n not in work_list:
+                        work_list.append(n)
+    def propagate_reference(self, task:mm.Lookup|mm.Aggregate, resolved_args:list[mm.Type], references:mm.Overload|list[mm.Relation]):
         # TODO: distinguish between overloads and placeholders better when raising errors
         if not references:
             return self.unresolved_overload(task)
-        resolved_args = [self.resolve(arg) for arg in task.args]
-        # we need to determine the final types of our args by taking all the references
-        # and adding the type of their fields back to the args.
         relation = get_relation(task)
+        if relation == b.core.cast:
+            # cast is special cased: we just propagate the type to the target
+            cast_type, target = task.args[0], task.args[2]
+            assert isinstance(cast_type, mm.Type)
+            assert isinstance(target, mm.Var)
+            self.add_resolved_type(target, cast_type)
+            return
         if bt.is_placeholder(relation):
             assert(references and isinstance(references, list))
             # we've resolved the placeholder, so store that
@@ -357,22 +383,25 @@ class PropagationNetwork():
                 # number specialization, so use that relation's field types
                 types = list([self.resolve(f) for f in relation.fields])
-            # if our overload preserves types, we check to see if there's a preserved
-            # output type given the inputs and if so, shadow the field's type with the
-            # preserved type
+            # if our overload preserves types, we check to see if there's a preserved output
+            # type given the inputs and if so, shadow the field type with the preserved type.
+            # this is only attempted if all input types match the field types, i.e. no
+            # conversions are needed
             resolved_fields = types
-            if bt.is_function(relation) and len(set(resolved_fields)) == 1:
+            if bt.is_function(relation) and len(set(resolved_fields)) == 1 and not relation in self.NON_TYPE_PRESERVERS and\
+                all(type_matches(arg_type, field_type) for arg_type, field_type, field in zip(resolved_args, types, relation.fields) if field.input):
                 input_types = set([arg_type for field, arg_type
                                     in zip(relation.fields, resolved_args) if field.input])
                 if out_type := self.try_preserve_type(input_types):
                     resolved_fields = [field_type if field.input else out_type
                                         for field, field_type in zip(relation.fields, types)]
-            # TODO - we also need to make sure the type vars are constently resolved here
-            # i.e. if types contain typevars, check that the args that are bound to those
-            # typevars are consistent
-            if b.core.Number in types or (b.core.TypeVar in types and any(bt.is_number(t) for t in resolved_args)):
-                # this overload contains generic numbers or typevars bound to numbers, so
+            # eq is special cased because we don't want to specialize a number for it, as it
+            # can be just comparing numbers of different types.
+            if relation != b.core.eq and (b.core.Number in types or (b.core.TypeVar in types and any(bt.is_number(t) for t in resolved_args))):
+                # this relation contains generic numbers or typevars bound to numbers, so
                 # find which specific type of number to use given the arguments being passed
                 number, resolved_fields = self.specialize_number(relation, resolved_fields, resolved_args)
                 self.resolved_number[task.id] = number
@@ -380,16 +409,62 @@ class PropagationNetwork():
                 for field, field_type, arg in zip(relation.fields, resolved_fields, task.args):
                     if not field.input and isinstance(arg, mm.Var):
                         self.add_resolved_type(arg, field_type)
+            elif b.core.TypeVar in types:
+                # this relation contains type vars, so we have to make sure that all args
+                # bound to the same type var are consistent
+                # find which arg is bound to the type var and check that they are all consistent
+                typevar_type = None
+                for arg_type, field_type in zip(resolved_args, types):
+                    if field_type == b.core.TypeVar:
+                        if typevar_type is None:
+                            typevar_type = arg_type
+                        else:
+                            typevar_type = merge_types(typevar_type, arg_type)
+                assert typevar_type is not None
+                # compute the final arg types by replacing type vars with the typevar_type
+                computed_arg_types = []
+                for arg_type, field_type in zip(resolved_args, types):
+                    # check that the arg type matches the type var type
+                    if not type_matches(typevar_type, arg_type) and not conversion_allowed(arg_type, typevar_type):
+                        self.type_mismatch(task, typevar_type, arg_type)
+                    if field_type == b.core.TypeVar:
+                        computed_arg_types.append(typevar_type)
+                    else:
+                        computed_arg_types.append(arg_type)
+                # if no mismatches were found, propagate the computed arg types back to the args
+                if len(computed_arg_types) == len(task.args):
+                    for computed_type, arg, arg_type in zip(computed_arg_types, task.args, resolved_args):
+                        # TODO: we could allow for non-numeric/string literals because this means
+                        # the literal is being used as a value type, but the backend emitters
+                        # would have to deal with that.
+                        if isinstance(arg, mm.Var) or (isinstance(arg, mm.Literal) and (bt.is_numeric(computed_type) or computed_type == b.core.String)):
+                            self.add_resolved_type(arg, computed_type)
             else:
-                for field_type, arg, arg_type in zip(resolved_fields, task.args, resolved_args):
-                    if bt.is_abstract(arg_type) and isinstance(arg, mm.Var):
+                # no typevar or number specialization shenanigans, just propagate field types to args
+                for field, field_type, arg, arg_type in zip(relation.fields, resolved_fields, task.args, resolved_args):
+                    # add a resolved type if we learned more about the arg's type
+                    if isinstance(arg, mm.Var) and (bt.is_abstract(arg_type) or bt.extends(field_type, arg_type)):
                         self.add_resolved_type(arg, field_type)
+                    elif not type_matches(arg_type, field_type) and not conversion_allowed(arg_type, field_type):
+                        self.type_mismatch(task, field_type, arg_type)
+    # we try to preserve types for relations that are functions (i.e. potentially multiple
+    # input but a single output) and where all types are the same. However, there are some
+    # exceptions to this rule, e.g. range() always returns int regardless of whether the
+    # input type is an int or a value type that extends int.
+    NON_TYPE_PRESERVERS = [
+        b.common.range
+    ]
     def try_preserve_type(self, types:set[mm.Type]) -> Optional[mm.Type]:
-        # we keep the input type as the output type if either all inputs
-        # are the exact same type or there's one nominal and its base primitive
-        # type, e.g. USD + Decimal
+        # we keep the input type as the output type if either all inputs are the exact same
+        # type or there's one nominal and its base primitive type, e.g. USD + Decimal
         if len(types) == 1:
             return next(iter(types))
         if len(types) == 2:
@@ -411,13 +486,15 @@ class PropagationNetwork():
     def specialize_number(self, op, field_types:list[mm.Type], arg_types:list[mm.Type]) -> Tuple[mm.NumberType, list[mm.Type]]:
         """
-        Find the number type to use for an overload that has Number in its field_types,
-        and which is being referred to with these arg_types.
+        Find the number type to use for an overload that has Number in its field_types, and
+        which is being referred to with these arg_types.
         Return a tuple where the first element is the specialized number type, and the second
         element is a new list that contains the same types as field_types but with
         Number replaced by this specialized number.
         """
+        # special case a few operators according to Snowflake's rules in
+        # https://docs.snowflake.com/en/sql-reference/operators-arithmetic#scale-and-precision-in-arithmetic-operations
         if op == b.core.div:
             # see https://docs.snowflake.com/en/sql-reference/operators-arithmetic#division
             numerator, denominator = get_number_type(arg_types[0]), get_number_type(arg_types[1])
@@ -436,29 +513,21 @@ class PropagationNetwork():
             # TODO!! - implement proper avg specialization
             pass
+        # fall back to the the current specialization policy, which is to select the number
+        # with largest scale and, if there multiple with the largest scale, the one with the
+        # largest precision. This is safe because when converting a number to the
+        # specialized number, we never truncate fractional digits (because we selected the
+        # largest scale) and, if the non-fractional digits are too large to fit the
+        # specialized number, we will have a runtime overflow, which should alert the user
+        # of the problem.
         number = None
         for arg_type in arg_types:
             x = bt.get_number_supertype(arg_type)
             if isinstance(x, mm.NumberType):
-                # the current specialization policy is to select the number with largest
-                # scale and, if there multiple with the largest scale, the one with the
-                # largest precision. This is safe because when converting a number to the
-                # specialized number, we never truncate fractional digits (because we
-                # selected the largest scale) and, if the non-fractional digits are too
-                # large to fit the specialized number, we will have a runtime overflow,
-                # which should alert the user of the problem.
-                #
-                # In the future we can implement more complex policies. For example,
-                # snowflake has well documented behavior for how the output of operations
-                # behave in face of different number types, and we may use that:
-                # https://docs.snowflake.com/en/sql-reference/operators-arithmetic#scale-and-precision-in-arithmetic-operations
                 if number is None or x.scale > number.scale or (x.scale == number.scale and x.precision > number.precision):
                     number = x
-        if number is None:
-            number = b.core.DefaultNumber
-        # assert(isinstance(number, mm.NumberType))
-        return number, [number if bt.is_number(field_type) else field_type
-                for field_type in field_types]
+        assert(number is not None)
+        return number, [number if bt.is_number(t) else t for t in field_types]
     #--------------------------------------------------
@@ -467,7 +536,6 @@ class PropagationNetwork():
     # draw the network as a mermaid graph for the debugger
     def to_mermaid(self, max_edges=500) -> str:
         # add links for edges while collecting nodes
         nodes = OrderedSet()
         link_strs = []
@@ -481,22 +549,12 @@ class PropagationNetwork():
                 link_strs.append(f"n{src.id} --> n{dst.id}")
             if len(link_strs) > max_edges:
                 break
-        # type requirements
-        for src, dsts in self.type_requirements.items():
-            nodes.add(src)
-            for dst in dsts:
-                if len(link_strs) > max_edges:
-                    break
-                nodes.add(dst)
-                link_strs.append(f"n{src.id} -.-> n{dst.id}")
-            if len(link_strs) > max_edges:
-                break
         def type_span(t:mm.Type) -> str:
             type_str = t.name if isinstance(t, mm.ScalarType) else str(t)
             return f"<span style='color:cyan;'>{type_str.strip()}</span>"
-        def reference_span(rel:mm.Relation, arg_types:list[mm.Type], root:str) -> str:
+        def reference_span(rel:mm.Relation, arg_types:list[mm.Type]) -> str:
             args = []
             for field, arg_type in zip(rel.fields, arg_types):
                 field_type = self.resolve(field)
@@ -506,22 +564,21 @@ class PropagationNetwork():
                     args.append(type_span(field_type))
                 else:
                     args.append(type_span(arg_type))
-            return f'{rel.name}{root}({", ".join(args)})'
+            return f'{rel.name}({", ".join(args)})'
         resolved = self.resolved_types
         node_strs = []
         for node in nodes:
             klass = ""
-            root = "(*)" if node in self.roots else ""
             if isinstance(node, mm.Var):
                 ir_type = resolved.get(node) or self.resolve(node)
                 type_str = type_span(ir_type)
-                label = f'(["{node.name}{root}:{type_str}"])'
+                label = f'(["{node.name}:{type_str}"])'
             elif isinstance(node, mm.Literal):
                 ir_type = resolved.get(node) or self.resolve(node)
                 type_str = type_span(ir_type)
                 klass = ":::literal"
-                label = f'[/"{node.value}{root}: {type_str}"\\]'
+                label = f'(["{node.value}: {type_str}"])'
             elif isinstance(node, mm.Field):
                 ir_type = resolved.get(node) or self.resolve(node)
                 type_str = type_span(ir_type)
@@ -529,19 +586,22 @@ class PropagationNetwork():
                 rel = node._relation
                 if rel is not None:
                     rel = str(node._relation)
-                    label = f'{{{{"{node.name}{root}:{type_str}\nfrom {rel}"}}}}'
+                    label = f'[/"{node.name}:{type_str}\nfrom {rel}"\\]'
                 else:
-                    label = f'{{{{"{node.name}{root}:\n{type_str}"}}}}'
-            elif isinstance(node, (mm.Lookup, mm.Update, mm.Aggregate)):
+                    label = f'[/"{node.name}:\n{type_str}"\\]'
+            elif isinstance(node, (mm.Lookup, mm.Aggregate, mm.Update)):
                 arg_types = [self.resolve(arg) for arg in node.args]
                 if node.id in self.resolved_placeholder:
                     overloads = self.resolved_placeholder[node.id]
-                    content = "<br/>".join([reference_span(o, arg_types, root) for o in overloads])
+                    content = "<br/>".join([reference_span(o, arg_types) for o in overloads])
+                else:
+                    content = reference_span(get_relation(node), arg_types)
+                if isinstance(node, mm.Update):
+                    klass = ":::update"
+                    label = f'{{{{"{content}"}}}}'
                 else:
-                    content = reference_span(get_relation(node), arg_types, root)
-                label = f'[/"{content}"/]'
-            # elif isinstance(node, mm.Relation):
-            #     label = f'[("{node}")]'
+                    klass = ":::reference"
+                    label = f'[/"{content}"/]'
             else:
                 raise NotImplementedError(f"Unknown node type: {type(node)}")
             if self.has_errors(node):
@@ -555,6 +615,7 @@ class PropagationNetwork():
             flowchart TD
                 linkStyle default stroke:#666
                 classDef field fill:#245,stroke:#478
+                classDef update fill:#245,stroke:#478
                 classDef literal fill:#452,stroke:#784
                 classDef error fill:#624,stroke:#945,color:#f9a
                 classDef default stroke:#444,stroke-width:2px, font-size:12px
@@ -598,121 +659,42 @@ class Analyzer(Walker):
         rel = node.relation
         self.compute_potential_targets(rel)
-        # if this is a type relation, the update is asserting that the argument is of that
-        # type; so, it's fine to pass a super-type in to the population e.g. Employee(Person)
-        # should be a valid way to populate that a particular Person is also an Employee.
-        is_type_relation = isinstance(rel, mm.TypeNode)
+        # arg is flowing into a field
         for arg, field in zip(node.args, rel.fields):
-            field_type = field.type
-            arg_type = self.net.resolve(arg)
-            # if the arg is abstract, but the field isn't, then we need to make sure that
-            # once the arg is resolved we check that it matches the field type
-            if isinstance(arg, mm.Var) and bt.is_abstract(arg_type) and bt.is_concrete(field_type):
-                self.net.add_type_requirement(arg, field)
-            if bt.is_abstract(field_type) and isinstance(arg, (mm.Var, mm.Literal)):
-                # if the field is abstract, then eventually this arg will help determine
-                # the field's type, so add an edge from the arg to the field
-                self.net.add_edge(arg, field)
-            elif not type_matches(arg_type, field_type, accept_expected_super_types=is_type_relation):
-                if not conversion_allowed(arg_type, field_type):
-                    self.net.type_mismatch(node, field_type, arg_type)
+            if isinstance(arg, (mm.Var, mm.Literal)):
+                self.net.add_edge(arg, node)
     #--------------------------------------------------
     # Walk Lookups + Aggregates
     #--------------------------------------------------
-    def lookup(self, node: mm.Lookup):
-        self.compute_potential_targets(node.relation)
-        self.visit_rel_op(node)
-    def aggregate(self, node: mm.Aggregate):
-        self.visit_rel_op(node)
-    def visit_rel_op(self, node: mm.Lookup|mm.Aggregate):
-        rel = get_relation(node)
-        # special case eq lookups
-        if isinstance(node, mm.Lookup) and rel == b.core.eq:
-            # if both args for an eq are abstract, link them, otherwise do normal processing
-            (left, right) = node.args
-            left_type = self.net.resolve(left)
-            right_type = self.net.resolve(right)
-            if bt.is_abstract(left_type) and bt.is_abstract(right_type):
-                assert isinstance(left, mm.Var) and isinstance(right, mm.Var)
-                # if both sides are abstract, then whatever we find out about
-                # either should propagate to the other
-                self.net.add_edge(left, right)
-                self.net.add_edge(right, left)
-                return
-        # special case when the relation needs to be resolved as there are overloads, placeholders,
-        # type vars or it needs number specialization
-        if self.requires_resolution(rel):
-            return self.visit_unresolved_reference(node)
-        # if this is a population check, then it's fine to pass a subtype in to do the check
-        # e.g. Employee(Person) is a valid way to check if a person is an employee
-        is_population_lookup = isinstance(rel, mm.TypeNode)
-        for arg, field in zip(node.args, rel.fields):
-            field_type = self.net.resolve(field)
-            arg_type = self.net.resolve(arg)
-            if not type_matches(arg_type, field_type, is_population_lookup):
-                # Do not complain if we can convert the arg to the field type.
-                if not conversion_allowed(arg_type, field_type):
-                    # if the arg is a var and it matches when allowing for super types of
-                    # the expected we can expect to refine it later; but we add a type
-                    # requirement to check at the end
-                    if isinstance(arg, mm.Var) and type_matches(arg_type, field_type, True):
-                        self.net.add_type_requirement(arg, field)
-                    else:
-                        self.net.type_mismatch(node, field_type, arg_type)
-            # if we have an abstract var then this field will ultimately propagate to that
-            # var's type; also, if this is a population lookup, the type of the population
-            # being looked up will flow back to the var
-            if isinstance(arg, mm.Var):
-                if not field.input:
-                    self.net.add_edge(field, arg)
-                else:
-                    self.net.add_type_requirement(arg, field)
+    def lookup(self, task: mm.Lookup):
+        self.compute_potential_targets(task.relation)
+        self.visit_rel_op(task)
+    def aggregate(self, task: mm.Aggregate):
+        self.visit_rel_op(task)
-    def requires_resolution(self, rel: mm.Relation) -> bool:
-        # has overloads or is a placeholder relation that needs replacement
-        if rel.overloads or bt.is_placeholder(rel):
-            return True
-        # there are type vars or numbers in the fields that need specialization
-        for field in rel.fields:
-            t = self.net.resolve(field)
-            if bt.is_type_var(t) or t == b.core.Number:
-                return True
-        return False
+    def visit_rel_op(self, task: mm.Lookup|mm.Aggregate):
+        relation = get_relation(task)
-    def visit_unresolved_reference(self, node: mm.Lookup|mm.Aggregate):
-        relation = get_relation(node)
-        # functions have their outputs determined by their inputs
-        is_function = bt.is_function(relation)
         is_placeholder = bt.is_placeholder(relation)
-        # add edges between args and the relation based on input/output
-        for field, arg in zip(relation.fields, node.args):
+        for field, arg in zip(relation.fields, task.args):
             if isinstance(arg, (mm.Var, mm.Literal)):
                 if field.input:
-                    # the arg type will flow into the input
-                    self.net.add_edge(arg, node)
+                    # we need to resolve all inputs before resolving the relation
+                    self.net.add_edge(arg, task)
                 else:
-                    if is_function:
-                        # this is an output of a function, so the field type will flow to the arg
-                        self.net.add_edge(node, arg)
+                    # placeholders also need the output to be resolved
+                    if is_placeholder:
+                        self.net.add_edge(arg, task)
                     else:
-                        if is_placeholder:
-                            self.net.add_edge(arg, node)
-                        if bt.is_abstract(field.type) and not is_placeholder:
-                            self.net.add_edge(field, node)
-                        if bt.is_abstract(self.net.resolve(arg)):
-                            self.net.add_edge(node, arg)
+                        # args bound to outputs can be resolved after
+                        self.net.add_edge(task, arg)
+                        # if the field is abstract, it needs to be resolved before we can
+                        # resolve the task.
+                        if bt.is_abstract(self.net.resolve(field)):
+                            self.net.add_edge(field, task)
 #--------------------------------------------------
@@ -750,7 +732,7 @@ class Replacer(Rewriter):
         # TODO - this is only modifying the relation in the model, but then we have a new
         # relation there, which is different than the object referenced by tasks.
         if node in self.net.resolved_types:
-            return mm.Field(node.name, self.net.resolved_types[node], node.input)
+            return mm.Field(node.name, self.net.resolved_types[node], node.input, _relation = node._relation)
         return node
     def var(self, node: mm.Var):
@@ -773,31 +755,40 @@ class Replacer(Rewriter):
         args = types = None
         if node.id in self.net.resolved_placeholder:
+            # placeholder resolved to multiple relations
             resolved_relations = self.net.resolved_placeholder[node.id]
             args = get_lookup_args(node, resolved_relations[0])
             types = [f.type for f in resolved_relations[0].fields]
         elif node.id in self.net.resolved_overload:
+            # overload resolved to a specific relation
             resolved_relations = [node.relation]
             types = self.net.resolved_overload[node.id].types
         else:
+            # single relation
             resolved_relations = [node.relation]
         if len(resolved_relations) == 1:
+            # single relation, just convert arguments
             x = self.convert_arguments(node, resolved_relations[0], args, types)
             if isinstance(x, mm.Logical) and len(x.body) == 1:
                 return x.body[0]
             else:
                 return x
+        # multiple relations, create a union
         branches:list = []
         for target in resolved_relations:
             args = get_lookup_args(node, target)
             types = [f.type for f in get_relation_fields(resolved_relations[0], node.relation.name)]
             # adding this logical to avoid issues in the old backend
-            branches.append(mm.Logical((self.convert_arguments(node, target, args, types=types),)))
+            branches.append(mm.Logical((self.convert_arguments(node, target, args, types=types, force_copy=True),)))
         return mm.Union(tuple(branches))
-    def convert_arguments(self, node: mm.Lookup|mm.Update, relation: mm.Relation, args: Iterable[mm.Value]|None=None, types: Iterable[mm.Type]|None=None) -> mm.Logical|mm.Lookup|mm.Update:
+    def convert_arguments(self, node: mm.Lookup|mm.Update, relation: mm.Relation, args: Iterable[mm.Value]|None=None, types: Iterable[mm.Type]|None=None, force_copy=False) -> mm.Logical|mm.Lookup|mm.Update:
+        """ This node was resolved to target this relation using these args, which should
+        have these types. Convert any arguments as needed and return a new node with the
+        proper relation and converted args. If multiple conversions are needed, return a
+        logical that contains all the conversion tasks plus the final node. """
         args = args or node.args
         types = types or [self.net.resolve(f) for f in relation.fields]
         number_type = self.net.resolved_number.get(node.id)
@@ -817,15 +808,20 @@ class Replacer(Rewriter):
                     final_args.append(arg)
             else:
                 final_args.append(arg)
-        if isinstance(node, mm.Lookup):
-            tasks.append(node.mut(relation = relation, args = tuple(final_args)))
+        # add the original node with the proper target relation and converted args;
+        # here we want to use mut because we keep information about nodes based on their ids
+        # (e.g. we store resolved types based on ids), so we want to keep the same id. But
+        # when a lookup is being converted as part of a union (i.e. multiple targets for a
+        # placeholder), we need to create nodes with new ids to avoid conflicts.
+        if force_copy:
+            tasks.append(node.replace(relation = relation, args = tuple(final_args)))
         else:
             tasks.append(node.mut(relation = relation, args = tuple(final_args)))
+        # if we need conversion tasks, wrap in a logical
         if len(tasks) == 1:
             return tasks[0]
         return mm.Logical(tuple(tasks))
     def visit_eq_lookup(self, node: mm.Lookup):
         (left, right) = node.args
         left_type = to_type(left)
@@ -842,7 +838,7 @@ class Replacer(Rewriter):
         elif conversion_allowed(right_type, left_type):
             final_args = [left, convert(right, left_type, tasks)]
         else:
-            self.net.type_mismatch(node, left_type, right_type)
+            # this type mismatch was reported during propagation, so just return the node
             return node
         tasks.append(mm.Lookup(b.core.eq, tuple(final_args)))
@@ -873,6 +869,15 @@ def get_name(type: mm.Type) -> str:
 # Type and Relation helpers
 #--------------------------------------------------
+def get_update_fields(arg, update: mm.Update) -> Iterable[mm.Field]:
+    """ Get the fields of the relation being updated by this arg. Note that an arg can be
+    bound to multiple fields at the same time. """
+    if arg in update.args:
+        for x, field in zip(update.args, update.relation.fields):
+            if arg == x:
+                yield field
+    return []
 def get_relation_fields(relation: mm.Relation, name: str) -> Iterable[mm.Field]:
     """ Get the fields of this relation, potentially reordered to match the reading with the given name."""
     if name == relation.name:
@@ -915,18 +920,20 @@ def get_potential_targets(model: mm.Model, placeholder: mm.Relation) -> list[mm.
     return list(filter(lambda r: is_potential_target(placeholder, r), model.relations))
 def to_type(value: mm.Value|mm.Field|mm.Literal) -> mm.Type:
-    if isinstance(value, (mm.Var, mm.Field, mm.Literal)):
+    if isinstance(value, (mm.Var, mm.Literal)):
         return value.type
     if isinstance(value, mm.Type):
         return b.core.Type
+    if isinstance(value, mm.Field):
+        return b.core.Field
     if isinstance(value, tuple):
         return mm.TupleType(element_types=tuple(to_type(v) for v in value))
     raise TypeError(f"Cannot determine IR type for value: {value} of type {type(value).__name__}")
 def convert(value: mm.Var|mm.Literal, to_type: mm.Type, tasks: list[mm.Task]) -> mm.Value:
     # if the arg is a literal, we can just change its type
     # TODO - we may want to check that the value is actually convertible
@@ -940,8 +947,10 @@ def convert(value: mm.Var|mm.Literal, to_type: mm.Type, tasks: list[mm.Task]) ->
     tasks.append(mm.Lookup(b.core.cast, (to_type_base, value, new_value)))
     return new_value
 def conversion_allowed(from_type: mm.Type, to_type: mm.Type) -> bool:
+    # value type conversion is allowed only if the value types are related by inheritance
+    if bt.is_value_type(from_type) and bt.is_value_type(to_type) and not bt.extends(from_type, to_type):
+        return False
     # value type conversion is allowed
     x = bt.get_primitive_supertype(from_type)
     y = bt.get_primitive_supertype(to_type)
@@ -954,7 +963,7 @@ def conversion_allowed(from_type: mm.Type, to_type: mm.Type) -> bool:
     # a number can be converted to another number of larger scale
     if isinstance(from_type, mm.NumberType) and isinstance(to_type, mm.NumberType):
-        if to_type.scale > from_type.scale:
+        if to_type.scale >= from_type.scale:
             return True
     if from_type == b.core.Number and isinstance(to_type, mm.NumberType):
@@ -990,11 +999,6 @@ def type_matches(actual: mm.Type, expected: mm.Type, accept_expected_super_types
     if expected == b.core.TypeVar:
         return True
-    # TODO - remove this once we make them singletons per precision/scale
-    if isinstance(actual, mm.NumberType) and isinstance(expected, mm.NumberType):
-        if actual.precision == expected.precision and actual.scale == expected.scale:
-            return True
     # if an entity type var or any entity is expected, it matches any actual entity type
     if (expected == b.core.EntityTypeVar or bt.extends(expected, b.core.AnyEntity)) and not bt.is_primitive(actual):
         return True
@@ -1006,6 +1010,10 @@ def type_matches(actual: mm.Type, expected: mm.Type, accept_expected_super_types
     if (expected == b.core.Numeric) and bt.is_numeric(actual):
         return True
+    # different value types never match
+    if bt.is_value_type(actual) and bt.is_value_type(expected) and not bt.extends(actual, expected):
+        return False
     # if actual is scalar, any of its parents may match the expected type
     if isinstance(actual, mm.ScalarType) and any([type_matches(parent, expected) for parent in actual.super_types]):
         return True
@@ -1049,18 +1057,12 @@ def type_matches(actual: mm.Type, expected: mm.Type, accept_expected_super_types
     # accept tuples with a single element type to match a list with that type
     if isinstance(actual, mm.TupleType) and isinstance(expected, mm.ListType):
-        if len(set(actual.element_types)) == 1:
-            return type_matches(actual.element_types[0], expected.element_type)
+        return type_matches(actual.element_types[0], expected.element_type)
     # otherwise no match
     return False
-def merge_types(type1: mm.Type, type2: mm.Type) -> mm.Type:
-    if type1 == type2:
-        return type1
-    types_to_process = [type1, type2]
+def merge_numeric_types(type1: mm.Type, type2: mm.Type) -> Optional[mm.Type]:
     # if one of them is the abstract Number type, pick the other
     if type1 == b.core.Number and isinstance(type2, mm.NumberType):
         return type2
@@ -1077,6 +1079,24 @@ def merge_types(type1: mm.Type, type2: mm.Type) -> mm.Type:
     # if we are overriding a number with a float, pick float
     if isinstance(type1, mm.NumberType) and type2 == b.core.Float:
         return type2
+    if isinstance(type2, mm.NumberType) and type1 == b.core.Float:
+        return type1
+    return None
+def merge_types(type1: mm.Type, type2: mm.Type) -> mm.Type:
+    if type1 == type2:
+        return type1
+    if bt.is_type_var(type1):
+        return type2
+    if bt.is_type_var(type2):
+        return type1
+    types_to_process = [type1, type2]
+    numeric_merge = merge_numeric_types(type1, type2)
+    if numeric_merge is not None:
+        return numeric_merge
     # if one extends the other, pick the most specific one
     if bt.extends(type1, type2):
@@ -1093,6 +1113,13 @@ def merge_types(type1: mm.Type, type2: mm.Type) -> mm.Type:
         elif bt.is_primitive(type2):
             return type1
+    if base_primitive_type1 and base_primitive_type2:
+        numeric_merge = merge_numeric_types(base_primitive_type1, base_primitive_type2)
+        if numeric_merge == base_primitive_type1:
+            return type1
+        elif numeric_merge == base_primitive_type2:
+            return type2
     combined = OrderedSet()
     # Iterative flattening of union types
     while types_to_process:

relationalai 1.0.0a1__py3-none-any.whl → 1.0.0a3__py3-none-any.whl

relationalai 1.0.0a1py3-none-any.whl → 1.0.0a3py3-none-any.whl