PyPI - datajoint - Versions diffs - 0.14.1__py3-none-any.whl → 0.14.3__py3-none-any.whl - Mend

datajoint 0.14.1py3-none-any.whl → 0.14.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datajoint might be problematic. Click here for more details.

Files changed (31) hide show

datajoint/__init__.py +5 -2
datajoint/admin.py +10 -2
datajoint/autopopulate.py +113 -84
datajoint/blob.py +6 -4
datajoint/cli.py +77 -0
datajoint/condition.py +31 -0
datajoint/connection.py +4 -1
datajoint/declare.py +22 -6
datajoint/dependencies.py +64 -32
datajoint/diagram.py +62 -48
datajoint/expression.py +98 -41
datajoint/external.py +7 -2
datajoint/fetch.py +31 -51
datajoint/heading.py +14 -8
datajoint/preview.py +8 -6
datajoint/s3.py +2 -1
datajoint/schemas.py +8 -10
datajoint/settings.py +9 -0
datajoint/table.py +54 -12
datajoint/user_tables.py +27 -0
datajoint/utils.py +14 -1
datajoint/version.py +1 -1
datajoint-0.14.3.dist-info/METADATA +592 -0
datajoint-0.14.3.dist-info/RECORD +34 -0
{datajoint-0.14.1.dist-info → datajoint-0.14.3.dist-info}/WHEEL +1 -1
datajoint-0.14.3.dist-info/entry_points.txt +3 -0
datajoint-0.14.1.dist-info/METADATA +0 -26
datajoint-0.14.1.dist-info/RECORD +0 -33
datajoint-0.14.1.dist-info/datajoint.pub +0 -6
{datajoint-0.14.1.dist-info → datajoint-0.14.3.dist-info}/LICENSE.txt +0 -0
{datajoint-0.14.1.dist-info → datajoint-0.14.3.dist-info}/top_level.txt +0 -0

datajoint/dependencies.py CHANGED Viewed

@@ -5,28 +5,64 @@ from collections import defaultdict
 from .errors import DataJointError
-def unite_master_parts(lst):
+def extract_master(part_table):
     """
-    re-order a list of table names so that part tables immediately follow their master tables without breaking
-    the topological order.
-    Without this correction, a simple topological sort may insert other descendants between master and parts.
-    The input list must be topologically sorted.
-    :example:
-    unite_master_parts(
-        ['`s`.`a`', '`s`.`a__q`', '`s`.`b`', '`s`.`c`', '`s`.`c__q`', '`s`.`b__q`', '`s`.`d`', '`s`.`a__r`']) ->
-        ['`s`.`a`', '`s`.`a__q`', '`s`.`a__r`', '`s`.`b`', '`s`.`b__q`', '`s`.`c`', '`s`.`c__q`', '`s`.`d`']
+    given a part table name, return master part. None if not a part table
     """
-    for i in range(2, len(lst)):
-        name = lst[i]
-        match = re.match(r"(?P<master>`\w+`.`#?\w+)__\w+`", name)
-        if match:  # name is a part table
-            master = match.group("master")
-            for j in range(i - 1, -1, -1):
-                if lst[j] == master + "`" or lst[j].startswith(master + "__"):
-                    # move from the ith position to the (j+1)th position
-                    lst[j + 1 : i + 1] = [name] + lst[j + 1 : i]
-                    break
-    return lst
+    match = re.match(r"(?P<master>`\w+`.`#?\w+)__\w+`", part_table)
+    return match["master"] + "`" if match else None
+def topo_sort(graph):
+    """
+    topological sort of a dependency graph that keeps part tables together with their masters
+    :return: list of table names in topological order
+    """
+    graph = nx.DiGraph(graph)  # make a copy
+    # collapse alias nodes
+    alias_nodes = [node for node in graph if node.isdigit()]
+    for node in alias_nodes:
+        try:
+            direct_edge = (
+                next(x for x in graph.in_edges(node))[0],
+                next(x for x in graph.out_edges(node))[1],
+            )
+        except StopIteration:
+            pass  # a disconnected alias node
+        else:
+            graph.add_edge(*direct_edge)
+    graph.remove_nodes_from(alias_nodes)
+    # Add parts' dependencies to their masters' dependencies
+    # to ensure correct topological ordering of the masters.
+    for part in graph:
+        # find the part's master
+        if (master := extract_master(part)) in graph:
+            for edge in graph.in_edges(part):
+                parent = edge[0]
+                if master not in (parent, extract_master(parent)):
+                    # if parent is neither master nor part of master
+                    graph.add_edge(parent, master)
+    sorted_nodes = list(nx.topological_sort(graph))
+    # bring parts up to their masters
+    pos = len(sorted_nodes) - 1
+    placed = set()
+    while pos > 1:
+        part = sorted_nodes[pos]
+        if (master := extract_master(part)) not in graph or part in placed:
+            pos -= 1
+        else:
+            placed.add(part)
+            insert_pos = sorted_nodes.index(master) + 1
+            if pos > insert_pos:
+                # move the part to the position immediately after its master
+                del sorted_nodes[pos]
+                sorted_nodes.insert(insert_pos, part)
+    return sorted_nodes
 class Dependencies(nx.DiGraph):
@@ -131,6 +167,10 @@ class Dependencies(nx.DiGraph):
             raise DataJointError("DataJoint can only work with acyclic dependencies")
         self._loaded = True
+    def topo_sort(self):
+        """:return: list of tables names in topological order"""
+        return topo_sort(self)
     def parents(self, table_name, primary=None):
         """
         :param table_name: `schema`.`table`
@@ -167,10 +207,8 @@ class Dependencies(nx.DiGraph):
         :return: all dependent tables sorted in topological order.  Self is included.
         """
         self.load(force=False)
-        nodes = self.subgraph(nx.algorithms.dag.descendants(self, full_table_name))
-        return unite_master_parts(
-            [full_table_name] + list(nx.algorithms.dag.topological_sort(nodes))
-        )
+        nodes = self.subgraph(nx.descendants(self, full_table_name))
+        return [full_table_name] + nodes.topo_sort()
     def ancestors(self, full_table_name):
         """
@@ -178,11 +216,5 @@ class Dependencies(nx.DiGraph):
         :return: all dependent tables sorted in topological order.  Self is included.
         """
         self.load(force=False)
-        nodes = self.subgraph(nx.algorithms.dag.ancestors(self, full_table_name))
-        return list(
-            reversed(
-                unite_master_parts(
-                    list(nx.algorithms.dag.topological_sort(nodes)) + [full_table_name]
-                )
-            )
-        )
+        nodes = self.subgraph(nx.ancestors(self, full_table_name))
+        return reversed(nodes.topo_sort() + [full_table_name])

datajoint/diagram.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import networkx as nx
-import re
 import functools
 import io
 import logging
 import inspect
 from .table import Table
-from .dependencies import unite_master_parts
-from .user_tables import Manual, Imported, Computed, Lookup, Part
+from .dependencies import topo_sort
+from .user_tables import Manual, Imported, Computed, Lookup, Part, _get_tier, _AliasNode
 from .errors import DataJointError
 from .table import lookup_class_name
@@ -27,29 +26,6 @@ except:
 logger = logging.getLogger(__name__.split(".")[0])
-user_table_classes = (Manual, Lookup, Computed, Imported, Part)
-class _AliasNode:
-    """
-    special class to indicate aliased foreign keys
-    """
-    pass
-def _get_tier(table_name):
-    if not table_name.startswith("`"):
-        return _AliasNode
-    else:
-        try:
-            return next(
-                tier
-                for tier in user_table_classes
-                if re.fullmatch(tier.tier_regexp, table_name.split("`")[-2])
-            )
-        except StopIteration:
-            return None
 if not diagram_active:
@@ -59,8 +35,7 @@ if not diagram_active:
         Entity relationship diagram, currently disabled due to the lack of required packages: matplotlib and pygraphviz.
         To enable Diagram feature, please install both matplotlib and pygraphviz. For instructions on how to install
-        these two packages, refer to http://docs.datajoint.io/setup/Install-and-connect.html#python and
-        http://tutorials.datajoint.io/setting-up/datajoint-python.html
+        these two packages, refer to https://datajoint.com/docs/core/datajoint-python/0.14/client/install/
         """
         def __init__(self, *args, **kwargs):
@@ -72,19 +47,22 @@ else:
     class Diagram(nx.DiGraph):
         """
-        Entity relationship diagram.
+        Schema diagram showing tables and foreign keys between in the form of a directed
+        acyclic graph (DAG).  The diagram is derived from the connection.dependencies object.
         Usage:
         >>>  diag = Diagram(source)
-        source can be a base table object, a base table class, a schema, or a module that has a schema.
+        source can be a table object, a table class, a schema, or a module that has a schema.
         >>> diag.draw()
         draws the diagram using pyplot
         diag1 + diag2  - combines the two diagrams.
+        diag1 - diag2  - difference between diagrams
+        diag1 * diag2  - intersection of diagrams
         diag + n   - expands n levels of successors
         diag - n   - expands n levels of predecessors
         Thus dj.Diagram(schema.Table)+1-1 defines the diagram of immediate ancestors and descendants of schema.Table
@@ -94,6 +72,7 @@ else:
         """
         def __init__(self, source, context=None):
             if isinstance(source, Diagram):
                 # copy constructor
                 self.nodes_to_show = set(source.nodes_to_show)
@@ -154,7 +133,7 @@ else:
         def add_parts(self):
             """
-            Adds to the diagram the part tables of tables already included in the diagram
+            Adds to the diagram the part tables of all master tables already in the diagram
             :return:
             """
@@ -179,16 +158,6 @@ else:
             )
             return self
-        def topological_sort(self):
-            """:return:  list of nodes in topological order"""
-            return unite_master_parts(
-                list(
-                    nx.algorithms.dag.topological_sort(
-                        nx.DiGraph(self).subgraph(self.nodes_to_show)
-                    )
-                )
-            )
         def __add__(self, arg):
             """
             :param arg: either another Diagram or a positive integer.
@@ -256,6 +225,10 @@ else:
             self.nodes_to_show.intersection_update(arg.nodes_to_show)
             return self
+        def topo_sort(self):
+            """return nodes in lexicographical topological order"""
+            return topo_sort(self)
         def _make_graph(self):
             """
             Make the self.graph - a graph object ready for drawing
@@ -300,6 +273,36 @@ else:
             nx.relabel_nodes(graph, mapping, copy=False)
             return graph
+        @staticmethod
+        def _encapsulate_edge_attributes(graph):
+            """
+            Modifies the `nx.Graph`'s edge attribute `attr_map` to be a string representation
+            of the attribute map, and encapsulates the string in double quotes.
+            Changes the graph in place.
+            Implements workaround described in
+            https://github.com/pydot/pydot/issues/258#issuecomment-795798099
+            """
+            for u, v, *_, edgedata in graph.edges(data=True):
+                if "attr_map" in edgedata:
+                    graph.edges[u, v]["attr_map"] = '"{0}"'.format(edgedata["attr_map"])
+        @staticmethod
+        def _encapsulate_node_names(graph):
+            """
+            Modifies the `nx.Graph`'s node names string representations encapsulated in
+            double quotes.
+            Changes the graph in place.
+            Implements workaround described in
+            https://github.com/datajoint/datajoint-python/pull/1176
+            """
+            nx.relabel_nodes(
+                graph,
+                {node: '"{0}"'.format(node) for node in graph.nodes()},
+                copy=False,
+            )
         def make_dot(self):
             graph = self._make_graph()
             graph.nodes()
@@ -368,6 +371,8 @@ else:
                 for node, d in dict(graph.nodes(data=True)).items()
             }
+            self._encapsulate_node_names(graph)
+            self._encapsulate_edge_attributes(graph)
             dot = nx.drawing.nx_pydot.to_pydot(graph)
             for node in dot.get_nodes():
                 node.set_shape("circle")
@@ -385,11 +390,15 @@ else:
                     assert issubclass(cls, Table)
                     description = cls().describe(context=self.context).split("\n")
                     description = (
-                        "-" * 30
-                        if q.startswith("---")
-                        else q.replace("->", "&#8594;")
-                        if "->" in q
-                        else q.split(":")[0]
+                        (
+                            "-" * 30
+                            if q.startswith("---")
+                            else (
+                                q.replace("->", "&#8594;")
+                                if "->" in q
+                                else q.split(":")[0]
+                            )
+                        )
                         for q in description
                         if not q.startswith("#")
                     )
@@ -404,9 +413,14 @@ else:
             for edge in dot.get_edges():
                 # see https://graphviz.org/doc/info/attrs.html
-                src = edge.get_source().strip('"')
-                dest = edge.get_destination().strip('"')
+                src = edge.get_source()
+                dest = edge.get_destination()
                 props = graph.get_edge_data(src, dest)
+                if props is None:
+                    raise DataJointError(
+                        "Could not find edge with source "
+                        "'{}' and destination '{}'".format(src, dest)
+                    )
                 edge.set_color("#00000040")
                 edge.set_style("solid" if props["primary"] else "dashed")
                 master_part = graph.nodes[dest][

datajoint/expression.py CHANGED Viewed

@@ -9,6 +9,7 @@ from .fetch import Fetch, Fetch1
 from .preview import preview, repr_html
 from .condition import (
     AndList,
+    Top,
     Not,
     make_condition,
     assert_join_compatibility,
@@ -52,6 +53,7 @@ class QueryExpression:
     _connection = None
     _heading = None
     _support = None
+    _top = None
     # If the query will be using distinct
     _distinct = False
@@ -100,9 +102,11 @@ class QueryExpression:
     def from_clause(self):
         support = (
-            "(" + src.make_sql() + ") as `$%x`" % next(self._subquery_alias_count)
-            if isinstance(src, QueryExpression)
-            else src
+            (
+                "(" + src.make_sql() + ") as `$%x`" % next(self._subquery_alias_count)
+                if isinstance(src, QueryExpression)
+                else src
+            )
             for src in self.support
         )
         clause = next(support)
@@ -119,17 +123,33 @@ class QueryExpression:
             else " WHERE (%s)" % ")AND(".join(str(s) for s in self.restriction)
         )
+    def sorting_clauses(self):
+        if not self._top:
+            return ""
+        clause = ", ".join(
+            _wrap_attributes(
+                _flatten_attribute_list(self.primary_key, self._top.order_by)
+            )
+        )
+        if clause:
+            clause = f" ORDER BY {clause}"
+        if self._top.limit is not None:
+            clause += f" LIMIT {self._top.limit}{f' OFFSET {self._top.offset}' if self._top.offset else ''}"
+        return clause
     def make_sql(self, fields=None):
         """
         Make the SQL SELECT statement.
         :param fields: used to explicitly set the select attributes
         """
-        return "SELECT {distinct}{fields} FROM {from_}{where}".format(
+        return "SELECT {distinct}{fields} FROM {from_}{where}{sorting}".format(
             distinct="DISTINCT " if self._distinct else "",
             fields=self.heading.as_sql(fields or self.heading.names),
             from_=self.from_clause(),
             where=self.where_clause(),
+            sorting=self.sorting_clauses(),
         )
     # --------- query operators -----------
@@ -187,6 +207,14 @@ class QueryExpression:
         string, or an AndList.
         """
         attributes = set()
+        if isinstance(restriction, Top):
+            result = (
+                self.make_subquery()
+                if self._top and not self._top.__eq__(restriction)
+                else copy.copy(self)
+            )  # make subquery to avoid overwriting existing Top
+            result._top = restriction
+            return result
         new_condition = make_condition(self, restriction, attributes)
         if new_condition is True:
             return self  # restriction has no effect, return the same object
@@ -200,8 +228,10 @@ class QueryExpression:
             pass  # all ok
         # If the new condition uses any new attributes, a subquery is required.
         # However, Aggregation's HAVING statement works fine with aliased attributes.
-        need_subquery = isinstance(self, Union) or (
-            not isinstance(self, Aggregation) and self.heading.new_attributes
+        need_subquery = (
+            isinstance(self, Union)
+            or (not isinstance(self, Aggregation) and self.heading.new_attributes)
+            or self._top
         )
         if need_subquery:
             result = self.make_subquery()
@@ -537,19 +567,20 @@ class QueryExpression:
     def __len__(self):
         """:return: number of elements in the result set e.g. ``len(q1)``."""
-        return self.connection.query(
+        result = self.make_subquery() if self._top else copy.copy(self)
+        return result.connection.query(
             "SELECT {select_} FROM {from_}{where}".format(
                 select_=(
                     "count(*)"
-                    if any(self._left)
+                    if any(result._left)
                     else "count(DISTINCT {fields})".format(
-                        fields=self.heading.as_sql(
-                            self.primary_key, include_aliases=False
+                        fields=result.heading.as_sql(
+                            result.primary_key, include_aliases=False
                         )
                     )
                 ),
-                from_=self.from_clause(),
-                where=self.where_clause(),
+                from_=result.from_clause(),
+                where=result.where_clause(),
             )
         ).fetchone()[0]
@@ -617,18 +648,12 @@ class QueryExpression:
                     # -- move on to next entry.
                     return next(self)
-    def cursor(self, offset=0, limit=None, order_by=None, as_dict=False):
+    def cursor(self, as_dict=False):
         """
         See expression.fetch() for input description.
         :return: query cursor
         """
-        if offset and limit is None:
-            raise DataJointError("limit is required when offset is set")
         sql = self.make_sql()
-        if order_by is not None:
-            sql += " ORDER BY " + ", ".join(order_by)
-        if limit is not None:
-            sql += " LIMIT %d" % limit + (" OFFSET %d" % offset if offset else "")
         logger.debug(sql)
         return self.connection.query(sql, as_dict=as_dict)
@@ -699,21 +724,26 @@ class Aggregation(QueryExpression):
         fields = self.heading.as_sql(fields or self.heading.names)
         assert self._grouping_attributes or not self.restriction
         distinct = set(self.heading.names) == set(self.primary_key)
-        return "SELECT {distinct}{fields} FROM {from_}{where}{group_by}".format(
-            distinct="DISTINCT " if distinct else "",
-            fields=fields,
-            from_=self.from_clause(),
-            where=self.where_clause(),
-            group_by=""
-            if not self.primary_key
-            else (
-                " GROUP BY `%s`" % "`,`".join(self._grouping_attributes)
-                + (
+        return (
+            "SELECT {distinct}{fields} FROM {from_}{where}{group_by}{sorting}".format(
+                distinct="DISTINCT " if distinct else "",
+                fields=fields,
+                from_=self.from_clause(),
+                where=self.where_clause(),
+                group_by=(
                     ""
-                    if not self.restriction
-                    else " HAVING (%s)" % ")AND(".join(self.restriction)
-                )
-            ),
+                    if not self.primary_key
+                    else (
+                        " GROUP BY `%s`" % "`,`".join(self._grouping_attributes)
+                        + (
+                            ""
+                            if not self.restriction
+                            else " HAVING (%s)" % ")AND(".join(self.restriction)
+                        )
+                    )
+                ),
+                sorting=self.sorting_clauses(),
+            )
         )
     def __len__(self):
@@ -772,14 +802,19 @@ class Union(QueryExpression):
         ):
             # no secondary attributes: use UNION DISTINCT
             fields = arg1.primary_key
-            return "SELECT * FROM (({sql1}) UNION ({sql2})) as `_u{alias}`".format(
-                sql1=arg1.make_sql()
-                if isinstance(arg1, Union)
-                else arg1.make_sql(fields),
-                sql2=arg2.make_sql()
-                if isinstance(arg2, Union)
-                else arg2.make_sql(fields),
+            return "SELECT * FROM (({sql1}) UNION ({sql2})) as `_u{alias}{sorting}`".format(
+                sql1=(
+                    arg1.make_sql()
+                    if isinstance(arg1, Union)
+                    else arg1.make_sql(fields)
+                ),
+                sql2=(
+                    arg2.make_sql()
+                    if isinstance(arg2, Union)
+                    else arg2.make_sql(fields)
+                ),
                 alias=next(self.__count),
+                sorting=self.sorting_clauses(),
             )
         # with secondary attributes, use union of left join with antijoin
         fields = self.heading.names
@@ -839,7 +874,7 @@ class U:
     >>> dj.U().aggr(expr, n='count(*)')
     The following expressions both yield one element containing the number `n` of distinct values of attribute `attr` in
-    query expressio `expr`.
+    query expression `expr`.
     >>> dj.U().aggr(expr, n='count(distinct attr)')
     >>> dj.U().aggr(dj.U('attr').aggr(expr), 'n=count(*)')
@@ -931,3 +966,25 @@ class U:
         )
     aggregate = aggr  # alias for aggr
+def _flatten_attribute_list(primary_key, attrs):
+    """
+    :param primary_key: list of attributes in primary key
+    :param attrs: list of attribute names, which may include "KEY", "KEY DESC" or "KEY ASC"
+    :return: generator of attributes where "KEY" is replaced with its component attributes
+    """
+    for a in attrs:
+        if re.match(r"^\s*KEY(\s+[aA][Ss][Cc])?\s*$", a):
+            if primary_key:
+                yield from primary_key
+        elif re.match(r"^\s*KEY\s+[Dd][Ee][Ss][Cc]\s*$", a):
+            if primary_key:
+                yield from (q + " DESC" for q in primary_key)
+        else:
+            yield a
+def _wrap_attributes(attr):
+    for entry in attr:  # wrap attribute names in backquotes
+        yield re.sub(r"\b((?!asc|desc)\w+)\b", r"`\1`", entry, flags=re.IGNORECASE)

datajoint/external.py CHANGED Viewed

@@ -8,7 +8,7 @@ from .hash import uuid_from_buffer, uuid_from_file
 from .table import Table, FreeTable
 from .heading import Heading
 from .declare import EXTERNAL_TABLE_ROOT
-from . import s3
+from . import s3, errors
 from .utils import safe_write, safe_copy
 logger = logging.getLogger(__name__.split(".")[0])
@@ -141,7 +141,12 @@ class ExternalTable(Table):
         if self.spec["protocol"] == "s3":
             return self.s3.get(external_path)
         if self.spec["protocol"] == "file":
-            return Path(external_path).read_bytes()
+            try:
+                return Path(external_path).read_bytes()
+            except FileNotFoundError:
+                raise errors.MissingExternalFile(
+                    f"Missing external file {external_path}"
+                ) from None
         assert False
     def _remove_external_file(self, external_path):

datajoint 0.14.1__py3-none-any.whl → 0.14.3__py3-none-any.whl

Potentially problematic release.

datajoint 0.14.1py3-none-any.whl → 0.14.3py3-none-any.whl