PyPI - datajoint - Versions diffs - 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl - Mend

datajoint 0.14.2py3-none-any.whl → 0.14.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datajoint might be problematic. Click here for more details.

Files changed (34) hide show

datajoint/__init__.py +16 -14
datajoint/admin.py +4 -2
datajoint/attribute_adapter.py +1 -0
datajoint/autopopulate.py +62 -20
datajoint/blob.py +6 -5
datajoint/cli.py +78 -0
datajoint/condition.py +38 -5
datajoint/connection.py +17 -10
datajoint/declare.py +25 -6
datajoint/dependencies.py +67 -33
datajoint/diagram.py +58 -48
datajoint/expression.py +92 -42
datajoint/external.py +17 -10
datajoint/fetch.py +18 -42
datajoint/hash.py +1 -1
datajoint/heading.py +14 -11
datajoint/jobs.py +4 -3
datajoint/plugin.py +5 -3
datajoint/s3.py +6 -4
datajoint/schemas.py +18 -19
datajoint/settings.py +25 -11
datajoint/table.py +27 -22
datajoint/user_tables.py +30 -2
datajoint/utils.py +2 -1
datajoint/version.py +4 -1
datajoint-0.14.4.dist-info/METADATA +703 -0
datajoint-0.14.4.dist-info/RECORD +34 -0
{datajoint-0.14.2.dist-info → datajoint-0.14.4.dist-info}/WHEEL +1 -1
datajoint-0.14.4.dist-info/entry_points.txt +3 -0
datajoint-0.14.2.dist-info/METADATA +0 -26
datajoint-0.14.2.dist-info/RECORD +0 -33
datajoint-0.14.2.dist-info/datajoint.pub +0 -6
{datajoint-0.14.2.dist-info → datajoint-0.14.4.dist-info/licenses}/LICENSE.txt +0 -0
{datajoint-0.14.2.dist-info → datajoint-0.14.4.dist-info}/top_level.txt +0 -0

datajoint/diagram.py CHANGED Viewed

@@ -1,15 +1,14 @@
-import networkx as nx
-import re
 import functools
+import inspect
 import io
 import logging
-import inspect
-from .table import Table
-from .dependencies import unite_master_parts
-from .user_tables import Manual, Imported, Computed, Lookup, Part
-from .errors import DataJointError
-from .table import lookup_class_name
+import networkx as nx
+from .dependencies import topo_sort
+from .errors import DataJointError
+from .table import Table, lookup_class_name
+from .user_tables import Computed, Imported, Lookup, Manual, Part, _AliasNode, _get_tier
 try:
     from matplotlib import pyplot as plt
@@ -27,29 +26,6 @@ except:
 logger = logging.getLogger(__name__.split(".")[0])
-user_table_classes = (Manual, Lookup, Computed, Imported, Part)
-class _AliasNode:
-    """
-    special class to indicate aliased foreign keys
-    """
-    pass
-def _get_tier(table_name):
-    if not table_name.startswith("`"):
-        return _AliasNode
-    else:
-        try:
-            return next(
-                tier
-                for tier in user_table_classes
-                if re.fullmatch(tier.tier_regexp, table_name.split("`")[-2])
-            )
-        except StopIteration:
-            return None
 if not diagram_active:
@@ -59,8 +35,7 @@ if not diagram_active:
         Entity relationship diagram, currently disabled due to the lack of required packages: matplotlib and pygraphviz.
         To enable Diagram feature, please install both matplotlib and pygraphviz. For instructions on how to install
-        these two packages, refer to http://docs.datajoint.io/setup/Install-and-connect.html#python and
-        http://tutorials.datajoint.io/setting-up/datajoint-python.html
+        these two packages, refer to https://datajoint.com/docs/core/datajoint-python/0.14/client/install/
         """
         def __init__(self, *args, **kwargs):
@@ -72,19 +47,22 @@ else:
     class Diagram(nx.DiGraph):
         """
-        Entity relationship diagram.
+        Schema diagram showing tables and foreign keys between in the form of a directed
+        acyclic graph (DAG).  The diagram is derived from the connection.dependencies object.
         Usage:
         >>>  diag = Diagram(source)
-        source can be a base table object, a base table class, a schema, or a module that has a schema.
+        source can be a table object, a table class, a schema, or a module that has a schema.
         >>> diag.draw()
         draws the diagram using pyplot
         diag1 + diag2  - combines the two diagrams.
+        diag1 - diag2  - difference between diagrams
+        diag1 * diag2  - intersection of diagrams
         diag + n   - expands n levels of successors
         diag - n   - expands n levels of predecessors
         Thus dj.Diagram(schema.Table)+1-1 defines the diagram of immediate ancestors and descendants of schema.Table
@@ -94,6 +72,7 @@ else:
         """
         def __init__(self, source, context=None):
             if isinstance(source, Diagram):
                 # copy constructor
                 self.nodes_to_show = set(source.nodes_to_show)
@@ -154,7 +133,7 @@ else:
         def add_parts(self):
             """
-            Adds to the diagram the part tables of tables already included in the diagram
+            Adds to the diagram the part tables of all master tables already in the diagram
             :return:
             """
@@ -179,16 +158,6 @@ else:
             )
             return self
-        def topological_sort(self):
-            """:return:  list of nodes in topological order"""
-            return unite_master_parts(
-                list(
-                    nx.algorithms.dag.topological_sort(
-                        nx.DiGraph(self).subgraph(self.nodes_to_show)
-                    )
-                )
-            )
         def __add__(self, arg):
             """
             :param arg: either another Diagram or a positive integer.
@@ -256,6 +225,10 @@ else:
             self.nodes_to_show.intersection_update(arg.nodes_to_show)
             return self
+        def topo_sort(self):
+            """return nodes in lexicographical topological order"""
+            return topo_sort(self)
         def _make_graph(self):
             """
             Make the self.graph - a graph object ready for drawing
@@ -300,6 +273,36 @@ else:
             nx.relabel_nodes(graph, mapping, copy=False)
             return graph
+        @staticmethod
+        def _encapsulate_edge_attributes(graph):
+            """
+            Modifies the `nx.Graph`'s edge attribute `attr_map` to be a string representation
+            of the attribute map, and encapsulates the string in double quotes.
+            Changes the graph in place.
+            Implements workaround described in
+            https://github.com/pydot/pydot/issues/258#issuecomment-795798099
+            """
+            for u, v, *_, edgedata in graph.edges(data=True):
+                if "attr_map" in edgedata:
+                    graph.edges[u, v]["attr_map"] = '"{0}"'.format(edgedata["attr_map"])
+        @staticmethod
+        def _encapsulate_node_names(graph):
+            """
+            Modifies the `nx.Graph`'s node names string representations encapsulated in
+            double quotes.
+            Changes the graph in place.
+            Implements workaround described in
+            https://github.com/datajoint/datajoint-python/pull/1176
+            """
+            nx.relabel_nodes(
+                graph,
+                {node: '"{0}"'.format(node) for node in graph.nodes()},
+                copy=False,
+            )
         def make_dot(self):
             graph = self._make_graph()
             graph.nodes()
@@ -368,6 +371,8 @@ else:
                 for node, d in dict(graph.nodes(data=True)).items()
             }
+            self._encapsulate_node_names(graph)
+            self._encapsulate_edge_attributes(graph)
             dot = nx.drawing.nx_pydot.to_pydot(graph)
             for node in dot.get_nodes():
                 node.set_shape("circle")
@@ -408,9 +413,14 @@ else:
             for edge in dot.get_edges():
                 # see https://graphviz.org/doc/info/attrs.html
-                src = edge.get_source().strip('"')
-                dest = edge.get_destination().strip('"')
+                src = edge.get_source()
+                dest = edge.get_destination()
                 props = graph.get_edge_data(src, dest)
+                if props is None:
+                    raise DataJointError(
+                        "Could not find edge with source "
+                        "'{}' and destination '{}'".format(src, dest)
+                    )
                 edge.set_color("#00000040")
                 edge.set_style("solid" if props["primary"] else "dashed")
                 master_part = graph.nodes[dest][

datajoint/expression.py CHANGED Viewed

@@ -1,22 +1,24 @@
-from itertools import count
-import logging
-import inspect
 import copy
+import inspect
+import logging
 import re
-from .settings import config
-from .errors import DataJointError
-from .fetch import Fetch, Fetch1
-from .preview import preview, repr_html
+from itertools import count
 from .condition import (
     AndList,
     Not,
-    make_condition,
+    PromiscuousOperand,
+    Top,
     assert_join_compatibility,
     extract_column_names,
-    PromiscuousOperand,
+    make_condition,
     translate_attribute,
 )
 from .declare import CONSTANT_LITERALS
+from .errors import DataJointError
+from .fetch import Fetch, Fetch1
+from .preview import preview, repr_html
+from .settings import config
 logger = logging.getLogger(__name__.split(".")[0])
@@ -52,6 +54,7 @@ class QueryExpression:
     _connection = None
     _heading = None
     _support = None
+    _top = None
     # If the query will be using distinct
     _distinct = False
@@ -121,17 +124,33 @@ class QueryExpression:
             else " WHERE (%s)" % ")AND(".join(str(s) for s in self.restriction)
         )
+    def sorting_clauses(self):
+        if not self._top:
+            return ""
+        clause = ", ".join(
+            _wrap_attributes(
+                _flatten_attribute_list(self.primary_key, self._top.order_by)
+            )
+        )
+        if clause:
+            clause = f" ORDER BY {clause}"
+        if self._top.limit is not None:
+            clause += f" LIMIT {self._top.limit}{f' OFFSET {self._top.offset}' if self._top.offset else ''}"
+        return clause
     def make_sql(self, fields=None):
         """
         Make the SQL SELECT statement.
         :param fields: used to explicitly set the select attributes
         """
-        return "SELECT {distinct}{fields} FROM {from_}{where}".format(
+        return "SELECT {distinct}{fields} FROM {from_}{where}{sorting}".format(
             distinct="DISTINCT " if self._distinct else "",
             fields=self.heading.as_sql(fields or self.heading.names),
             from_=self.from_clause(),
             where=self.where_clause(),
+            sorting=self.sorting_clauses(),
         )
     # --------- query operators -----------
@@ -189,6 +208,14 @@ class QueryExpression:
         string, or an AndList.
         """
         attributes = set()
+        if isinstance(restriction, Top):
+            result = (
+                self.make_subquery()
+                if self._top and not self._top.__eq__(restriction)
+                else copy.copy(self)
+            )  # make subquery to avoid overwriting existing Top
+            result._top = restriction
+            return result
         new_condition = make_condition(self, restriction, attributes)
         if new_condition is True:
             return self  # restriction has no effect, return the same object
@@ -202,8 +229,10 @@ class QueryExpression:
             pass  # all ok
         # If the new condition uses any new attributes, a subquery is required.
         # However, Aggregation's HAVING statement works fine with aliased attributes.
-        need_subquery = isinstance(self, Union) or (
-            not isinstance(self, Aggregation) and self.heading.new_attributes
+        need_subquery = (
+            isinstance(self, Union)
+            or (not isinstance(self, Aggregation) and self.heading.new_attributes)
+            or self._top
         )
         if need_subquery:
             result = self.make_subquery()
@@ -539,19 +568,20 @@ class QueryExpression:
     def __len__(self):
         """:return: number of elements in the result set e.g. ``len(q1)``."""
-        return self.connection.query(
+        result = self.make_subquery() if self._top else copy.copy(self)
+        return result.connection.query(
             "SELECT {select_} FROM {from_}{where}".format(
                 select_=(
                     "count(*)"
-                    if any(self._left)
+                    if any(result._left)
                     else "count(DISTINCT {fields})".format(
-                        fields=self.heading.as_sql(
-                            self.primary_key, include_aliases=False
+                        fields=result.heading.as_sql(
+                            result.primary_key, include_aliases=False
                         )
                     )
                 ),
-                from_=self.from_clause(),
-                where=self.where_clause(),
+                from_=result.from_clause(),
+                where=result.where_clause(),
             )
         ).fetchone()[0]
@@ -619,18 +649,12 @@ class QueryExpression:
                     # -- move on to next entry.
                     return next(self)
-    def cursor(self, offset=0, limit=None, order_by=None, as_dict=False):
+    def cursor(self, as_dict=False):
         """
         See expression.fetch() for input description.
         :return: query cursor
         """
-        if offset and limit is None:
-            raise DataJointError("limit is required when offset is set")
         sql = self.make_sql()
-        if order_by is not None:
-            sql += " ORDER BY " + ", ".join(order_by)
-        if limit is not None:
-            sql += " LIMIT %d" % limit + (" OFFSET %d" % offset if offset else "")
         logger.debug(sql)
         return self.connection.query(sql, as_dict=as_dict)
@@ -701,23 +725,26 @@ class Aggregation(QueryExpression):
         fields = self.heading.as_sql(fields or self.heading.names)
         assert self._grouping_attributes or not self.restriction
         distinct = set(self.heading.names) == set(self.primary_key)
-        return "SELECT {distinct}{fields} FROM {from_}{where}{group_by}".format(
-            distinct="DISTINCT " if distinct else "",
-            fields=fields,
-            from_=self.from_clause(),
-            where=self.where_clause(),
-            group_by=(
-                ""
-                if not self.primary_key
-                else (
-                    " GROUP BY `%s`" % "`,`".join(self._grouping_attributes)
-                    + (
-                        ""
-                        if not self.restriction
-                        else " HAVING (%s)" % ")AND(".join(self.restriction)
+        return (
+            "SELECT {distinct}{fields} FROM {from_}{where}{group_by}{sorting}".format(
+                distinct="DISTINCT " if distinct else "",
+                fields=fields,
+                from_=self.from_clause(),
+                where=self.where_clause(),
+                group_by=(
+                    ""
+                    if not self.primary_key
+                    else (
+                        " GROUP BY `%s`" % "`,`".join(self._grouping_attributes)
+                        + (
+                            ""
+                            if not self.restriction
+                            else " HAVING (%s)" % ")AND(".join(self.restriction)
+                        )
                     )
-                )
-            ),
+                ),
+                sorting=self.sorting_clauses(),
+            )
         )
     def __len__(self):
@@ -776,7 +803,7 @@ class Union(QueryExpression):
         ):
             # no secondary attributes: use UNION DISTINCT
             fields = arg1.primary_key
-            return "SELECT * FROM (({sql1}) UNION ({sql2})) as `_u{alias}`".format(
+            return "SELECT * FROM (({sql1}) UNION ({sql2})) as `_u{alias}{sorting}`".format(
                 sql1=(
                     arg1.make_sql()
                     if isinstance(arg1, Union)
@@ -788,6 +815,7 @@ class Union(QueryExpression):
                     else arg2.make_sql(fields)
                 ),
                 alias=next(self.__count),
+                sorting=self.sorting_clauses(),
             )
         # with secondary attributes, use union of left join with antijoin
         fields = self.heading.names
@@ -939,3 +967,25 @@ class U:
         )
     aggregate = aggr  # alias for aggr
+def _flatten_attribute_list(primary_key, attrs):
+    """
+    :param primary_key: list of attributes in primary key
+    :param attrs: list of attribute names, which may include "KEY", "KEY DESC" or "KEY ASC"
+    :return: generator of attributes where "KEY" is replaced with its component attributes
+    """
+    for a in attrs:
+        if re.match(r"^\s*KEY(\s+[aA][Ss][Cc])?\s*$", a):
+            if primary_key:
+                yield from primary_key
+        elif re.match(r"^\s*KEY\s+[Dd][Ee][Ss][Cc]\s*$", a):
+            if primary_key:
+                yield from (q + " DESC" for q in primary_key)
+        else:
+            yield a
+def _wrap_attributes(attr):
+    for entry in attr:  # wrap attribute names in backquotes
+        yield re.sub(r"\b((?!asc|desc)\w+)\b", r"`\1`", entry, flags=re.IGNORECASE)

datajoint/external.py CHANGED Viewed

@@ -1,15 +1,17 @@
-from pathlib import Path, PurePosixPath, PureWindowsPath
+import logging
 from collections.abc import Mapping
+from pathlib import Path, PurePosixPath, PureWindowsPath
 from tqdm import tqdm
-import logging
-from .settings import config
+from . import errors, s3
+from .declare import EXTERNAL_TABLE_ROOT
 from .errors import DataJointError, MissingExternalFile
 from .hash import uuid_from_buffer, uuid_from_file
-from .table import Table, FreeTable
 from .heading import Heading
-from .declare import EXTERNAL_TABLE_ROOT
-from . import s3
-from .utils import safe_write, safe_copy
+from .settings import config
+from .table import FreeTable, Table
+from .utils import safe_copy, safe_write
 logger = logging.getLogger(__name__.split(".")[0])
@@ -22,7 +24,7 @@ SUPPORT_MIGRATED_BLOBS = True  # support blobs migrated from datajoint 0.11.*
 def subfold(name, folds):
     """
-    subfolding for external storage:   e.g.  subfold('aBCdefg', (2, 3))  -->  ['ab','cde']
+    subfolding for external storage: e.g.  subfold('aBCdefg', (2, 3))  -->  ['ab','cde']
     """
     return (
         (name[: folds[0]].lower(),) + subfold(name[folds[0] :], folds[1:])
@@ -141,7 +143,12 @@ class ExternalTable(Table):
         if self.spec["protocol"] == "s3":
             return self.s3.get(external_path)
         if self.spec["protocol"] == "file":
-            return Path(external_path).read_bytes()
+            try:
+                return Path(external_path).read_bytes()
+            except FileNotFoundError:
+                raise errors.MissingExternalFile(
+                    f"Missing external file {external_path}"
+                ) from None
         assert False
     def _remove_external_file(self, external_path):
@@ -273,7 +280,7 @@ class ExternalTable(Table):
         # check if the remote file already exists and verify that it matches
         check_hash = (self & {"hash": uuid}).fetch("contents_hash")
-        if check_hash:
+        if check_hash.size:
             # the tracking entry exists, check that it's the same file as before
             if contents_hash != check_hash[0]:
                 raise DataJointError(

datajoint/fetch.py CHANGED Viewed

@@ -1,20 +1,20 @@
-from functools import partial
-from pathlib import Path
-import logging
-import pandas
 import itertools
-import re
 import json
-import numpy as np
-import uuid
 import numbers
+import uuid
+from functools import partial
+from pathlib import Path
+import numpy as np
+import pandas
+from datajoint.condition import Top
 from . import blob, hash
 from .errors import DataJointError
 from .settings import config
 from .utils import safe_write
-logger = logging.getLogger(__name__.split(".")[0])
 class key:
     """
@@ -119,21 +119,6 @@ def _get(connection, attr, data, squeeze, download_path):
     )
-def _flatten_attribute_list(primary_key, attrs):
-    """
-    :param primary_key: list of attributes in primary key
-    :param attrs: list of attribute names, which may include "KEY", "KEY DESC" or "KEY ASC"
-    :return: generator of attributes where "KEY" is replaces with its component attributes
-    """
-    for a in attrs:
-        if re.match(r"^\s*KEY(\s+[aA][Ss][Cc])?\s*$", a):
-            yield from primary_key
-        elif re.match(r"^\s*KEY\s+[Dd][Ee][Ss][Cc]\s*$", a):
-            yield from (q + " DESC" for q in primary_key)
-        else:
-            yield a
 class Fetch:
     """
     A fetch object that handles retrieving elements from the table expression.
@@ -153,7 +138,7 @@ class Fetch:
         format=None,
         as_dict=None,
         squeeze=False,
-        download_path="."
+        download_path=".",
     ):
         """
         Fetches the expression results from the database into an np.array or list of dictionaries and
@@ -174,13 +159,13 @@ class Fetch:
         :param download_path: for fetches that download data, e.g. attachments
         :return: the contents of the table in the form of a structured numpy.array or a dict list
         """
-        if order_by is not None:
-            # if 'order_by' passed in a string, make into list
-            if isinstance(order_by, str):
-                order_by = [order_by]
-            # expand "KEY" or "KEY DESC"
-            order_by = list(
-                _flatten_attribute_list(self._expression.primary_key, order_by)
+        if offset or order_by or limit:
+            self._expression = self._expression.restrict(
+                Top(
+                    limit,
+                    order_by,
+                    offset,
+                )
             )
         attrs_as_dict = as_dict and attrs
@@ -212,13 +197,6 @@ class Fetch:
                     'use "array" or "frame"'.format(format)
                 )
-        if limit is None and offset is not None:
-            logger.warning(
-                "Offset set, but no limit. Setting limit to a large number. "
-                "Consider setting a limit explicitly."
-            )
-            limit = 8000000000  # just a very large number to effect no limit
         get = partial(
             _get,
             self._expression.connection,
@@ -257,9 +235,7 @@ class Fetch:
                 ]
                 ret = return_values[0] if len(attrs) == 1 else return_values
         else:  # fetch all attributes as a numpy.record_array or pandas.DataFrame
-            cur = self._expression.cursor(
-                as_dict=as_dict, limit=limit, offset=offset, order_by=order_by
-            )
+            cur = self._expression.cursor(as_dict=as_dict)
             heading = self._expression.heading
             if as_dict:
                 ret = [

datajoint/hash.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import hashlib
-import uuid
 import io
+import uuid
 from pathlib import Path

datajoint/heading.py CHANGED Viewed

@@ -1,18 +1,19 @@
-import numpy as np
-from collections import namedtuple, defaultdict
-from itertools import chain
-import re
 import logging
-from .errors import DataJointError, _support_filepath_types, FILEPATH_FEATURE_SWITCH
+import re
+from collections import defaultdict, namedtuple
+from itertools import chain
+import numpy as np
+from .attribute_adapter import AttributeAdapter, get_adapter
 from .declare import (
-    UUID_DATA_TYPE,
-    SPECIAL_TYPES,
-    TYPE_PATTERN,
     EXTERNAL_TYPES,
     NATIVE_TYPES,
+    SPECIAL_TYPES,
+    TYPE_PATTERN,
+    UUID_DATA_TYPE,
 )
-from .attribute_adapter import get_adapter, AttributeAdapter
+from .errors import FILEPATH_FEATURE_SWITCH, DataJointError, _support_filepath_types
 logger = logging.getLogger(__name__.split(".")[0])
@@ -33,6 +34,7 @@ default_attribute_properties = (
         is_attachment=False,
         is_filepath=False,
         is_external=False,
+        is_hidden=False,
         adapter=None,
         store=None,
         unsupported=False,
@@ -120,7 +122,7 @@ class Heading:
     def attributes(self):
         if self._attributes is None:
             self._init_from_database()  # lazy loading from database
-        return self._attributes
+        return {k: v for k, v in self._attributes.items() if not v.is_hidden}
     @property
     def names(self):
@@ -300,6 +302,7 @@ class Heading:
                 store=None,
                 is_external=False,
                 attribute_expression=None,
+                is_hidden=attr["name"].startswith("_"),
             )
             if any(TYPE_PATTERN[t].match(attr["type"]) for t in ("INTEGER", "FLOAT")):

datajoint/jobs.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import os
-from .hash import key_hash
 import platform
-from .table import Table
-from .settings import config
 from .errors import DuplicateError
+from .hash import key_hash
 from .heading import Heading
+from .settings import config
+from .table import Table
 ERROR_MESSAGE_LENGTH = 2047
 TRUNCATION_APPENDIX = "...truncated"

datajoint/plugin.py CHANGED Viewed

@@ -1,9 +1,11 @@
-from .settings import config
-import pkg_resources
+import logging
 from pathlib import Path
+import pkg_resources
 from cryptography.exceptions import InvalidSignature
 from otumat import hash_pkg, verify
-import logging
+from .settings import config
 logger = logging.getLogger(__name__.split(".")[0])

datajoint 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl

Potentially problematic release.

datajoint 0.14.2py3-none-any.whl → 0.14.4py3-none-any.whl