PyPI - datajoint - Versions diffs - 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl - Mend

datajoint 0.14.2py3-none-any.whl → 0.14.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datajoint might be problematic. Click here for more details.

Files changed (34) hide show

datajoint/__init__.py +16 -14
datajoint/admin.py +4 -2
datajoint/attribute_adapter.py +1 -0
datajoint/autopopulate.py +62 -20
datajoint/blob.py +6 -5
datajoint/cli.py +78 -0
datajoint/condition.py +38 -5
datajoint/connection.py +17 -10
datajoint/declare.py +25 -6
datajoint/dependencies.py +67 -33
datajoint/diagram.py +58 -48
datajoint/expression.py +92 -42
datajoint/external.py +17 -10
datajoint/fetch.py +18 -42
datajoint/hash.py +1 -1
datajoint/heading.py +14 -11
datajoint/jobs.py +4 -3
datajoint/plugin.py +5 -3
datajoint/s3.py +6 -4
datajoint/schemas.py +18 -19
datajoint/settings.py +25 -11
datajoint/table.py +27 -22
datajoint/user_tables.py +30 -2
datajoint/utils.py +2 -1
datajoint/version.py +4 -1
datajoint-0.14.4.dist-info/METADATA +703 -0
datajoint-0.14.4.dist-info/RECORD +34 -0
{datajoint-0.14.2.dist-info → datajoint-0.14.4.dist-info}/WHEEL +1 -1
datajoint-0.14.4.dist-info/entry_points.txt +3 -0
datajoint-0.14.2.dist-info/METADATA +0 -26
datajoint-0.14.2.dist-info/RECORD +0 -33
datajoint-0.14.2.dist-info/datajoint.pub +0 -6
{datajoint-0.14.2.dist-info → datajoint-0.14.4.dist-info/licenses}/LICENSE.txt +0 -0
{datajoint-0.14.2.dist-info → datajoint-0.14.4.dist-info}/top_level.txt +0 -0

datajoint/__init__.py CHANGED Viewed

@@ -37,6 +37,7 @@ __all__ = [
     "Part",
     "Not",
     "AndList",
+    "Top",
     "U",
     "Diagram",
     "Di",
@@ -51,25 +52,26 @@ __all__ = [
     "key",
     "key_hash",
     "logger",
+    "cli",
 ]
-from .logging import logger
-from .version import __version__
-from .settings import config
-from .connection import conn, Connection
-from .schemas import Schema
-from .schemas import VirtualModule, list_schemas
-from .table import Table, FreeTable
-from .user_tables import Manual, Lookup, Imported, Computed, Part
-from .expression import Not, AndList, U
-from .diagram import Diagram
-from .admin import set_password, kill
+from . import errors
+from .admin import kill, set_password
+from .attribute_adapter import AttributeAdapter
 from .blob import MatCell, MatStruct
+from .cli import cli
+from .connection import Connection, conn
+from .diagram import Diagram
+from .errors import DataJointError
+from .expression import AndList, Not, Top, U
 from .fetch import key
 from .hash import key_hash
-from .attribute_adapter import AttributeAdapter
-from . import errors
-from .errors import DataJointError
+from .logging import logger
+from .schemas import Schema, VirtualModule, list_schemas
+from .settings import config
+from .table import FreeTable, Table
+from .user_tables import Computed, Imported, Lookup, Manual, Part
+from .version import __version__
 ERD = Di = Diagram  # Aliases for Diagram
 schema = Schema  # Aliases for Schema

datajoint/admin.py CHANGED Viewed

@@ -1,10 +1,12 @@
-import pymysql
+import logging
 from getpass import getpass
+import pymysql
 from packaging import version
 from .connection import conn
 from .settings import config
 from .utils import user_choice
-import logging
 logger = logging.getLogger(__name__.split(".")[0])

datajoint/attribute_adapter.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import re
 from .errors import DataJointError, _support_adapted_types
 from .plugin import type_plugins

datajoint/autopopulate.py CHANGED Viewed

@@ -1,17 +1,20 @@
 """This module defines class dj.AutoPopulate"""
-import logging
+import contextlib
 import datetime
-import traceback
-import random
 import inspect
+import logging
+import multiprocessing as mp
+import random
+import signal
+import traceback
+import deepdiff
 from tqdm import tqdm
-from .hash import key_hash
-from .expression import QueryExpression, AndList
 from .errors import DataJointError, LostConnectionError
-import signal
-import multiprocessing as mp
-import contextlib
+from .expression import AndList, QueryExpression
+from .hash import key_hash
 # noinspection PyExceptionInherit,PyCallingNonCallable
@@ -23,7 +26,7 @@ logger = logging.getLogger(__name__.split(".")[0])
 def _initialize_populate(table, jobs, populate_kwargs):
     """
-    Initialize the process for mulitprocessing.
+    Initialize the process for multiprocessing.
     Saves the unpickled copy of the table to the current process and reconnects.
     """
     process = mp.current_process()
@@ -153,6 +156,7 @@ class AutoPopulate:
     def populate(
         self,
         *restrictions,
+        keys=None,
         suppress_errors=False,
         return_exception_objects=False,
         reserve_jobs=False,
@@ -169,6 +173,8 @@ class AutoPopulate:
         :param restrictions: a list of restrictions each restrict
             (table.key_source - target.proj())
+        :param keys: The list of keys (dicts) to send to self.make().
+            If None (default), then use self.key_source to query they keys.
         :param suppress_errors: if True, do not terminate execution.
         :param return_exception_objects: return error objects instead of just error messages
         :param reserve_jobs: if True, reserve jobs to populate in asynchronous fashion
@@ -206,7 +212,10 @@ class AutoPopulate:
             old_handler = signal.signal(signal.SIGTERM, handler)
-        keys = (self._jobs_to_do(restrictions) - self.target).fetch("KEY", limit=limit)
+        if keys is None:
+            keys = (self._jobs_to_do(restrictions) - self.target).fetch(
+                "KEY", limit=limit
+            )
         # exclude "error", "ignore" or "reserved" jobs
         if reserve_jobs:
@@ -256,13 +265,16 @@ class AutoPopulate:
                 # spawn multiple processes
                 self.connection.close()  # disconnect parent process from MySQL server
                 del self.connection._conn.ctx  # SSLContext is not pickleable
-                with mp.Pool(
-                    processes, _initialize_populate, (self, jobs, populate_kwargs)
-                ) as pool, (
-                    tqdm(desc="Processes: ", total=nkeys)
-                    if display_progress
-                    else contextlib.nullcontext()
-                ) as progress_bar:
+                with (
+                    mp.Pool(
+                        processes, _initialize_populate, (self, jobs, populate_kwargs)
+                    ) as pool,
+                    (
+                        tqdm(desc="Processes: ", total=nkeys)
+                        if display_progress
+                        else contextlib.nullcontext()
+                    ) as progress_bar,
+                ):
                     for status in pool.imap(_call_populate1, keys, chunksize=1):
                         if status is True:
                             success_list.append(1)
@@ -295,6 +307,7 @@ class AutoPopulate:
         :return: (key, error) when suppress_errors=True,
             True if successfully invoke one `make()` call, otherwise False
         """
+        # use the legacy `_make_tuples` callback.
         make = self._make_tuples if hasattr(self, "_make_tuples") else self.make
         if jobs is not None and not jobs.reserve(
@@ -302,17 +315,46 @@ class AutoPopulate:
         ):
             return False
-        self.connection.start_transaction()
+        # if make is a generator, it transaction can be delayed until the final stage
+        is_generator = inspect.isgeneratorfunction(make)
+        if not is_generator:
+            self.connection.start_transaction()
         if key in self.target:  # already populated
-            self.connection.cancel_transaction()
+            if not is_generator:
+                self.connection.cancel_transaction()
             if jobs is not None:
                 jobs.complete(self.target.table_name, self._job_key(key))
             return False
         logger.debug(f"Making {key} -> {self.target.full_table_name}")
         self.__class__._allow_insert = True
         try:
-            make(dict(key), **(make_kwargs or {}))
+            if not is_generator:
+                make(dict(key), **(make_kwargs or {}))
+            else:
+                # tripartite make - transaction is delayed until the final stage
+                gen = make(dict(key), **(make_kwargs or {}))
+                fetched_data = next(gen)
+                fetch_hash = deepdiff.DeepHash(
+                    fetched_data, ignore_iterable_order=False
+                )[fetched_data]
+                computed_result = next(gen)  # perform the computation
+                # fetch and insert inside a transaction
+                self.connection.start_transaction()
+                gen = make(dict(key), **(make_kwargs or {}))  # restart make
+                fetched_data = next(gen)
+                if (
+                    fetch_hash
+                    != deepdiff.DeepHash(fetched_data, ignore_iterable_order=False)[
+                        fetched_data
+                    ]
+                ):  # rollback due to referential integrity fail
+                    self.connection.cancel_transaction()
+                    return False
+                gen.send(computed_result)  # insert
         except (KeyboardInterrupt, SystemExit, Exception) as error:
             try:
                 self.connection.cancel_transaction()

datajoint/blob.py CHANGED Viewed

@@ -3,17 +3,18 @@
 compatibility with Matlab-based serialization implemented by mYm.
 """
-import zlib
-from itertools import repeat
 import collections
-from decimal import Decimal
 import datetime
 import uuid
+import zlib
+from decimal import Decimal
+from itertools import repeat
 import numpy as np
 from .errors import DataJointError
 from .settings import config
 deserialize_lookup = {
     0: {"dtype": None, "scalar_type": "UNKNOWN"},
     1: {"dtype": None, "scalar_type": "CELL"},
@@ -204,7 +205,7 @@ class Blob:
             return self.pack_dict(obj)
         if isinstance(obj, str):
             return self.pack_string(obj)
-        if isinstance(obj, collections.abc.ByteString):
+        if isinstance(obj, (bytes, bytearray)):
             return self.pack_bytes(obj)
         if isinstance(obj, collections.abc.MutableSequence):
             return self.pack_list(obj)

datajoint/cli.py ADDED Viewed

@@ -0,0 +1,78 @@
+import argparse
+from code import interact
+from collections import ChainMap
+import datajoint as dj
+def cli(args: list = None):
+    """
+    Console interface for DataJoint Python
+    :param args: List of arguments to be passed in, defaults to reading stdin
+    :type args: list, optional
+    """
+    parser = argparse.ArgumentParser(
+        prog="datajoint",
+        description="DataJoint console interface.",
+        conflict_handler="resolve",
+    )
+    parser.add_argument(
+        "-V", "--version", action="version", version=f"{dj.__name__} {dj.__version__}"
+    )
+    parser.add_argument(
+        "-u",
+        "--user",
+        type=str,
+        default=dj.config["database.user"],
+        required=False,
+        help="Datajoint username",
+    )
+    parser.add_argument(
+        "-p",
+        "--password",
+        type=str,
+        default=dj.config["database.password"],
+        required=False,
+        help="Datajoint password",
+    )
+    parser.add_argument(
+        "-h",
+        "--host",
+        type=str,
+        default=dj.config["database.host"],
+        required=False,
+        help="Datajoint host",
+    )
+    parser.add_argument(
+        "-s",
+        "--schemas",
+        nargs="+",
+        type=str,
+        required=False,
+        help="A list of virtual module mappings in `db:schema ...` format",
+    )
+    kwargs = vars(parser.parse_args(args))
+    mods = {}
+    if kwargs["user"]:
+        dj.config["database.user"] = kwargs["user"]
+    if kwargs["password"]:
+        dj.config["database.password"] = kwargs["password"]
+    if kwargs["host"]:
+        dj.config["database.host"] = kwargs["host"]
+    if kwargs["schemas"]:
+        for vm in kwargs["schemas"]:
+            d, m = vm.split(":")
+            mods[m] = dj.create_virtual_module(m, d)
+    banner = "dj repl\n"
+    if mods:
+        modstr = "\n".join("  - {}".format(m) for m in mods)
+        banner += "\nschema modules:\n\n" + modstr + "\n"
+    interact(banner, local=dict(ChainMap(mods, locals(), globals())))
+    raise SystemExit
+if __name__ == "__main__":
+    cli()

datajoint/condition.py CHANGED Viewed

@@ -1,14 +1,18 @@
 """ methods for generating SQL WHERE clauses from datajoint restriction conditions """
-import inspect
 import collections
-import re
-import uuid
 import datetime
 import decimal
+import inspect
+import json
+import re
+import uuid
+from dataclasses import dataclass
+from typing import List, Union
 import numpy
 import pandas
-import json
 from .errors import DataJointError
 JSON_PATTERN = re.compile(
@@ -61,6 +65,35 @@ class AndList(list):
             super().append(restriction)
+@dataclass
+class Top:
+    """
+    A restriction to the top entities of a query.
+    In SQL, this corresponds to ORDER BY ... LIMIT ... OFFSET
+    """
+    limit: Union[int, None] = 1
+    order_by: Union[str, List[str]] = "KEY"
+    offset: int = 0
+    def __post_init__(self):
+        self.order_by = self.order_by or ["KEY"]
+        self.offset = self.offset or 0
+        if self.limit is not None and not isinstance(self.limit, int):
+            raise TypeError("Top limit must be an integer")
+        if not isinstance(self.order_by, (str, collections.abc.Sequence)) or not all(
+            isinstance(r, str) for r in self.order_by
+        ):
+            raise TypeError("Top order_by attributes must all be strings")
+        if not isinstance(self.offset, int):
+            raise TypeError("The offset argument must be an integer")
+        if self.offset and self.limit is None:
+            self.limit = 999999999999  # arbitrary large number to allow query
+        if isinstance(self.order_by, str):
+            self.order_by = [self.order_by]
 class Not:
     """invert restriction"""
@@ -112,7 +145,7 @@ def make_condition(query_expression, condition, columns):
         condition.
     :return: an SQL condition string or a boolean value.
     """
-    from .expression import QueryExpression, Aggregation, U
+    from .expression import Aggregation, QueryExpression, U
     def prep_value(k, v):
         """prepare SQL condition"""

datajoint/connection.py CHANGED Viewed

@@ -3,20 +3,22 @@ This module contains the Connection class that manages the connection to the dat
 the ``conn`` function that provides access to a persistent connection in datajoint.
 """
+import logging
+import pathlib
+import re
 import warnings
 from contextlib import contextmanager
-import pymysql as client
-import logging
 from getpass import getpass
-import re
-import pathlib
-from .settings import config
+import pymysql as client
 from . import errors
-from .dependencies import Dependencies
 from .blob import pack, unpack
+from .dependencies import Dependencies
 from .hash import uuid_from_buffer
 from .plugin import connection_plugins
+from .settings import config
+from .version import __version__
 logger = logging.getLogger(__name__.split(".")[0])
 query_log_max_length = 300
@@ -190,15 +192,20 @@ class Connection:
         self.conn_info["ssl_input"] = use_tls
         self.conn_info["host_input"] = host_input
         self.init_fun = init_fun
-        logger.info("Connecting {user}@{host}:{port}".format(**self.conn_info))
         self._conn = None
         self._query_cache = None
         connect_host_hook(self)
         if self.is_connected:
-            logger.info("Connected {user}@{host}:{port}".format(**self.conn_info))
+            logger.info(
+                "DataJoint {version} connected to {user}@{host}:{port}".format(
+                    version=__version__, **self.conn_info
+                )
+            )
             self.connection_id = self.query("SELECT connection_id()").fetchone()[0]
         else:
-            raise errors.LostConnectionError("Connection failed.")
+            raise errors.LostConnectionError(
+                "Connection failed {user}@{host}:{port}".format(**self.conn_info)
+            )
         self._in_transaction = False
         self.schemas = dict()
         self.dependencies = Dependencies(self)
@@ -344,7 +351,7 @@ class Connection:
         except errors.LostConnectionError:
             if not reconnect:
                 raise
-            logger.warning("MySQL server has gone away. Reconnecting to the server.")
+            logger.warning("Reconnecting to MySQL server.")
             connect_host_hook(self)
             if self._in_transaction:
                 self.cancel_transaction()

datajoint/declare.py CHANGED Viewed

@@ -3,12 +3,16 @@ This module hosts functions to convert DataJoint table definitions into mysql ta
 declare the corresponding mysql tables.
 """
+import logging
 import re
+from hashlib import sha1
 import pyparsing as pp
-import logging
-from .errors import DataJointError, _support_filepath_types, FILEPATH_FEATURE_SWITCH
 from .attribute_adapter import get_adapter
 from .condition import translate_attribute
+from .errors import FILEPATH_FEATURE_SWITCH, DataJointError, _support_filepath_types
+from .settings import config
 UUID_DATA_TYPE = "binary(16)"
 MAX_TABLE_NAME_LENGTH = 64
@@ -161,8 +165,8 @@ def compile_foreign_key(
     :param index_sql: list of INDEX declaration statements, duplicate or redundant indexes are ok.
     """
     # Parse and validate
-    from .table import Table
     from .expression import QueryExpression
+    from .table import Table
     try:
         result = foreign_key_parser.parseString(line)
@@ -310,6 +314,19 @@ def declare(full_table_name, definition, context):
         external_stores,
     ) = prepare_declare(definition, context)
+    if config.get("add_hidden_timestamp", False):
+        metadata_attr_sql = [
+            "`_{full_table_name}_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP"
+        ]
+        attribute_sql.extend(
+            attr.format(
+                full_table_name=sha1(
+                    full_table_name.replace("`", "").encode("utf-8")
+                ).hexdigest()
+            )
+            for attr in metadata_attr_sql
+        )
     if not primary_key:
         raise DataJointError("Table must have a primary key")
@@ -442,9 +459,11 @@ def compile_index(line, index_sql):
             return f"`{attr}`"
         return f"({attr})"
-    match = re.match(
-        r"(?P<unique>unique\s+)?index\s*\(\s*(?P<args>.*)\)", line, re.I
-    ).groupdict()
+    match = re.match(r"(?P<unique>unique\s+)?index\s*\(\s*(?P<args>.*)\)", line, re.I)
+    if match is None:
+        raise DataJointError(f'Table definition syntax error in line "{line}"')
+    match = match.groupdict()
     attr_list = re.findall(r"(?:[^,(]|\([^)]*\))+", match["args"])
     index_sql.append(
         "{unique}index ({attrs})".format(

datajoint/dependencies.py CHANGED Viewed

@@ -1,32 +1,70 @@
-import networkx as nx
 import itertools
 import re
 from collections import defaultdict
+import networkx as nx
 from .errors import DataJointError
-def unite_master_parts(lst):
+def extract_master(part_table):
+    """
+    given a part table name, return master part. None if not a part table
+    """
+    match = re.match(r"(?P<master>`\w+`.`#?\w+)__\w+`", part_table)
+    return match["master"] + "`" if match else None
+def topo_sort(graph):
     """
-    re-order a list of table names so that part tables immediately follow their master tables without breaking
-    the topological order.
-    Without this correction, a simple topological sort may insert other descendants between master and parts.
-    The input list must be topologically sorted.
-    :example:
-    unite_master_parts(
-        ['`s`.`a`', '`s`.`a__q`', '`s`.`b`', '`s`.`c`', '`s`.`c__q`', '`s`.`b__q`', '`s`.`d`', '`s`.`a__r`']) ->
-        ['`s`.`a`', '`s`.`a__q`', '`s`.`a__r`', '`s`.`b`', '`s`.`b__q`', '`s`.`c`', '`s`.`c__q`', '`s`.`d`']
+    topological sort of a dependency graph that keeps part tables together with their masters
+    :return: list of table names in topological order
     """
-    for i in range(2, len(lst)):
-        name = lst[i]
-        match = re.match(r"(?P<master>`\w+`.`#?\w+)__\w+`", name)
-        if match:  # name is a part table
-            master = match.group("master")
-            for j in range(i - 1, -1, -1):
-                if lst[j] == master + "`" or lst[j].startswith(master + "__"):
-                    # move from the ith position to the (j+1)th position
-                    lst[j + 1 : i + 1] = [name] + lst[j + 1 : i]
-                    break
-    return lst
+    graph = nx.DiGraph(graph)  # make a copy
+    # collapse alias nodes
+    alias_nodes = [node for node in graph if node.isdigit()]
+    for node in alias_nodes:
+        try:
+            direct_edge = (
+                next(x for x in graph.in_edges(node))[0],
+                next(x for x in graph.out_edges(node))[1],
+            )
+        except StopIteration:
+            pass  # a disconnected alias node
+        else:
+            graph.add_edge(*direct_edge)
+    graph.remove_nodes_from(alias_nodes)
+    # Add parts' dependencies to their masters' dependencies
+    # to ensure correct topological ordering of the masters.
+    for part in graph:
+        # find the part's master
+        if (master := extract_master(part)) in graph:
+            for edge in graph.in_edges(part):
+                parent = edge[0]
+                if master not in (parent, extract_master(parent)):
+                    # if parent is neither master nor part of master
+                    graph.add_edge(parent, master)
+    sorted_nodes = list(nx.topological_sort(graph))
+    # bring parts up to their masters
+    pos = len(sorted_nodes) - 1
+    placed = set()
+    while pos > 1:
+        part = sorted_nodes[pos]
+        if (master := extract_master(part)) not in graph or part in placed:
+            pos -= 1
+        else:
+            placed.add(part)
+            insert_pos = sorted_nodes.index(master) + 1
+            if pos > insert_pos:
+                # move the part to the position immediately after its master
+                del sorted_nodes[pos]
+                sorted_nodes.insert(insert_pos, part)
+    return sorted_nodes
 class Dependencies(nx.DiGraph):
@@ -131,6 +169,10 @@ class Dependencies(nx.DiGraph):
             raise DataJointError("DataJoint can only work with acyclic dependencies")
         self._loaded = True
+    def topo_sort(self):
+        """:return: list of tables names in topological order"""
+        return topo_sort(self)
     def parents(self, table_name, primary=None):
         """
         :param table_name: `schema`.`table`
@@ -167,10 +209,8 @@ class Dependencies(nx.DiGraph):
         :return: all dependent tables sorted in topological order.  Self is included.
         """
         self.load(force=False)
-        nodes = self.subgraph(nx.algorithms.dag.descendants(self, full_table_name))
-        return unite_master_parts(
-            [full_table_name] + list(nx.algorithms.dag.topological_sort(nodes))
-        )
+        nodes = self.subgraph(nx.descendants(self, full_table_name))
+        return [full_table_name] + nodes.topo_sort()
     def ancestors(self, full_table_name):
         """
@@ -178,11 +218,5 @@ class Dependencies(nx.DiGraph):
         :return: all dependent tables sorted in topological order.  Self is included.
         """
         self.load(force=False)
-        nodes = self.subgraph(nx.algorithms.dag.ancestors(self, full_table_name))
-        return list(
-            reversed(
-                unite_master_parts(
-                    list(nx.algorithms.dag.topological_sort(nodes)) + [full_table_name]
-                )
-            )
-        )
+        nodes = self.subgraph(nx.ancestors(self, full_table_name))
+        return reversed(nodes.topo_sort() + [full_table_name])

datajoint 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl

Potentially problematic release.

datajoint 0.14.2py3-none-any.whl → 0.14.4py3-none-any.whl