datajoint 0.14.2__py3-none-any.whl → 0.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datajoint might be problematic. Click here for more details.

datajoint/__init__.py CHANGED
@@ -37,6 +37,7 @@ __all__ = [
37
37
  "Part",
38
38
  "Not",
39
39
  "AndList",
40
+ "Top",
40
41
  "U",
41
42
  "Diagram",
42
43
  "Di",
@@ -51,6 +52,7 @@ __all__ = [
51
52
  "key",
52
53
  "key_hash",
53
54
  "logger",
55
+ "cli",
54
56
  ]
55
57
 
56
58
  from .logging import logger
@@ -61,7 +63,7 @@ from .schemas import Schema
61
63
  from .schemas import VirtualModule, list_schemas
62
64
  from .table import Table, FreeTable
63
65
  from .user_tables import Manual, Lookup, Imported, Computed, Part
64
- from .expression import Not, AndList, U
66
+ from .expression import Not, AndList, U, Top
65
67
  from .diagram import Diagram
66
68
  from .admin import set_password, kill
67
69
  from .blob import MatCell, MatStruct
@@ -70,6 +72,7 @@ from .hash import key_hash
70
72
  from .attribute_adapter import AttributeAdapter
71
73
  from . import errors
72
74
  from .errors import DataJointError
75
+ from .cli import cli
73
76
 
74
77
  ERD = Di = Diagram # Aliases for Diagram
75
78
  schema = Schema # Aliases for Schema
datajoint/autopopulate.py CHANGED
@@ -23,7 +23,7 @@ logger = logging.getLogger(__name__.split(".")[0])
23
23
 
24
24
  def _initialize_populate(table, jobs, populate_kwargs):
25
25
  """
26
- Initialize the process for mulitprocessing.
26
+ Initialize the process for multiprocessing.
27
27
  Saves the unpickled copy of the table to the current process and reconnects.
28
28
  """
29
29
  process = mp.current_process()
@@ -153,6 +153,7 @@ class AutoPopulate:
153
153
  def populate(
154
154
  self,
155
155
  *restrictions,
156
+ keys=None,
156
157
  suppress_errors=False,
157
158
  return_exception_objects=False,
158
159
  reserve_jobs=False,
@@ -169,6 +170,8 @@ class AutoPopulate:
169
170
 
170
171
  :param restrictions: a list of restrictions each restrict
171
172
  (table.key_source - target.proj())
173
+ :param keys: The list of keys (dicts) to send to self.make().
174
+ If None (default), then use self.key_source to query they keys.
172
175
  :param suppress_errors: if True, do not terminate execution.
173
176
  :param return_exception_objects: return error objects instead of just error messages
174
177
  :param reserve_jobs: if True, reserve jobs to populate in asynchronous fashion
@@ -206,7 +209,10 @@ class AutoPopulate:
206
209
 
207
210
  old_handler = signal.signal(signal.SIGTERM, handler)
208
211
 
209
- keys = (self._jobs_to_do(restrictions) - self.target).fetch("KEY", limit=limit)
212
+ if keys is None:
213
+ keys = (self._jobs_to_do(restrictions) - self.target).fetch(
214
+ "KEY", limit=limit
215
+ )
210
216
 
211
217
  # exclude "error", "ignore" or "reserved" jobs
212
218
  if reserve_jobs:
@@ -295,6 +301,7 @@ class AutoPopulate:
295
301
  :return: (key, error) when suppress_errors=True,
296
302
  True if successfully invoke one `make()` call, otherwise False
297
303
  """
304
+ # use the legacy `_make_tuples` callback.
298
305
  make = self._make_tuples if hasattr(self, "_make_tuples") else self.make
299
306
 
300
307
  if jobs is not None and not jobs.reserve(
datajoint/cli.py ADDED
@@ -0,0 +1,77 @@
1
+ import argparse
2
+ from code import interact
3
+ from collections import ChainMap
4
+ import datajoint as dj
5
+
6
+
7
+ def cli(args: list = None):
8
+ """
9
+ Console interface for DataJoint Python
10
+
11
+ :param args: List of arguments to be passed in, defaults to reading stdin
12
+ :type args: list, optional
13
+ """
14
+ parser = argparse.ArgumentParser(
15
+ prog="datajoint",
16
+ description="DataJoint console interface.",
17
+ conflict_handler="resolve",
18
+ )
19
+ parser.add_argument(
20
+ "-V", "--version", action="version", version=f"{dj.__name__} {dj.__version__}"
21
+ )
22
+ parser.add_argument(
23
+ "-u",
24
+ "--user",
25
+ type=str,
26
+ default=dj.config["database.user"],
27
+ required=False,
28
+ help="Datajoint username",
29
+ )
30
+ parser.add_argument(
31
+ "-p",
32
+ "--password",
33
+ type=str,
34
+ default=dj.config["database.password"],
35
+ required=False,
36
+ help="Datajoint password",
37
+ )
38
+ parser.add_argument(
39
+ "-h",
40
+ "--host",
41
+ type=str,
42
+ default=dj.config["database.host"],
43
+ required=False,
44
+ help="Datajoint host",
45
+ )
46
+ parser.add_argument(
47
+ "-s",
48
+ "--schemas",
49
+ nargs="+",
50
+ type=str,
51
+ required=False,
52
+ help="A list of virtual module mappings in `db:schema ...` format",
53
+ )
54
+ kwargs = vars(parser.parse_args(args))
55
+ mods = {}
56
+ if kwargs["user"]:
57
+ dj.config["database.user"] = kwargs["user"]
58
+ if kwargs["password"]:
59
+ dj.config["database.password"] = kwargs["password"]
60
+ if kwargs["host"]:
61
+ dj.config["database.host"] = kwargs["host"]
62
+ if kwargs["schemas"]:
63
+ for vm in kwargs["schemas"]:
64
+ d, m = vm.split(":")
65
+ mods[m] = dj.create_virtual_module(m, d)
66
+
67
+ banner = "dj repl\n"
68
+ if mods:
69
+ modstr = "\n".join(" - {}".format(m) for m in mods)
70
+ banner += "\nschema modules:\n\n" + modstr + "\n"
71
+ interact(banner, local=dict(ChainMap(mods, locals(), globals())))
72
+
73
+ raise SystemExit
74
+
75
+
76
+ if __name__ == "__main__":
77
+ cli()
datajoint/condition.py CHANGED
@@ -10,6 +10,8 @@ import numpy
10
10
  import pandas
11
11
  import json
12
12
  from .errors import DataJointError
13
+ from typing import Union, List
14
+ from dataclasses import dataclass
13
15
 
14
16
  JSON_PATTERN = re.compile(
15
17
  r"^(?P<attr>\w+)(\.(?P<path>[\w.*\[\]]+))?(:(?P<type>[\w(,\s)]+))?$"
@@ -61,6 +63,35 @@ class AndList(list):
61
63
  super().append(restriction)
62
64
 
63
65
 
66
+ @dataclass
67
+ class Top:
68
+ """
69
+ A restriction to the top entities of a query.
70
+ In SQL, this corresponds to ORDER BY ... LIMIT ... OFFSET
71
+ """
72
+
73
+ limit: Union[int, None] = 1
74
+ order_by: Union[str, List[str]] = "KEY"
75
+ offset: int = 0
76
+
77
+ def __post_init__(self):
78
+ self.order_by = self.order_by or ["KEY"]
79
+ self.offset = self.offset or 0
80
+
81
+ if self.limit is not None and not isinstance(self.limit, int):
82
+ raise TypeError("Top limit must be an integer")
83
+ if not isinstance(self.order_by, (str, collections.abc.Sequence)) or not all(
84
+ isinstance(r, str) for r in self.order_by
85
+ ):
86
+ raise TypeError("Top order_by attributes must all be strings")
87
+ if not isinstance(self.offset, int):
88
+ raise TypeError("The offset argument must be an integer")
89
+ if self.offset and self.limit is None:
90
+ self.limit = 999999999999 # arbitrary large number to allow query
91
+ if isinstance(self.order_by, str):
92
+ self.order_by = [self.order_by]
93
+
94
+
64
95
  class Not:
65
96
  """invert restriction"""
66
97
 
datajoint/declare.py CHANGED
@@ -6,9 +6,11 @@ declare the corresponding mysql tables.
6
6
  import re
7
7
  import pyparsing as pp
8
8
  import logging
9
+ from hashlib import sha1
9
10
  from .errors import DataJointError, _support_filepath_types, FILEPATH_FEATURE_SWITCH
10
11
  from .attribute_adapter import get_adapter
11
12
  from .condition import translate_attribute
13
+ from .settings import config
12
14
 
13
15
  UUID_DATA_TYPE = "binary(16)"
14
16
  MAX_TABLE_NAME_LENGTH = 64
@@ -310,6 +312,19 @@ def declare(full_table_name, definition, context):
310
312
  external_stores,
311
313
  ) = prepare_declare(definition, context)
312
314
 
315
+ if config.get("add_hidden_timestamp", False):
316
+ metadata_attr_sql = [
317
+ "`_{full_table_name}_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP"
318
+ ]
319
+ attribute_sql.extend(
320
+ attr.format(
321
+ full_table_name=sha1(
322
+ full_table_name.replace("`", "").encode("utf-8")
323
+ ).hexdigest()
324
+ )
325
+ for attr in metadata_attr_sql
326
+ )
327
+
313
328
  if not primary_key:
314
329
  raise DataJointError("Table must have a primary key")
315
330
 
@@ -442,9 +457,11 @@ def compile_index(line, index_sql):
442
457
  return f"`{attr}`"
443
458
  return f"({attr})"
444
459
 
445
- match = re.match(
446
- r"(?P<unique>unique\s+)?index\s*\(\s*(?P<args>.*)\)", line, re.I
447
- ).groupdict()
460
+ match = re.match(r"(?P<unique>unique\s+)?index\s*\(\s*(?P<args>.*)\)", line, re.I)
461
+ if match is None:
462
+ raise DataJointError(f'Table definition syntax error in line "{line}"')
463
+ match = match.groupdict()
464
+
448
465
  attr_list = re.findall(r"(?:[^,(]|\([^)]*\))+", match["args"])
449
466
  index_sql.append(
450
467
  "{unique}index ({attrs})".format(
datajoint/dependencies.py CHANGED
@@ -5,28 +5,64 @@ from collections import defaultdict
5
5
  from .errors import DataJointError
6
6
 
7
7
 
8
- def unite_master_parts(lst):
8
+ def extract_master(part_table):
9
9
  """
10
- re-order a list of table names so that part tables immediately follow their master tables without breaking
11
- the topological order.
12
- Without this correction, a simple topological sort may insert other descendants between master and parts.
13
- The input list must be topologically sorted.
14
- :example:
15
- unite_master_parts(
16
- ['`s`.`a`', '`s`.`a__q`', '`s`.`b`', '`s`.`c`', '`s`.`c__q`', '`s`.`b__q`', '`s`.`d`', '`s`.`a__r`']) ->
17
- ['`s`.`a`', '`s`.`a__q`', '`s`.`a__r`', '`s`.`b`', '`s`.`b__q`', '`s`.`c`', '`s`.`c__q`', '`s`.`d`']
10
+ given a part table name, return master part. None if not a part table
18
11
  """
19
- for i in range(2, len(lst)):
20
- name = lst[i]
21
- match = re.match(r"(?P<master>`\w+`.`#?\w+)__\w+`", name)
22
- if match: # name is a part table
23
- master = match.group("master")
24
- for j in range(i - 1, -1, -1):
25
- if lst[j] == master + "`" or lst[j].startswith(master + "__"):
26
- # move from the ith position to the (j+1)th position
27
- lst[j + 1 : i + 1] = [name] + lst[j + 1 : i]
28
- break
29
- return lst
12
+ match = re.match(r"(?P<master>`\w+`.`#?\w+)__\w+`", part_table)
13
+ return match["master"] + "`" if match else None
14
+
15
+
16
+ def topo_sort(graph):
17
+ """
18
+ topological sort of a dependency graph that keeps part tables together with their masters
19
+ :return: list of table names in topological order
20
+ """
21
+
22
+ graph = nx.DiGraph(graph) # make a copy
23
+
24
+ # collapse alias nodes
25
+ alias_nodes = [node for node in graph if node.isdigit()]
26
+ for node in alias_nodes:
27
+ try:
28
+ direct_edge = (
29
+ next(x for x in graph.in_edges(node))[0],
30
+ next(x for x in graph.out_edges(node))[1],
31
+ )
32
+ except StopIteration:
33
+ pass # a disconnected alias node
34
+ else:
35
+ graph.add_edge(*direct_edge)
36
+ graph.remove_nodes_from(alias_nodes)
37
+
38
+ # Add parts' dependencies to their masters' dependencies
39
+ # to ensure correct topological ordering of the masters.
40
+ for part in graph:
41
+ # find the part's master
42
+ if (master := extract_master(part)) in graph:
43
+ for edge in graph.in_edges(part):
44
+ parent = edge[0]
45
+ if master not in (parent, extract_master(parent)):
46
+ # if parent is neither master nor part of master
47
+ graph.add_edge(parent, master)
48
+ sorted_nodes = list(nx.topological_sort(graph))
49
+
50
+ # bring parts up to their masters
51
+ pos = len(sorted_nodes) - 1
52
+ placed = set()
53
+ while pos > 1:
54
+ part = sorted_nodes[pos]
55
+ if (master := extract_master(part)) not in graph or part in placed:
56
+ pos -= 1
57
+ else:
58
+ placed.add(part)
59
+ insert_pos = sorted_nodes.index(master) + 1
60
+ if pos > insert_pos:
61
+ # move the part to the position immediately after its master
62
+ del sorted_nodes[pos]
63
+ sorted_nodes.insert(insert_pos, part)
64
+
65
+ return sorted_nodes
30
66
 
31
67
 
32
68
  class Dependencies(nx.DiGraph):
@@ -131,6 +167,10 @@ class Dependencies(nx.DiGraph):
131
167
  raise DataJointError("DataJoint can only work with acyclic dependencies")
132
168
  self._loaded = True
133
169
 
170
+ def topo_sort(self):
171
+ """:return: list of tables names in topological order"""
172
+ return topo_sort(self)
173
+
134
174
  def parents(self, table_name, primary=None):
135
175
  """
136
176
  :param table_name: `schema`.`table`
@@ -167,10 +207,8 @@ class Dependencies(nx.DiGraph):
167
207
  :return: all dependent tables sorted in topological order. Self is included.
168
208
  """
169
209
  self.load(force=False)
170
- nodes = self.subgraph(nx.algorithms.dag.descendants(self, full_table_name))
171
- return unite_master_parts(
172
- [full_table_name] + list(nx.algorithms.dag.topological_sort(nodes))
173
- )
210
+ nodes = self.subgraph(nx.descendants(self, full_table_name))
211
+ return [full_table_name] + nodes.topo_sort()
174
212
 
175
213
  def ancestors(self, full_table_name):
176
214
  """
@@ -178,11 +216,5 @@ class Dependencies(nx.DiGraph):
178
216
  :return: all dependent tables sorted in topological order. Self is included.
179
217
  """
180
218
  self.load(force=False)
181
- nodes = self.subgraph(nx.algorithms.dag.ancestors(self, full_table_name))
182
- return list(
183
- reversed(
184
- unite_master_parts(
185
- list(nx.algorithms.dag.topological_sort(nodes)) + [full_table_name]
186
- )
187
- )
188
- )
219
+ nodes = self.subgraph(nx.ancestors(self, full_table_name))
220
+ return reversed(nodes.topo_sort() + [full_table_name])
datajoint/diagram.py CHANGED
@@ -1,12 +1,11 @@
1
1
  import networkx as nx
2
- import re
3
2
  import functools
4
3
  import io
5
4
  import logging
6
5
  import inspect
7
6
  from .table import Table
8
- from .dependencies import unite_master_parts
9
- from .user_tables import Manual, Imported, Computed, Lookup, Part
7
+ from .dependencies import topo_sort
8
+ from .user_tables import Manual, Imported, Computed, Lookup, Part, _get_tier, _AliasNode
10
9
  from .errors import DataJointError
11
10
  from .table import lookup_class_name
12
11
 
@@ -27,29 +26,6 @@ except:
27
26
 
28
27
 
29
28
  logger = logging.getLogger(__name__.split(".")[0])
30
- user_table_classes = (Manual, Lookup, Computed, Imported, Part)
31
-
32
-
33
- class _AliasNode:
34
- """
35
- special class to indicate aliased foreign keys
36
- """
37
-
38
- pass
39
-
40
-
41
- def _get_tier(table_name):
42
- if not table_name.startswith("`"):
43
- return _AliasNode
44
- else:
45
- try:
46
- return next(
47
- tier
48
- for tier in user_table_classes
49
- if re.fullmatch(tier.tier_regexp, table_name.split("`")[-2])
50
- )
51
- except StopIteration:
52
- return None
53
29
 
54
30
 
55
31
  if not diagram_active:
@@ -59,8 +35,7 @@ if not diagram_active:
59
35
  Entity relationship diagram, currently disabled due to the lack of required packages: matplotlib and pygraphviz.
60
36
 
61
37
  To enable Diagram feature, please install both matplotlib and pygraphviz. For instructions on how to install
62
- these two packages, refer to http://docs.datajoint.io/setup/Install-and-connect.html#python and
63
- http://tutorials.datajoint.io/setting-up/datajoint-python.html
38
+ these two packages, refer to https://datajoint.com/docs/core/datajoint-python/0.14/client/install/
64
39
  """
65
40
 
66
41
  def __init__(self, *args, **kwargs):
@@ -72,19 +47,22 @@ else:
72
47
 
73
48
  class Diagram(nx.DiGraph):
74
49
  """
75
- Entity relationship diagram.
50
+ Schema diagram showing tables and foreign keys between in the form of a directed
51
+ acyclic graph (DAG). The diagram is derived from the connection.dependencies object.
76
52
 
77
53
  Usage:
78
54
 
79
55
  >>> diag = Diagram(source)
80
56
 
81
- source can be a base table object, a base table class, a schema, or a module that has a schema.
57
+ source can be a table object, a table class, a schema, or a module that has a schema.
82
58
 
83
59
  >>> diag.draw()
84
60
 
85
61
  draws the diagram using pyplot
86
62
 
87
63
  diag1 + diag2 - combines the two diagrams.
64
+ diag1 - diag2 - difference between diagrams
65
+ diag1 * diag2 - intersection of diagrams
88
66
  diag + n - expands n levels of successors
89
67
  diag - n - expands n levels of predecessors
90
68
  Thus dj.Diagram(schema.Table)+1-1 defines the diagram of immediate ancestors and descendants of schema.Table
@@ -94,6 +72,7 @@ else:
94
72
  """
95
73
 
96
74
  def __init__(self, source, context=None):
75
+
97
76
  if isinstance(source, Diagram):
98
77
  # copy constructor
99
78
  self.nodes_to_show = set(source.nodes_to_show)
@@ -154,7 +133,7 @@ else:
154
133
 
155
134
  def add_parts(self):
156
135
  """
157
- Adds to the diagram the part tables of tables already included in the diagram
136
+ Adds to the diagram the part tables of all master tables already in the diagram
158
137
  :return:
159
138
  """
160
139
 
@@ -179,16 +158,6 @@ else:
179
158
  )
180
159
  return self
181
160
 
182
- def topological_sort(self):
183
- """:return: list of nodes in topological order"""
184
- return unite_master_parts(
185
- list(
186
- nx.algorithms.dag.topological_sort(
187
- nx.DiGraph(self).subgraph(self.nodes_to_show)
188
- )
189
- )
190
- )
191
-
192
161
  def __add__(self, arg):
193
162
  """
194
163
  :param arg: either another Diagram or a positive integer.
@@ -256,6 +225,10 @@ else:
256
225
  self.nodes_to_show.intersection_update(arg.nodes_to_show)
257
226
  return self
258
227
 
228
+ def topo_sort(self):
229
+ """return nodes in lexicographical topological order"""
230
+ return topo_sort(self)
231
+
259
232
  def _make_graph(self):
260
233
  """
261
234
  Make the self.graph - a graph object ready for drawing
@@ -300,6 +273,36 @@ else:
300
273
  nx.relabel_nodes(graph, mapping, copy=False)
301
274
  return graph
302
275
 
276
+ @staticmethod
277
+ def _encapsulate_edge_attributes(graph):
278
+ """
279
+ Modifies the `nx.Graph`'s edge attribute `attr_map` to be a string representation
280
+ of the attribute map, and encapsulates the string in double quotes.
281
+ Changes the graph in place.
282
+
283
+ Implements workaround described in
284
+ https://github.com/pydot/pydot/issues/258#issuecomment-795798099
285
+ """
286
+ for u, v, *_, edgedata in graph.edges(data=True):
287
+ if "attr_map" in edgedata:
288
+ graph.edges[u, v]["attr_map"] = '"{0}"'.format(edgedata["attr_map"])
289
+
290
+ @staticmethod
291
+ def _encapsulate_node_names(graph):
292
+ """
293
+ Modifies the `nx.Graph`'s node names string representations encapsulated in
294
+ double quotes.
295
+ Changes the graph in place.
296
+
297
+ Implements workaround described in
298
+ https://github.com/datajoint/datajoint-python/pull/1176
299
+ """
300
+ nx.relabel_nodes(
301
+ graph,
302
+ {node: '"{0}"'.format(node) for node in graph.nodes()},
303
+ copy=False,
304
+ )
305
+
303
306
  def make_dot(self):
304
307
  graph = self._make_graph()
305
308
  graph.nodes()
@@ -368,6 +371,8 @@ else:
368
371
  for node, d in dict(graph.nodes(data=True)).items()
369
372
  }
370
373
 
374
+ self._encapsulate_node_names(graph)
375
+ self._encapsulate_edge_attributes(graph)
371
376
  dot = nx.drawing.nx_pydot.to_pydot(graph)
372
377
  for node in dot.get_nodes():
373
378
  node.set_shape("circle")
@@ -408,9 +413,14 @@ else:
408
413
 
409
414
  for edge in dot.get_edges():
410
415
  # see https://graphviz.org/doc/info/attrs.html
411
- src = edge.get_source().strip('"')
412
- dest = edge.get_destination().strip('"')
416
+ src = edge.get_source()
417
+ dest = edge.get_destination()
413
418
  props = graph.get_edge_data(src, dest)
419
+ if props is None:
420
+ raise DataJointError(
421
+ "Could not find edge with source "
422
+ "'{}' and destination '{}'".format(src, dest)
423
+ )
414
424
  edge.set_color("#00000040")
415
425
  edge.set_style("solid" if props["primary"] else "dashed")
416
426
  master_part = graph.nodes[dest][