jupyter-duckdb 1.2.0.3__py3-none-any.whl → 1.2.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckdb_kernel/kernel.py CHANGED
@@ -14,6 +14,7 @@ from .db.error import *
14
14
  from .magics import *
15
15
  from .parser import RAParser, DCParser
16
16
  from .util.ResultSetComparator import ResultSetComparator
17
+ from .util.TestError import TestError
17
18
  from .util.formatting import row_count, rows_table, wrap_image
18
19
  from .visualization import *
19
20
 
@@ -139,6 +140,7 @@ class DuckDBKernel(Kernel):
139
140
  return False
140
141
 
141
142
  def _execute_stmt(self, query: str, silent: bool,
143
+ column_name_mapping: Dict[str, str],
142
144
  max_rows: Optional[int]) -> Tuple[Optional[List[str]], Optional[List[List]]]:
143
145
  if self._db is None:
144
146
  raise AssertionError('load a database first')
@@ -168,7 +170,8 @@ class DuckDBKernel(Kernel):
168
170
  else:
169
171
  if columns is not None:
170
172
  # table header
171
- table_header = ''.join(f'<th>{c}</th>' for c in columns)
173
+ mapped_columns = (column_name_mapping.get(c, c) for c in columns)
174
+ table_header = ''.join(f'<th>{c}</th>' for c in mapped_columns)
172
175
 
173
176
  # table data
174
177
  if max_rows is not None and len(rows) > max_rows:
@@ -302,12 +305,23 @@ class DuckDBKernel(Kernel):
302
305
  result_columns = [col.rsplit('.', 1)[-1] for col in result_columns]
303
306
 
304
307
  # extract data for test
305
- data = self._tests[name]
308
+ test_data = self._tests[name]
306
309
 
310
+ # execute test
311
+ try:
312
+ self._execute_test(test_data, result_columns, result)
313
+ self.print_data(wrap_image(True))
314
+ except TestError as e:
315
+ self.print_data(wrap_image(False, e.message))
316
+ if os.environ.get('DUCKDB_TESTS_RAISE_EXCEPTION', 'false').lower() in ('true', '1'):
317
+ raise e
318
+
319
+ @staticmethod
320
+ def _execute_test(test_data: Dict, result_columns: List[str], result: List[List]):
307
321
  # check columns if required
308
- if isinstance(data['equals'], dict):
322
+ if isinstance(test_data['equals'], dict):
309
323
  # get column order
310
- data_columns = list(data['equals'].keys())
324
+ data_columns = list(test_data['equals'].keys())
311
325
  column_order = []
312
326
 
313
327
  for dc in data_columns:
@@ -318,39 +332,37 @@ class DuckDBKernel(Kernel):
318
332
  found += 1
319
333
 
320
334
  if found == 0:
321
- return self.print_data(wrap_image(False, f'attribute {dc} missing'))
335
+ raise TestError(f'attribute {dc} missing')
322
336
  if found >= 2:
323
- return self.print_data(wrap_image(False, f'ambiguous attribute {dc}'))
337
+ raise TestError(f'ambiguous attribute {dc}')
324
338
 
325
339
  # abort if columns from result are unnecessary
326
340
  for i, rc in enumerate(result_columns):
327
341
  if i not in column_order:
328
- return self.print_data(wrap_image(False, f'unnecessary attribute {rc}'))
342
+ raise TestError(f'unnecessary attribute {rc}')
329
343
 
330
344
  # reorder columns and transform to list of lists
331
345
  sorted_columns = [x for _, x in sorted(zip(column_order, data_columns))]
332
346
  rows = []
333
347
 
334
- for row in zip(*(data['equals'][col] for col in sorted_columns)):
348
+ for row in zip(*(test_data['equals'][col] for col in sorted_columns)):
335
349
  rows.append(row)
336
350
 
337
351
  else:
338
- rows = data['equals']
352
+ rows = test_data['equals']
339
353
 
340
354
  # ordered test
341
- if data['ordered']:
355
+ if test_data['ordered']:
342
356
  # calculate diff
343
357
  rsc = ResultSetComparator(result, rows)
344
358
 
345
359
  missing = len(rsc.ordered_right_only)
346
360
  if missing > 0:
347
- return self.print_data(wrap_image(False, f'{row_count(missing)} missing'))
361
+ raise TestError(f'{row_count(missing)} missing')
348
362
 
349
363
  missing = len(rsc.ordered_left_only)
350
364
  if missing > 0:
351
- return self.print_data(wrap_image(False, f'{row_count(missing)} more than required'))
352
-
353
- return self.print_data(wrap_image(True))
365
+ raise TestError(f'{row_count(missing)} more than required')
354
366
 
355
367
  # unordered test
356
368
  else:
@@ -362,13 +374,11 @@ class DuckDBKernel(Kernel):
362
374
 
363
375
  # print result
364
376
  if below > 0 and above > 0:
365
- self.print_data(wrap_image(False, f'{row_count(below)} missing, {row_count(above)} unnecessary'))
377
+ raise TestError(f'{row_count(below)} missing, {row_count(above)} unnecessary')
366
378
  elif below > 0:
367
- self.print_data(wrap_image(False, f'{row_count(below)} missing'))
379
+ raise TestError(f'{row_count(below)} missing')
368
380
  elif above > 0:
369
- self.print_data(wrap_image(False, f'{row_count(above)} unnecessary'))
370
- else:
371
- self.print_data(wrap_image(True))
381
+ raise TestError(f'{row_count(above)} unnecessary')
372
382
 
373
383
  def _all_magic(self, silent: bool):
374
384
  return {
@@ -503,10 +513,11 @@ class DuckDBKernel(Kernel):
503
513
  root_node = DCParser.parse_query(code)
504
514
 
505
515
  # generate sql
506
- sql = root_node.to_sql(tables)
516
+ sql, cnm = root_node.to_sql_with_renamed_columns(tables)
507
517
 
508
518
  return {
509
- 'generated_code': sql
519
+ 'generated_code': sql,
520
+ 'column_name_mapping': cnm
510
521
  }
511
522
 
512
523
  # jupyter related functions
@@ -530,6 +541,10 @@ class DuckDBKernel(Kernel):
530
541
  clean_code = execution_args['generated_code']
531
542
  del execution_args['generated_code']
532
543
 
544
+ # set default column name mapping if none provided
545
+ if 'column_name_mapping' not in execution_args:
546
+ execution_args['column_name_mapping'] = {}
547
+
533
548
  # execute statement if needed
534
549
  if clean_code.strip():
535
550
  cols, rows = self._execute_stmt(clean_code, silent, **execution_args)
@@ -42,7 +42,7 @@ class ConditionalSet:
42
42
 
43
43
  # If a constant was found, we store the value and replace it with a random attribute name.
44
44
  constant = le.names[i]
45
- new_token = Token.random()
45
+ new_token = Token.random(constant)
46
46
  new_operand = DCOperand(le.relation, le.names[:i] + (new_token,) + le.names[i + 1:], skip_comma=True)
47
47
 
48
48
  # We now need an equality comparison to ensure the introduced attribute is equal to the constant.
@@ -103,7 +103,7 @@ class ConditionalSet:
103
103
  # The default case is to return the LogicElement with not DCOperands.
104
104
  return le, []
105
105
 
106
- def to_sql(self, tables: Dict[str, Table]) -> str:
106
+ def to_sql_with_renamed_columns(self, tables: Dict[str, Table]) -> Tuple[str, Dict[str, str]]:
107
107
  # First we have to find and remove all DCOperands from the operator tree.
108
108
  condition, dc_operands = self.split_tree(self.condition)
109
109
 
@@ -339,5 +339,18 @@ class ConditionalSet:
339
339
  sql_join_filters += f' AND {join_filter}'
340
340
 
341
341
  sql_condition = condition.to_sql(joined_columns) if condition is not None else '1=1'
342
+ sql_query = f'SELECT DISTINCT {sql_select} FROM {sql_tables} WHERE ({sql_join_filters}) AND ({sql_condition})'
343
+
344
+ # Create a mapping from intermediate column names to constant values.
345
+ column_name_mapping = {
346
+ p: p.constant
347
+ for o in dc_operands
348
+ for p in o.names
349
+ if p.constant is not None
350
+ }
342
351
 
343
- return f'SELECT DISTINCT {sql_select} FROM {sql_tables} WHERE ({sql_join_filters}) AND ({sql_condition})'
352
+ return sql_query, column_name_mapping
353
+
354
+ def to_sql(self, tables: Dict[str, Table]) -> str:
355
+ sql, _ = self.to_sql_with_renamed_columns(tables)
356
+ return sql
@@ -1,8 +1,9 @@
1
+ from typing import Optional
1
2
  from uuid import uuid4
2
3
 
3
4
 
4
5
  class Token(str):
5
- def __new__(cls, value: str):
6
+ def __new__(cls, value: str, constant: 'Token' = None):
6
7
  while True:
7
8
  # strip whitespaces
8
9
  value = value.strip()
@@ -38,20 +39,40 @@ class Token(str):
38
39
 
39
40
  return super().__new__(cls, value)
40
41
 
42
+ def __init__(self, value: str, constant: 'Token' = None):
43
+ self.constant: Optional[Token] = constant
44
+
41
45
  @staticmethod
42
- def random() -> 'Token':
43
- return Token('__' + str(uuid4()).replace('-', '_'))
46
+ def random(constant: 'Token' = None) -> 'Token':
47
+ return Token('__' + str(uuid4()).replace('-', '_'), constant)
44
48
 
45
49
  @property
46
50
  def empty(self) -> bool:
47
51
  return len(self) == 0
48
52
 
53
+ @property
54
+ def is_temporary(self) -> bool:
55
+ return self.startswith('__')
56
+
49
57
  @property
50
58
  def is_constant(self) -> bool:
51
59
  return ((self[0] == '"' and self[-1] == '"') or
52
60
  (self[0] == "'" and self[-1] == "'") or
53
61
  self.replace('.', '', 1).isnumeric())
54
62
 
63
+ @property
64
+ def no_quotes(self) -> str:
65
+ quotes = ('"', "'")
66
+
67
+ if self[0] in quotes and self[-1] in quotes:
68
+ return self[1:-1]
69
+ if self[0] in quotes:
70
+ return self[1:]
71
+ if self[-1] in quotes:
72
+ return self[:-1]
73
+ else:
74
+ return self
75
+
55
76
  @property
56
77
  def single_quotes(self) -> str:
57
78
  # TODO Is this comparison useless because tokens are cleaned automatically?
@@ -0,0 +1,4 @@
1
+ class TestError(Exception):
2
+ @property
3
+ def message(self) -> str:
4
+ return str(self)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: jupyter-duckdb
3
- Version: 1.2.0.3
3
+ Version: 1.2.0.4
4
4
  Summary: a basic wrapper kernel for DuckDB
5
5
  Home-page: https://github.com/erictroebs/jupyter-duckdb
6
6
  Author: Eric Tröbs
@@ -32,10 +32,6 @@ This is a simple DuckDB wrapper kernel which accepts SQL as input, executes it
32
32
  using a previously loaded DuckDB instance and formats the output as a table.
33
33
  There are some magic commands that make teaching easier with this kernel.
34
34
 
35
- ## Quick Start
36
-
37
- [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/git/https%3A%2F%2Fdbgit.prakinf.tu-ilmenau.de%2Fertr8623%2Fjupyter-duckdb.git/master)
38
-
39
35
  ## Table of Contents
40
36
 
41
37
  - [Setup](#setup)
@@ -85,6 +81,12 @@ Execute the following command to pull and run a prepared image.
85
81
  docker run -p 8888:8888 troebs/jupyter-duckdb
86
82
  ```
87
83
 
84
+ There is also a second image. It contains an additional instance of PostgreSQL:
85
+
86
+ ```bash
87
+ docker run -p 8888:8888 troebs/jupyter-duckdb:postgresql
88
+ ```
89
+
88
90
  This image can also be used with JupyterHub and the
89
91
  [DockerSpawner / SwarmSpawner](https://github.com/jupyterhub/dockerspawner)
90
92
  and probably with the
@@ -138,6 +140,13 @@ Please note that `:memory:` is also a valid file path for DuckDB. The data is
138
140
  then stored exclusively in the main memory. In combination with `CREATE`
139
141
  and `OF` this makes it possible to work on a temporary copy in memory.
140
142
 
143
+ Although the name suggests otherwise, the kernel can also be used with other
144
+ databases:
145
+ - **SQLite** is automatically used as a fallback if the DuckDB dependency is
146
+ missing.
147
+ - To connect to a **PostgreSQL** instance, you need to specify a database URI
148
+ starting with `(postgresql|postgres|pgsql|psql|pg)://`.
149
+
141
150
  ### Schema Diagrams
142
151
 
143
152
  The magic command `SCHEMA` can be used to create a simple schema diagram of the
@@ -153,6 +162,10 @@ representation requires more space, but can improve readability.
153
162
  %SCHEMA TD
154
163
  ```
155
164
 
165
+ The optional argument `ONLY`, followed by one or more table names separated by a
166
+ comma, can be used to display only the named tables and all those connected with
167
+ a foreign key.
168
+
156
169
  Graphviz (`dot` in PATH) is required to render schema diagrams.
157
170
 
158
171
  ### Number of Rows
@@ -234,6 +247,11 @@ UNION
234
247
  SELECT 1, 'Name 1'
235
248
  ```
236
249
 
250
+ By default, failed tests will display an explanation, but the notebook will
251
+ continue to run. Set the `DUCKDB_TESTS_RAISE_EXCEPTION` environment variable to
252
+ `true` to raise an exception when a test fails. This can be useful for automated
253
+ testing in CI environments.
254
+
237
255
  Disclaimer: The integrated testing is work-in-progress and thus subject to
238
256
  potentially incompatible changes and enhancements.
239
257
 
@@ -259,6 +277,9 @@ The supported operations are:
259
277
  - Cross Product `×`
260
278
  - Division `÷`
261
279
 
280
+ The optional flag `ANALYZE` can be used to add an execution diagram to the
281
+ output.
282
+
262
283
  The Dockerfile also installs the Jupyter Lab plugin
263
284
  [jupyter-ra-extension](https://pypi.org/project/jupyter-ra-extension/). It adds
264
285
  the symbols mentioned above and some other supported symbols to the toolbar for
@@ -1,7 +1,7 @@
1
1
  duckdb_kernel/__init__.py,sha256=6auU6zeJrsA4fxPSr2PYamS8fG-SMXTn5YQFXF2cseo,33
2
2
  duckdb_kernel/__main__.py,sha256=Z3GwHEBWoQjNm2Y84ijnbA0Lk66L7nsFREuqhZ_ptk0,165
3
3
  duckdb_kernel/kernel.json,sha256=_7E8Ci2FSdCvnzCjsOaue8QE8AvpS5JLQuxORO5IGtA,127
4
- duckdb_kernel/kernel.py,sha256=Td6S4qWDC9uuqgAhSllXwR-JHHdnGfT14RHkfYLzXxI,19441
4
+ duckdb_kernel/kernel.py,sha256=KjuYperzwFe21x-vjKHIkVjBlQ348Jlfb7C3H5ua7Sg,20061
5
5
  duckdb_kernel/db/Column.py,sha256=GM5P6sFdlYK92hiKln5-6038gIDOTxh1AYbR4kiga_w,559
6
6
  duckdb_kernel/db/Connection.py,sha256=5pH-CwGh-r9Q2QwJKGSxvoINBU-sqmvZyG4Q1digfeE,599
7
7
  duckdb_kernel/db/Constraint.py,sha256=1YgUHk7s8mHCVedbcuJKyXDykj7_ybbwT3Dk9p2VMis,287
@@ -40,7 +40,7 @@ duckdb_kernel/parser/elements/__init__.py,sha256=4DA2M43hh9d1fZb5Z6YnTTI-IBkDyhC
40
40
  duckdb_kernel/parser/elements/binary/Add.py,sha256=XGkZMfab01huk9EaI6JUfzkd2STbV1C_-TyC2guKE8I,190
41
41
  duckdb_kernel/parser/elements/binary/And.py,sha256=0jgetTG8yo5TJSeK70Kj-PI9ERyek1eyMQXX5HBxa4Y,274
42
42
  duckdb_kernel/parser/elements/binary/ArrowLeft.py,sha256=u4fZSoyT9lfvWXBwuhUl4DdjVZAOqyVIKmMVbpElLD4,203
43
- duckdb_kernel/parser/elements/binary/ConditionalSet.py,sha256=4KzvUTls2bodJw9ejCKx8se32PR5VFJbVupDZVx2NHE,16671
43
+ duckdb_kernel/parser/elements/binary/ConditionalSet.py,sha256=L3X1o_C55ibD0tHLmJWqyR5yuGgxAb_FDT2_mgPjnqo,17154
44
44
  duckdb_kernel/parser/elements/binary/Cross.py,sha256=jVY3cvD6qDWZkJ7q74lFUPO2VdDt4aAjdk2YAfg-ZC4,687
45
45
  duckdb_kernel/parser/elements/binary/Difference.py,sha256=ZVRgJHYVMOFwnc97oPvGtKvLvHsjSCsn2Aao6ymxY8Y,742
46
46
  duckdb_kernel/parser/elements/binary/Divide.py,sha256=d7mzaOeRYSRO1F-2IHsv_C939TuYtLppbf4-5GSRJXs,265
@@ -63,20 +63,21 @@ duckdb_kernel/parser/elements/unary/Projection.py,sha256=CJ-MIf1-__1ewTjNZVy5hOz
63
63
  duckdb_kernel/parser/elements/unary/Rename.py,sha256=Zr2n9EJ3nA476lND0Djz2b6493nnsbSpJ9kkEgk5B_Y,1273
64
64
  duckdb_kernel/parser/elements/unary/Selection.py,sha256=TKykDMw0QGQcMFp0r7g6ye4CkjshBTNq14c7qtMkqs4,955
65
65
  duckdb_kernel/parser/elements/unary/__init__.py,sha256=48EDygy0pD7l3J_BlXGc-b7HYPaiHQa1-0Mcsj9Xzr0,270
66
- duckdb_kernel/parser/tokenizer/Token.py,sha256=vwN5hHg11kqzOHLeL5GO1c1BbCTZzYDTuy0QR4kDzew,1800
66
+ duckdb_kernel/parser/tokenizer/Token.py,sha256=gsCzgU_zLiA-yD0FWvd2qS9LQUXbivESYH-34Glffqs,2404
67
67
  duckdb_kernel/parser/tokenizer/Tokenizer.py,sha256=PWGgS7gYgpULiKGDho842UbaXuqmwEkccixuF10oi5g,5081
68
68
  duckdb_kernel/parser/tokenizer/__init__.py,sha256=EOSmfc2RJwtB5cE1Hhj1JAra97tckxxS8-legybPy60,58
69
69
  duckdb_kernel/parser/util/RenamableColumn.py,sha256=LxJhFDMUv_OxYYDLwKn63QGpBRfs08jVvhuJTzRtc9c,704
70
70
  duckdb_kernel/parser/util/RenamableColumnList.py,sha256=GfhdGv4KYT64Z9YA9TCn-7hhcEcc3Gu3vI2zMZ52w-8,3015
71
71
  duckdb_kernel/parser/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
72
  duckdb_kernel/util/ResultSetComparator.py,sha256=RZDIfjJyx8-eR-HIqQlEYgZd_V1ympbszpVRF4TlA7o,2262
73
+ duckdb_kernel/util/TestError.py,sha256=iwlGHr9j6pFDa2cGxqGyvJ-exrFUtPJjVm_OhHi4n3g,97
73
74
  duckdb_kernel/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
75
  duckdb_kernel/util/formatting.py,sha256=cbt0CtERnqtzd97mLrOjeJpqM2Lo6pW96BjAYqrOTD8,793
75
76
  duckdb_kernel/visualization/Drawer.py,sha256=D0LkiGMvuJ2v6cQSg_axLTGaM4VXAJEQJAynvedQ3So,296
76
77
  duckdb_kernel/visualization/RATreeDrawer.py,sha256=j-Vy1zpYMzwZ3CsphyfPW-J7ou9a9tM6aXXgAlQTgDI,2128
77
78
  duckdb_kernel/visualization/SchemaDrawer.py,sha256=9K-TUUmyeGdMYMTFQJ7evIU3p8p2KyMKeizUc7-y8co,3015
78
79
  duckdb_kernel/visualization/__init__.py,sha256=5eMJmxJ01XAXcgWDn3t70eSZF2PGaXdNo6GK-x-0H3s,78
79
- jupyter_duckdb-1.2.0.3.dist-info/METADATA,sha256=O4AGOi_-WpenZIFXf6ZGNNPFGDWAHTNOYOA2CdEB4g0,7980
80
- jupyter_duckdb-1.2.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
81
- jupyter_duckdb-1.2.0.3.dist-info/top_level.txt,sha256=KvRRPMnmkQNuhyBsXoPmwyt26LRDp0O-0HN6u0Dm5jA,14
82
- jupyter_duckdb-1.2.0.3.dist-info/RECORD,,
80
+ jupyter_duckdb-1.2.0.4.dist-info/METADATA,sha256=lS30HHPD2YAmrAJ7IKUzkGXUpKIwe00JDszpd08bVjI,8796
81
+ jupyter_duckdb-1.2.0.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
82
+ jupyter_duckdb-1.2.0.4.dist-info/top_level.txt,sha256=KvRRPMnmkQNuhyBsXoPmwyt26LRDp0O-0HN6u0Dm5jA,14
83
+ jupyter_duckdb-1.2.0.4.dist-info/RECORD,,