jupyter-duckdb 1.2.7__py3-none-any.whl → 1.2.101__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckdb_kernel/kernel.py +86 -0
- duckdb_kernel/magics/MagicCommandHandler.py +9 -1
- duckdb_kernel/parser/elements/binary/Difference.py +1 -1
- duckdb_kernel/parser/elements/binary/Divide.py +1 -1
- duckdb_kernel/parser/elements/binary/Division.py +0 -4
- duckdb_kernel/parser/elements/binary/FullOuterJoin.py +10 -3
- duckdb_kernel/tests/__init__.py +76 -0
- duckdb_kernel/tests/test_dc.py +454 -0
- duckdb_kernel/tests/test_ra.py +1250 -0
- duckdb_kernel/tests/test_result_comparison.py +173 -0
- duckdb_kernel/tests/test_sql.py +48 -0
- duckdb_kernel/visualization/Plotly.py +154 -0
- duckdb_kernel/visualization/__init__.py +1 -0
- {jupyter_duckdb-1.2.7.dist-info → jupyter_duckdb-1.2.101.dist-info}/METADATA +2 -2
- {jupyter_duckdb-1.2.7.dist-info → jupyter_duckdb-1.2.101.dist-info}/RECORD +17 -11
- {jupyter_duckdb-1.2.7.dist-info → jupyter_duckdb-1.2.101.dist-info}/WHEEL +1 -1
- {jupyter_duckdb-1.2.7.dist-info → jupyter_duckdb-1.2.101.dist-info}/top_level.txt +0 -0
duckdb_kernel/kernel.py
CHANGED
|
@@ -54,6 +54,8 @@ class DuckDBKernel(Kernel):
|
|
|
54
54
|
MagicCommand('all_dc').arg('value', '1').on(self._all_dc_magic),
|
|
55
55
|
MagicCommand('auto_parser').disable('sql', 'ra', 'dc').code(True).on(self._auto_parser_magic),
|
|
56
56
|
MagicCommand('guess_parser').arg('value', '1').on(self._guess_parser_magic),
|
|
57
|
+
MagicCommand('plotly').arg('type').arg('mapping').opt('title').result(True).on(self._plotly_magic),
|
|
58
|
+
MagicCommand('plotly_raw').opt('title').result(True).on(self._plotly_raw_magic)
|
|
57
59
|
)
|
|
58
60
|
|
|
59
61
|
# create placeholders for database and tests
|
|
@@ -577,6 +579,90 @@ class DuckDBKernel(Kernel):
|
|
|
577
579
|
if e.depth > 0:
|
|
578
580
|
raise e
|
|
579
581
|
|
|
582
|
+
def _plotly_magic(self, silent: bool, cols: List, rows: List[Tuple], type: str, mapping: str, title: str = None):
|
|
583
|
+
# split mapping and handle asterisks
|
|
584
|
+
mapping = [m.strip() for m in mapping.split(',')]
|
|
585
|
+
|
|
586
|
+
for i in range(len(mapping)):
|
|
587
|
+
if mapping[i] == '*':
|
|
588
|
+
mapping = mapping[:i] + cols + mapping[i+1:]
|
|
589
|
+
|
|
590
|
+
# convert all column names to lower case
|
|
591
|
+
lower_cols = [c.lower() for c in cols]
|
|
592
|
+
lower_mapping = [m.lower() for m in mapping]
|
|
593
|
+
|
|
594
|
+
# map desired columns to indices
|
|
595
|
+
mapped_indices = {}
|
|
596
|
+
for ok, lk in zip(mapping, lower_mapping):
|
|
597
|
+
for i in range(len(lower_cols)):
|
|
598
|
+
if lk == lower_cols[i]:
|
|
599
|
+
mapped_indices[ok] = i
|
|
600
|
+
break
|
|
601
|
+
else:
|
|
602
|
+
raise ValueError(f'unknown column {ok}')
|
|
603
|
+
|
|
604
|
+
# map desired columns to value lists
|
|
605
|
+
mapped_values = {
|
|
606
|
+
m: [r[i] for r in rows]
|
|
607
|
+
for m, i in mapped_indices.items()
|
|
608
|
+
}
|
|
609
|
+
mapped_keys = iter(mapped_values.keys())
|
|
610
|
+
|
|
611
|
+
# get required chart type
|
|
612
|
+
match type.lower():
|
|
613
|
+
case 'scatter':
|
|
614
|
+
if len(lower_mapping) < 2: raise ValueError('scatter requires at least x and y values')
|
|
615
|
+
html = draw_scatter_chart(title,
|
|
616
|
+
mapped_values[next(mapped_keys)],
|
|
617
|
+
**{k: mapped_values[k] for k in mapped_keys})
|
|
618
|
+
case 'line':
|
|
619
|
+
if len(lower_mapping) < 2: raise ValueError('lines requires at least x and y values')
|
|
620
|
+
html = draw_line_chart(title,
|
|
621
|
+
mapped_values[next(mapped_keys)],
|
|
622
|
+
**{k: mapped_values[k] for k in mapped_keys})
|
|
623
|
+
|
|
624
|
+
case 'bar':
|
|
625
|
+
if len(lower_mapping) < 2: raise ValueError('bar requires at least x and y values')
|
|
626
|
+
html = draw_bar_chart(title,
|
|
627
|
+
mapped_values[next(mapped_keys)],
|
|
628
|
+
**{k: mapped_values[k] for k in mapped_keys})
|
|
629
|
+
|
|
630
|
+
case 'pie':
|
|
631
|
+
if len(lower_mapping) != 2: raise ValueError('pie requires labels and values')
|
|
632
|
+
html = draw_pie_chart(title,
|
|
633
|
+
mapped_values[next(mapped_keys)],
|
|
634
|
+
mapped_values[next(mapped_keys)])
|
|
635
|
+
|
|
636
|
+
case 'bubble':
|
|
637
|
+
if len(lower_mapping) != 4: raise ValueError('bubble requires x, y, size and color')
|
|
638
|
+
html = draw_bubble_chart(title,
|
|
639
|
+
mapped_values[next(mapped_keys)],
|
|
640
|
+
mapped_values[next(mapped_keys)],
|
|
641
|
+
mapped_values[next(mapped_keys)],
|
|
642
|
+
mapped_values[next(mapped_keys)])
|
|
643
|
+
|
|
644
|
+
case 'heatmap':
|
|
645
|
+
if len(lower_mapping) != 3: raise ValueError('heatmap requires x, y and z values')
|
|
646
|
+
html = draw_heatmap_chart(title,
|
|
647
|
+
mapped_values[next(mapped_keys)],
|
|
648
|
+
mapped_values[next(mapped_keys)],
|
|
649
|
+
mapped_values[next(mapped_keys)])
|
|
650
|
+
|
|
651
|
+
case _:
|
|
652
|
+
raise ValueError(f'unknown type: {type}')
|
|
653
|
+
|
|
654
|
+
# finally print the code
|
|
655
|
+
self.print_data(html, mime='text/html')
|
|
656
|
+
|
|
657
|
+
def _plotly_raw_magic(self, silent: bool, cols: List, rows: List[Tuple], title: str = None):
|
|
658
|
+
if len(cols) != 1 and len(rows) != 1:
|
|
659
|
+
raise ValueError(f'expected exactly one column and one row')
|
|
660
|
+
|
|
661
|
+
self.print_data(
|
|
662
|
+
draw_chart(title, rows[0][0]),
|
|
663
|
+
mime='text/html'
|
|
664
|
+
)
|
|
665
|
+
|
|
580
666
|
# jupyter related functions
|
|
581
667
|
def do_execute(self, code: str, silent: bool,
|
|
582
668
|
store_history: bool = True, user_expressions: dict = None, allow_stdin: bool = False,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Dict, Tuple, List
|
|
2
|
+
from typing import Dict, Tuple, List
|
|
3
3
|
|
|
4
4
|
from . import MagicCommand, MagicCommandException, MagicCommandCallback
|
|
5
5
|
from .StringWrapper import StringWrapper
|
|
@@ -62,6 +62,11 @@ class MagicCommandHandler:
|
|
|
62
62
|
args = [group if group is not None else default
|
|
63
63
|
for group, (_, default, _) in zip(match.groups(), magic.args)]
|
|
64
64
|
|
|
65
|
+
args = [arg[1:-1]
|
|
66
|
+
if arg is not None and (arg[0] == '"' and arg[-1] == '"' or arg[0] == "'" and arg[-1] == "'")
|
|
67
|
+
else arg
|
|
68
|
+
for arg in args]
|
|
69
|
+
|
|
65
70
|
if any(arg is None for arg in args):
|
|
66
71
|
raise MagicCommandException(f'could not parse parameters for command "{command}"')
|
|
67
72
|
|
|
@@ -87,6 +92,9 @@ class MagicCommandHandler:
|
|
|
87
92
|
value = match.group(i + 2)
|
|
88
93
|
i += 3
|
|
89
94
|
|
|
95
|
+
if value is not None and (value[0] == '"' and value[-1] == '"' or value[0] == "'" and value[-1] == "'"):
|
|
96
|
+
value = value[1:-1]
|
|
97
|
+
|
|
90
98
|
if name is not None:
|
|
91
99
|
optionals[name.lower()] = value
|
|
92
100
|
|
|
@@ -8,7 +8,7 @@ from ...util.RenamableColumnList import RenamableColumnList
|
|
|
8
8
|
class Difference(RABinaryOperator):
|
|
9
9
|
@staticmethod
|
|
10
10
|
def symbols() -> Tuple[str, ...]:
|
|
11
|
-
return '\\'
|
|
11
|
+
return '-', '\\'
|
|
12
12
|
|
|
13
13
|
def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
|
|
14
14
|
# execute subqueries
|
|
@@ -27,10 +27,6 @@ class Division(RABinaryOperator):
|
|
|
27
27
|
# inter_name_left = ', '.join(l.current_name for l, _ in inter_cols)
|
|
28
28
|
inter_name_right = ', '.join(r.current_name for _, r in inter_cols)
|
|
29
29
|
|
|
30
|
-
print('-', diff_name)
|
|
31
|
-
print(inter_name)
|
|
32
|
-
print(inter_name_right)
|
|
33
|
-
|
|
34
30
|
# create sql
|
|
35
31
|
return f'''
|
|
36
32
|
SELECT {diff_name}
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
from typing import Optional
|
|
1
2
|
from typing import Tuple, Dict
|
|
2
3
|
|
|
3
4
|
from duckdb_kernel.db import Table
|
|
4
5
|
from ..RABinaryOperator import RABinaryOperator
|
|
6
|
+
from ...util.RenamableColumn import RenamableColumn
|
|
5
7
|
from ...util.RenamableColumnList import RenamableColumnList
|
|
6
8
|
|
|
7
9
|
|
|
@@ -10,6 +12,13 @@ class FullOuterJoin(RABinaryOperator):
|
|
|
10
12
|
def symbols() -> Tuple[str, ...]:
|
|
11
13
|
return chr(10199), 'fjoin', 'ojoin'
|
|
12
14
|
|
|
15
|
+
@staticmethod
|
|
16
|
+
def _coalesce(c1: RenamableColumn, c2: Optional[RenamableColumn]) -> str:
|
|
17
|
+
if c2 is not None:
|
|
18
|
+
return f'COALESCE({c1.current_name}, {c2.current_name}) AS {c1.current_name}'
|
|
19
|
+
else:
|
|
20
|
+
return c1.current_name
|
|
21
|
+
|
|
13
22
|
def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
|
|
14
23
|
# execute subqueries
|
|
15
24
|
lq, lcols = self.left.to_sql(tables)
|
|
@@ -19,9 +28,7 @@ class FullOuterJoin(RABinaryOperator):
|
|
|
19
28
|
join_cols, all_cols = lcols.intersect(rcols)
|
|
20
29
|
|
|
21
30
|
replacements = {c1: c2 for c1, c2 in join_cols}
|
|
22
|
-
select_cols = [
|
|
23
|
-
f'COALESCE({c.current_name}, {replacements.get(c).current_name})' if c in replacements else c.current_name
|
|
24
|
-
for c in all_cols]
|
|
31
|
+
select_cols = [self._coalesce(c, replacements.get(c)) for c in all_cols]
|
|
25
32
|
select_clause = ', '.join(select_cols)
|
|
26
33
|
|
|
27
34
|
on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Dict, List, Tuple
|
|
3
|
+
|
|
4
|
+
from duckdb_kernel.db import Connection as DB, Table
|
|
5
|
+
from duckdb_kernel.db.error import EmptyResultError
|
|
6
|
+
from duckdb_kernel.parser.elements import RAElement
|
|
7
|
+
from duckdb_kernel.parser.elements.binary import ConditionalSet
|
|
8
|
+
|
|
9
|
+
with open('examples/tables.sql', 'r', encoding='utf-8') as file:
|
|
10
|
+
EXAMPLE_STMTS = [stmt
|
|
11
|
+
for stmt in file.read().split(';')
|
|
12
|
+
if stmt.strip()]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Connection:
|
|
16
|
+
def __enter__(self):
|
|
17
|
+
db_type = os.environ.get('DB_TYPE')
|
|
18
|
+
if db_type == 'postgres':
|
|
19
|
+
host = os.environ.get('POSTGRES_HOST', 'localhost')
|
|
20
|
+
port = int(os.environ.get('POSTGRES_PORT', 5432))
|
|
21
|
+
username = os.environ.get('POSTGRES_USER', 'postgres')
|
|
22
|
+
password = os.environ.get('POSTGRES_PASSWORD', 'postgres')
|
|
23
|
+
|
|
24
|
+
from duckdb_kernel.db.implementation.postgres import Connection as PostgreSQL
|
|
25
|
+
self.con: DB = PostgreSQL(host, port, username, password, None)
|
|
26
|
+
elif db_type == 'sqlite':
|
|
27
|
+
from duckdb_kernel.db.implementation.sqlite import Connection as SQLite
|
|
28
|
+
self.con: DB = SQLite(':memory:')
|
|
29
|
+
else:
|
|
30
|
+
from duckdb_kernel.db.implementation.duckdb import Connection as DuckDB
|
|
31
|
+
self.con: DB = DuckDB(':memory:')
|
|
32
|
+
|
|
33
|
+
for stmt in EXAMPLE_STMTS:
|
|
34
|
+
try:
|
|
35
|
+
self.con.execute(stmt)
|
|
36
|
+
except EmptyResultError:
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
return self
|
|
40
|
+
|
|
41
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
42
|
+
self.con.close()
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def tables(self) -> Dict[str, Table]:
|
|
46
|
+
return self.con.analyze()
|
|
47
|
+
|
|
48
|
+
def execute_sql_return_cols(self, query: str) -> Tuple[List, List]:
|
|
49
|
+
return self.con.execute(query)
|
|
50
|
+
|
|
51
|
+
def execute_sql(self, query: str) -> List:
|
|
52
|
+
_, rows = self.execute_sql_return_cols(query)
|
|
53
|
+
return rows
|
|
54
|
+
|
|
55
|
+
def execute_ra_return_cols(self, root: RAElement) -> Tuple[List, List]:
|
|
56
|
+
sql = root.to_sql_with_renamed_columns(self.tables)
|
|
57
|
+
cols, rows = self.execute_sql_return_cols(sql)
|
|
58
|
+
|
|
59
|
+
return cols, sorted(rows, key=lambda t: tuple(-1 if x is None else x for x in t))
|
|
60
|
+
|
|
61
|
+
def execute_ra(self, root: RAElement) -> List:
|
|
62
|
+
_, rows = self.execute_ra_return_cols(root)
|
|
63
|
+
return rows
|
|
64
|
+
|
|
65
|
+
def execute_dc_return_cols(self, root: ConditionalSet) -> Tuple[List, List]:
|
|
66
|
+
sql, cnm = root.to_sql_with_renamed_columns(self.tables)
|
|
67
|
+
cols, rows = self.execute_sql_return_cols(sql)
|
|
68
|
+
|
|
69
|
+
return [cnm.get(c, c) for c in cols], sorted(rows)
|
|
70
|
+
|
|
71
|
+
def execute_dc(self, root: ConditionalSet) -> List:
|
|
72
|
+
_, rows = self.execute_dc_return_cols(root)
|
|
73
|
+
return rows
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
__all__ = ['Connection']
|