jupyter-duckdb 1.2.0.1__py3-none-any.whl → 1.4.111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. duckdb_kernel/db/Connection.py +3 -0
  2. duckdb_kernel/db/Table.py +8 -0
  3. duckdb_kernel/db/implementation/duckdb/Connection.py +27 -13
  4. duckdb_kernel/db/implementation/postgres/Connection.py +27 -12
  5. duckdb_kernel/db/implementation/sqlite/Connection.py +9 -3
  6. duckdb_kernel/kernel.py +407 -200
  7. duckdb_kernel/magics/MagicCommand.py +34 -10
  8. duckdb_kernel/magics/MagicCommandCallback.py +11 -7
  9. duckdb_kernel/magics/MagicCommandHandler.py +58 -9
  10. duckdb_kernel/magics/MagicState.py +11 -0
  11. duckdb_kernel/magics/__init__.py +1 -0
  12. duckdb_kernel/parser/DCParser.py +17 -7
  13. duckdb_kernel/parser/LogicParser.py +6 -6
  14. duckdb_kernel/parser/ParserError.py +18 -0
  15. duckdb_kernel/parser/RAParser.py +29 -21
  16. duckdb_kernel/parser/__init__.py +1 -0
  17. duckdb_kernel/parser/elements/DCOperand.py +7 -4
  18. duckdb_kernel/parser/elements/LogicElement.py +0 -2
  19. duckdb_kernel/parser/elements/RAElement.py +4 -1
  20. duckdb_kernel/parser/elements/RARelationReference.py +86 -0
  21. duckdb_kernel/parser/elements/RAUnaryOperator.py +6 -0
  22. duckdb_kernel/parser/elements/__init__.py +2 -1
  23. duckdb_kernel/parser/elements/binary/And.py +1 -1
  24. duckdb_kernel/parser/elements/binary/ConditionalSet.py +37 -10
  25. duckdb_kernel/parser/elements/binary/Cross.py +2 -2
  26. duckdb_kernel/parser/elements/binary/Difference.py +1 -1
  27. duckdb_kernel/parser/elements/binary/Divide.py +1 -1
  28. duckdb_kernel/parser/elements/binary/Division.py +0 -4
  29. duckdb_kernel/parser/elements/binary/FullOuterJoin.py +40 -0
  30. duckdb_kernel/parser/elements/binary/Join.py +4 -1
  31. duckdb_kernel/parser/elements/binary/LeftOuterJoin.py +27 -0
  32. duckdb_kernel/parser/elements/binary/LeftSemiJoin.py +27 -0
  33. duckdb_kernel/parser/elements/binary/RightOuterJoin.py +27 -0
  34. duckdb_kernel/parser/elements/binary/RightSemiJoin.py +27 -0
  35. duckdb_kernel/parser/elements/binary/__init__.py +21 -6
  36. duckdb_kernel/parser/elements/unary/AttributeRename.py +39 -0
  37. duckdb_kernel/parser/elements/unary/Projection.py +1 -1
  38. duckdb_kernel/parser/elements/unary/Rename.py +68 -14
  39. duckdb_kernel/parser/elements/unary/__init__.py +2 -0
  40. duckdb_kernel/parser/tokenizer/Token.py +24 -3
  41. duckdb_kernel/parser/util/QuerySplitter.py +87 -0
  42. duckdb_kernel/parser/util/RenamableColumnList.py +10 -2
  43. duckdb_kernel/tests/__init__.py +76 -0
  44. duckdb_kernel/tests/test_dc.py +483 -0
  45. duckdb_kernel/tests/test_ra.py +1966 -0
  46. duckdb_kernel/tests/test_result_comparison.py +173 -0
  47. duckdb_kernel/tests/test_sql.py +48 -0
  48. duckdb_kernel/util/ResultSetComparator.py +22 -4
  49. duckdb_kernel/util/SQL.py +6 -0
  50. duckdb_kernel/util/TestError.py +4 -0
  51. duckdb_kernel/visualization/Plotly.py +144 -0
  52. duckdb_kernel/visualization/RATreeDrawer.py +34 -2
  53. duckdb_kernel/visualization/__init__.py +1 -0
  54. duckdb_kernel/visualization/lib/__init__.py +53 -0
  55. duckdb_kernel/visualization/lib/plotly-3.0.1.min.js +3879 -0
  56. duckdb_kernel/visualization/lib/ra.css +3 -0
  57. duckdb_kernel/visualization/lib/ra.js +55 -0
  58. {jupyter_duckdb-1.2.0.1.dist-info → jupyter_duckdb-1.4.111.dist-info}/METADATA +53 -19
  59. jupyter_duckdb-1.4.111.dist-info/RECORD +104 -0
  60. {jupyter_duckdb-1.2.0.1.dist-info → jupyter_duckdb-1.4.111.dist-info}/WHEEL +1 -1
  61. jupyter_duckdb-1.2.0.1.dist-info/RECORD +0 -82
  62. {jupyter_duckdb-1.2.0.1.dist-info → jupyter_duckdb-1.4.111.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,173 @@
1
+ from duckdb_kernel.util.ResultSetComparator import ResultSetComparator
2
+
3
+
4
+ def test_equals():
5
+ rsc = ResultSetComparator([
6
+ (1, "Alice"),
7
+ (3, "Charlie")
8
+ ], [
9
+ (1, "Alice"),
10
+ (3, "Charlie")
11
+ ])
12
+
13
+ assert rsc.left_only == []
14
+ assert rsc.right_only == []
15
+ assert rsc.ordered_left_only == []
16
+ assert rsc.ordered_right_only == []
17
+
18
+
19
+ def test_equals_only_unordered():
20
+ rsc = ResultSetComparator([
21
+ (1, "Alice"),
22
+ (3, "Charlie")
23
+ ], [
24
+ (3, "Charlie"),
25
+ (1, "Alice")
26
+ ])
27
+
28
+ assert rsc.left_only == []
29
+ assert rsc.right_only == []
30
+ assert rsc.ordered_left_only == [(3, "Charlie")]
31
+ assert rsc.ordered_right_only == [(1, "Alice")]
32
+
33
+
34
+ def test_missing():
35
+ # first missing
36
+ rsc = ResultSetComparator([
37
+ (1, "Alice"),
38
+ (2, "Bob"),
39
+ (3, "Charlie")
40
+ ], [
41
+ (2, "Bob"),
42
+ (3, "Charlie")
43
+ ])
44
+
45
+ assert rsc.left_only == [(1, "Alice")]
46
+ assert rsc.right_only == []
47
+ assert rsc.ordered_left_only == [(1, "Alice")]
48
+ assert rsc.ordered_right_only == []
49
+
50
+ # middle missing
51
+ rsc = ResultSetComparator([
52
+ (1, "Alice"),
53
+ (2, "Bob"),
54
+ (3, "Charlie")
55
+ ], [
56
+ (1, "Alice"),
57
+ (3, "Charlie")
58
+ ])
59
+
60
+ assert rsc.left_only == [(2, "Bob")]
61
+ assert rsc.right_only == []
62
+ assert rsc.ordered_left_only == [(2, "Bob")]
63
+ assert rsc.ordered_right_only == []
64
+
65
+ # last missing
66
+ rsc = ResultSetComparator([
67
+ (1, "Alice"),
68
+ (2, "Bob"),
69
+ (3, "Charlie")
70
+ ], [
71
+ (1, "Alice"),
72
+ (2, "Bob")
73
+ ])
74
+
75
+ assert rsc.left_only == [(3, "Charlie")]
76
+ assert rsc.right_only == []
77
+ assert rsc.ordered_left_only == [(3, "Charlie")]
78
+ assert rsc.ordered_right_only == []
79
+
80
+
81
+ def test_unnecessary():
82
+ # first unnecessary
83
+ rsc = ResultSetComparator([
84
+ (2, "Bob"),
85
+ (3, "Charlie")
86
+ ], [
87
+ (1, "Alice"),
88
+ (2, "Bob"),
89
+ (3, "Charlie")
90
+ ])
91
+
92
+ assert rsc.left_only == []
93
+ assert rsc.right_only == [(1, "Alice")]
94
+ assert rsc.ordered_left_only == []
95
+ assert rsc.ordered_right_only == [(1, "Alice")]
96
+
97
+ # middle unnecessary
98
+ rsc = ResultSetComparator([
99
+ (1, "Alice"),
100
+ (3, "Charlie")
101
+ ], [
102
+ (1, "Alice"),
103
+ (2, "Bob"),
104
+ (3, "Charlie")
105
+ ])
106
+
107
+ assert rsc.left_only == []
108
+ assert rsc.right_only == [(2, "Bob")]
109
+ assert rsc.ordered_left_only == []
110
+ assert rsc.ordered_right_only == [(2, "Bob")]
111
+
112
+ # last unnecessary
113
+ rsc = ResultSetComparator([
114
+ (1, "Alice"),
115
+ (2, "Bob")
116
+ ], [
117
+ (1, "Alice"),
118
+ (2, "Bob"),
119
+ (3, "Charlie")
120
+ ])
121
+
122
+ assert rsc.left_only == []
123
+ assert rsc.right_only == [(3, "Charlie")]
124
+ assert rsc.ordered_left_only == []
125
+ assert rsc.ordered_right_only == [(3, "Charlie")]
126
+
127
+
128
+ def test_repeating():
129
+ # equal
130
+ rsc = ResultSetComparator([
131
+ (2, "Bob"),
132
+ (2, "Bob"),
133
+ (3, "Charlie")
134
+ ], [
135
+ (2, "Bob"),
136
+ (2, "Bob"),
137
+ (3, "Charlie")
138
+ ])
139
+
140
+ assert rsc.left_only == []
141
+ assert rsc.right_only == []
142
+ assert rsc.ordered_left_only == []
143
+ assert rsc.ordered_right_only == []
144
+
145
+ # missing
146
+ rsc = ResultSetComparator([
147
+ (2, "Bob"),
148
+ (2, "Bob"),
149
+ (3, "Charlie")
150
+ ], [
151
+ (2, "Bob"),
152
+ (3, "Charlie")
153
+ ])
154
+
155
+ assert rsc.left_only == [(2, "Bob")]
156
+ assert rsc.right_only == []
157
+ assert rsc.ordered_left_only == [(2, "Bob")]
158
+ assert rsc.ordered_right_only == []
159
+
160
+ # unnecessary
161
+ rsc = ResultSetComparator([
162
+ (2, "Bob"),
163
+ (3, "Charlie")
164
+ ], [
165
+ (2, "Bob"),
166
+ (2, "Bob"),
167
+ (3, "Charlie")
168
+ ])
169
+
170
+ assert rsc.left_only == []
171
+ assert rsc.right_only == [(2, "Bob")]
172
+ assert rsc.ordered_left_only == []
173
+ assert rsc.ordered_right_only == [(2, "Bob")]
@@ -0,0 +1,48 @@
1
+ import pytest
2
+
3
+ from duckdb_kernel.db.error import EmptyResultError
4
+ from . import Connection
5
+
6
+
7
+ def test_simple_queries():
8
+ query = 'SELECT Username FROM Users'
9
+
10
+ with Connection() as con:
11
+ assert con.execute_sql(query) == [
12
+ ('Alice',),
13
+ ('Bob',),
14
+ ('Charlie',)
15
+ ]
16
+
17
+
18
+ def test_empty_result():
19
+ with Connection() as con:
20
+ query = "SELECT Username FROM Users WHERE Username = 'abcdef'"
21
+ assert con.execute_sql(query) == []
22
+
23
+ with Connection() as con:
24
+ query = 'CREATE TABLE foo (bar INTEGER PRIMARY KEY)'
25
+ try:
26
+ assert con.execute_sql(query) == []
27
+ except EmptyResultError:
28
+ pass
29
+
30
+
31
+ def test_empty_queries():
32
+ for query in [
33
+ '',
34
+ ' ',
35
+ '\n',
36
+ '-- this is an empty query too'
37
+ ]:
38
+ with pytest.raises(EmptyResultError):
39
+ with Connection() as con:
40
+ con.execute_sql(query)
41
+
42
+ with pytest.raises(Exception):
43
+ with Connection() as con:
44
+ query = '-- this is a query with syntax errors\nFOR foo IN bar'
45
+ try:
46
+ con.execute_sql(query)
47
+ except EmptyResultError:
48
+ pass
@@ -1,11 +1,29 @@
1
+ import datetime
2
+ import decimal
1
3
  from datetime import date
2
- from typing import Tuple, List, Optional
4
+ from typing import Tuple, List, Optional, Any
3
5
 
4
6
 
5
7
  class ResultSetComparator:
6
8
  def __init__(self, left: List[Tuple | List], right: List[Tuple | List]):
7
- self._left: List[Tuple] = [tuple(t) for t in left]
8
- self._right: List[Tuple] = [tuple(t) for t in right]
9
+ self._left: List[Tuple] = [tuple(self.format_value(t)) for t in left]
10
+ self._right: List[Tuple] = [tuple(self.format_value(t)) for t in right]
11
+
12
+ @staticmethod
13
+ def format_value(value: Any) -> Any:
14
+ if isinstance(value, tuple) or isinstance(value, list):
15
+ return tuple(ResultSetComparator.format_value(t) for t in value)
16
+
17
+ if isinstance(value, datetime.datetime):
18
+ return value.strftime("%Y-%m-%d %H:%M:%S")
19
+ if isinstance(value, datetime.date):
20
+ return value.strftime("%Y-%m-%d")
21
+ if isinstance(value, datetime.time):
22
+ return value.strftime("%H:%M:%S")
23
+ if isinstance(value, decimal.Decimal):
24
+ return float(value)
25
+
26
+ return value
9
27
 
10
28
  @property
11
29
  def left_only(self) -> List[Tuple]:
@@ -67,7 +85,7 @@ class ResultSetComparator:
67
85
 
68
86
  for le, re in zip(left, right):
69
87
  if isinstance(le, float) or isinstance(re, float):
70
- if abs(le - re) > 1e-6:
88
+ if abs(le - re) > 1e-4:
71
89
  return False
72
90
  elif isinstance(le, date) or isinstance(re, date):
73
91
  if str(le) != str(re):
@@ -0,0 +1,6 @@
1
+ # see https://www.postgresql.org/docs/current/sql-commands.html
2
+ SQL_KEYWORDS = ('ABORT', 'ALTER', 'ANALYZE', 'BEGIN', 'CALL', 'CHECKPOINT', 'CLOSE', 'CLUSTER', 'COMMENT', 'COMMIT',
3
+ 'COPY', 'CREATE', 'DEALLOCATE', 'DECLARE', 'DELETE', 'DISCARD', 'DO', 'DROP', 'END', 'EXECUTE',
4
+ 'EXPLAIN', 'FETCH', 'GRANT', 'IMPORT', 'INSERT', 'LISTEN', 'LOAD', 'LOCK', 'MERGE', 'MOVE', 'NOTIFY',
5
+ 'PREPARE', 'REASSIGN', 'REFRESH', 'REINDEX', 'RELEASE', 'RESET', 'REVOKE', 'ROLLBACK', 'SAVEPOINT',
6
+ 'SECURITY', 'SELECT', 'SET', 'SHOW', 'START', 'TRUNCATE', 'UNLISTEN', 'UPDATE', 'VACUUM', 'VALUES')
@@ -0,0 +1,4 @@
1
+ class TestError(Exception):
2
+ @property
3
+ def message(self) -> str:
4
+ return str(self)
@@ -0,0 +1,144 @@
1
+ import json
2
+ from decimal import Decimal
3
+ from typing import Dict, List, Optional
4
+ from uuid import uuid4
5
+
6
+ from .lib import init_plotly
7
+
8
+
9
+ def __div_id() -> str:
10
+ return f'div-{str(uuid4())}'
11
+
12
+
13
+ def __layout(title: Optional[str]):
14
+ layout = {
15
+ 'dragmode': False,
16
+ 'xaxis': {
17
+ 'rangeselector': {
18
+ 'visible': False
19
+ }
20
+ }
21
+ }
22
+
23
+ if title is not None:
24
+ layout['title'] = {
25
+ 'text': title,
26
+ 'font': {
27
+ 'family': 'sans-serif',
28
+ 'size': 32,
29
+ 'color': 'rgb(0, 0, 0)'
30
+ },
31
+ 'xanchor': 'center'
32
+ }
33
+
34
+ return layout
35
+
36
+
37
+ def __config():
38
+ return {
39
+ 'displayModeBar': False,
40
+ 'scrollZoom': False
41
+ }
42
+
43
+
44
+ def __fix_decimal(x: List):
45
+ return [float(x) if isinstance(x, Decimal) else x
46
+ for x in x]
47
+
48
+
49
+ def draw_chart(title: Optional[str], traces: List[Dict] | Dict) -> str:
50
+ init = init_plotly()
51
+ div_id = __div_id()
52
+ layout = __layout(title)
53
+ config = __config()
54
+
55
+ if not isinstance(traces, str):
56
+ traces = json.dumps(traces)
57
+
58
+ return f'''
59
+ <script type="text/javascript">
60
+ {init}
61
+ </script>
62
+
63
+ <div id="{div_id}"></div>
64
+ <script type="text/javascript">
65
+ Plotly.newPlot('{div_id}', {traces}, {json.dumps(layout)}, {json.dumps(config)});
66
+ </script>
67
+ '''
68
+
69
+
70
+ def draw_scatter_chart(title: Optional[str], x, **ys) -> str:
71
+ return draw_chart(title, [
72
+ {
73
+ 'x': __fix_decimal(x),
74
+ 'y': __fix_decimal(y),
75
+ 'mode': 'markers',
76
+ 'type': 'scatter',
77
+ 'name': name
78
+ }
79
+ for name, y in ys.items()
80
+ ])
81
+
82
+
83
+ def draw_line_chart(title: Optional[str], x, **ys) -> str:
84
+ return draw_chart(title, [
85
+ {
86
+ 'x': __fix_decimal(x),
87
+ 'y': __fix_decimal(y),
88
+ 'mode': 'lines+markers',
89
+ 'name': name
90
+ }
91
+ for name, y in ys.items()
92
+ ])
93
+
94
+
95
+ def draw_bar_chart(title: Optional[str], x, **ys) -> str:
96
+ return draw_chart(title, [
97
+ {
98
+ 'x': __fix_decimal(x),
99
+ 'y': __fix_decimal(y),
100
+ 'type': 'bar',
101
+ 'name': name
102
+ }
103
+ for name, y in ys.items()
104
+ ])
105
+
106
+
107
+ def draw_pie_chart(title: Optional[str], x, y) -> str:
108
+ return draw_chart(title, [{
109
+ 'values': __fix_decimal(y),
110
+ 'labels': __fix_decimal(x),
111
+ 'type': 'pie'
112
+ }])
113
+
114
+
115
+ def draw_bubble_chart(title: Optional[str], x, y, s, c) -> str:
116
+ return draw_chart(title, [{
117
+ 'x': __fix_decimal(x),
118
+ 'y': __fix_decimal(y),
119
+ 'mode': 'markers',
120
+ 'marker': {
121
+ 'size': __fix_decimal(s),
122
+ 'color': __fix_decimal(c)
123
+ }
124
+ }])
125
+
126
+
127
+ def draw_heatmap_chart(title: Optional[str], x, y, z) -> str:
128
+ return draw_chart(title, [{
129
+ 'x': __fix_decimal(x[0]),
130
+ 'y': __fix_decimal(y[0]),
131
+ 'z': [__fix_decimal(v) for v in z[0]],
132
+ 'type': 'heatmap'
133
+ }])
134
+
135
+
136
+ __all__ = [
137
+ 'draw_chart',
138
+ 'draw_scatter_chart',
139
+ 'draw_line_chart',
140
+ 'draw_bar_chart',
141
+ 'draw_pie_chart',
142
+ 'draw_bubble_chart',
143
+ 'draw_heatmap_chart',
144
+ ]
@@ -1,12 +1,14 @@
1
- from typing import Dict
1
+ from typing import Dict, Optional
2
2
 
3
3
  from graphviz import Digraph
4
+ from uuid import uuid4
4
5
 
5
6
  from duckdb_kernel.db import Table
6
7
  from duckdb_kernel.parser.elements import RAElement
7
8
  from duckdb_kernel.util.formatting import row_count
8
9
  from .Drawer import Drawer
9
10
  from ..db import Connection
11
+ from .lib import *
10
12
 
11
13
 
12
14
  class RATreeDrawer(Drawer):
@@ -15,6 +17,9 @@ class RATreeDrawer(Drawer):
15
17
  self.root_node: RAElement = root_node
16
18
  self.tables: Dict[str, Table] = tables
17
19
 
20
+ self.nodes: Dict[str, RAElement] = {}
21
+ self.root_node_id: Optional[str] = None
22
+
18
23
  def to_graph(self) -> Digraph:
19
24
  # create graph
20
25
  ps = Digraph('Schema',
@@ -31,7 +36,11 @@ class RATreeDrawer(Drawer):
31
36
 
32
37
  def __add_node(self, ps: Digraph, node: RAElement) -> str:
33
38
  # use id of node object as identifier
34
- node_id = f'node_{id(node)}'
39
+ node_id = f'node_{str(uuid4()).replace("-", "_")}'
40
+
41
+ self.nodes[node_id] = node
42
+ if node == self.root_node:
43
+ self.root_node_id = node_id
35
44
 
36
45
  # generate child nodes
37
46
  child_ids = [self.__add_node(ps, child) for child in node.children]
@@ -69,3 +78,26 @@ class RATreeDrawer(Drawer):
69
78
 
70
79
  # return node identifier to generate edges
71
80
  return node_id
81
+
82
+ def to_interactive_svg(self) -> str:
83
+ div_id = f'div-{str(uuid4())}'
84
+
85
+ css = init_css()
86
+ ra = init_ra()
87
+ svg = self.to_svg(True)
88
+
89
+ return f'''
90
+ <style type="text/css">
91
+ {css}
92
+ </style>
93
+
94
+ <div id="{div_id}">
95
+ {svg}
96
+ </div>
97
+
98
+ <script type="text/javascript">
99
+ {ra}
100
+
101
+ animate_ra('{div_id}', '{self.root_node_id}')
102
+ </script>
103
+ '''
@@ -1,2 +1,3 @@
1
+ from .Plotly import *
1
2
  from .RATreeDrawer import RATreeDrawer
2
3
  from .SchemaDrawer import SchemaDrawer
@@ -0,0 +1,53 @@
1
+ import os
2
+
3
+ __CSS_INITIALIZED = False
4
+ __RA_INITIALIZED = False
5
+ __PLOTLY_INITIALIZED = False
6
+
7
+ __location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
8
+
9
+
10
+ def init_css() -> str:
11
+ global __CSS_INITIALIZED
12
+
13
+ if not __CSS_INITIALIZED:
14
+ with open(os.path.join(__location, 'ra.css')) as ra_file:
15
+ css = ra_file.read()
16
+ else:
17
+ css = ''
18
+
19
+ __CSS_INITIALIZED = True
20
+ return css
21
+
22
+
23
+ def init_ra() -> str:
24
+ global __RA_INITIALIZED
25
+
26
+ if not __RA_INITIALIZED:
27
+ with open(os.path.join(__location, 'ra.js')) as ra_file:
28
+ ra = ra_file.read()
29
+ else:
30
+ ra = ''
31
+
32
+ __RA_INITIALIZED = True
33
+ return ra
34
+
35
+
36
+ def init_plotly() -> str:
37
+ global __PLOTLY_INITIALIZED
38
+
39
+ if not __PLOTLY_INITIALIZED:
40
+ with open(os.path.join(__location, 'plotly-3.0.1.min.js')) as plotly_file:
41
+ plotly = plotly_file.read()
42
+ else:
43
+ plotly = ''
44
+
45
+ __PLOTLY_INITIALIZED = True
46
+ return plotly
47
+
48
+
49
+ __all__ = [
50
+ 'init_css',
51
+ 'init_ra',
52
+ 'init_plotly',
53
+ ]