jupyter-duckdb 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckdb_kernel/__init__.py +1 -0
- duckdb_kernel/kernel.json +0 -0
- duckdb_kernel/kernel.py +187 -126
- duckdb_kernel/magics/MagicCommand.py +63 -0
- duckdb_kernel/magics/MagicCommandCallback.py +21 -0
- duckdb_kernel/magics/MagicCommandException.py +2 -0
- duckdb_kernel/magics/MagicCommandHandler.py +71 -0
- duckdb_kernel/magics/__init__.py +4 -0
- duckdb_kernel/util/__init__.py +0 -0
- duckdb_kernel/util/formatting.py +26 -0
- duckdb_kernel/visualization/Column.py +18 -0
- duckdb_kernel/visualization/Constraint.py +11 -0
- duckdb_kernel/visualization/ForeignKey.py +15 -0
- duckdb_kernel/visualization/Table.py +27 -0
- duckdb_kernel/visualization/VizDrawer.py +219 -0
- duckdb_kernel/visualization/__init__.py +5 -0
- jupyter_duckdb-0.4.1.dist-info/METADATA +202 -0
- jupyter_duckdb-0.4.1.dist-info/RECORD +21 -0
- {jupyter_duckdb-0.3.2.dist-info → jupyter_duckdb-0.4.1.dist-info}/WHEEL +1 -1
- jupyter_duckdb-0.3.2.dist-info/METADATA +0 -17
- jupyter_duckdb-0.3.2.dist-info/RECORD +0 -8
- {jupyter_duckdb-0.3.2.dist-info → jupyter_duckdb-0.4.1.dist-info}/top_level.txt +0 -0
duckdb_kernel/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .kernel import DuckDBKernel
|
duckdb_kernel/kernel.json
CHANGED
|
File without changes
|
duckdb_kernel/kernel.py
CHANGED
|
@@ -1,18 +1,23 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import math
|
|
2
3
|
import os
|
|
3
|
-
import re
|
|
4
4
|
import time
|
|
5
5
|
import traceback
|
|
6
6
|
from typing import Optional, Dict, List, Tuple
|
|
7
7
|
|
|
8
8
|
import duckdb
|
|
9
9
|
from ipykernel.kernelbase import Kernel
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
from .magics import *
|
|
12
|
+
from .util.formatting import row_count, rows_table, wrap_image
|
|
13
|
+
from .visualization import *
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class DuckDBKernel(Kernel):
|
|
17
|
+
DEFAULT_MAX_ROWS = 20
|
|
18
|
+
|
|
14
19
|
implementation = 'DuckDB'
|
|
15
|
-
implementation_version = '0.
|
|
20
|
+
implementation_version = '0.8.1'
|
|
16
21
|
banner = 'DuckDB Kernel'
|
|
17
22
|
language_info = {
|
|
18
23
|
'name': 'duckdb',
|
|
@@ -23,6 +28,20 @@ class DuckDBKernel(Kernel):
|
|
|
23
28
|
def __init__(self, **kwargs):
|
|
24
29
|
super().__init__(**kwargs)
|
|
25
30
|
|
|
31
|
+
# register magic commands
|
|
32
|
+
self._magics: MagicCommandHandler = MagicCommandHandler()
|
|
33
|
+
|
|
34
|
+
self._magics.add(
|
|
35
|
+
MagicCommand('create').arg('database').opt('of').opt('with_tests').on(self._create_magic),
|
|
36
|
+
MagicCommand('load').arg('database').opt('with_tests').on(self._load_magic),
|
|
37
|
+
MagicCommand('test').arg('name').result(True).on(self._test_magic),
|
|
38
|
+
MagicCommand('all', 'all_rows').on(self._all_magic),
|
|
39
|
+
MagicCommand('max_rows').arg('count').on(self._max_rows_magic),
|
|
40
|
+
MagicCommand('query_max_rows').arg('count').on(self._query_max_rows_magic),
|
|
41
|
+
MagicCommand('schema').opt('lr').on(self._schema_magic)
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# create placeholders for database and tests
|
|
26
45
|
self._db: Optional[duckdb.DuckDBPyConnection] = None
|
|
27
46
|
self._tests: Optional[Dict] = None
|
|
28
47
|
|
|
@@ -36,7 +55,9 @@ class DuckDBKernel(Kernel):
|
|
|
36
55
|
def print_exception(self, e: Exception):
|
|
37
56
|
if isinstance(e, AssertionError):
|
|
38
57
|
text = str(e)
|
|
39
|
-
elif isinstance(e,
|
|
58
|
+
elif isinstance(e, MagicCommandException):
|
|
59
|
+
text = str(e)
|
|
60
|
+
elif isinstance(e, (duckdb.OperationalError, duckdb.ProgrammingError, duckdb.InvalidInputException)):
|
|
40
61
|
text = str(e)
|
|
41
62
|
else:
|
|
42
63
|
text = traceback.format_exc()
|
|
@@ -70,7 +91,8 @@ class DuckDBKernel(Kernel):
|
|
|
70
91
|
else:
|
|
71
92
|
return False
|
|
72
93
|
|
|
73
|
-
def _execute_stmt(self, query: str, silent: bool
|
|
94
|
+
def _execute_stmt(self, query: str, silent: bool,
|
|
95
|
+
max_rows: Optional[int]) -> Tuple[Optional[List[str]], Optional[List[List]]]:
|
|
74
96
|
if self._db is None:
|
|
75
97
|
raise AssertionError('load a database first')
|
|
76
98
|
|
|
@@ -81,22 +103,44 @@ class DuckDBKernel(Kernel):
|
|
|
81
103
|
et = time.time()
|
|
82
104
|
|
|
83
105
|
if not silent:
|
|
106
|
+
# print EXPLAIN queries as raw text
|
|
84
107
|
if query.strip().startswith('EXPLAIN'):
|
|
85
108
|
rows = cursor.fetchall()
|
|
86
109
|
for ekey, evalue in rows:
|
|
87
110
|
self.print_data(f'<b>{ekey}</b><br><pre>{evalue}</pre>')
|
|
88
111
|
|
|
112
|
+
return None, None
|
|
113
|
+
|
|
114
|
+
# print every other query as a table
|
|
89
115
|
else:
|
|
90
116
|
# table header
|
|
91
|
-
|
|
117
|
+
if cursor.description is None:
|
|
118
|
+
columns = []
|
|
119
|
+
else:
|
|
120
|
+
columns = [e[0] for e in cursor.description]
|
|
121
|
+
|
|
122
|
+
table_header = ''.join(f'<th>{c}</th>' for c in columns)
|
|
92
123
|
|
|
93
124
|
# table data
|
|
94
125
|
rows = cursor.fetchall()
|
|
95
126
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
127
|
+
if max_rows is not None and len(rows) > max_rows:
|
|
128
|
+
table_data = f'''
|
|
129
|
+
{rows_table(rows[:math.ceil(max_rows / 2)])}
|
|
130
|
+
<tr>
|
|
131
|
+
<td colspan="{len(columns)}"
|
|
132
|
+
style="text-align: center"
|
|
133
|
+
title="{row_count(len(rows) - max_rows)} omitted">
|
|
134
|
+
...
|
|
135
|
+
</td>
|
|
136
|
+
</tr>
|
|
137
|
+
{rows_table(rows[-math.floor(max_rows // 2):])}
|
|
138
|
+
'''
|
|
139
|
+
else:
|
|
140
|
+
table_data = ''.join(map(
|
|
141
|
+
lambda row: '<tr>' + ''.join(map(lambda e: f'<td>{e}</td>', row)) + '</tr>',
|
|
142
|
+
rows
|
|
143
|
+
))
|
|
100
144
|
|
|
101
145
|
# send to client
|
|
102
146
|
self.print_data(f'''
|
|
@@ -106,12 +150,18 @@ class DuckDBKernel(Kernel):
|
|
|
106
150
|
</table>
|
|
107
151
|
''')
|
|
108
152
|
|
|
109
|
-
self.print_data(f'{
|
|
153
|
+
self.print_data(f'{row_count(len(rows))} in {et - st:.3f}s')
|
|
110
154
|
|
|
111
|
-
return
|
|
155
|
+
return columns, rows
|
|
112
156
|
|
|
113
157
|
# magic command related functions
|
|
114
|
-
def
|
|
158
|
+
def _create_magic(self, silent: bool, path: str, of: Optional[str], with_tests: Optional[str]):
|
|
159
|
+
self._load(silent, path, True, of, with_tests)
|
|
160
|
+
|
|
161
|
+
def _load_magic(self, silent: bool, path: str, with_tests: Optional[str]):
|
|
162
|
+
self._load(silent, path, False, None, with_tests)
|
|
163
|
+
|
|
164
|
+
def _load(self, silent: bool, path: str, create: bool, of: Optional[str], with_tests: Optional[str]):
|
|
115
165
|
# unload current database if necessary
|
|
116
166
|
if self._unload_database():
|
|
117
167
|
if not silent:
|
|
@@ -121,24 +171,31 @@ class DuckDBKernel(Kernel):
|
|
|
121
171
|
if not silent:
|
|
122
172
|
self.print(f'{self.implementation} {self.implementation_version}\n')
|
|
123
173
|
|
|
124
|
-
#
|
|
125
|
-
if
|
|
126
|
-
|
|
174
|
+
# clean path
|
|
175
|
+
if path.startswith(("'", '"')):
|
|
176
|
+
path = path[1:]
|
|
177
|
+
if path.endswith(("'", '"')):
|
|
178
|
+
path = path[:-1]
|
|
127
179
|
|
|
128
|
-
|
|
129
|
-
|
|
180
|
+
# load new database
|
|
181
|
+
if create and os.path.exists(path):
|
|
182
|
+
os.remove(path)
|
|
130
183
|
|
|
131
|
-
if self._load_database(
|
|
184
|
+
if self._load_database(path, read_only=False):
|
|
132
185
|
if not silent:
|
|
133
|
-
self.print(f'loaded database {
|
|
186
|
+
self.print(f'loaded database {path}\n')
|
|
134
187
|
|
|
135
188
|
# copy data from source database
|
|
136
|
-
if
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
if
|
|
141
|
-
|
|
189
|
+
if of is not None:
|
|
190
|
+
# clean path
|
|
191
|
+
if of.startswith(("'", '"')):
|
|
192
|
+
of = of[1:]
|
|
193
|
+
if of.endswith(("'", '"')):
|
|
194
|
+
of = of[:-1]
|
|
195
|
+
|
|
196
|
+
# load sql files
|
|
197
|
+
if of.endswith('.sql'):
|
|
198
|
+
with open(of, 'r') as file:
|
|
142
199
|
content = file.read()
|
|
143
200
|
|
|
144
201
|
# statements = re.split(r';\r?\n', content)
|
|
@@ -148,27 +205,28 @@ class DuckDBKernel(Kernel):
|
|
|
148
205
|
self._db.execute(content)
|
|
149
206
|
|
|
150
207
|
if not silent:
|
|
151
|
-
self.print(f'executed {
|
|
208
|
+
self.print(f'executed {of}\n')
|
|
152
209
|
|
|
210
|
+
# load database files
|
|
153
211
|
else:
|
|
154
|
-
with duckdb.connect(
|
|
155
|
-
|
|
156
|
-
for table, in
|
|
157
|
-
transfer_df =
|
|
212
|
+
with duckdb.connect(of, read_only=True) as of_db:
|
|
213
|
+
of_db.execute('SHOW TABLES')
|
|
214
|
+
for table, in of_db.fetchall():
|
|
215
|
+
transfer_df = of_db.query(f'SELECT * FROM {table}').to_df()
|
|
158
216
|
self._db.execute(f'CREATE TABLE {table} AS SELECT * FROM transfer_df')
|
|
159
217
|
|
|
160
218
|
if not silent:
|
|
161
219
|
self.print(f'transferred table {table}\n')
|
|
162
220
|
|
|
163
221
|
# load tests
|
|
164
|
-
if
|
|
222
|
+
if with_tests is None:
|
|
165
223
|
self._tests = {}
|
|
166
224
|
else:
|
|
167
|
-
with open(
|
|
225
|
+
with open(with_tests, 'r') as tests_file:
|
|
168
226
|
self._tests = json.load(tests_file)
|
|
169
|
-
self.print(f'loaded tests from {
|
|
227
|
+
self.print(f'loaded tests from {with_tests}\n')
|
|
170
228
|
|
|
171
|
-
def _test_magic(self,
|
|
229
|
+
def _test_magic(self, silent: bool, _: List[str], result: List[List], name: str):
|
|
172
230
|
# Testing makes no sense if there is no output.
|
|
173
231
|
if silent:
|
|
174
232
|
return
|
|
@@ -176,124 +234,127 @@ class DuckDBKernel(Kernel):
|
|
|
176
234
|
# extract data for test
|
|
177
235
|
data = self._tests[name]
|
|
178
236
|
|
|
179
|
-
# prepare comparison functions
|
|
180
|
-
def my_equals(row1, row2):
|
|
181
|
-
return len(row1) == len(row2) and all((x == y for x, y in zip(row1, row2)))
|
|
182
|
-
|
|
183
|
-
def my_in(row, rows):
|
|
184
|
-
for r in rows:
|
|
185
|
-
if my_equals(r, row):
|
|
186
|
-
return True
|
|
187
|
-
|
|
188
|
-
return False
|
|
189
|
-
|
|
190
237
|
# ordered test
|
|
191
238
|
if data['ordered']:
|
|
239
|
+
def my_equals(row1, row2):
|
|
240
|
+
return len(row1) == len(row2) and all((x == y for x, y in zip(row1, row2)))
|
|
241
|
+
|
|
192
242
|
rows = data['equals']
|
|
193
243
|
missing = len(rows) - len(result)
|
|
194
244
|
|
|
195
245
|
if missing > 0:
|
|
196
|
-
return self.print_data(
|
|
197
|
-
False, title=f'{missing} row{"" if missing == 1 else "s"} missing'
|
|
198
|
-
))
|
|
246
|
+
return self.print_data(wrap_image(False, f'{row_count(missing)} missing'))
|
|
199
247
|
|
|
200
248
|
if missing < 0:
|
|
201
|
-
return self.print_data(
|
|
202
|
-
False, title=f'{-missing} row{"" if -missing == 1 else "s"} more than required'
|
|
203
|
-
))
|
|
249
|
+
return self.print_data(wrap_image(False, f'{row_count(-missing)} more than required'))
|
|
204
250
|
|
|
205
251
|
for data_row, result_row in zip(data['equals'], result):
|
|
206
252
|
if not my_equals(data_row, result_row):
|
|
207
|
-
return self.print_data(
|
|
253
|
+
return self.print_data(wrap_image(False, 'found row without match'))
|
|
208
254
|
|
|
209
|
-
return self.print_data(
|
|
255
|
+
return self.print_data(wrap_image(True))
|
|
210
256
|
|
|
211
257
|
# unordered test
|
|
212
258
|
else:
|
|
213
|
-
|
|
259
|
+
# prepare data structures
|
|
260
|
+
test_tuples = [tuple(row) for row in data['equals']]
|
|
261
|
+
test_counts: Dict[Tuple, int] = {}
|
|
214
262
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
263
|
+
for row in test_tuples:
|
|
264
|
+
if row not in test_counts:
|
|
265
|
+
test_counts[row] = 1
|
|
266
|
+
else:
|
|
267
|
+
test_counts[row] += 1
|
|
219
268
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
False, title=f'{missing} row{"" if missing == 1 else "s"} missing'
|
|
223
|
-
))
|
|
224
|
-
|
|
225
|
-
over = 0
|
|
226
|
-
for element in result:
|
|
227
|
-
if not my_in(element, rows):
|
|
228
|
-
over += 1
|
|
229
|
-
|
|
230
|
-
if over > 0:
|
|
231
|
-
return self.print_data(checkmarkandcross.image_html(
|
|
232
|
-
False, title=f'{over} row{"" if over == 1 else "s"} more than required'
|
|
233
|
-
))
|
|
234
|
-
|
|
235
|
-
return self.print_data(checkmarkandcross.image_html(True, title='success'))
|
|
236
|
-
|
|
237
|
-
def _handle_magic(self, code: str, silent: bool):
|
|
238
|
-
code_lower = code.lower()
|
|
239
|
-
|
|
240
|
-
if code_lower.startswith('%load'):
|
|
241
|
-
# parse line
|
|
242
|
-
match = re.match(
|
|
243
|
-
r'''^%LOAD +([^ ]+?|'.+?'|".+?")( +WITH +([^ ]+?|'.+?'|".+?"))?$''',
|
|
244
|
-
code.strip(), re.IGNORECASE
|
|
245
|
-
)
|
|
246
|
-
if match is None:
|
|
247
|
-
raise AssertionError('usage: %LOAD target.db [WITH tests.json]')
|
|
248
|
-
|
|
249
|
-
# call
|
|
250
|
-
self._load_magic(silent, match.group(1), False, None, match.group(3))
|
|
251
|
-
|
|
252
|
-
elif code_lower.startswith('%create'):
|
|
253
|
-
# parse line
|
|
254
|
-
match = re.match(
|
|
255
|
-
r'''^%CREATE +([^ ]+?|'.+?'|".+?")( +FROM +([^ ]+?|'.+?'|".+?"))?( +WITH +([^ ]+?|'.+?'|".+?"))?$''',
|
|
256
|
-
code.strip(), re.IGNORECASE
|
|
257
|
-
)
|
|
258
|
-
if match is None:
|
|
259
|
-
raise AssertionError('usage: %CREATE target.db [FROM (source.db | source.sql)] [WITH tests.json]')
|
|
260
|
-
|
|
261
|
-
# call
|
|
262
|
-
self._load_magic(silent, match.group(1), True, match.group(3), match.group(5))
|
|
263
|
-
|
|
264
|
-
elif code_lower.startswith('%test'):
|
|
265
|
-
# parse line
|
|
266
|
-
match = re.match(
|
|
267
|
-
r'''^%TEST +([^ ]+?|'.+?'|".+?")$''',
|
|
268
|
-
code, re.IGNORECASE | re.MULTILINE
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
if match is None:
|
|
272
|
-
raise AssertionError('usage: %TEST name')
|
|
273
|
-
if match.group(1) not in self._tests:
|
|
274
|
-
raise AssertionError(f'test {match.group(1)} unknown')
|
|
275
|
-
|
|
276
|
-
# execute statement
|
|
277
|
-
description, rows = self._execute_stmt(code[match.end():], silent)
|
|
278
|
-
|
|
279
|
-
# execute tests
|
|
280
|
-
self._test_magic(match.group(1), description, rows, silent)
|
|
269
|
+
result_tuples = [tuple(row) for row in result]
|
|
270
|
+
result_counts: Dict[Tuple, int] = {}
|
|
281
271
|
|
|
272
|
+
for row in result_tuples:
|
|
273
|
+
if row not in result_counts:
|
|
274
|
+
result_counts[row] = 1
|
|
275
|
+
else:
|
|
276
|
+
result_counts[row] += 1
|
|
277
|
+
|
|
278
|
+
# calculate diffs
|
|
279
|
+
diff: Dict[Tuple, int] = {}
|
|
280
|
+
|
|
281
|
+
for row, count in test_counts.items():
|
|
282
|
+
diff[row] = result_counts.get(row, 0) - count
|
|
283
|
+
|
|
284
|
+
for row, count in result_counts.items():
|
|
285
|
+
if row not in diff:
|
|
286
|
+
diff[row] = count - test_counts.get(row, 0)
|
|
287
|
+
|
|
288
|
+
below = sum(max(0, -count) for count in diff.values())
|
|
289
|
+
above = sum(max(0, count) for count in diff.values())
|
|
290
|
+
|
|
291
|
+
# print result
|
|
292
|
+
if below > 0 and above > 0:
|
|
293
|
+
self.print_data(wrap_image(False, f'{row_count(below)} missing, {row_count(above)} unnecessary'))
|
|
294
|
+
elif below > 0:
|
|
295
|
+
self.print_data(wrap_image(False, f'{row_count(below)} missing'))
|
|
296
|
+
elif above > 0:
|
|
297
|
+
self.print_data(wrap_image(False, f'{row_count(above)} unnecessary'))
|
|
298
|
+
else:
|
|
299
|
+
self.print_data(wrap_image(True))
|
|
300
|
+
|
|
301
|
+
def _all_magic(self, silent: bool):
|
|
302
|
+
return {
|
|
303
|
+
'max_rows': None
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
def _max_rows_magic(self, silent: bool, count: str):
|
|
307
|
+
if count.lower() != 'none':
|
|
308
|
+
DuckDBKernel.DEFAULT_MAX_ROWS = int(count)
|
|
309
|
+
else:
|
|
310
|
+
DuckDBKernel.DEFAULT_MAX_ROWS = None
|
|
311
|
+
|
|
312
|
+
def _query_max_rows_magic(self, silent: bool, count: str):
|
|
313
|
+
return {
|
|
314
|
+
'max_rows': int(count) if count.lower() != 'none' else None
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
def _schema_magic(self, silent: bool, lr: Optional[str]):
|
|
318
|
+
if silent:
|
|
319
|
+
return
|
|
320
|
+
|
|
321
|
+
if lr.lower() == 'false':
|
|
322
|
+
lr = False
|
|
323
|
+
elif lr.isnumeric():
|
|
324
|
+
lr = bool(int(lr))
|
|
282
325
|
else:
|
|
283
|
-
|
|
326
|
+
lr = bool(lr)
|
|
327
|
+
|
|
328
|
+
vd = VizDrawer(self._db)
|
|
329
|
+
svg = vd.to_svg(lr)
|
|
330
|
+
|
|
331
|
+
self.print_data(svg)
|
|
284
332
|
|
|
285
333
|
# jupyter related functions
|
|
286
334
|
def do_execute(self, code: str, silent: bool,
|
|
287
335
|
store_history: bool = True, user_expressions: dict = None, allow_stdin: bool = False,
|
|
288
336
|
**kwargs):
|
|
289
337
|
try:
|
|
290
|
-
#
|
|
291
|
-
|
|
292
|
-
|
|
338
|
+
# get magic command
|
|
339
|
+
clean_code, pre_query_callbacks, post_query_callbacks = self._magics(silent, code)
|
|
340
|
+
|
|
341
|
+
# execute magic commands here if it does not depend on query results
|
|
342
|
+
execution_args = {
|
|
343
|
+
'max_rows': DuckDBKernel.DEFAULT_MAX_ROWS
|
|
344
|
+
}
|
|
293
345
|
|
|
294
|
-
|
|
346
|
+
for callback in pre_query_callbacks:
|
|
347
|
+
execution_args.update(callback())
|
|
348
|
+
|
|
349
|
+
# execute statement if needed
|
|
350
|
+
if clean_code.strip():
|
|
351
|
+
cols, rows = self._execute_stmt(clean_code, silent, **execution_args)
|
|
295
352
|
else:
|
|
296
|
-
|
|
353
|
+
cols, rows = None, None
|
|
354
|
+
|
|
355
|
+
# execute magic command here if it does depend on query results
|
|
356
|
+
for callback in post_query_callbacks:
|
|
357
|
+
callback(cols, rows)
|
|
297
358
|
|
|
298
359
|
return {
|
|
299
360
|
'status': 'ok',
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from typing import Any, List, Tuple, Callable, Dict
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class MagicCommand:
|
|
5
|
+
_ARG = '''([^ ]+?|'.+?'|".+?")'''
|
|
6
|
+
|
|
7
|
+
def __init__(self, *names: str):
|
|
8
|
+
self._names: Tuple[str] = names
|
|
9
|
+
|
|
10
|
+
self._arguments: List[Tuple[str, str]] = []
|
|
11
|
+
self._optionals: List[Tuple[str, Any, str]] = []
|
|
12
|
+
self._on: List[Callable] = []
|
|
13
|
+
self._result: bool = False
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def names(self) -> Tuple[str]:
|
|
17
|
+
return self._names
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def args(self) -> List[Tuple[str, str]]:
|
|
21
|
+
return self._arguments
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def kwargs(self) -> List[Tuple[str, Any, str]]:
|
|
25
|
+
return self._optionals
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def requires_query_result(self) -> bool:
|
|
29
|
+
return self._result
|
|
30
|
+
|
|
31
|
+
def arg(self, name: str, description: str = None) -> 'MagicCommand':
|
|
32
|
+
self._arguments.append((name, description))
|
|
33
|
+
return self
|
|
34
|
+
|
|
35
|
+
def opt(self, name: str, default_value: Any = None, description: str = None) -> 'MagicCommand':
|
|
36
|
+
self._optionals.append((name, default_value, description))
|
|
37
|
+
return self
|
|
38
|
+
|
|
39
|
+
def result(self, result: bool) -> 'MagicCommand':
|
|
40
|
+
self._result = result
|
|
41
|
+
return self
|
|
42
|
+
|
|
43
|
+
def on(self, fun: Callable):
|
|
44
|
+
self._on.append(fun)
|
|
45
|
+
return self
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def parameters(self) -> str:
|
|
49
|
+
args = ' +'.join([self._ARG] * len(self._arguments))
|
|
50
|
+
opts = ''.join([f'( +({name}) +{self._ARG})?' for name, *_ in self._optionals])
|
|
51
|
+
|
|
52
|
+
return f'^ *{args}{opts} *$'
|
|
53
|
+
|
|
54
|
+
def __call__(self, silent: bool, *args, **kwargs) -> Dict[str, Any]:
|
|
55
|
+
result = {}
|
|
56
|
+
|
|
57
|
+
for fun in self._on:
|
|
58
|
+
r = fun(silent, *args, **kwargs)
|
|
59
|
+
if r is not None:
|
|
60
|
+
for k, v in r.items():
|
|
61
|
+
result[k] = v
|
|
62
|
+
|
|
63
|
+
return result
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from typing import Optional, List
|
|
2
|
+
|
|
3
|
+
from . import MagicCommand
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MagicCommandCallback:
|
|
7
|
+
def __init__(self, mc: MagicCommand, silent: bool, *args, **kwargs):
|
|
8
|
+
self._mc: MagicCommand = mc
|
|
9
|
+
self._silent: bool = silent
|
|
10
|
+
self._args = args
|
|
11
|
+
self._kwargs = kwargs
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def requires_query_result(self) -> bool:
|
|
15
|
+
return self._mc.requires_query_result
|
|
16
|
+
|
|
17
|
+
def __call__(self, columns: Optional[List[str]] = None, rows: Optional[List[List]] = None):
|
|
18
|
+
if self.requires_query_result:
|
|
19
|
+
return self._mc(self._silent, columns, rows, *self._args, **self._kwargs)
|
|
20
|
+
else:
|
|
21
|
+
return self._mc(self._silent, *self._args, **self._kwargs)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, Tuple, List
|
|
3
|
+
|
|
4
|
+
from . import MagicCommand, MagicCommandException, MagicCommandCallback
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MagicCommandHandler:
|
|
8
|
+
def __init__(self):
|
|
9
|
+
self._magics: Dict[str, MagicCommand] = {}
|
|
10
|
+
|
|
11
|
+
def add(self, *command: MagicCommand):
|
|
12
|
+
for cmd in command:
|
|
13
|
+
for key in cmd.names:
|
|
14
|
+
key = key.lower()
|
|
15
|
+
self._magics[key] = cmd
|
|
16
|
+
|
|
17
|
+
def __call__(self, silent: bool, code: str) -> Tuple[str, List[MagicCommandCallback], List[MagicCommandCallback]]:
|
|
18
|
+
pre_query_callbacks = []
|
|
19
|
+
post_query_callbacks = []
|
|
20
|
+
|
|
21
|
+
while True:
|
|
22
|
+
# ensure code starts with '%' or '%%' but not with '%%%'
|
|
23
|
+
match = re.match(r'^%{1,2}([^% ]+?)($| .+?$)', code, re.MULTILINE | re.IGNORECASE)
|
|
24
|
+
|
|
25
|
+
if match is None:
|
|
26
|
+
break
|
|
27
|
+
|
|
28
|
+
# remove magic command from code
|
|
29
|
+
start, end = match.span()
|
|
30
|
+
code = code[:start] + code[end + 1:]
|
|
31
|
+
|
|
32
|
+
# extract command
|
|
33
|
+
command = match.group(1).lower()
|
|
34
|
+
|
|
35
|
+
if command not in self._magics:
|
|
36
|
+
raise MagicCommandException(f'unknown magic command "{command}"')
|
|
37
|
+
|
|
38
|
+
magic = self._magics[command]
|
|
39
|
+
|
|
40
|
+
# extract parameters
|
|
41
|
+
params = match.group(2)
|
|
42
|
+
match = re.match(magic.parameters, params, re.IGNORECASE)
|
|
43
|
+
|
|
44
|
+
if match is None:
|
|
45
|
+
raise MagicCommandException(f'could not parse parameters for command "{command}"')
|
|
46
|
+
|
|
47
|
+
# extract args
|
|
48
|
+
args = [g for g, _ in zip(match.groups(), magic.args)]
|
|
49
|
+
|
|
50
|
+
# extract kwargs
|
|
51
|
+
kwargs = {name: default for name, default, _ in magic.kwargs}
|
|
52
|
+
|
|
53
|
+
i = len(args) + 1
|
|
54
|
+
while i < len(match.groups()):
|
|
55
|
+
name = match.group(i + 1)
|
|
56
|
+
value = match.group(i + 2)
|
|
57
|
+
i += 3
|
|
58
|
+
|
|
59
|
+
if name is not None:
|
|
60
|
+
kwargs[name.lower()] = value
|
|
61
|
+
|
|
62
|
+
# add to callbacks
|
|
63
|
+
callback = MagicCommandCallback(magic, silent, *args, **kwargs)
|
|
64
|
+
|
|
65
|
+
if not magic.requires_query_result:
|
|
66
|
+
pre_query_callbacks.append(callback)
|
|
67
|
+
else:
|
|
68
|
+
post_query_callbacks.append(callback)
|
|
69
|
+
|
|
70
|
+
# return callbacks
|
|
71
|
+
return code, pre_query_callbacks, post_query_callbacks
|
|
File without changes
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
import checkmarkandcross
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def row_count(count: int) -> str:
|
|
7
|
+
return f'{count} row{"" if count == 1 else "s"}'
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def rows_table(rows: List[List]) -> str:
|
|
11
|
+
return ''.join(map(
|
|
12
|
+
lambda row: '<tr>' + ''.join(map(lambda e: f'<td>{e}</td>', row)) + '</tr>',
|
|
13
|
+
rows
|
|
14
|
+
))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def wrap_image(val: bool, msg: str = '') -> str:
|
|
18
|
+
image = checkmarkandcross.image_html(val, size=24, title=msg)
|
|
19
|
+
return f'''
|
|
20
|
+
<div style="display: flex; align-items: center; margin-top: 0.5rem">
|
|
21
|
+
{image}
|
|
22
|
+
<span style="margin-left: 0.5rem">
|
|
23
|
+
{msg}
|
|
24
|
+
</span>
|
|
25
|
+
</div>
|
|
26
|
+
'''
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from .Table import Table
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Column:
|
|
7
|
+
def __init__(self, table: Table, name: str, data_type: str):
|
|
8
|
+
self.table: Table = table
|
|
9
|
+
self.name: str = name
|
|
10
|
+
self.data_type: str = data_type
|
|
11
|
+
|
|
12
|
+
def __hash__(self):
|
|
13
|
+
return self.name.__hash__()
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def id(self) -> str:
|
|
17
|
+
name = re.sub(r'[^A-Za-z]', '_', self.name)
|
|
18
|
+
return f'{self.table.id}_column_{name}'
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from typing import Tuple
|
|
2
|
+
|
|
3
|
+
from . import Column
|
|
4
|
+
from . import Table
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Constraint:
|
|
8
|
+
def __init__(self, index: int, table: Table, columns: Tuple['Column', ...]):
|
|
9
|
+
self.index: int = index
|
|
10
|
+
self.table: Table = table
|
|
11
|
+
self.columns: Tuple['Column', ...] = columns
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from typing import Tuple, Iterator
|
|
2
|
+
|
|
3
|
+
from . import Column
|
|
4
|
+
from . import Constraint
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ForeignKey:
|
|
8
|
+
def __init__(self, columns: Tuple['Column', ...], constraint: Constraint):
|
|
9
|
+
self.columns: Tuple['Column', ...] = columns
|
|
10
|
+
self.constraint: Constraint = constraint
|
|
11
|
+
|
|
12
|
+
@property
|
|
13
|
+
def references(self) -> Iterator[Tuple['Column', 'Column']]:
|
|
14
|
+
for source, target in zip(self.columns, self.constraint.columns):
|
|
15
|
+
yield source, target
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from . import Column
|
|
5
|
+
from . import ForeignKey
|
|
6
|
+
from .Constraint import Constraint
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Table:
|
|
10
|
+
def __init__(self, name: str):
|
|
11
|
+
self.name: str = name
|
|
12
|
+
self.columns: List[Column] = []
|
|
13
|
+
self.primary_key: Optional[Constraint] = None
|
|
14
|
+
self.unique_keys: List[Constraint] = []
|
|
15
|
+
self.foreign_keys: List[ForeignKey] = []
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def id(self) -> str:
|
|
19
|
+
name = re.sub(r'[^A-Za-z]', '_', self.name)
|
|
20
|
+
return f'table_{name}'
|
|
21
|
+
|
|
22
|
+
def get_column(self, name: str) -> "Column":
|
|
23
|
+
for column in self.columns:
|
|
24
|
+
if column.name == name:
|
|
25
|
+
return column
|
|
26
|
+
|
|
27
|
+
raise AssertionError(f'could not find column {name} in table {self.name}')
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
|
|
3
|
+
from duckdb import DuckDBPyConnection
|
|
4
|
+
from graphviz import Digraph
|
|
5
|
+
|
|
6
|
+
from . import Constraint, Column, ForeignKey, Table
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class VizDrawer:
|
|
10
|
+
def __init__(self, con: DuckDBPyConnection):
|
|
11
|
+
self.tables: List[Table] = []
|
|
12
|
+
|
|
13
|
+
tables: Dict[str, Table] = {}
|
|
14
|
+
constraints: Dict[int, Constraint] = {}
|
|
15
|
+
|
|
16
|
+
# Get table names first. In the columns table we can not filter
|
|
17
|
+
# for base tables and some of the tables might not be contained
|
|
18
|
+
# in the constraints' information.
|
|
19
|
+
for table_name, in con.execute('''
|
|
20
|
+
SELECT table_name
|
|
21
|
+
FROM information_schema.tables
|
|
22
|
+
WHERE table_type == 'BASE TABLE'
|
|
23
|
+
''').fetchall():
|
|
24
|
+
table = Table(table_name)
|
|
25
|
+
|
|
26
|
+
self.tables.append(table)
|
|
27
|
+
tables[table_name] = table
|
|
28
|
+
|
|
29
|
+
# Get column names and data types for each table.
|
|
30
|
+
for table_name, column_name, data_type in con.execute('''
|
|
31
|
+
SELECT
|
|
32
|
+
table_name,
|
|
33
|
+
column_name,
|
|
34
|
+
data_type
|
|
35
|
+
FROM information_schema.columns
|
|
36
|
+
ORDER BY ordinal_position ASC
|
|
37
|
+
''').fetchall():
|
|
38
|
+
if table_name in tables:
|
|
39
|
+
table = tables[table_name]
|
|
40
|
+
|
|
41
|
+
column = Column(table, column_name, data_type)
|
|
42
|
+
table.columns.append(column)
|
|
43
|
+
|
|
44
|
+
# Find primary keys.
|
|
45
|
+
for table_name, constraint_index, constraint_columns in con.execute('''
|
|
46
|
+
SELECT
|
|
47
|
+
table_name,
|
|
48
|
+
constraint_index,
|
|
49
|
+
constraint_column_names
|
|
50
|
+
FROM duckdb_constraints()
|
|
51
|
+
WHERE constraint_type = 'PRIMARY KEY'
|
|
52
|
+
ORDER BY constraint_index ASC
|
|
53
|
+
''').fetchall():
|
|
54
|
+
# get table
|
|
55
|
+
if table_name not in tables:
|
|
56
|
+
raise AssertionError(f'unknown table {table_name} for constraint {constraint_index}')
|
|
57
|
+
|
|
58
|
+
table = tables[table_name]
|
|
59
|
+
|
|
60
|
+
# store constraint
|
|
61
|
+
if constraint_index in constraints:
|
|
62
|
+
raise AssertionError(f'constraint with index {constraint_index} already stored')
|
|
63
|
+
|
|
64
|
+
constraint = Constraint(
|
|
65
|
+
constraint_index,
|
|
66
|
+
table,
|
|
67
|
+
tuple(table.get_column(c) for c in constraint_columns)
|
|
68
|
+
)
|
|
69
|
+
constraints[constraint_index] = constraint
|
|
70
|
+
|
|
71
|
+
# store key
|
|
72
|
+
if table.primary_key is not None:
|
|
73
|
+
raise AssertionError(f'discovered second primary key for table {table_name}')
|
|
74
|
+
|
|
75
|
+
table.primary_key = constraint
|
|
76
|
+
|
|
77
|
+
# Find unique keys.
|
|
78
|
+
for table_name, constraint_index, constraint_columns in con.execute('''
|
|
79
|
+
SELECT
|
|
80
|
+
table_name,
|
|
81
|
+
constraint_index,
|
|
82
|
+
constraint_column_names
|
|
83
|
+
FROM duckdb_constraints()
|
|
84
|
+
WHERE constraint_type = 'UNIQUE'
|
|
85
|
+
ORDER BY constraint_index ASC
|
|
86
|
+
''').fetchall():
|
|
87
|
+
# get table
|
|
88
|
+
if table_name not in tables:
|
|
89
|
+
raise AssertionError(f'unknown table {table_name} for constraint {constraint_index}')
|
|
90
|
+
|
|
91
|
+
table = tables[table_name]
|
|
92
|
+
|
|
93
|
+
# store constraint
|
|
94
|
+
if constraint_index in constraints:
|
|
95
|
+
raise AssertionError(f'constraint with index {constraint_index} already stored')
|
|
96
|
+
|
|
97
|
+
constraint = Constraint(
|
|
98
|
+
constraint_index,
|
|
99
|
+
table,
|
|
100
|
+
tuple(table.get_column(c) for c in constraint_columns)
|
|
101
|
+
)
|
|
102
|
+
constraints[constraint_index] = constraint
|
|
103
|
+
|
|
104
|
+
# store key
|
|
105
|
+
table.unique_keys.append(constraint)
|
|
106
|
+
|
|
107
|
+
# Find foreign keys.
|
|
108
|
+
for table_name, constraint_index, constraint_columns in con.execute('''
|
|
109
|
+
SELECT
|
|
110
|
+
table_name,
|
|
111
|
+
constraint_index,
|
|
112
|
+
constraint_column_names
|
|
113
|
+
FROM duckdb_constraints()
|
|
114
|
+
WHERE constraint_type = 'FOREIGN KEY'
|
|
115
|
+
ORDER BY constraint_index ASC
|
|
116
|
+
''').fetchall():
|
|
117
|
+
# get table
|
|
118
|
+
if table_name not in tables:
|
|
119
|
+
raise AssertionError(f'unknown table {table_name} for constraint {constraint_index}')
|
|
120
|
+
|
|
121
|
+
table = tables[table_name]
|
|
122
|
+
|
|
123
|
+
# lookup constraint
|
|
124
|
+
if constraint_index not in constraints:
|
|
125
|
+
raise AssertionError(f'constraint with index {constraint_index} not discovered previously')
|
|
126
|
+
|
|
127
|
+
constraint = constraints[constraint_index]
|
|
128
|
+
|
|
129
|
+
# store key
|
|
130
|
+
key = ForeignKey(tuple(table.get_column(c) for c in constraint_columns), constraint)
|
|
131
|
+
table.foreign_keys.append(key)
|
|
132
|
+
|
|
133
|
+
def to_graph(self) -> Digraph:
|
|
134
|
+
# create graph
|
|
135
|
+
ps = Digraph('Schema',
|
|
136
|
+
graph_attr={},
|
|
137
|
+
node_attr={
|
|
138
|
+
'shape': 'plaintext'
|
|
139
|
+
})
|
|
140
|
+
|
|
141
|
+
# add nodes
|
|
142
|
+
fk_counter: Dict[str, int] = {}
|
|
143
|
+
|
|
144
|
+
for table in self.tables:
|
|
145
|
+
columns = "\n".join(self.__column_to_html(table, column, fk_counter) for column in table.columns)
|
|
146
|
+
|
|
147
|
+
ps.node(
|
|
148
|
+
table.id,
|
|
149
|
+
f'''<
|
|
150
|
+
<table border="0" cellborder="1" cellspacing="0" cellpadding="5">
|
|
151
|
+
<tr>
|
|
152
|
+
<td><b>{table.name}</b></td>
|
|
153
|
+
</tr>
|
|
154
|
+
<tr>
|
|
155
|
+
<td>
|
|
156
|
+
<table border="0" cellborder="0" cellspacing="0">
|
|
157
|
+
{columns}
|
|
158
|
+
</table>
|
|
159
|
+
</td>
|
|
160
|
+
</tr>
|
|
161
|
+
</table>
|
|
162
|
+
>'''
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# add edges
|
|
166
|
+
for source_table in self.tables:
|
|
167
|
+
for key in source_table.foreign_keys:
|
|
168
|
+
target_table = key.constraint.table
|
|
169
|
+
fk_counter_key = f'{source_table.name}_{key.constraint.index}'
|
|
170
|
+
|
|
171
|
+
ps.edge(source_table.id, target_table.id, label=f'FK{fk_counter[fk_counter_key]}', arrowhead='vee')
|
|
172
|
+
|
|
173
|
+
# return graph
|
|
174
|
+
return ps
|
|
175
|
+
|
|
176
|
+
def to_svg(self, lr: bool) -> str:
|
|
177
|
+
ps = self.to_graph()
|
|
178
|
+
if lr:
|
|
179
|
+
ps.graph_attr['rankdir'] = 'LR'
|
|
180
|
+
|
|
181
|
+
return ps.pipe(format='svg').decode('utf-8')
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def __column_to_html(table: Table, column: Column, fk_counter: Dict[str, int]):
|
|
185
|
+
name = column.name
|
|
186
|
+
data_type = column.data_type
|
|
187
|
+
|
|
188
|
+
# extract and style column name
|
|
189
|
+
if table.primary_key is not None and column in table.primary_key.columns:
|
|
190
|
+
name = f'<b>{name}</b>'
|
|
191
|
+
for key in table.unique_keys:
|
|
192
|
+
if column in key.columns:
|
|
193
|
+
name = f'<u>{name}</u>'
|
|
194
|
+
break
|
|
195
|
+
|
|
196
|
+
# extract foreign keys
|
|
197
|
+
fk = []
|
|
198
|
+
for key in table.foreign_keys:
|
|
199
|
+
if column in key.columns:
|
|
200
|
+
fk_counter_key = f'{table.name}_{key.constraint.index}'
|
|
201
|
+
if fk_counter_key not in fk_counter:
|
|
202
|
+
fk_counter[fk_counter_key] = max(*fk_counter.values(), 0, 0) + 1
|
|
203
|
+
|
|
204
|
+
fk.append(fk_counter[fk_counter_key])
|
|
205
|
+
|
|
206
|
+
if len(fk) > 0:
|
|
207
|
+
fk = map(lambda x: f'(FK{x})', sorted(fk))
|
|
208
|
+
fk = f'<i>{" ".join(fk)}</i>'
|
|
209
|
+
else:
|
|
210
|
+
fk = ''
|
|
211
|
+
|
|
212
|
+
# convert to html
|
|
213
|
+
return f'''
|
|
214
|
+
<tr port="{column.id}">
|
|
215
|
+
<td align="left">{name}</td>
|
|
216
|
+
<td align="left">: {data_type}</td>
|
|
217
|
+
<td align="left">{fk}</td>
|
|
218
|
+
</tr>
|
|
219
|
+
'''
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: jupyter-duckdb
|
|
3
|
+
Version: 0.4.1
|
|
4
|
+
Summary: a basic wrapper kernel for DuckDB
|
|
5
|
+
Home-page: https://github.com/erictroebs/jupyter-duckdb
|
|
6
|
+
Author: Eric Tröbs
|
|
7
|
+
Author-email: eric.troebs@tu-ilmenau.de
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/erictroebs/jupyter-duckdb/issues
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.7
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: jupyter
|
|
15
|
+
Requires-Dist: duckdb ==0.8.1
|
|
16
|
+
Requires-Dist: graphviz ==0.20.1
|
|
17
|
+
Requires-Dist: checkmarkandcross
|
|
18
|
+
|
|
19
|
+
# DuckDB Kernel for Jupyter
|
|
20
|
+
|
|
21
|
+
This is a simple DuckDB wrapper kernel which accepts SQL as input, executes it using a previously loaded DuckDB instance
|
|
22
|
+
and formats the output as a table. There are some magic commands that make teaching easier with this kernel.
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
[](https://mybinder.org/v2/git/https%3A%2F%2Fdbgit.prakinf.tu-ilmenau.de%2Fertr8623%2Fjupyter-duckdb.git/master)
|
|
27
|
+
|
|
28
|
+
## Table of Contents
|
|
29
|
+
|
|
30
|
+
- [Setup](#setup)
|
|
31
|
+
- [Using pip](#using-pip)
|
|
32
|
+
- [Using Docker](#using-docker)
|
|
33
|
+
- [Usage](#usage)
|
|
34
|
+
- [A Note on Magic Commands](#a-note-on-magic-commands)
|
|
35
|
+
- [Load a Database](#load-a-database)
|
|
36
|
+
- [Schema Diagrams](#schema-diagrams)
|
|
37
|
+
- [Number of Rows](#number-of-rows)
|
|
38
|
+
- [Ship Tests With Your Notebook](#ship-tests-with-your-notebooks)
|
|
39
|
+
|
|
40
|
+
## Setup
|
|
41
|
+
|
|
42
|
+
### Using pip
|
|
43
|
+
|
|
44
|
+
Run `pip` to install the corresponding package from [pypi](https://pypi.org/project/jupyter-duckdb/) **after**
|
|
45
|
+
Jupyter is already installed.
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install jupyter-duckdb
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Register the kernel.
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
jupyter kernelspec install <path to the site-packages directory>/duckdb_kernel
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Now start Jupyter the usual way and the kernel should be available.
|
|
58
|
+
|
|
59
|
+
### Using Docker
|
|
60
|
+
|
|
61
|
+
Execute the following command to pull a and run a prepared image.
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
docker run -p 8888:8888 troebs/jupyter:duckdb
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
This image can also be used with JupyterHub and the
|
|
68
|
+
[DockerSpawner / SwarmSpawner](https://github.com/jupyterhub/dockerspawner)
|
|
69
|
+
and probably with the
|
|
70
|
+
[kubespawner](https://github.com/jupyterhub/kubespawner).
|
|
71
|
+
You can also build your own image using the [Dockerfile](Dockerfile) in the repository.
|
|
72
|
+
|
|
73
|
+
## Usage
|
|
74
|
+
|
|
75
|
+
A detailed example can be found [in the repository](example/). The rest of this section describes the magic commands.
|
|
76
|
+
|
|
77
|
+
### A Note on Magic Commands
|
|
78
|
+
|
|
79
|
+
Many Jupyter kernels make a difference between magic commands for a single line starting with one percent sign and
|
|
80
|
+
others for a whole cell starting with two percent signs. The upcoming magic commands always apply to a whole cell.
|
|
81
|
+
Therefore, it does not matter whether you use a single or two percent signs. However, the magic commands must always
|
|
82
|
+
be used at the beginning of a cell.
|
|
83
|
+
|
|
84
|
+
It is also possible to use more than one magic command per cell.
|
|
85
|
+
|
|
86
|
+
### Load a Database
|
|
87
|
+
|
|
88
|
+
To load the database two magic commands are available.
|
|
89
|
+
|
|
90
|
+
`CREATE` creates a new database and therefore overwrites files with the same name without prompting. Using the optional
|
|
91
|
+
parameter `OF` you can either provide another DuckDB file or a file with SQL statements. In the first case the included
|
|
92
|
+
tables will be copied to the new database, while in the second case the SQL statements are just executed. We find this
|
|
93
|
+
feature very useful to work in a temporary copy of the data and therefore be able to restart at any time. The last
|
|
94
|
+
optional parameter `WITH_TESTS` is described in detail [below](#ship-tests-with-your-notebooks).
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
%CREATE data.duckdb OF my_statements.sql
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
`LOAD` on the other hand loads an existing database and returns an error if it does not exist. (That is why `OF` cannot
|
|
101
|
+
be used with `LOAD`! `WITH_TESTS` on the other hand is available also with this magic command.)
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
%LOAD data.duckdb
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Only one database can be open at any time. If a new database is created or loaded, the current one is closed first and
|
|
108
|
+
saved to disk if necessary.
|
|
109
|
+
|
|
110
|
+
Please note that `:memory:` is also a valid file path for DuckDB. The data is then stored exclusively in the main
|
|
111
|
+
memory. In combination with `CREATE` and `OF` this makes it possible to work on a temporary copy in memory.
|
|
112
|
+
|
|
113
|
+
### Schema Diagrams
|
|
114
|
+
|
|
115
|
+
The magic command `SCHEMA` can be used to create a simple schema diagram of the loaded database, showing all created
|
|
116
|
+
tables, their columns and data types, but without any views. Primary keys are printed in bold and unique keys are
|
|
117
|
+
underlined. Foreign keys are also highlighted and the dependencies between the tables are shown by arrows.
|
|
118
|
+
|
|
119
|
+
The optional parameter `LR` can be set to a true value to force a horizontal layout. This saves visual space especially
|
|
120
|
+
for larger amounts of tables.
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
%SCHEMA LR 1
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Number of Rows
|
|
127
|
+
|
|
128
|
+
By default, only 20 rows are shown. All further lines are replaced by three dots. When hovering over the three dots
|
|
129
|
+
using the cursor, the number of omitted lines is displayed. Of course, the number of lines displayed can be changed.
|
|
130
|
+
|
|
131
|
+
The magic command `ALL_ROWS` and its short form `ALL` can be used to display **all** rows of the query in the same
|
|
132
|
+
cell. **Caution**: With large result sets this can lead to a frozen Jupyter instance.
|
|
133
|
+
|
|
134
|
+
```sql
|
|
135
|
+
%ALL_ROWS
|
|
136
|
+
SELECT *
|
|
137
|
+
FROM foo
|
|
138
|
+
-- all rows
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
The magic command `QUERY_MAX_ROWS` followed by an integer can be used to change the number of displayed rows for the
|
|
142
|
+
current cell.
|
|
143
|
+
|
|
144
|
+
```sql
|
|
145
|
+
%QUERY_MAX_ROWS 50
|
|
146
|
+
SELECT *
|
|
147
|
+
FROM foo
|
|
148
|
+
-- 50 rows
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
The magic command `MAX_ROWS` followed by an integer can be used to change the number of displayed rows for all future
|
|
152
|
+
queries including the current cell.
|
|
153
|
+
|
|
154
|
+
```sql
|
|
155
|
+
%MAX_ROWS 30
|
|
156
|
+
SELECT *
|
|
157
|
+
FROM foo
|
|
158
|
+
-- 30 rows
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
```sql
|
|
162
|
+
SELECT *
|
|
163
|
+
FROM bar
|
|
164
|
+
-- 30 rows
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Ship Tests With Your Notebooks
|
|
168
|
+
|
|
169
|
+
Simple tests can be loaded together with the database with the help of the `WITH_TESTS` parameter. These tests are
|
|
170
|
+
stored as a JSON file. Each test is assigned a unique name, a result set and whether the test should check the order
|
|
171
|
+
of the result. A very simple test file looks like the following JSON object:
|
|
172
|
+
|
|
173
|
+
```json
|
|
174
|
+
{
|
|
175
|
+
"task1": {
|
|
176
|
+
"ordered": false,
|
|
177
|
+
"equals": [
|
|
178
|
+
[
|
|
179
|
+
1,
|
|
180
|
+
"Name 1"
|
|
181
|
+
],
|
|
182
|
+
[
|
|
183
|
+
2,
|
|
184
|
+
"Name 2"
|
|
185
|
+
]
|
|
186
|
+
]
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
To bind a test to a cell, use the magic command `TEST` in combination with a name. After the cell is executed, the
|
|
192
|
+
result is evaluated and then displayed below the query result.
|
|
193
|
+
|
|
194
|
+
```sql
|
|
195
|
+
%TEST task1
|
|
196
|
+
SELECT 2, 'Name 2'
|
|
197
|
+
UNION
|
|
198
|
+
SELECT 1, 'Name 1'
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Disclaimer: The integrated testing is work-in-progress and thus subject to potentially incompatible changes and
|
|
202
|
+
enhancements.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
duckdb_kernel/__init__.py,sha256=6auU6zeJrsA4fxPSr2PYamS8fG-SMXTn5YQFXF2cseo,33
|
|
2
|
+
duckdb_kernel/__main__.py,sha256=Z3GwHEBWoQjNm2Y84ijnbA0Lk66L7nsFREuqhZ_ptk0,165
|
|
3
|
+
duckdb_kernel/kernel.json,sha256=_7E8Ci2FSdCvnzCjsOaue8QE8AvpS5JLQuxORO5IGtA,127
|
|
4
|
+
duckdb_kernel/kernel.py,sha256=n83u1M3I2dID_CxZRp9atQq1yk168NwICAJo6nVyRKs,13196
|
|
5
|
+
duckdb_kernel/magics/MagicCommand.py,sha256=d4Chj2G9CfX18Y5ZcH5E_Ovx0fueh-Eq54nLH--cgis,1779
|
|
6
|
+
duckdb_kernel/magics/MagicCommandCallback.py,sha256=sCGsUbQUmUctGpBQRtkca44tYCLI8u4Spo6ntMggmFc,706
|
|
7
|
+
duckdb_kernel/magics/MagicCommandException.py,sha256=MwuWkpA6NoCqz437urdI0RVXhbSbVdziuRoi7slYFPc,49
|
|
8
|
+
duckdb_kernel/magics/MagicCommandHandler.py,sha256=V47ef_nWptg7ClwNPKaEVxjQ5prAcMpCk5jXI29RpPA,2319
|
|
9
|
+
duckdb_kernel/magics/__init__.py,sha256=DA8gnQeRCUt1Scy3_NQ9w5CPmMEY9i8YwB-g392pN1U,204
|
|
10
|
+
duckdb_kernel/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
duckdb_kernel/util/formatting.py,sha256=rxY6rBF-p_mk_HS1Z2PrHelJ-IElxYl6GLaDS9hZJ1U,653
|
|
12
|
+
duckdb_kernel/visualization/Column.py,sha256=UXHxczsT6HalANH0CaklEVCyJZg1l0cmq-KGRWXt2-A,422
|
|
13
|
+
duckdb_kernel/visualization/Constraint.py,sha256=1YgUHk7s8mHCVedbcuJKyXDykj7_ybbwT3Dk9p2VMis,287
|
|
14
|
+
duckdb_kernel/visualization/ForeignKey.py,sha256=iurUAXwTwSIpLXsL0B7BA8jqDTfW4_wkeHxoqQbZwiU,470
|
|
15
|
+
duckdb_kernel/visualization/Table.py,sha256=Jv9un_oX-nupx2EqzJDn_UHtAwddgFGSEapho2kIDrY,756
|
|
16
|
+
duckdb_kernel/visualization/VizDrawer.py,sha256=435Ejrp4nEnlnnL2-cu9IHUmgkhSSQQ-04EJtbm3T8g,7568
|
|
17
|
+
duckdb_kernel/visualization/__init__.py,sha256=BfWfACqoxtagVQxK1eAM2r_VbxDf0psPO_0fQWCiiro,155
|
|
18
|
+
jupyter_duckdb-0.4.1.dist-info/METADATA,sha256=CvKQQeaSEgPzWIoLXv1UkRVmeaAsqY7i-VMrA2GDqoU,6563
|
|
19
|
+
jupyter_duckdb-0.4.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
20
|
+
jupyter_duckdb-0.4.1.dist-info/top_level.txt,sha256=KvRRPMnmkQNuhyBsXoPmwyt26LRDp0O-0HN6u0Dm5jA,14
|
|
21
|
+
jupyter_duckdb-0.4.1.dist-info/RECORD,,
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: jupyter-duckdb
|
|
3
|
-
Version: 0.3.2
|
|
4
|
-
Summary: a basic wrapper kernel for DuckDB
|
|
5
|
-
Home-page: https://github.com/erictroebs/jupyter-duckdb
|
|
6
|
-
Author: Eric Tröbs
|
|
7
|
-
Author-email: eric.troebs@tu-ilmenau.de
|
|
8
|
-
Project-URL: Bug Tracker, https://github.com/erictroebs/jupyter-duckdb/issues
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
-
Classifier: Operating System :: OS Independent
|
|
12
|
-
Requires-Python: >=3.6
|
|
13
|
-
Description-Content-Type: text/markdown
|
|
14
|
-
Requires-Dist: jupyter
|
|
15
|
-
Requires-Dist: duckdb (==0.6.1)
|
|
16
|
-
|
|
17
|
-
# DuckDB Kernel for Jupyter
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
duckdb_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
duckdb_kernel/__main__.py,sha256=Z3GwHEBWoQjNm2Y84ijnbA0Lk66L7nsFREuqhZ_ptk0,165
|
|
3
|
-
duckdb_kernel/kernel.json,sha256=_7E8Ci2FSdCvnzCjsOaue8QE8AvpS5JLQuxORO5IGtA,127
|
|
4
|
-
duckdb_kernel/kernel.py,sha256=cYMSgJgcTjPOMvxZuXS9wsXZpP2TsovzTovI2VYOgQY,10762
|
|
5
|
-
jupyter_duckdb-0.3.2.dist-info/METADATA,sha256=QQ2rgkWRmphAfVzlIfM-cLyg0s3bkz8e3jOUwmFrxEM,588
|
|
6
|
-
jupyter_duckdb-0.3.2.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
7
|
-
jupyter_duckdb-0.3.2.dist-info/top_level.txt,sha256=KvRRPMnmkQNuhyBsXoPmwyt26LRDp0O-0HN6u0Dm5jA,14
|
|
8
|
-
jupyter_duckdb-0.3.2.dist-info/RECORD,,
|
|
File without changes
|