PyPI - jupyter-duckdb - Versions diffs - 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

jupyter-duckdb 0.3.2py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

duckdb_kernel/__init__.py +1 -0
duckdb_kernel/kernel.json +0 -0
duckdb_kernel/kernel.py +187 -126
duckdb_kernel/magics/MagicCommand.py +63 -0
duckdb_kernel/magics/MagicCommandCallback.py +21 -0
duckdb_kernel/magics/MagicCommandException.py +2 -0
duckdb_kernel/magics/MagicCommandHandler.py +71 -0
duckdb_kernel/magics/__init__.py +4 -0
duckdb_kernel/util/__init__.py +0 -0
duckdb_kernel/util/formatting.py +26 -0
duckdb_kernel/visualization/Column.py +18 -0
duckdb_kernel/visualization/Constraint.py +11 -0
duckdb_kernel/visualization/ForeignKey.py +15 -0
duckdb_kernel/visualization/Table.py +27 -0
duckdb_kernel/visualization/VizDrawer.py +219 -0
duckdb_kernel/visualization/__init__.py +5 -0
jupyter_duckdb-0.4.1.dist-info/METADATA +202 -0
jupyter_duckdb-0.4.1.dist-info/RECORD +21 -0
{jupyter_duckdb-0.3.2.dist-info → jupyter_duckdb-0.4.1.dist-info}/WHEEL +1 -1
jupyter_duckdb-0.3.2.dist-info/METADATA +0 -17
jupyter_duckdb-0.3.2.dist-info/RECORD +0 -8
{jupyter_duckdb-0.3.2.dist-info → jupyter_duckdb-0.4.1.dist-info}/top_level.txt +0 -0

duckdb_kernel/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@
1	+ from .kernel import DuckDBKernel

duckdb_kernel/kernel.json CHANGED Viewed

File without changes

duckdb_kernel/kernel.py CHANGED Viewed

@@ -1,18 +1,23 @@
 import json
+import math
 import os
-import re
 import time
 import traceback
 from typing import Optional, Dict, List, Tuple
 import duckdb
 from ipykernel.kernelbase import Kernel
-import checkmarkandcross
+from .magics import *
+from .util.formatting import row_count, rows_table, wrap_image
+from .visualization import *
 class DuckDBKernel(Kernel):
+    DEFAULT_MAX_ROWS = 20
     implementation = 'DuckDB'
-    implementation_version = '0.6.1'
+    implementation_version = '0.8.1'
     banner = 'DuckDB Kernel'
     language_info = {
         'name': 'duckdb',
@@ -23,6 +28,20 @@ class DuckDBKernel(Kernel):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
+        # register magic commands
+        self._magics: MagicCommandHandler = MagicCommandHandler()
+        self._magics.add(
+            MagicCommand('create').arg('database').opt('of').opt('with_tests').on(self._create_magic),
+            MagicCommand('load').arg('database').opt('with_tests').on(self._load_magic),
+            MagicCommand('test').arg('name').result(True).on(self._test_magic),
+            MagicCommand('all', 'all_rows').on(self._all_magic),
+            MagicCommand('max_rows').arg('count').on(self._max_rows_magic),
+            MagicCommand('query_max_rows').arg('count').on(self._query_max_rows_magic),
+            MagicCommand('schema').opt('lr').on(self._schema_magic)
+        )
+        # create placeholders for database and tests
         self._db: Optional[duckdb.DuckDBPyConnection] = None
         self._tests: Optional[Dict] = None
@@ -36,7 +55,9 @@ class DuckDBKernel(Kernel):
     def print_exception(self, e: Exception):
         if isinstance(e, AssertionError):
             text = str(e)
-        elif isinstance(e, (duckdb.OperationalError, duckdb.ProgrammingError)):
+        elif isinstance(e, MagicCommandException):
+            text = str(e)
+        elif isinstance(e, (duckdb.OperationalError, duckdb.ProgrammingError, duckdb.InvalidInputException)):
             text = str(e)
         else:
             text = traceback.format_exc()
@@ -70,7 +91,8 @@ class DuckDBKernel(Kernel):
         else:
             return False
-    def _execute_stmt(self, query: str, silent: bool) -> Tuple[List[str], List[List]]:
+    def _execute_stmt(self, query: str, silent: bool,
+                      max_rows: Optional[int]) -> Tuple[Optional[List[str]], Optional[List[List]]]:
         if self._db is None:
             raise AssertionError('load a database first')
@@ -81,22 +103,44 @@ class DuckDBKernel(Kernel):
             et = time.time()
             if not silent:
+                # print EXPLAIN queries as raw text
                 if query.strip().startswith('EXPLAIN'):
                     rows = cursor.fetchall()
                     for ekey, evalue in rows:
                         self.print_data(f'<b>{ekey}</b><br><pre>{evalue}</pre>')
+                    return None, None
+                # print every other query as a table
                 else:
                     # table header
-                    table_header = ''.join(map(lambda e: f'<th>{e[0]}</th>', cursor.description))
+                    if cursor.description is None:
+                        columns = []
+                    else:
+                        columns = [e[0] for e in cursor.description]
+                    table_header = ''.join(f'<th>{c}</th>' for c in columns)
                     # table data
                     rows = cursor.fetchall()
-                    table_data = ''.join(map(
-                        lambda row: '<tr>' + ''.join(map(lambda e: f'<td>{e}</td>', row)) + '</tr>',
-                        rows
-                    ))
+                    if max_rows is not None and len(rows) > max_rows:
+                        table_data = f'''
+                            {rows_table(rows[:math.ceil(max_rows / 2)])}
+                            <tr>
+                                <td colspan="{len(columns)}"
+                                    style="text-align: center"
+                                    title="{row_count(len(rows) - max_rows)} omitted">
+                                    ...
+                                </td>
+                            </tr>
+                            {rows_table(rows[-math.floor(max_rows // 2):])}
+                        '''
+                    else:
+                        table_data = ''.join(map(
+                            lambda row: '<tr>' + ''.join(map(lambda e: f'<td>{e}</td>', row)) + '</tr>',
+                            rows
+                        ))
                     # send to client
                     self.print_data(f'''
@@ -106,12 +150,18 @@ class DuckDBKernel(Kernel):
                         </table>
                     ''')
-                    self.print_data(f'{len(rows)} row{"" if len(rows) == 1 else "s"} in {et - st:.3f}s')
+                    self.print_data(f'{row_count(len(rows))} in {et - st:.3f}s')
-            return [e[0] for e in cursor.description], rows
+            return columns, rows
     # magic command related functions
-    def _load_magic(self, silent: bool, target: str, create: bool, source: Optional[str], tests: Optional[str]):
+    def _create_magic(self, silent: bool, path: str, of: Optional[str], with_tests: Optional[str]):
+        self._load(silent, path, True, of, with_tests)
+    def _load_magic(self, silent: bool, path: str, with_tests: Optional[str]):
+        self._load(silent, path, False, None, with_tests)
+    def _load(self, silent: bool, path: str, create: bool, of: Optional[str], with_tests: Optional[str]):
         # unload current database if necessary
         if self._unload_database():
             if not silent:
@@ -121,24 +171,31 @@ class DuckDBKernel(Kernel):
         if not silent:
             self.print(f'{self.implementation} {self.implementation_version}\n')
-        # load new database
-        if target.startswith(("'", '"')):
-            target = target[1:-1]
+        # clean path
+        if path.startswith(("'", '"')):
+            path = path[1:]
+        if path.endswith(("'", '"')):
+            path = path[:-1]
-        if create and os.path.exists(target):
-            os.remove(target)
+        # load new database
+        if create and os.path.exists(path):
+            os.remove(path)
-        if self._load_database(target, read_only=False):
+        if self._load_database(path, read_only=False):
             if not silent:
-                self.print(f'loaded database {target}\n')
+                self.print(f'loaded database {path}\n')
         # copy data from source database
-        if source is not None:
-            if source.startswith(("'", '"')):
-                source = source[1:-1]
-            if source.endswith('.sql'):
-                with open(source, 'r') as file:
+        if of is not None:
+            # clean path
+            if of.startswith(("'", '"')):
+                of = of[1:]
+            if of.endswith(("'", '"')):
+                of = of[:-1]
+            # load sql files
+            if of.endswith('.sql'):
+                with open(of, 'r') as file:
                     content = file.read()
                     # statements = re.split(r';\r?\n', content)
@@ -148,27 +205,28 @@ class DuckDBKernel(Kernel):
                     self._db.execute(content)
                     if not silent:
-                        self.print(f'executed {source}')
+                        self.print(f'executed {of}\n')
+            # load database files
             else:
-                with duckdb.connect(source, read_only=True) as source_db:
-                    source_db.execute('SHOW TABLES')
-                    for table, in source_db.fetchall():
-                        transfer_df = source_db.query(f'SELECT * FROM {table}').to_df()
+                with duckdb.connect(of, read_only=True) as of_db:
+                    of_db.execute('SHOW TABLES')
+                    for table, in of_db.fetchall():
+                        transfer_df = of_db.query(f'SELECT * FROM {table}').to_df()
                         self._db.execute(f'CREATE TABLE {table} AS SELECT * FROM transfer_df')
                         if not silent:
                             self.print(f'transferred table {table}\n')
         # load tests
-        if tests is None:
+        if with_tests is None:
             self._tests = {}
         else:
-            with open(tests, 'r') as tests_file:
+            with open(with_tests, 'r') as tests_file:
                 self._tests = json.load(tests_file)
-                self.print(f'loaded tests from {tests}')
+                self.print(f'loaded tests from {with_tests}\n')
-    def _test_magic(self, name: str, description: List[str], result: List[List], silent: bool):
+    def _test_magic(self, silent: bool, _: List[str], result: List[List], name: str):
         # Testing makes no sense if there is no output.
         if silent:
             return
@@ -176,124 +234,127 @@ class DuckDBKernel(Kernel):
         # extract data for test
         data = self._tests[name]
-        # prepare comparison functions
-        def my_equals(row1, row2):
-            return len(row1) == len(row2) and all((x == y for x, y in zip(row1, row2)))
-        def my_in(row, rows):
-            for r in rows:
-                if my_equals(r, row):
-                    return True
-            return False
         # ordered test
         if data['ordered']:
+            def my_equals(row1, row2):
+                return len(row1) == len(row2) and all((x == y for x, y in zip(row1, row2)))
             rows = data['equals']
             missing = len(rows) - len(result)
             if missing > 0:
-                return self.print_data(checkmarkandcross.image_html(
-                    False, title=f'{missing} row{"" if missing == 1 else "s"} missing'
-                ))
+                return self.print_data(wrap_image(False, f'{row_count(missing)} missing'))
             if missing < 0:
-                return self.print_data(checkmarkandcross.image_html(
-                    False, title=f'{-missing} row{"" if -missing == 1 else "s"} more than required'
-                ))
+                return self.print_data(wrap_image(False, f'{row_count(-missing)} more than required'))
             for data_row, result_row in zip(data['equals'], result):
                 if not my_equals(data_row, result_row):
-                    return self.print_data(checkmarkandcross.image_html(False, title='found row without match'))
+                    return self.print_data(wrap_image(False, 'found row without match'))
-            return self.print_data(checkmarkandcross.image_html(True, title='success'))
+            return self.print_data(wrap_image(True))
         # unordered test
         else:
-            rows = data['equals']
+            # prepare data structures
+            test_tuples = [tuple(row) for row in data['equals']]
+            test_counts: Dict[Tuple, int] = {}
-            missing = 0
-            for element in rows:
-                if not my_in(element, result):
-                    missing += 1
+            for row in test_tuples:
+                if row not in test_counts:
+                    test_counts[row] = 1
+                else:
+                    test_counts[row] += 1
-            if missing > 0:
-                return self.print_data(checkmarkandcross.image_html(
-                    False, title=f'{missing} row{"" if missing == 1 else "s"} missing'
-                ))
-            over = 0
-            for element in result:
-                if not my_in(element, rows):
-                    over += 1
-            if over > 0:
-                return self.print_data(checkmarkandcross.image_html(
-                    False, title=f'{over} row{"" if over == 1 else "s"} more than required'
-                ))
-            return self.print_data(checkmarkandcross.image_html(True, title='success'))
-    def _handle_magic(self, code: str, silent: bool):
-        code_lower = code.lower()
-        if code_lower.startswith('%load'):
-            # parse line
-            match = re.match(
-                r'''^%LOAD +([^ ]+?|'.+?'|".+?")( +WITH +([^ ]+?|'.+?'|".+?"))?$''',
-                code.strip(), re.IGNORECASE
-            )
-            if match is None:
-                raise AssertionError('usage: %LOAD target.db [WITH tests.json]')
-            # call
-            self._load_magic(silent, match.group(1), False, None, match.group(3))
-        elif code_lower.startswith('%create'):
-            # parse line
-            match = re.match(
-                r'''^%CREATE +([^ ]+?|'.+?'|".+?")( +FROM +([^ ]+?|'.+?'|".+?"))?( +WITH +([^ ]+?|'.+?'|".+?"))?$''',
-                code.strip(), re.IGNORECASE
-            )
-            if match is None:
-                raise AssertionError('usage: %CREATE target.db [FROM (source.db | source.sql)] [WITH tests.json]')
-            # call
-            self._load_magic(silent, match.group(1), True, match.group(3), match.group(5))
-        elif code_lower.startswith('%test'):
-            # parse line
-            match = re.match(
-                r'''^%TEST +([^ ]+?|'.+?'|".+?")$''',
-                code, re.IGNORECASE | re.MULTILINE
-            )
-            if match is None:
-                raise AssertionError('usage: %TEST name')
-            if match.group(1) not in self._tests:
-                raise AssertionError(f'test {match.group(1)} unknown')
-            # execute statement
-            description, rows = self._execute_stmt(code[match.end():], silent)
-            # execute tests
-            self._test_magic(match.group(1), description, rows, silent)
+            result_tuples = [tuple(row) for row in result]
+            result_counts: Dict[Tuple, int] = {}
+            for row in result_tuples:
+                if row not in result_counts:
+                    result_counts[row] = 1
+                else:
+                    result_counts[row] += 1
+            # calculate diffs
+            diff: Dict[Tuple, int] = {}
+            for row, count in test_counts.items():
+                diff[row] = result_counts.get(row, 0) - count
+            for row, count in result_counts.items():
+                if row not in diff:
+                    diff[row] = count - test_counts.get(row, 0)
+            below = sum(max(0, -count) for count in diff.values())
+            above = sum(max(0, count) for count in diff.values())
+            # print result
+            if below > 0 and above > 0:
+                self.print_data(wrap_image(False, f'{row_count(below)} missing, {row_count(above)} unnecessary'))
+            elif below > 0:
+                self.print_data(wrap_image(False, f'{row_count(below)} missing'))
+            elif above > 0:
+                self.print_data(wrap_image(False, f'{row_count(above)} unnecessary'))
+            else:
+                self.print_data(wrap_image(True))
+    def _all_magic(self, silent: bool):
+        return {
+            'max_rows': None
+        }
+    def _max_rows_magic(self, silent: bool, count: str):
+        if count.lower() != 'none':
+            DuckDBKernel.DEFAULT_MAX_ROWS = int(count)
+        else:
+            DuckDBKernel.DEFAULT_MAX_ROWS = None
+    def _query_max_rows_magic(self, silent: bool, count: str):
+        return {
+            'max_rows': int(count) if count.lower() != 'none' else None
+        }
+    def _schema_magic(self, silent: bool, lr: Optional[str]):
+        if silent:
+            return
+        if lr.lower() == 'false':
+            lr = False
+        elif lr.isnumeric():
+            lr = bool(int(lr))
         else:
-            raise AssertionError('unknown magic command')
+            lr = bool(lr)
+        vd = VizDrawer(self._db)
+        svg = vd.to_svg(lr)
+        self.print_data(svg)
     # jupyter related functions
     def do_execute(self, code: str, silent: bool,
                    store_history: bool = True, user_expressions: dict = None, allow_stdin: bool = False,
                    **kwargs):
         try:
-            # handle magic commands
-            if code.startswith('%'):
-                self._handle_magic(code, silent)
+            # get magic command
+            clean_code, pre_query_callbacks, post_query_callbacks = self._magics(silent, code)
+            # execute magic commands here if it does not depend on query results
+            execution_args = {
+                'max_rows': DuckDBKernel.DEFAULT_MAX_ROWS
+            }
-            # execute statement otherwise
+            for callback in pre_query_callbacks:
+                execution_args.update(callback())
+            # execute statement if needed
+            if clean_code.strip():
+                cols, rows = self._execute_stmt(clean_code, silent, **execution_args)
             else:
-                self._execute_stmt(code, silent)
+                cols, rows = None, None
+            # execute magic command here if it does depend on query results
+            for callback in post_query_callbacks:
+                callback(cols, rows)
             return {
                 'status': 'ok',

duckdb_kernel/magics/MagicCommand.py ADDED Viewed

@@ -0,0 +1,63 @@
+from typing import Any, List, Tuple, Callable, Dict
+class MagicCommand:
+    _ARG = '''([^ ]+?|'.+?'|".+?")'''
+    def __init__(self, *names: str):
+        self._names: Tuple[str] = names
+        self._arguments: List[Tuple[str, str]] = []
+        self._optionals: List[Tuple[str, Any, str]] = []
+        self._on: List[Callable] = []
+        self._result: bool = False
+    @property
+    def names(self) -> Tuple[str]:
+        return self._names
+    @property
+    def args(self) -> List[Tuple[str, str]]:
+        return self._arguments
+    @property
+    def kwargs(self) -> List[Tuple[str, Any, str]]:
+        return self._optionals
+    @property
+    def requires_query_result(self) -> bool:
+        return self._result
+    def arg(self, name: str, description: str = None) -> 'MagicCommand':
+        self._arguments.append((name, description))
+        return self
+    def opt(self, name: str, default_value: Any = None, description: str = None) -> 'MagicCommand':
+        self._optionals.append((name, default_value, description))
+        return self
+    def result(self, result: bool) -> 'MagicCommand':
+        self._result = result
+        return self
+    def on(self, fun: Callable):
+        self._on.append(fun)
+        return self
+    @property
+    def parameters(self) -> str:
+        args = ' +'.join([self._ARG] * len(self._arguments))
+        opts = ''.join([f'( +({name}) +{self._ARG})?' for name, *_ in self._optionals])
+        return f'^ *{args}{opts} *$'
+    def __call__(self, silent: bool, *args, **kwargs) -> Dict[str, Any]:
+        result = {}
+        for fun in self._on:
+            r = fun(silent, *args, **kwargs)
+            if r is not None:
+                for k, v in r.items():
+                    result[k] = v
+        return result

duckdb_kernel/magics/MagicCommandCallback.py ADDED Viewed

@@ -0,0 +1,21 @@
+from typing import Optional, List
+from . import MagicCommand
+class MagicCommandCallback:
+    def __init__(self, mc: MagicCommand, silent: bool, *args, **kwargs):
+        self._mc: MagicCommand = mc
+        self._silent: bool = silent
+        self._args = args
+        self._kwargs = kwargs
+    @property
+    def requires_query_result(self) -> bool:
+        return self._mc.requires_query_result
+    def __call__(self, columns: Optional[List[str]] = None, rows: Optional[List[List]] = None):
+        if self.requires_query_result:
+            return self._mc(self._silent, columns, rows, *self._args, **self._kwargs)
+        else:
+            return self._mc(self._silent, *self._args, **self._kwargs)

duckdb_kernel/magics/MagicCommandException.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ class MagicCommandException(Exception):
2	+ pass

duckdb_kernel/magics/MagicCommandHandler.py ADDED Viewed

@@ -0,0 +1,71 @@
+import re
+from typing import Dict, Tuple, List
+from . import MagicCommand, MagicCommandException, MagicCommandCallback
+class MagicCommandHandler:
+    def __init__(self):
+        self._magics: Dict[str, MagicCommand] = {}
+    def add(self, *command: MagicCommand):
+        for cmd in command:
+            for key in cmd.names:
+                key = key.lower()
+                self._magics[key] = cmd
+    def __call__(self, silent: bool, code: str) -> Tuple[str, List[MagicCommandCallback], List[MagicCommandCallback]]:
+        pre_query_callbacks = []
+        post_query_callbacks = []
+        while True:
+            # ensure code starts with '%' or '%%' but not with '%%%'
+            match = re.match(r'^%{1,2}([^% ]+?)($| .+?$)', code, re.MULTILINE | re.IGNORECASE)
+            if match is None:
+                break
+            # remove magic command from code
+            start, end = match.span()
+            code = code[:start] + code[end + 1:]
+            # extract command
+            command = match.group(1).lower()
+            if command not in self._magics:
+                raise MagicCommandException(f'unknown magic command "{command}"')
+            magic = self._magics[command]
+            # extract parameters
+            params = match.group(2)
+            match = re.match(magic.parameters, params, re.IGNORECASE)
+            if match is None:
+                raise MagicCommandException(f'could not parse parameters for command "{command}"')
+            # extract args
+            args = [g for g, _ in zip(match.groups(), magic.args)]
+            # extract kwargs
+            kwargs = {name: default for name, default, _ in magic.kwargs}
+            i = len(args) + 1
+            while i < len(match.groups()):
+                name = match.group(i + 1)
+                value = match.group(i + 2)
+                i += 3
+                if name is not None:
+                    kwargs[name.lower()] = value
+            # add to callbacks
+            callback = MagicCommandCallback(magic, silent, *args, **kwargs)
+            if not magic.requires_query_result:
+                pre_query_callbacks.append(callback)
+            else:
+                post_query_callbacks.append(callback)
+        # return callbacks
+        return code, pre_query_callbacks, post_query_callbacks

duckdb_kernel/magics/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .MagicCommand import MagicCommand
+from .MagicCommandCallback import MagicCommandCallback
+from .MagicCommandException import MagicCommandException
+from .MagicCommandHandler import MagicCommandHandler

duckdb_kernel/util/__init__.py ADDED Viewed

File without changes

duckdb_kernel/util/formatting.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import List
+import checkmarkandcross
+def row_count(count: int) -> str:
+    return f'{count} row{"" if count == 1 else "s"}'
+def rows_table(rows: List[List]) -> str:
+    return ''.join(map(
+        lambda row: '<tr>' + ''.join(map(lambda e: f'<td>{e}</td>', row)) + '</tr>',
+        rows
+    ))
+def wrap_image(val: bool, msg: str = '') -> str:
+    image = checkmarkandcross.image_html(val, size=24, title=msg)
+    return f'''
+        <div style="display: flex; align-items: center; margin-top: 0.5rem">
+            {image}
+            <span style="margin-left: 0.5rem">
+                {msg}
+            </span>
+        </div>
+    '''

duckdb_kernel/visualization/Column.py ADDED Viewed

@@ -0,0 +1,18 @@
+import re
+from .Table import Table
+class Column:
+    def __init__(self, table: Table, name: str, data_type: str):
+        self.table: Table = table
+        self.name: str = name
+        self.data_type: str = data_type
+    def __hash__(self):
+        return self.name.__hash__()
+    @property
+    def id(self) -> str:
+        name = re.sub(r'[^A-Za-z]', '_', self.name)
+        return f'{self.table.id}_column_{name}'

duckdb_kernel/visualization/Constraint.py ADDED Viewed

@@ -0,0 +1,11 @@
+from typing import Tuple
+from . import Column
+from . import Table
+class Constraint:
+    def __init__(self, index: int, table: Table, columns: Tuple['Column', ...]):
+        self.index: int = index
+        self.table: Table = table
+        self.columns: Tuple['Column', ...] = columns

duckdb_kernel/visualization/ForeignKey.py ADDED Viewed

@@ -0,0 +1,15 @@
+from typing import Tuple, Iterator
+from . import Column
+from . import Constraint
+class ForeignKey:
+    def __init__(self, columns: Tuple['Column', ...], constraint: Constraint):
+        self.columns: Tuple['Column', ...] = columns
+        self.constraint: Constraint = constraint
+    @property
+    def references(self) -> Iterator[Tuple['Column', 'Column']]:
+        for source, target in zip(self.columns, self.constraint.columns):
+            yield source, target

duckdb_kernel/visualization/Table.py ADDED Viewed

@@ -0,0 +1,27 @@
+import re
+from typing import List, Optional
+from . import Column
+from . import ForeignKey
+from .Constraint import Constraint
+class Table:
+    def __init__(self, name: str):
+        self.name: str = name
+        self.columns: List[Column] = []
+        self.primary_key: Optional[Constraint] = None
+        self.unique_keys: List[Constraint] = []
+        self.foreign_keys: List[ForeignKey] = []
+    @property
+    def id(self) -> str:
+        name = re.sub(r'[^A-Za-z]', '_', self.name)
+        return f'table_{name}'
+    def get_column(self, name: str) -> "Column":
+        for column in self.columns:
+            if column.name == name:
+                return column
+        raise AssertionError(f'could not find column {name} in table {self.name}')

duckdb_kernel/visualization/VizDrawer.py ADDED Viewed

@@ -0,0 +1,219 @@
+from typing import Dict, List
+from duckdb import DuckDBPyConnection
+from graphviz import Digraph
+from . import Constraint, Column, ForeignKey, Table
+class VizDrawer:
+    def __init__(self, con: DuckDBPyConnection):
+        self.tables: List[Table] = []
+        tables: Dict[str, Table] = {}
+        constraints: Dict[int, Constraint] = {}
+        # Get table names first. In the columns table we can not filter
+        # for base tables and some of the tables might not be contained
+        # in the constraints' information.
+        for table_name, in con.execute('''
+            SELECT table_name
+            FROM information_schema.tables
+            WHERE table_type == 'BASE TABLE'
+        ''').fetchall():
+            table = Table(table_name)
+            self.tables.append(table)
+            tables[table_name] = table
+        # Get column names and data types for each table.
+        for table_name, column_name, data_type in con.execute('''
+            SELECT
+                table_name,
+                column_name,
+                data_type
+            FROM information_schema.columns
+            ORDER BY ordinal_position ASC
+        ''').fetchall():
+            if table_name in tables:
+                table = tables[table_name]
+                column = Column(table, column_name, data_type)
+                table.columns.append(column)
+        # Find primary keys.
+        for table_name, constraint_index, constraint_columns in con.execute('''
+            SELECT
+                table_name,
+                constraint_index,
+                constraint_column_names
+            FROM duckdb_constraints()
+            WHERE constraint_type = 'PRIMARY KEY'
+            ORDER BY constraint_index ASC
+        ''').fetchall():
+            # get table
+            if table_name not in tables:
+                raise AssertionError(f'unknown table {table_name} for constraint {constraint_index}')
+            table = tables[table_name]
+            # store constraint
+            if constraint_index in constraints:
+                raise AssertionError(f'constraint with index {constraint_index} already stored')
+            constraint = Constraint(
+                constraint_index,
+                table,
+                tuple(table.get_column(c) for c in constraint_columns)
+            )
+            constraints[constraint_index] = constraint
+            # store key
+            if table.primary_key is not None:
+                raise AssertionError(f'discovered second primary key for table {table_name}')
+            table.primary_key = constraint
+        # Find unique keys.
+        for table_name, constraint_index, constraint_columns in con.execute('''
+            SELECT
+                table_name,
+                constraint_index,
+                constraint_column_names
+            FROM duckdb_constraints()
+            WHERE constraint_type = 'UNIQUE'
+            ORDER BY constraint_index ASC
+        ''').fetchall():
+            # get table
+            if table_name not in tables:
+                raise AssertionError(f'unknown table {table_name} for constraint {constraint_index}')
+            table = tables[table_name]
+            # store constraint
+            if constraint_index in constraints:
+                raise AssertionError(f'constraint with index {constraint_index} already stored')
+            constraint = Constraint(
+                constraint_index,
+                table,
+                tuple(table.get_column(c) for c in constraint_columns)
+            )
+            constraints[constraint_index] = constraint
+            # store key
+            table.unique_keys.append(constraint)
+        # Find foreign keys.
+        for table_name, constraint_index, constraint_columns in con.execute('''
+            SELECT
+                table_name,
+                constraint_index,
+                constraint_column_names
+            FROM duckdb_constraints()
+            WHERE constraint_type = 'FOREIGN KEY'
+            ORDER BY constraint_index ASC
+        ''').fetchall():
+            # get table
+            if table_name not in tables:
+                raise AssertionError(f'unknown table {table_name} for constraint {constraint_index}')
+            table = tables[table_name]
+            # lookup constraint
+            if constraint_index not in constraints:
+                raise AssertionError(f'constraint with index {constraint_index} not discovered previously')
+            constraint = constraints[constraint_index]
+            # store key
+            key = ForeignKey(tuple(table.get_column(c) for c in constraint_columns), constraint)
+            table.foreign_keys.append(key)
+    def to_graph(self) -> Digraph:
+        # create graph
+        ps = Digraph('Schema',
+                     graph_attr={},
+                     node_attr={
+                         'shape': 'plaintext'
+                     })
+        # add nodes
+        fk_counter: Dict[str, int] = {}
+        for table in self.tables:
+            columns = "\n".join(self.__column_to_html(table, column, fk_counter) for column in table.columns)
+            ps.node(
+                table.id,
+                f'''<
+                    <table border="0" cellborder="1" cellspacing="0" cellpadding="5">
+                        <tr>
+                            <td><b>{table.name}</b></td>
+                        </tr>
+                        <tr>
+                            <td>
+                                <table border="0" cellborder="0" cellspacing="0">
+                                    {columns}
+                                </table>
+                            </td>
+                        </tr>
+                    </table>
+                >'''
+            )
+        # add edges
+        for source_table in self.tables:
+            for key in source_table.foreign_keys:
+                target_table = key.constraint.table
+                fk_counter_key = f'{source_table.name}_{key.constraint.index}'
+                ps.edge(source_table.id, target_table.id, label=f'FK{fk_counter[fk_counter_key]}', arrowhead='vee')
+        # return graph
+        return ps
+    def to_svg(self, lr: bool) -> str:
+        ps = self.to_graph()
+        if lr:
+            ps.graph_attr['rankdir'] = 'LR'
+        return ps.pipe(format='svg').decode('utf-8')
+    @staticmethod
+    def __column_to_html(table: Table, column: Column, fk_counter: Dict[str, int]):
+        name = column.name
+        data_type = column.data_type
+        # extract and style column name
+        if table.primary_key is not None and column in table.primary_key.columns:
+            name = f'<b>{name}</b>'
+        for key in table.unique_keys:
+            if column in key.columns:
+                name = f'<u>{name}</u>'
+                break
+        # extract foreign keys
+        fk = []
+        for key in table.foreign_keys:
+            if column in key.columns:
+                fk_counter_key = f'{table.name}_{key.constraint.index}'
+                if fk_counter_key not in fk_counter:
+                    fk_counter[fk_counter_key] = max(*fk_counter.values(), 0, 0) + 1
+                fk.append(fk_counter[fk_counter_key])
+        if len(fk) > 0:
+            fk = map(lambda x: f'(FK{x})', sorted(fk))
+            fk = f'<i>{" ".join(fk)}</i>'
+        else:
+            fk = ''
+        # convert to html
+        return f'''
+            <tr port="{column.id}">
+                <td align="left">{name}</td>
+                <td align="left">: {data_type}</td>
+                <td align="left">{fk}</td>
+            </tr>
+        '''

duckdb_kernel/visualization/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .Column import Column
+from .Constraint import Constraint
+from .ForeignKey import ForeignKey
+from .Table import Table
+from .VizDrawer import VizDrawer

jupyter_duckdb-0.4.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,202 @@
+Metadata-Version: 2.1
+Name: jupyter-duckdb
+Version: 0.4.1
+Summary: a basic wrapper kernel for DuckDB
+Home-page: https://github.com/erictroebs/jupyter-duckdb
+Author: Eric Tröbs
+Author-email: eric.troebs@tu-ilmenau.de
+Project-URL: Bug Tracker, https://github.com/erictroebs/jupyter-duckdb/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.7
+Description-Content-Type: text/markdown
+Requires-Dist: jupyter
+Requires-Dist: duckdb ==0.8.1
+Requires-Dist: graphviz ==0.20.1
+Requires-Dist: checkmarkandcross
+# DuckDB Kernel for Jupyter
+This is a simple DuckDB wrapper kernel which accepts SQL as input, executes it using a previously loaded DuckDB instance
+and formats the output as a table. There are some magic commands that make teaching easier with this kernel.
+## Quick Start
+[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/git/https%3A%2F%2Fdbgit.prakinf.tu-ilmenau.de%2Fertr8623%2Fjupyter-duckdb.git/master)
+## Table of Contents
+- [Setup](#setup)
+    - [Using pip](#using-pip)
+    - [Using Docker](#using-docker)
+- [Usage](#usage)
+    - [A Note on Magic Commands](#a-note-on-magic-commands)
+    - [Load a Database](#load-a-database)
+    - [Schema Diagrams](#schema-diagrams)
+    - [Number of Rows](#number-of-rows)
+    - [Ship Tests With Your Notebook](#ship-tests-with-your-notebooks)
+## Setup
+### Using pip
+Run `pip` to install the corresponding package from [pypi](https://pypi.org/project/jupyter-duckdb/) **after**
+Jupyter is already installed.
+```bash
+pip install jupyter-duckdb
+```
+Register the kernel.
+```bash
+jupyter kernelspec install <path to the site-packages directory>/duckdb_kernel
+```
+Now start Jupyter the usual way and the kernel should be available.
+### Using Docker
+Execute the following command to pull a and run a prepared image.
+```bash
+docker run -p 8888:8888 troebs/jupyter:duckdb
+```
+This image can also be used with JupyterHub and the
+[DockerSpawner / SwarmSpawner](https://github.com/jupyterhub/dockerspawner)
+and probably with the
+[kubespawner](https://github.com/jupyterhub/kubespawner).
+You can also build your own image using the [Dockerfile](Dockerfile) in the repository.
+## Usage
+A detailed example can be found [in the repository](example/). The rest of this section describes the magic commands.
+### A Note on Magic Commands
+Many Jupyter kernels make a difference between magic commands for a single line starting with one percent sign and
+others for a whole cell starting with two percent signs. The upcoming magic commands always apply to a whole cell.
+Therefore, it does not matter whether you use a single or two percent signs. However, the magic commands must always
+be used at the beginning of a cell.
+It is also possible to use more than one magic command per cell.
+### Load a Database
+To load the database two magic commands are available.
+`CREATE` creates a new database and therefore overwrites files with the same name without prompting. Using the optional
+parameter `OF` you can either provide another DuckDB file or a file with SQL statements. In the first case the included
+tables will be copied to the new database, while in the second case the SQL statements are just executed. We find this
+feature very useful to work in a temporary copy of the data and therefore be able to restart at any time. The last
+optional parameter `WITH_TESTS` is described in detail [below](#ship-tests-with-your-notebooks).
+```
+%CREATE data.duckdb OF my_statements.sql
+```
+`LOAD` on the other hand loads an existing database and returns an error if it does not exist. (That is why `OF` cannot
+be used with `LOAD`! `WITH_TESTS` on the other hand is available also with this magic command.)
+```
+%LOAD data.duckdb
+```
+Only one database can be open at any time. If a new database is created or loaded, the current one is closed first and
+saved to disk if necessary.
+Please note that `:memory:` is also a valid file path for DuckDB. The data is then stored exclusively in the main
+memory. In combination with `CREATE` and `OF` this makes it possible to work on a temporary copy in memory.
+### Schema Diagrams
+The magic command `SCHEMA` can be used to create a simple schema diagram of the loaded database, showing all created
+tables, their columns and data types, but without any views. Primary keys are printed in bold and unique keys are
+underlined. Foreign keys are also highlighted and the dependencies between the tables are shown by arrows.
+The optional parameter `LR` can be set to a true value to force a horizontal layout. This saves visual space especially
+for larger amounts of tables.
+```
+%SCHEMA LR 1
+```
+### Number of Rows
+By default, only 20 rows are shown. All further lines are replaced by three dots. When hovering over the three dots
+using the cursor, the number of omitted lines is displayed. Of course, the number of lines displayed can be changed.
+The magic command `ALL_ROWS` and its short form `ALL` can be used to display **all** rows of the query in the same
+cell. **Caution**: With large result sets this can lead to a frozen Jupyter instance.
+```sql
+%ALL_ROWS
+SELECT *
+FROM foo
+-- all rows
+```
+The magic command `QUERY_MAX_ROWS` followed by an integer can be used to change the number of displayed rows for the
+current cell.
+```sql
+%QUERY_MAX_ROWS 50
+SELECT *
+FROM foo
+-- 50 rows
+```
+The magic command `MAX_ROWS` followed by an integer can be used to change the number of displayed rows for all future
+queries including the current cell.
+```sql
+%MAX_ROWS 30
+SELECT *
+FROM foo
+-- 30 rows
+```
+```sql
+SELECT *
+FROM bar
+-- 30 rows
+```
+### Ship Tests With Your Notebooks
+Simple tests can be loaded together with the database with the help of the `WITH_TESTS` parameter. These tests are
+stored as a JSON file. Each test is assigned a unique name, a result set and whether the test should check the order
+of the result. A very simple test file looks like the following JSON object:
+```json
+{
+  "task1": {
+    "ordered": false,
+    "equals": [
+      [
+        1,
+        "Name 1"
+      ],
+      [
+        2,
+        "Name 2"
+      ]
+    ]
+  }
+}
+```
+To bind a test to a cell, use the magic command `TEST` in combination with a name. After the cell is executed, the
+result is evaluated and then displayed below the query result.
+```sql
+%TEST task1
+SELECT 2, 'Name 2'
+UNION
+SELECT 1, 'Name 1'
+```
+Disclaimer: The integrated testing is work-in-progress and thus subject to potentially incompatible changes and
+enhancements.

jupyter_duckdb-0.4.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,21 @@
+duckdb_kernel/__init__.py,sha256=6auU6zeJrsA4fxPSr2PYamS8fG-SMXTn5YQFXF2cseo,33
+duckdb_kernel/__main__.py,sha256=Z3GwHEBWoQjNm2Y84ijnbA0Lk66L7nsFREuqhZ_ptk0,165
+duckdb_kernel/kernel.json,sha256=_7E8Ci2FSdCvnzCjsOaue8QE8AvpS5JLQuxORO5IGtA,127
+duckdb_kernel/kernel.py,sha256=n83u1M3I2dID_CxZRp9atQq1yk168NwICAJo6nVyRKs,13196
+duckdb_kernel/magics/MagicCommand.py,sha256=d4Chj2G9CfX18Y5ZcH5E_Ovx0fueh-Eq54nLH--cgis,1779
+duckdb_kernel/magics/MagicCommandCallback.py,sha256=sCGsUbQUmUctGpBQRtkca44tYCLI8u4Spo6ntMggmFc,706
+duckdb_kernel/magics/MagicCommandException.py,sha256=MwuWkpA6NoCqz437urdI0RVXhbSbVdziuRoi7slYFPc,49
+duckdb_kernel/magics/MagicCommandHandler.py,sha256=V47ef_nWptg7ClwNPKaEVxjQ5prAcMpCk5jXI29RpPA,2319
+duckdb_kernel/magics/__init__.py,sha256=DA8gnQeRCUt1Scy3_NQ9w5CPmMEY9i8YwB-g392pN1U,204
+duckdb_kernel/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+duckdb_kernel/util/formatting.py,sha256=rxY6rBF-p_mk_HS1Z2PrHelJ-IElxYl6GLaDS9hZJ1U,653
+duckdb_kernel/visualization/Column.py,sha256=UXHxczsT6HalANH0CaklEVCyJZg1l0cmq-KGRWXt2-A,422
+duckdb_kernel/visualization/Constraint.py,sha256=1YgUHk7s8mHCVedbcuJKyXDykj7_ybbwT3Dk9p2VMis,287
+duckdb_kernel/visualization/ForeignKey.py,sha256=iurUAXwTwSIpLXsL0B7BA8jqDTfW4_wkeHxoqQbZwiU,470
+duckdb_kernel/visualization/Table.py,sha256=Jv9un_oX-nupx2EqzJDn_UHtAwddgFGSEapho2kIDrY,756
+duckdb_kernel/visualization/VizDrawer.py,sha256=435Ejrp4nEnlnnL2-cu9IHUmgkhSSQQ-04EJtbm3T8g,7568
+duckdb_kernel/visualization/__init__.py,sha256=BfWfACqoxtagVQxK1eAM2r_VbxDf0psPO_0fQWCiiro,155
+jupyter_duckdb-0.4.1.dist-info/METADATA,sha256=CvKQQeaSEgPzWIoLXv1UkRVmeaAsqY7i-VMrA2GDqoU,6563
+jupyter_duckdb-0.4.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
+jupyter_duckdb-0.4.1.dist-info/top_level.txt,sha256=KvRRPMnmkQNuhyBsXoPmwyt26LRDp0O-0HN6u0Dm5jA,14
+jupyter_duckdb-0.4.1.dist-info/RECORD,,

{jupyter_duckdb-0.3.2.dist-info → jupyter_duckdb-0.4.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.38.4)
+Generator: bdist_wheel (0.41.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

jupyter_duckdb-0.3.2.dist-info/METADATA DELETED Viewed

@@ -1,17 +0,0 @@
-Metadata-Version: 2.1
-Name: jupyter-duckdb
-Version: 0.3.2
-Summary: a basic wrapper kernel for DuckDB
-Home-page: https://github.com/erictroebs/jupyter-duckdb
-Author: Eric Tröbs
-Author-email: eric.troebs@tu-ilmenau.de
-Project-URL: Bug Tracker, https://github.com/erictroebs/jupyter-duckdb/issues
-Classifier: Programming Language :: Python :: 3
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: OS Independent
-Requires-Python: >=3.6
-Description-Content-Type: text/markdown
-Requires-Dist: jupyter
-Requires-Dist: duckdb (==0.6.1)
-# DuckDB Kernel for Jupyter

jupyter_duckdb-0.3.2.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-duckdb_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-duckdb_kernel/__main__.py,sha256=Z3GwHEBWoQjNm2Y84ijnbA0Lk66L7nsFREuqhZ_ptk0,165
-duckdb_kernel/kernel.json,sha256=_7E8Ci2FSdCvnzCjsOaue8QE8AvpS5JLQuxORO5IGtA,127
-duckdb_kernel/kernel.py,sha256=cYMSgJgcTjPOMvxZuXS9wsXZpP2TsovzTovI2VYOgQY,10762
-jupyter_duckdb-0.3.2.dist-info/METADATA,sha256=QQ2rgkWRmphAfVzlIfM-cLyg0s3bkz8e3jOUwmFrxEM,588
-jupyter_duckdb-0.3.2.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
-jupyter_duckdb-0.3.2.dist-info/top_level.txt,sha256=KvRRPMnmkQNuhyBsXoPmwyt26LRDp0O-0HN6u0Dm5jA,14
-jupyter_duckdb-0.3.2.dist-info/RECORD,,

{jupyter_duckdb-0.3.2.dist-info → jupyter_duckdb-0.4.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

jupyter-duckdb 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl

jupyter-duckdb 0.3.2py3-none-any.whl → 0.4.1py3-none-any.whl