jupyter-duckdb 1.2.7__py3-none-any.whl → 1.2.101__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckdb_kernel/kernel.py CHANGED
@@ -54,6 +54,8 @@ class DuckDBKernel(Kernel):
54
54
  MagicCommand('all_dc').arg('value', '1').on(self._all_dc_magic),
55
55
  MagicCommand('auto_parser').disable('sql', 'ra', 'dc').code(True).on(self._auto_parser_magic),
56
56
  MagicCommand('guess_parser').arg('value', '1').on(self._guess_parser_magic),
57
+ MagicCommand('plotly').arg('type').arg('mapping').opt('title').result(True).on(self._plotly_magic),
58
+ MagicCommand('plotly_raw').opt('title').result(True).on(self._plotly_raw_magic)
57
59
  )
58
60
 
59
61
  # create placeholders for database and tests
@@ -577,6 +579,90 @@ class DuckDBKernel(Kernel):
577
579
  if e.depth > 0:
578
580
  raise e
579
581
 
582
+ def _plotly_magic(self, silent: bool, cols: List, rows: List[Tuple], type: str, mapping: str, title: str = None):
583
+ # split mapping and handle asterisks
584
+ mapping = [m.strip() for m in mapping.split(',')]
585
+
586
+ for i in range(len(mapping)):
587
+ if mapping[i] == '*':
588
+ mapping = mapping[:i] + cols + mapping[i+1:]
589
+
590
+ # convert all column names to lower case
591
+ lower_cols = [c.lower() for c in cols]
592
+ lower_mapping = [m.lower() for m in mapping]
593
+
594
+ # map desired columns to indices
595
+ mapped_indices = {}
596
+ for ok, lk in zip(mapping, lower_mapping):
597
+ for i in range(len(lower_cols)):
598
+ if lk == lower_cols[i]:
599
+ mapped_indices[ok] = i
600
+ break
601
+ else:
602
+ raise ValueError(f'unknown column {ok}')
603
+
604
+ # map desired columns to value lists
605
+ mapped_values = {
606
+ m: [r[i] for r in rows]
607
+ for m, i in mapped_indices.items()
608
+ }
609
+ mapped_keys = iter(mapped_values.keys())
610
+
611
+ # get required chart type
612
+ match type.lower():
613
+ case 'scatter':
614
+ if len(lower_mapping) < 2: raise ValueError('scatter requires at least x and y values')
615
+ html = draw_scatter_chart(title,
616
+ mapped_values[next(mapped_keys)],
617
+ **{k: mapped_values[k] for k in mapped_keys})
618
+ case 'line':
619
+ if len(lower_mapping) < 2: raise ValueError('lines requires at least x and y values')
620
+ html = draw_line_chart(title,
621
+ mapped_values[next(mapped_keys)],
622
+ **{k: mapped_values[k] for k in mapped_keys})
623
+
624
+ case 'bar':
625
+ if len(lower_mapping) < 2: raise ValueError('bar requires at least x and y values')
626
+ html = draw_bar_chart(title,
627
+ mapped_values[next(mapped_keys)],
628
+ **{k: mapped_values[k] for k in mapped_keys})
629
+
630
+ case 'pie':
631
+ if len(lower_mapping) != 2: raise ValueError('pie requires labels and values')
632
+ html = draw_pie_chart(title,
633
+ mapped_values[next(mapped_keys)],
634
+ mapped_values[next(mapped_keys)])
635
+
636
+ case 'bubble':
637
+ if len(lower_mapping) != 4: raise ValueError('bubble requires x, y, size and color')
638
+ html = draw_bubble_chart(title,
639
+ mapped_values[next(mapped_keys)],
640
+ mapped_values[next(mapped_keys)],
641
+ mapped_values[next(mapped_keys)],
642
+ mapped_values[next(mapped_keys)])
643
+
644
+ case 'heatmap':
645
+ if len(lower_mapping) != 3: raise ValueError('heatmap requires x, y and z values')
646
+ html = draw_heatmap_chart(title,
647
+ mapped_values[next(mapped_keys)],
648
+ mapped_values[next(mapped_keys)],
649
+ mapped_values[next(mapped_keys)])
650
+
651
+ case _:
652
+ raise ValueError(f'unknown type: {type}')
653
+
654
+ # finally print the code
655
+ self.print_data(html, mime='text/html')
656
+
657
+ def _plotly_raw_magic(self, silent: bool, cols: List, rows: List[Tuple], title: str = None):
658
+ if len(cols) != 1 and len(rows) != 1:
659
+ raise ValueError(f'expected exactly one column and one row')
660
+
661
+ self.print_data(
662
+ draw_chart(title, rows[0][0]),
663
+ mime='text/html'
664
+ )
665
+
580
666
  # jupyter related functions
581
667
  def do_execute(self, code: str, silent: bool,
582
668
  store_history: bool = True, user_expressions: dict = None, allow_stdin: bool = False,
@@ -1,5 +1,5 @@
1
1
  import re
2
- from typing import Dict, Tuple, List, Optional
2
+ from typing import Dict, Tuple, List
3
3
 
4
4
  from . import MagicCommand, MagicCommandException, MagicCommandCallback
5
5
  from .StringWrapper import StringWrapper
@@ -62,6 +62,11 @@ class MagicCommandHandler:
62
62
  args = [group if group is not None else default
63
63
  for group, (_, default, _) in zip(match.groups(), magic.args)]
64
64
 
65
+ args = [arg[1:-1]
66
+ if arg is not None and (arg[0] == '"' and arg[-1] == '"' or arg[0] == "'" and arg[-1] == "'")
67
+ else arg
68
+ for arg in args]
69
+
65
70
  if any(arg is None for arg in args):
66
71
  raise MagicCommandException(f'could not parse parameters for command "{command}"')
67
72
 
@@ -87,6 +92,9 @@ class MagicCommandHandler:
87
92
  value = match.group(i + 2)
88
93
  i += 3
89
94
 
95
+ if value is not None and (value[0] == '"' and value[-1] == '"' or value[0] == "'" and value[-1] == "'"):
96
+ value = value[1:-1]
97
+
90
98
  if name is not None:
91
99
  optionals[name.lower()] = value
92
100
 
@@ -8,7 +8,7 @@ from ...util.RenamableColumnList import RenamableColumnList
8
8
  class Difference(RABinaryOperator):
9
9
  @staticmethod
10
10
  def symbols() -> Tuple[str, ...]:
11
- return '\\',
11
+ return '-', '\\'
12
12
 
13
13
  def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
14
14
  # execute subqueries
@@ -8,7 +8,7 @@ class Divide(LogicOperator):
8
8
 
9
9
  @staticmethod
10
10
  def symbols() -> Tuple[str, ...]:
11
- return '÷', '/'
11
+ return '/', '÷'
12
12
 
13
13
  @property
14
14
  def sql_symbol(self) -> str:
@@ -27,10 +27,6 @@ class Division(RABinaryOperator):
27
27
  # inter_name_left = ', '.join(l.current_name for l, _ in inter_cols)
28
28
  inter_name_right = ', '.join(r.current_name for _, r in inter_cols)
29
29
 
30
- print('-', diff_name)
31
- print(inter_name)
32
- print(inter_name_right)
33
-
34
30
  # create sql
35
31
  return f'''
36
32
  SELECT {diff_name}
@@ -1,7 +1,9 @@
1
+ from typing import Optional
1
2
  from typing import Tuple, Dict
2
3
 
3
4
  from duckdb_kernel.db import Table
4
5
  from ..RABinaryOperator import RABinaryOperator
6
+ from ...util.RenamableColumn import RenamableColumn
5
7
  from ...util.RenamableColumnList import RenamableColumnList
6
8
 
7
9
 
@@ -10,6 +12,13 @@ class FullOuterJoin(RABinaryOperator):
10
12
  def symbols() -> Tuple[str, ...]:
11
13
  return chr(10199), 'fjoin', 'ojoin'
12
14
 
15
+ @staticmethod
16
+ def _coalesce(c1: RenamableColumn, c2: Optional[RenamableColumn]) -> str:
17
+ if c2 is not None:
18
+ return f'COALESCE({c1.current_name}, {c2.current_name}) AS {c1.current_name}'
19
+ else:
20
+ return c1.current_name
21
+
13
22
  def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
14
23
  # execute subqueries
15
24
  lq, lcols = self.left.to_sql(tables)
@@ -19,9 +28,7 @@ class FullOuterJoin(RABinaryOperator):
19
28
  join_cols, all_cols = lcols.intersect(rcols)
20
29
 
21
30
  replacements = {c1: c2 for c1, c2 in join_cols}
22
- select_cols = [
23
- f'COALESCE({c.current_name}, {replacements.get(c).current_name})' if c in replacements else c.current_name
24
- for c in all_cols]
31
+ select_cols = [self._coalesce(c, replacements.get(c)) for c in all_cols]
25
32
  select_clause = ', '.join(select_cols)
26
33
 
27
34
  on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
@@ -0,0 +1,76 @@
1
+ import os
2
+ from typing import Dict, List, Tuple
3
+
4
+ from duckdb_kernel.db import Connection as DB, Table
5
+ from duckdb_kernel.db.error import EmptyResultError
6
+ from duckdb_kernel.parser.elements import RAElement
7
+ from duckdb_kernel.parser.elements.binary import ConditionalSet
8
+
9
+ with open('examples/tables.sql', 'r', encoding='utf-8') as file:
10
+ EXAMPLE_STMTS = [stmt
11
+ for stmt in file.read().split(';')
12
+ if stmt.strip()]
13
+
14
+
15
+ class Connection:
16
+ def __enter__(self):
17
+ db_type = os.environ.get('DB_TYPE')
18
+ if db_type == 'postgres':
19
+ host = os.environ.get('POSTGRES_HOST', 'localhost')
20
+ port = int(os.environ.get('POSTGRES_PORT', 5432))
21
+ username = os.environ.get('POSTGRES_USER', 'postgres')
22
+ password = os.environ.get('POSTGRES_PASSWORD', 'postgres')
23
+
24
+ from duckdb_kernel.db.implementation.postgres import Connection as PostgreSQL
25
+ self.con: DB = PostgreSQL(host, port, username, password, None)
26
+ elif db_type == 'sqlite':
27
+ from duckdb_kernel.db.implementation.sqlite import Connection as SQLite
28
+ self.con: DB = SQLite(':memory:')
29
+ else:
30
+ from duckdb_kernel.db.implementation.duckdb import Connection as DuckDB
31
+ self.con: DB = DuckDB(':memory:')
32
+
33
+ for stmt in EXAMPLE_STMTS:
34
+ try:
35
+ self.con.execute(stmt)
36
+ except EmptyResultError:
37
+ pass
38
+
39
+ return self
40
+
41
+ def __exit__(self, exc_type, exc_val, exc_tb):
42
+ self.con.close()
43
+
44
+ @property
45
+ def tables(self) -> Dict[str, Table]:
46
+ return self.con.analyze()
47
+
48
+ def execute_sql_return_cols(self, query: str) -> Tuple[List, List]:
49
+ return self.con.execute(query)
50
+
51
+ def execute_sql(self, query: str) -> List:
52
+ _, rows = self.execute_sql_return_cols(query)
53
+ return rows
54
+
55
+ def execute_ra_return_cols(self, root: RAElement) -> Tuple[List, List]:
56
+ sql = root.to_sql_with_renamed_columns(self.tables)
57
+ cols, rows = self.execute_sql_return_cols(sql)
58
+
59
+ return cols, sorted(rows, key=lambda t: tuple(-1 if x is None else x for x in t))
60
+
61
+ def execute_ra(self, root: RAElement) -> List:
62
+ _, rows = self.execute_ra_return_cols(root)
63
+ return rows
64
+
65
+ def execute_dc_return_cols(self, root: ConditionalSet) -> Tuple[List, List]:
66
+ sql, cnm = root.to_sql_with_renamed_columns(self.tables)
67
+ cols, rows = self.execute_sql_return_cols(sql)
68
+
69
+ return [cnm.get(c, c) for c in cols], sorted(rows)
70
+
71
+ def execute_dc(self, root: ConditionalSet) -> List:
72
+ _, rows = self.execute_dc_return_cols(root)
73
+ return rows
74
+
75
+
76
+ __all__ = ['Connection']