jupyter-duckdb 1.4.0__py3-none-any.whl → 1.4.105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckdb_kernel/kernel.py CHANGED
@@ -13,6 +13,7 @@ from .db import Connection, DatabaseError, Table
13
13
  from .db.error import *
14
14
  from .magics import *
15
15
  from .parser import RAParser, DCParser, ParserError
16
+ from .parser.util.QuerySplitter import split_queries, get_last_query
16
17
  from .util.ResultSetComparator import ResultSetComparator
17
18
  from .util.SQL import SQL_KEYWORDS
18
19
  from .util.TestError import TestError
@@ -27,9 +28,11 @@ class DuckDBKernel(Kernel):
27
28
  implementation_version = '1.0'
28
29
  banner = 'DuckDB Kernel'
29
30
  language_info = {
30
- 'name': 'duckdb',
31
- 'mimetype': 'application/sql',
31
+ 'name': 'sql',
32
32
  'file_extension': '.sql',
33
+ 'mimetype': 'text/x-sql',
34
+ 'codemirror_mode': 'sql',
35
+ 'pygments_lexer': 'sql',
33
36
  }
34
37
 
35
38
  def __init__(self, **kwargs):
@@ -166,7 +169,9 @@ class DuckDBKernel(Kernel):
166
169
  # print result if not silent
167
170
  if not silent:
168
171
  # print EXPLAIN queries as raw text if using DuckDB
169
- if query.strip().startswith('EXPLAIN') and state.db.plain_explain():
172
+ last_query = get_last_query(query, remove_comments=True).strip()
173
+
174
+ if last_query.startswith('EXPLAIN') and state.db.plain_explain():
170
175
  for ekey, evalue in rows:
171
176
  html = f'<b>{ekey}</b><br><pre>{evalue}</pre>'
172
177
  break
@@ -273,8 +278,7 @@ class DuckDBKernel(Kernel):
273
278
 
274
279
  # You can only execute one statement at a time using SQLite.
275
280
  if not state.db.multiple_statements_per_query():
276
- statements = re.split(r';\r?\n', content)
277
- for statement in statements:
281
+ for statement in split_queries(content):
278
282
  try:
279
283
  state.db.execute(statement)
280
284
  except EmptyResultError:
@@ -2,6 +2,7 @@ from .LogicParser import LogicParser
2
2
  from .ParserError import RAParserError
3
3
  from .elements import *
4
4
  from .tokenizer import *
5
+ from .util.QuerySplitter import get_last_query
5
6
 
6
7
 
7
8
  # Instead of multiple nested loops, a tree with rotation can
@@ -10,6 +11,10 @@ from .tokenizer import *
10
11
  class RAParser:
11
12
  @staticmethod
12
13
  def parse_query(query: str) -> RAElement:
14
+ # remove comments from query
15
+ query = get_last_query(query, split_at=None, remove_comments=True)
16
+
17
+ # parse query
13
18
  initial_token = Token(query)
14
19
  return RAParser.parse_tokens(initial_token, depth=0)
15
20
 
@@ -36,10 +36,13 @@ class RAElement:
36
36
  # if all columns are from the same relation we can skip the relation name
37
37
  if len(set(c.table for c in columns)) == 1:
38
38
  column_names = ', '.join(f'{c.current_name} AS "{c.name}"' for c in columns)
39
+ order_names = ', '.join(f'"{c.name}" ASC' for c in columns)
39
40
  else:
40
41
  column_names = ', '.join(f'{c.current_name} AS "{c.full_name}"' for c in columns)
42
+ order_names = ', '.join(f'"{c.full_name}" ASC' for c in columns)
41
43
 
42
- return f'SELECT {column_names} FROM ({sql}) {self._name()}'
44
+ # create sql
45
+ return f'SELECT {column_names} FROM ({sql}) {self._name()} ORDER BY {order_names}'
43
46
 
44
47
  def to_sql_with_count(self, tables: Dict[str, Table]) -> str:
45
48
  sql, _ = self.to_sql(tables)
@@ -347,7 +347,13 @@ class ConditionalSet:
347
347
  sql_join_filters += f' AND {join_filter}'
348
348
 
349
349
  sql_condition = condition.to_sql(joined_columns) if condition is not None else '1=1'
350
- sql_query = f'SELECT DISTINCT {sql_select} FROM {sql_tables} WHERE ({sql_join_filters}) AND ({sql_condition})'
350
+
351
+ if self.projection == ('*',):
352
+ sql_order = ', '.join(f'{rc.name} ASC' for rcl in rcls for rc in rcl)
353
+ else:
354
+ sql_order = ', '.join(f'{col} ASC' for col in self.projection)
355
+
356
+ sql_query = f'SELECT DISTINCT {sql_select} FROM {sql_tables} WHERE ({sql_join_filters}) AND ({sql_condition}) ORDER BY {sql_order}'
351
357
 
352
358
  # Create a mapping from intermediate column names to constant values.
353
359
  column_name_mapping = {
@@ -0,0 +1,87 @@
1
+ from typing import Iterator
2
+
3
+
4
+ def split_queries(query: str, split_at: str | None = ';', remove_comments: bool = False) -> Iterator[str]:
5
+ quotes = '\'"`'
6
+
7
+ escaped = False
8
+ in_quotes = None
9
+ in_singleline_comment = False
10
+ in_multiline_comment = False
11
+
12
+ previous = None
13
+ current_query = []
14
+
15
+ for symbol in query:
16
+ keep_symbol = True
17
+
18
+ # escaped symbol
19
+ if escaped:
20
+ escaped = False
21
+
22
+ # backslash (escape)
23
+ elif symbol == '\\':
24
+ escaped = True
25
+
26
+ # if in quotes
27
+ elif in_quotes is not None:
28
+ if symbol == in_quotes:
29
+ in_quotes = False
30
+
31
+ # if in single line comment
32
+ elif in_singleline_comment:
33
+ if symbol == '\n':
34
+ in_singleline_comment = False
35
+ elif remove_comments:
36
+ keep_symbol = False
37
+
38
+ # if in multiline comment
39
+ elif in_multiline_comment:
40
+ if previous == '*' and symbol == '/':
41
+ in_multiline_comment = False
42
+
43
+ if remove_comments:
44
+ keep_symbol = False
45
+
46
+ # start of quotes
47
+ elif symbol in quotes:
48
+ in_quotes = symbol
49
+
50
+ # start of single line comment
51
+ elif previous == '-' and symbol == '-':
52
+ in_singleline_comment = True
53
+
54
+ if remove_comments:
55
+ keep_symbol = False
56
+ current_query.pop()
57
+
58
+ # start of multiline comment
59
+ elif previous == '/' and symbol == '*':
60
+ in_multiline_comment = True
61
+
62
+ if remove_comments:
63
+ keep_symbol = False
64
+ current_query.pop()
65
+
66
+ # semicolon
67
+ elif split_at is not None and symbol == split_at:
68
+ yield ''.join(current_query)
69
+
70
+ current_query = []
71
+ keep_symbol = False
72
+
73
+ # store symbol
74
+ if keep_symbol:
75
+ current_query.append(symbol)
76
+
77
+ previous = symbol
78
+
79
+ # yield remaining symbols
80
+ yield ''.join(current_query)
81
+
82
+
83
+ def get_last_query(query: str, split_at: str | None = ';', remove_comments: bool = False) -> str:
84
+ for query in split_queries(query, split_at, remove_comments):
85
+ pass
86
+
87
+ return query
@@ -56,7 +56,7 @@ class Connection:
56
56
  sql = root.to_sql_with_renamed_columns(self.tables)
57
57
  cols, rows = self.execute_sql_return_cols(sql)
58
58
 
59
- return cols, sorted(rows, key=lambda t: tuple(-1 if x is None else x for x in t))
59
+ return cols, rows # sorted(rows, key=lambda t: tuple(-1 if x is None else x for x in t))
60
60
 
61
61
  def execute_ra(self, root: RAElement) -> List:
62
62
  _, rows = self.execute_ra_return_cols(root)
@@ -66,7 +66,7 @@ class Connection:
66
66
  sql, cnm = root.to_sql_with_renamed_columns(self.tables)
67
67
  cols, rows = self.execute_sql_return_cols(sql)
68
68
 
69
- return [cnm.get(c, c) for c in cols], sorted(rows)
69
+ return [cnm.get(c, c) for c in cols], rows # sorted(rows)
70
70
 
71
71
  def execute_dc(self, root: ConditionalSet) -> List:
72
72
  _, rows = self.execute_dc_return_cols(root)
@@ -79,6 +79,29 @@ def test_case_insensitivity():
79
79
  ]
80
80
 
81
81
 
82
+ def test_comments():
83
+ for query in (
84
+ # single line
85
+ 'Shows -- x Users\n x Seasons',
86
+ 'Shows x Seasons -- x Users',
87
+ 'Shows x Seasons--',
88
+ 'Shows x Seasons--\n',
89
+ # multi line
90
+ 'Shows /* x Users */ x Seasons',
91
+ 'Shows /* x Users */\n x Seasons',
92
+ 'Shows /* x Users\n */ x Seasons',
93
+ 'Shows x Seasons/**/',
94
+ 'Shows x Seasons/*\n*/',
95
+ 'Shows x Seasons\n/**/',
96
+ 'Shows x Seasons/* x Users'
97
+ ):
98
+ root = RAParser.parse_query(query)
99
+
100
+ assert isinstance(root, BinaryOperators.Cross)
101
+ assert isinstance(root.left, RAOperand) and root.left.name == 'Shows'
102
+ assert isinstance(root.right, RAOperand) and root.right.name == 'Seasons'
103
+
104
+
82
105
  def test_binary_operator_cross():
83
106
  for query in (
84
107
  r'Shows x Seasons',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jupyter-duckdb
3
- Version: 1.4.0
3
+ Version: 1.4.105
4
4
  Summary: a basic wrapper kernel for DuckDB
5
5
  Home-page: https://github.com/erictroebs/jupyter-duckdb
6
6
  Author: Eric Tröbs
@@ -14,7 +14,7 @@ Description-Content-Type: text/markdown
14
14
  Requires-Dist: jupyter
15
15
  Requires-Dist: graphviz==0.21
16
16
  Requires-Dist: checkmarkandcross
17
- Requires-Dist: duckdb==1.4.0
17
+ Requires-Dist: duckdb==1.4.1
18
18
  Dynamic: author
19
19
  Dynamic: author-email
20
20
  Dynamic: classifier
@@ -45,6 +45,7 @@ There are some magic commands that make teaching easier with this kernel.
45
45
  - [Ship Tests With Your Notebook](#ship-tests-with-your-notebooks)
46
46
  - [Relational Algebra](#relational-algebra)
47
47
  - [Domain Calculus](#domain-calculus)
48
+ - [Automated Parser Selection](#automated-parser-selection)
48
49
 
49
50
  ## Setup
50
51
 
@@ -281,6 +282,8 @@ The supported operations are:
281
282
  The optional flag `ANALYZE` can be used to add an execution diagram to the
282
283
  output.
283
284
 
285
+ You can also add comments to queries using `--` or `/* */`, just like in SQL.
286
+
284
287
  The Dockerfile also installs the Jupyter Lab plugin
285
288
  [jupyter-ra-extension](https://pypi.org/project/jupyter-ra-extension/). It adds
286
289
  the symbols mentioned above and some other supported symbols to the toolbar for
@@ -304,4 +307,3 @@ executed cells.
304
307
  If the magic command `%AUTO_PARSER` is added to a cell, a parser is
305
308
  automatically selected. If `%GUESS_PARSER` is executed, the parser is
306
309
  automatically selected for all subsequent cells.
307
-
@@ -1,7 +1,7 @@
1
1
  duckdb_kernel/__init__.py,sha256=6auU6zeJrsA4fxPSr2PYamS8fG-SMXTn5YQFXF2cseo,33
2
2
  duckdb_kernel/__main__.py,sha256=Z3GwHEBWoQjNm2Y84ijnbA0Lk66L7nsFREuqhZ_ptk0,165
3
3
  duckdb_kernel/kernel.json,sha256=_7E8Ci2FSdCvnzCjsOaue8QE8AvpS5JLQuxORO5IGtA,127
4
- duckdb_kernel/kernel.py,sha256=jgmFdxNfv89_r6pbDMK1F5azjhCz7ABJRlS_Nn9hq1s,28237
4
+ duckdb_kernel/kernel.py,sha256=pn7I_mZj910A1o_RF2_n1fU2aIRIU0fKgvUzU0GdxUw,28386
5
5
  duckdb_kernel/db/Column.py,sha256=GM5P6sFdlYK92hiKln5-6038gIDOTxh1AYbR4kiga_w,559
6
6
  duckdb_kernel/db/Connection.py,sha256=tBXQBYt9c52RLbpl9sakNuAm0Z84--fhZ4efo8ACz-U,670
7
7
  duckdb_kernel/db/Constraint.py,sha256=1YgUHk7s8mHCVedbcuJKyXDykj7_ybbwT3Dk9p2VMis,287
@@ -27,14 +27,14 @@ duckdb_kernel/magics/__init__.py,sha256=ggxzDzDEsKMZzYsWw9JqYVJhciJPvPVYGV7oNo9Y
27
27
  duckdb_kernel/parser/DCParser.py,sha256=16c1mxa494KP9OreUKQHsSQKoDGZ7NNp2u_gi_D-dkw,2293
28
28
  duckdb_kernel/parser/LogicParser.py,sha256=_vZwE5OPRUEN8aEC_fSZAYKR_dpexqNthXog9OFHYRY,1233
29
29
  duckdb_kernel/parser/ParserError.py,sha256=qJQVloFtID1HgVDQ1Io247bODT1ic3oO9Z1ZrWR-2Mk,321
30
- duckdb_kernel/parser/RAParser.py,sha256=vLR2X8zJIJ3cPKdLvU3PKjNakEOvsoFuoK8pr4bA-Fg,3102
30
+ duckdb_kernel/parser/RAParser.py,sha256=YiXjJGdofnajqDbL3UYofIEj3mXll8Hb6dc2bX58ZNY,3284
31
31
  duckdb_kernel/parser/__init__.py,sha256=nTmDm1ADvNPDHhVJQLxKYmArNJk6967EUXqn5AkT8FM,126
32
32
  duckdb_kernel/parser/elements/DCOperand.py,sha256=qEg_6Us4WV1eK4Bq6oUsmFt_L_x5pJPGce_wSapzIYA,1149
33
33
  duckdb_kernel/parser/elements/LogicElement.py,sha256=YasKHxWLDDP8UdyLIKbXzqIRA8-XaakjmvTj-1Iuzyc,280
34
34
  duckdb_kernel/parser/elements/LogicOperand.py,sha256=B9NvriloQE5eP734dNMZBZwrdaaIfsuAmZlG1t2eMhs,1021
35
35
  duckdb_kernel/parser/elements/LogicOperator.py,sha256=lkM4TAGkXUhsO4w4PLKVA0bgCRGPQQFpNA1FcWWOW9Q,1028
36
36
  duckdb_kernel/parser/elements/RABinaryOperator.py,sha256=XN41stGc1e-a4dZ1AQVtQ3lEgjUGNt3dMfYXp85LEeE,538
37
- duckdb_kernel/parser/elements/RAElement.py,sha256=d6QWO8KlWcwhA4tXtWsf8K4oT3OMMjTvEnUD4yaWQ_c,1468
37
+ duckdb_kernel/parser/elements/RAElement.py,sha256=3qf-ZLQU5WAH_3TvEnfXUg8Y9lE2Fg01D82XutIfgjg,1661
38
38
  duckdb_kernel/parser/elements/RAOperand.py,sha256=pghnTYCrrT6MkvynJRgVFPRoMvxIGNB3FTjaq-uCpDQ,1078
39
39
  duckdb_kernel/parser/elements/RAOperator.py,sha256=rtqMFBIBBqT-Bwg7Qm4WQwbDrE28Nb74F_7XMeR3ks4,255
40
40
  duckdb_kernel/parser/elements/RAUnaryOperator.py,sha256=XC1nphkSm88JaEu5V_HKnb_8JNoeBfE3EvNL4o0qh2c,654
@@ -42,7 +42,7 @@ duckdb_kernel/parser/elements/__init__.py,sha256=t5H6SVOm3z8r6UWRYOI7HmMIuB4Yh6T
42
42
  duckdb_kernel/parser/elements/binary/Add.py,sha256=XGkZMfab01huk9EaI6JUfzkd2STbV1C_-TyC2guKE8I,190
43
43
  duckdb_kernel/parser/elements/binary/And.py,sha256=0jgetTG8yo5TJSeK70Kj-PI9ERyek1eyMQXX5HBxa4Y,274
44
44
  duckdb_kernel/parser/elements/binary/ArrowLeft.py,sha256=u4fZSoyT9lfvWXBwuhUl4DdjVZAOqyVIKmMVbpElLD4,203
45
- duckdb_kernel/parser/elements/binary/ConditionalSet.py,sha256=sZ3qrxPux7pb3fMrlyBg4Hw7n4-Ln-AeN70_Jp5dAPo,17652
45
+ duckdb_kernel/parser/elements/binary/ConditionalSet.py,sha256=yzUPWO4KL8BEIQ-SiV00bdLulwHoxgoJRxebyhsRSPU,17884
46
46
  duckdb_kernel/parser/elements/binary/Cross.py,sha256=jVY3cvD6qDWZkJ7q74lFUPO2VdDt4aAjdk2YAfg-ZC4,687
47
47
  duckdb_kernel/parser/elements/binary/Difference.py,sha256=4nyHhjo09UmYjtGNC3xGQxV5ROzCGOpPkyerUN-AlF4,746
48
48
  duckdb_kernel/parser/elements/binary/Divide.py,sha256=ubekU4C1wkCTidUSMLEj5neheRx0QjhWSrsPGuXTa1g,265
@@ -71,12 +71,13 @@ duckdb_kernel/parser/elements/unary/__init__.py,sha256=48EDygy0pD7l3J_BlXGc-b7HY
71
71
  duckdb_kernel/parser/tokenizer/Token.py,sha256=gsCzgU_zLiA-yD0FWvd2qS9LQUXbivESYH-34Glffqs,2404
72
72
  duckdb_kernel/parser/tokenizer/Tokenizer.py,sha256=PWGgS7gYgpULiKGDho842UbaXuqmwEkccixuF10oi5g,5081
73
73
  duckdb_kernel/parser/tokenizer/__init__.py,sha256=EOSmfc2RJwtB5cE1Hhj1JAra97tckxxS8-legybPy60,58
74
+ duckdb_kernel/parser/util/QuerySplitter.py,sha256=CXpF--muxC5NuSr1xc6-EVaP-ZBXLCkDNZb6zYkRTJk,2222
74
75
  duckdb_kernel/parser/util/RenamableColumn.py,sha256=LxJhFDMUv_OxYYDLwKn63QGpBRfs08jVvhuJTzRtc9c,704
75
76
  duckdb_kernel/parser/util/RenamableColumnList.py,sha256=5oEDbtvl4YfHbkxu_Ny2pc0EYnhCZsf7EeoNQvftbrU,3281
76
77
  duckdb_kernel/parser/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
- duckdb_kernel/tests/__init__.py,sha256=MxC-m85ZyYQVn5_AJyEtXX1T5oQPngjW-Zxa_gpfXLE,2715
78
+ duckdb_kernel/tests/__init__.py,sha256=-BoPfo1FNQKnvAYt22Ioc21dbuO67QVFaV_SmS1zQw8,2731
78
79
  duckdb_kernel/tests/test_dc.py,sha256=HPJ6JGB7yXVKIOnDHB8KwX1A16ljU0I5Y8VFcJs-KVI,15192
79
- duckdb_kernel/tests/test_ra.py,sha256=UENizwUqSnaObrvfliwkOakBQqNIh8tsQsgtYQ807MY,52371
80
+ duckdb_kernel/tests/test_ra.py,sha256=YEL-Jwc9tIvCT-UGOaqfXJZZH0FAok3cj9sXK59xzkQ,53172
80
81
  duckdb_kernel/tests/test_result_comparison.py,sha256=TQVLPKKNyV2k3i4jCfasetPfVfCzgYZr92wxQmlzPnA,3859
81
82
  duckdb_kernel/tests/test_sql.py,sha256=p7UEokoJs2xc-url7xQ4PmWKxtExrDDYnMeoyR1JD0A,1208
82
83
  duckdb_kernel/util/ResultSetComparator.py,sha256=RZDIfjJyx8-eR-HIqQlEYgZd_V1ympbszpVRF4TlA7o,2262
@@ -93,7 +94,7 @@ duckdb_kernel/visualization/lib/__init__.py,sha256=LYi0YPtn5fXOejbLIqbt_3KzP-Xrw
93
94
  duckdb_kernel/visualization/lib/plotly-3.0.1.min.js,sha256=oy6Be7Eh6eiQFs5M7oXuPxxm9qbJXEtTpfSI93dW16Q,4653932
94
95
  duckdb_kernel/visualization/lib/ra.css,sha256=foz1v69EQ117BDduB9QyHH978PbRs2TG1kBS4VGqZbI,57
95
96
  duckdb_kernel/visualization/lib/ra.js,sha256=VzMRn55ztcd5Kfu2B6gdRPARpi8n-fvs8oNFnfp55Ec,1845
96
- jupyter_duckdb-1.4.0.dist-info/METADATA,sha256=j9NzAabdqOabEew9oJrboOxxHUnURZqE0FQx0N6maU4,9128
97
- jupyter_duckdb-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
98
- jupyter_duckdb-1.4.0.dist-info/top_level.txt,sha256=KvRRPMnmkQNuhyBsXoPmwyt26LRDp0O-0HN6u0Dm5jA,14
99
- jupyter_duckdb-1.4.0.dist-info/RECORD,,
97
+ jupyter_duckdb-1.4.105.dist-info/METADATA,sha256=uuenj_81-ZaSUb0TQ2Sy8-rcrtZsa7GHfZQ7nElAzZ0,9272
98
+ jupyter_duckdb-1.4.105.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
+ jupyter_duckdb-1.4.105.dist-info/top_level.txt,sha256=KvRRPMnmkQNuhyBsXoPmwyt26LRDp0O-0HN6u0Dm5jA,14
100
+ jupyter_duckdb-1.4.105.dist-info/RECORD,,