jupyter-duckdb 1.2.0.0__py3-none-any.whl → 1.4.111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckdb_kernel/db/Connection.py +3 -0
- duckdb_kernel/db/Table.py +8 -0
- duckdb_kernel/db/implementation/duckdb/Connection.py +27 -13
- duckdb_kernel/db/implementation/postgres/Connection.py +27 -12
- duckdb_kernel/db/implementation/sqlite/Connection.py +9 -3
- duckdb_kernel/kernel.py +407 -200
- duckdb_kernel/magics/MagicCommand.py +34 -10
- duckdb_kernel/magics/MagicCommandCallback.py +11 -7
- duckdb_kernel/magics/MagicCommandHandler.py +58 -9
- duckdb_kernel/magics/MagicState.py +11 -0
- duckdb_kernel/magics/__init__.py +1 -0
- duckdb_kernel/parser/DCParser.py +17 -7
- duckdb_kernel/parser/LogicParser.py +6 -6
- duckdb_kernel/parser/ParserError.py +18 -0
- duckdb_kernel/parser/RAParser.py +29 -21
- duckdb_kernel/parser/__init__.py +1 -0
- duckdb_kernel/parser/elements/DCOperand.py +7 -4
- duckdb_kernel/parser/elements/LogicElement.py +0 -2
- duckdb_kernel/parser/elements/RAElement.py +4 -1
- duckdb_kernel/parser/elements/RARelationReference.py +86 -0
- duckdb_kernel/parser/elements/RAUnaryOperator.py +6 -0
- duckdb_kernel/parser/elements/__init__.py +2 -1
- duckdb_kernel/parser/elements/binary/And.py +1 -1
- duckdb_kernel/parser/elements/binary/ConditionalSet.py +37 -10
- duckdb_kernel/parser/elements/binary/Cross.py +2 -2
- duckdb_kernel/parser/elements/binary/Difference.py +1 -1
- duckdb_kernel/parser/elements/binary/Divide.py +1 -1
- duckdb_kernel/parser/elements/binary/Division.py +0 -4
- duckdb_kernel/parser/elements/binary/FullOuterJoin.py +40 -0
- duckdb_kernel/parser/elements/binary/Join.py +4 -1
- duckdb_kernel/parser/elements/binary/LeftOuterJoin.py +27 -0
- duckdb_kernel/parser/elements/binary/LeftSemiJoin.py +27 -0
- duckdb_kernel/parser/elements/binary/RightOuterJoin.py +27 -0
- duckdb_kernel/parser/elements/binary/RightSemiJoin.py +27 -0
- duckdb_kernel/parser/elements/binary/__init__.py +21 -6
- duckdb_kernel/parser/elements/unary/AttributeRename.py +39 -0
- duckdb_kernel/parser/elements/unary/Projection.py +1 -1
- duckdb_kernel/parser/elements/unary/Rename.py +68 -14
- duckdb_kernel/parser/elements/unary/__init__.py +2 -0
- duckdb_kernel/parser/tokenizer/Token.py +24 -3
- duckdb_kernel/parser/util/QuerySplitter.py +87 -0
- duckdb_kernel/parser/util/RenamableColumn.py +1 -1
- duckdb_kernel/parser/util/RenamableColumnList.py +10 -2
- duckdb_kernel/tests/__init__.py +76 -0
- duckdb_kernel/tests/test_dc.py +483 -0
- duckdb_kernel/tests/test_ra.py +1966 -0
- duckdb_kernel/tests/test_result_comparison.py +173 -0
- duckdb_kernel/tests/test_sql.py +48 -0
- duckdb_kernel/util/ResultSetComparator.py +22 -4
- duckdb_kernel/util/SQL.py +6 -0
- duckdb_kernel/util/TestError.py +4 -0
- duckdb_kernel/visualization/Plotly.py +144 -0
- duckdb_kernel/visualization/RATreeDrawer.py +34 -2
- duckdb_kernel/visualization/__init__.py +1 -0
- duckdb_kernel/visualization/lib/__init__.py +53 -0
- duckdb_kernel/visualization/lib/plotly-3.0.1.min.js +3879 -0
- duckdb_kernel/visualization/lib/ra.css +3 -0
- duckdb_kernel/visualization/lib/ra.js +55 -0
- {jupyter_duckdb-1.2.0.0.dist-info → jupyter_duckdb-1.4.111.dist-info}/METADATA +53 -19
- jupyter_duckdb-1.4.111.dist-info/RECORD +104 -0
- {jupyter_duckdb-1.2.0.0.dist-info → jupyter_duckdb-1.4.111.dist-info}/WHEEL +1 -1
- jupyter_duckdb-1.2.0.0.dist-info/RECORD +0 -82
- {jupyter_duckdb-1.2.0.0.dist-info → jupyter_duckdb-1.4.111.dist-info}/top_level.txt +0 -0
|
@@ -1,27 +1,28 @@
|
|
|
1
|
-
from typing import Any, List, Tuple, Callable, Dict
|
|
1
|
+
from typing import Any, List, Tuple, Callable, Dict, Set
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class MagicCommand:
|
|
5
|
-
_ARG = '''([^ ]+?|'.+?'|".+?")'''
|
|
5
|
+
_ARG = '''([^ ]+?|'.+?'|".+?")?'''
|
|
6
6
|
|
|
7
7
|
def __init__(self, *names: str):
|
|
8
|
-
self._names: Tuple[str] = names
|
|
8
|
+
self._names: Tuple[str, ...] = names
|
|
9
9
|
|
|
10
|
-
self._arguments: List[Tuple[str, str]] = []
|
|
10
|
+
self._arguments: List[Tuple[str, Any, str]] = []
|
|
11
11
|
self._flags: List[Tuple[str, str]] = []
|
|
12
12
|
self._optionals: List[Tuple[str, Any, str]] = []
|
|
13
|
-
|
|
13
|
+
self._disables: Set[str] = set()
|
|
14
14
|
self._code: bool = False
|
|
15
15
|
self._result: bool = False
|
|
16
|
+
self._default: bool = False
|
|
16
17
|
|
|
17
18
|
self._on: List[Callable] = []
|
|
18
19
|
|
|
19
20
|
@property
|
|
20
|
-
def names(self) -> Tuple[str]:
|
|
21
|
+
def names(self) -> Tuple[str, ...]:
|
|
21
22
|
return self._names
|
|
22
23
|
|
|
23
24
|
@property
|
|
24
|
-
def args(self) -> List[Tuple[str, str]]:
|
|
25
|
+
def args(self) -> List[Tuple[str, Any, str]]:
|
|
25
26
|
return self._arguments
|
|
26
27
|
|
|
27
28
|
@property
|
|
@@ -32,6 +33,10 @@ class MagicCommand:
|
|
|
32
33
|
def optionals(self) -> List[Tuple[str, Any, str]]:
|
|
33
34
|
return self._optionals
|
|
34
35
|
|
|
36
|
+
@property
|
|
37
|
+
def disables(self) -> Set[str]:
|
|
38
|
+
return self._disables
|
|
39
|
+
|
|
35
40
|
@property
|
|
36
41
|
def requires_code(self) -> bool:
|
|
37
42
|
return self._code
|
|
@@ -40,8 +45,17 @@ class MagicCommand:
|
|
|
40
45
|
def requires_query_result(self) -> bool:
|
|
41
46
|
return self._result
|
|
42
47
|
|
|
43
|
-
|
|
44
|
-
|
|
48
|
+
@property
|
|
49
|
+
def is_default(self) -> bool:
|
|
50
|
+
return self._default
|
|
51
|
+
|
|
52
|
+
def arg(self, name: str, default_value: Any = None, description: str = None) -> 'MagicCommand':
|
|
53
|
+
if len(self._arguments) > 0:
|
|
54
|
+
ln, ldv, _ = self._arguments[-1]
|
|
55
|
+
if ldv is not None and default_value is None:
|
|
56
|
+
raise ValueError(f'argument {name} without default value registered after argument {ln} with default value {ldv}')
|
|
57
|
+
|
|
58
|
+
self._arguments.append((name, default_value, description))
|
|
45
59
|
return self
|
|
46
60
|
|
|
47
61
|
def opt(self, name: str, default_value: Any = None, description: str = None) -> 'MagicCommand':
|
|
@@ -52,6 +66,12 @@ class MagicCommand:
|
|
|
52
66
|
self._flags.append((name, description))
|
|
53
67
|
return self
|
|
54
68
|
|
|
69
|
+
def disable(self, *name: str) -> 'MagicCommand':
|
|
70
|
+
for n in name:
|
|
71
|
+
self._disables.add(n)
|
|
72
|
+
|
|
73
|
+
return self
|
|
74
|
+
|
|
55
75
|
def code(self, code: bool) -> 'MagicCommand':
|
|
56
76
|
self._code = code
|
|
57
77
|
return self
|
|
@@ -60,10 +80,14 @@ class MagicCommand:
|
|
|
60
80
|
self._result = result
|
|
61
81
|
return self
|
|
62
82
|
|
|
63
|
-
def on(self, fun: Callable):
|
|
83
|
+
def on(self, fun: Callable) -> 'MagicCommand':
|
|
64
84
|
self._on.append(fun)
|
|
65
85
|
return self
|
|
66
86
|
|
|
87
|
+
def default(self, default: bool) -> 'MagicCommand':
|
|
88
|
+
self._default = default
|
|
89
|
+
return self
|
|
90
|
+
|
|
67
91
|
@property
|
|
68
92
|
def parameters(self) -> str:
|
|
69
93
|
args = ' +'.join([self._ARG] * len(self._arguments))
|
|
@@ -1,20 +1,24 @@
|
|
|
1
1
|
from typing import Optional, List
|
|
2
2
|
|
|
3
|
-
from . import MagicCommand
|
|
3
|
+
from . import MagicCommand, MagicState
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class MagicCommandCallback:
|
|
7
|
-
def __init__(self, mc: MagicCommand, silent: bool,
|
|
7
|
+
def __init__(self, mc: MagicCommand, silent: bool, state: MagicState, *args, **kwargs):
|
|
8
8
|
self._mc: MagicCommand = mc
|
|
9
9
|
self._silent: bool = silent
|
|
10
|
-
self.
|
|
10
|
+
self._state: MagicState = state
|
|
11
11
|
self._args = args
|
|
12
12
|
self._kwargs = kwargs
|
|
13
13
|
|
|
14
|
+
@property
|
|
15
|
+
def magic(self) -> MagicCommand:
|
|
16
|
+
return self._mc
|
|
17
|
+
|
|
14
18
|
def __call__(self, columns: Optional[List[str]] = None, rows: Optional[List[List]] = None):
|
|
15
19
|
if self._mc.requires_code:
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
20
|
+
self._mc(self._silent, self._state, *self._args, **self._kwargs)
|
|
21
|
+
elif self._mc.requires_query_result:
|
|
22
|
+
self._mc(self._silent, self._state, columns, rows, *self._args, **self._kwargs)
|
|
19
23
|
else:
|
|
20
|
-
|
|
24
|
+
self._mc(self._silent, self._state, *self._args, **self._kwargs)
|
|
@@ -2,6 +2,8 @@ import re
|
|
|
2
2
|
from typing import Dict, Tuple, List
|
|
3
3
|
|
|
4
4
|
from . import MagicCommand, MagicCommandException, MagicCommandCallback
|
|
5
|
+
from .MagicState import MagicState
|
|
6
|
+
from ..db import Connection
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
class MagicCommandHandler:
|
|
@@ -14,20 +16,33 @@ class MagicCommandHandler:
|
|
|
14
16
|
key = key.lower()
|
|
15
17
|
self._magics[key] = cmd
|
|
16
18
|
|
|
17
|
-
def
|
|
18
|
-
|
|
19
|
-
|
|
19
|
+
def __getitem__(self, key: str) -> MagicCommand:
|
|
20
|
+
return self._magics[key.lower()]
|
|
21
|
+
|
|
22
|
+
def __call__(self, silent: bool, state: MagicState) \
|
|
23
|
+
-> Tuple[List[MagicCommandCallback], List[MagicCommandCallback]]:
|
|
24
|
+
enabled_callbacks: List[MagicCommandCallback] = []
|
|
25
|
+
|
|
26
|
+
# enable commands with default==True
|
|
27
|
+
for magic in self._magics.values():
|
|
28
|
+
if magic.is_default:
|
|
29
|
+
flags = {name: False for name, _ in magic.flags}
|
|
30
|
+
optionals = {name: default for name, default, _ in magic.optionals}
|
|
31
|
+
callback = MagicCommandCallback(magic, silent, state, **flags, **optionals)
|
|
20
32
|
|
|
33
|
+
enabled_callbacks.append(callback)
|
|
34
|
+
|
|
35
|
+
# search for magic commands in code
|
|
21
36
|
while True:
|
|
22
37
|
# ensure code starts with '%' or '%%' but not with '%%%'
|
|
23
|
-
match = re.match(r'^%{1,2}([^% ]+?)([ \t]*$| .+?$)', code, re.MULTILINE | re.IGNORECASE)
|
|
38
|
+
match = re.match(r'^%{1,2}([^% ]+?)([ \t]*$| .+?$)', state.code, re.MULTILINE | re.IGNORECASE)
|
|
24
39
|
|
|
25
40
|
if match is None:
|
|
26
41
|
break
|
|
27
42
|
|
|
28
43
|
# remove magic command from code
|
|
29
44
|
start, end = match.span()
|
|
30
|
-
code = code[:start] + code[end + 1:]
|
|
45
|
+
state.code = state.code[:start] + state.code[end + 1:]
|
|
31
46
|
|
|
32
47
|
# extract command
|
|
33
48
|
command = match.group(1).lower()
|
|
@@ -45,7 +60,16 @@ class MagicCommandHandler:
|
|
|
45
60
|
raise MagicCommandException(f'could not parse parameters for command "{command}"')
|
|
46
61
|
|
|
47
62
|
# extract args
|
|
48
|
-
args = [
|
|
63
|
+
args = [group if group is not None else default
|
|
64
|
+
for group, (_, default, _) in zip(match.groups(), magic.args)]
|
|
65
|
+
|
|
66
|
+
args = [arg[1:-1]
|
|
67
|
+
if arg is not None and (arg[0] == '"' and arg[-1] == '"' or arg[0] == "'" and arg[-1] == "'")
|
|
68
|
+
else arg
|
|
69
|
+
for arg in args]
|
|
70
|
+
|
|
71
|
+
if any(arg is None for arg in args):
|
|
72
|
+
raise MagicCommandException(f'could not parse parameters for command "{command}"')
|
|
49
73
|
|
|
50
74
|
i = len(args) + 1
|
|
51
75
|
|
|
@@ -69,16 +93,41 @@ class MagicCommandHandler:
|
|
|
69
93
|
value = match.group(i + 2)
|
|
70
94
|
i += 3
|
|
71
95
|
|
|
96
|
+
if value is not None and (value[0] == '"' and value[-1] == '"' or value[0] == "'" and value[-1] == "'"):
|
|
97
|
+
value = value[1:-1]
|
|
98
|
+
|
|
72
99
|
if name is not None:
|
|
73
100
|
optionals[name.lower()] = value
|
|
74
101
|
|
|
75
102
|
# add to callbacks
|
|
76
|
-
callback = MagicCommandCallback(magic, silent,
|
|
103
|
+
callback = MagicCommandCallback(magic, silent, state, *args, **flags, **optionals)
|
|
104
|
+
enabled_callbacks.append(callback)
|
|
105
|
+
|
|
106
|
+
# disable overwritten callbacks
|
|
107
|
+
callbacks = []
|
|
108
|
+
blacklist = set()
|
|
109
|
+
|
|
110
|
+
for callback in reversed(enabled_callbacks):
|
|
111
|
+
for name in callback.magic.names:
|
|
112
|
+
if name in blacklist:
|
|
113
|
+
break
|
|
114
|
+
else:
|
|
115
|
+
callbacks.append(callback)
|
|
116
|
+
|
|
117
|
+
for name in callback.magic.names:
|
|
118
|
+
blacklist.add(name)
|
|
119
|
+
for disable in callback.magic.disables:
|
|
120
|
+
blacklist.add(disable)
|
|
121
|
+
|
|
122
|
+
# prepare callback lists
|
|
123
|
+
pre_query_callbacks = []
|
|
124
|
+
post_query_callbacks = []
|
|
77
125
|
|
|
78
|
-
|
|
126
|
+
for callback in reversed(callbacks):
|
|
127
|
+
if not callback.magic.requires_query_result:
|
|
79
128
|
pre_query_callbacks.append(callback)
|
|
80
129
|
else:
|
|
81
130
|
post_query_callbacks.append(callback)
|
|
82
131
|
|
|
83
132
|
# return callbacks
|
|
84
|
-
return
|
|
133
|
+
return pre_query_callbacks, post_query_callbacks
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from typing import Union, Dict, Optional
|
|
2
|
+
|
|
3
|
+
from ..db import Connection
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MagicState:
|
|
7
|
+
def __init__(self, db: Connection, code: str, max_rows: Optional[int]):
|
|
8
|
+
self.db: Connection = db
|
|
9
|
+
self.code: Union[str, Dict] = code
|
|
10
|
+
self.max_rows: Optional[int] = max_rows
|
|
11
|
+
self.column_name_mapping: Dict[str, str] = {}
|
duckdb_kernel/magics/__init__.py
CHANGED
duckdb_kernel/parser/DCParser.py
CHANGED
|
@@ -1,14 +1,22 @@
|
|
|
1
|
+
from .ParserError import DCParserError
|
|
1
2
|
from .elements import *
|
|
2
3
|
from .tokenizer import *
|
|
4
|
+
from .util.QuerySplitter import get_last_query
|
|
3
5
|
|
|
4
6
|
|
|
5
7
|
class DCParser:
|
|
6
8
|
@staticmethod
|
|
7
9
|
def parse_query(query: str) -> DC_SET:
|
|
10
|
+
# remove comments from query
|
|
11
|
+
query = get_last_query(query, split_at=None, remove_comments=True)
|
|
12
|
+
|
|
8
13
|
# create initial token set
|
|
9
14
|
initial_token = Token(query)
|
|
10
15
|
tokens = tuple(Tokenizer.tokenize(initial_token))
|
|
11
16
|
|
|
17
|
+
if len(tokens) == 0:
|
|
18
|
+
return None
|
|
19
|
+
|
|
12
20
|
# split at |
|
|
13
21
|
for i, token in enumerate(tokens):
|
|
14
22
|
if token in DC_SET.symbols():
|
|
@@ -18,17 +26,17 @@ class DCParser:
|
|
|
18
26
|
)
|
|
19
27
|
|
|
20
28
|
# raise exception if query is not in the correct format
|
|
21
|
-
raise
|
|
29
|
+
raise DCParserError('The expression shall be of the format "{ x1, ..., xn | f(x1, ..., xn) }".', 0)
|
|
22
30
|
|
|
23
31
|
@staticmethod
|
|
24
|
-
def parse_projection(*tokens: Token) -> LogicOperand:
|
|
32
|
+
def parse_projection(*tokens: Token, depth: int = 0) -> LogicOperand:
|
|
25
33
|
if len(tokens) == 1:
|
|
26
34
|
tokens = tuple(Tokenizer.tokenize(tokens[0]))
|
|
27
35
|
|
|
28
36
|
return LogicOperand(*tokens)
|
|
29
37
|
|
|
30
38
|
@staticmethod
|
|
31
|
-
def parse_condition(*tokens: Token) -> LogicElement:
|
|
39
|
+
def parse_condition(*tokens: Token, depth: int = 0) -> LogicElement:
|
|
32
40
|
if len(tokens) == 1:
|
|
33
41
|
tokens = tuple(Tokenizer.tokenize(tokens[0]))
|
|
34
42
|
|
|
@@ -40,8 +48,8 @@ class DCParser:
|
|
|
40
48
|
# return the operator
|
|
41
49
|
# with left part of tokens and right part of tokens
|
|
42
50
|
return operator(
|
|
43
|
-
DCParser.parse_condition(*tokens[:-i]),
|
|
44
|
-
DCParser.parse_condition(*tokens[-i + 1:])
|
|
51
|
+
DCParser.parse_condition(*tokens[:-i], depth=depth + 1),
|
|
52
|
+
DCParser.parse_condition(*tokens[-i + 1:], depth=depth + 1)
|
|
45
53
|
)
|
|
46
54
|
|
|
47
55
|
# not
|
|
@@ -56,10 +64,12 @@ class DCParser:
|
|
|
56
64
|
elif len(tokens) == 2:
|
|
57
65
|
return DCOperand(
|
|
58
66
|
tokens[0],
|
|
59
|
-
tuple(Tokenizer.tokenize(tokens[1]))
|
|
67
|
+
tuple(Tokenizer.tokenize(tokens[1])),
|
|
68
|
+
depth=depth + 1
|
|
60
69
|
)
|
|
61
70
|
else:
|
|
62
71
|
return DCOperand(
|
|
63
72
|
tokens[0],
|
|
64
|
-
tokens[1:]
|
|
73
|
+
tokens[1:],
|
|
74
|
+
depth=depth + 1
|
|
65
75
|
)
|
|
@@ -4,12 +4,12 @@ from .tokenizer import *
|
|
|
4
4
|
|
|
5
5
|
class LogicParser:
|
|
6
6
|
@staticmethod
|
|
7
|
-
def parse_query(query: str) -> LogicElement:
|
|
7
|
+
def parse_query(query: str, depth: int = 0) -> LogicElement:
|
|
8
8
|
initial_token = Token(query)
|
|
9
|
-
return LogicParser.parse_tokens(initial_token)
|
|
9
|
+
return LogicParser.parse_tokens(initial_token, depth=depth)
|
|
10
10
|
|
|
11
11
|
@staticmethod
|
|
12
|
-
def parse_tokens(*tokens: Token) -> LogicElement:
|
|
12
|
+
def parse_tokens(*tokens: Token, depth: int = 0) -> LogicElement:
|
|
13
13
|
if len(tokens) == 1:
|
|
14
14
|
tokens = tuple(Tokenizer.tokenize(tokens[0]))
|
|
15
15
|
|
|
@@ -21,14 +21,14 @@ class LogicParser:
|
|
|
21
21
|
# return the operator
|
|
22
22
|
# with left part of tokens and right part of tokens
|
|
23
23
|
return operator(
|
|
24
|
-
LogicParser.parse_tokens(*tokens[:-i]),
|
|
25
|
-
LogicParser.parse_tokens(*tokens[-i + 1:])
|
|
24
|
+
LogicParser.parse_tokens(*tokens[:-i], depth=depth + 1),
|
|
25
|
+
LogicParser.parse_tokens(*tokens[-i + 1:], depth=depth + 1)
|
|
26
26
|
)
|
|
27
27
|
|
|
28
28
|
# not
|
|
29
29
|
if tokens[0] in LOGIC_NOT.symbols():
|
|
30
30
|
return LOGIC_NOT(
|
|
31
|
-
LogicParser.parse_tokens(*tokens[1:])
|
|
31
|
+
LogicParser.parse_tokens(*tokens[1:], depth=depth + 1)
|
|
32
32
|
)
|
|
33
33
|
|
|
34
34
|
# ArgList
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
class ParserError(Exception):
|
|
2
|
+
def __init__(self, message: str, depth: int):
|
|
3
|
+
super().__init__(message)
|
|
4
|
+
|
|
5
|
+
self.message: str = message
|
|
6
|
+
self.depth: int = depth
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RAParserError(ParserError):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DCParserError(ParserError):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LogicParserError(ParserError):
|
|
18
|
+
pass
|
duckdb_kernel/parser/RAParser.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from .ParserError import RAParserError
|
|
2
2
|
from .elements import *
|
|
3
3
|
from .tokenizer import *
|
|
4
|
+
from .util.QuerySplitter import get_last_query
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
# Instead of multiple nested loops, a tree with rotation can
|
|
@@ -8,31 +9,39 @@ from .tokenizer import *
|
|
|
8
9
|
|
|
9
10
|
class RAParser:
|
|
10
11
|
@staticmethod
|
|
11
|
-
def parse_query(query: str) -> RAElement:
|
|
12
|
+
def parse_query(query: str) -> RAElement | None:
|
|
13
|
+
# remove comments from query
|
|
14
|
+
query = get_last_query(query, split_at=None, remove_comments=True)
|
|
15
|
+
|
|
16
|
+
# parse query
|
|
12
17
|
initial_token = Token(query)
|
|
13
|
-
return RAParser.parse_tokens(initial_token)
|
|
18
|
+
return RAParser.parse_tokens(initial_token, depth=0)
|
|
14
19
|
|
|
15
20
|
@staticmethod
|
|
16
|
-
def parse_tokens(*tokens: Token, target: RAOperator | RAOperand = None) -> RAElement:
|
|
21
|
+
def parse_tokens(*tokens: Token, target: RAOperator | RAOperand = None, depth: int = 0) -> RAElement | None:
|
|
17
22
|
if len(tokens) == 1:
|
|
18
23
|
tokens = tuple(Tokenizer.tokenize(tokens[0]))
|
|
19
24
|
|
|
20
25
|
# binary operators
|
|
21
|
-
for
|
|
26
|
+
for operator_symbols in RA_BINARY_SYMBOLS:
|
|
22
27
|
# iterate tokens and match symbol
|
|
23
28
|
for i in range(1, len(tokens) + 1):
|
|
24
|
-
|
|
29
|
+
lower_token = tokens[-i].lower()
|
|
30
|
+
|
|
31
|
+
if lower_token in operator_symbols:
|
|
32
|
+
operator = operator_symbols[lower_token]
|
|
33
|
+
|
|
25
34
|
# raise error if left or right operand missing
|
|
26
35
|
if i == 1:
|
|
27
|
-
raise
|
|
36
|
+
raise RAParserError(f'right operand missing after {tokens[-i]}', depth)
|
|
28
37
|
if i == len(tokens):
|
|
29
|
-
raise
|
|
38
|
+
raise RAParserError(f'left operand missing before {tokens[-i]}', depth)
|
|
30
39
|
|
|
31
40
|
# return the operator
|
|
32
41
|
# with left part of tokens and right part of tokens
|
|
33
42
|
return operator(
|
|
34
|
-
RAParser.parse_tokens(*tokens[:-i]),
|
|
35
|
-
RAParser.parse_tokens(*tokens[-i + 1:])
|
|
43
|
+
RAParser.parse_tokens(*tokens[:-i], depth=depth + 1),
|
|
44
|
+
RAParser.parse_tokens(*tokens[-i + 1:], depth=depth + 1)
|
|
36
45
|
)
|
|
37
46
|
|
|
38
47
|
# unary operators
|
|
@@ -44,8 +53,8 @@ class RAParser:
|
|
|
44
53
|
# the last token is the operators target.
|
|
45
54
|
if target is None:
|
|
46
55
|
op = operator(
|
|
47
|
-
RAParser.parse_tokens(tokens[-1]),
|
|
48
|
-
|
|
56
|
+
RAParser.parse_tokens(tokens[-1], depth=depth + 1),
|
|
57
|
+
operator.parse_args(*tokens[-i + 1:-1], depth=depth + 1)
|
|
49
58
|
)
|
|
50
59
|
|
|
51
60
|
# Otherwise the handed target is this operator's
|
|
@@ -53,23 +62,22 @@ class RAParser:
|
|
|
53
62
|
else:
|
|
54
63
|
op = operator(
|
|
55
64
|
target,
|
|
56
|
-
|
|
65
|
+
operator.parse_args(*tokens[-i + 1:], depth=depth + 1)
|
|
57
66
|
)
|
|
58
67
|
|
|
59
68
|
# If there are any more tokens the operator is
|
|
60
69
|
# the target for the next step.
|
|
61
70
|
if i < len(tokens):
|
|
62
|
-
return RAParser.parse_tokens(
|
|
63
|
-
*tokens[:-i],
|
|
64
|
-
target=op
|
|
65
|
-
)
|
|
71
|
+
return RAParser.parse_tokens(*tokens[:-i], target=op, depth=depth + 1)
|
|
66
72
|
|
|
67
73
|
# Otherwise the operator is the return value.
|
|
68
74
|
else:
|
|
69
75
|
return op
|
|
70
76
|
|
|
71
77
|
# return as name
|
|
72
|
-
if len(tokens)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
78
|
+
if len(tokens) == 0:
|
|
79
|
+
return None
|
|
80
|
+
elif len(tokens) == 1:
|
|
81
|
+
return RAOperand(tokens[0])
|
|
82
|
+
else:
|
|
83
|
+
raise RAParserError(f'{tokens=}', depth)
|
duckdb_kernel/parser/__init__.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
from typing import Tuple
|
|
2
2
|
|
|
3
3
|
from .LogicOperand import LogicOperand
|
|
4
|
+
from ..ParserError import DCParserError
|
|
4
5
|
from ..tokenizer import Token
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class DCOperand(LogicOperand):
|
|
8
|
-
def __new__(cls, relation: Token, columns: Tuple[Token], skip_comma: bool = False):
|
|
9
|
+
def __new__(cls, relation: Token, columns: Tuple[Token, ...], skip_comma: bool = False, depth: int = 0):
|
|
9
10
|
if not skip_comma and not all(t == ',' for i, t in enumerate(columns) if i % 2 == 1):
|
|
10
|
-
raise
|
|
11
|
+
raise DCParserError('arguments must be separated by commas', 0)
|
|
11
12
|
|
|
12
13
|
return tuple.__new__(
|
|
13
14
|
cls,
|
|
@@ -18,9 +19,11 @@ class DCOperand(LogicOperand):
|
|
|
18
19
|
))
|
|
19
20
|
)
|
|
20
21
|
|
|
21
|
-
def __init__(self,
|
|
22
|
+
def __init__(self, relation: Token, columns: Tuple[Token, ...], skip_comma: bool = False, depth: int = 0):
|
|
22
23
|
super().__init__()
|
|
23
|
-
|
|
24
|
+
|
|
25
|
+
self.depth: int = depth
|
|
26
|
+
self.invert: bool = False
|
|
24
27
|
|
|
25
28
|
@property
|
|
26
29
|
def relation(self) -> Token:
|
|
@@ -36,10 +36,13 @@ class RAElement:
|
|
|
36
36
|
# if all columns are from the same relation we can skip the relation name
|
|
37
37
|
if len(set(c.table for c in columns)) == 1:
|
|
38
38
|
column_names = ', '.join(f'{c.current_name} AS "{c.name}"' for c in columns)
|
|
39
|
+
order_names = ', '.join(f'"{c.name}" ASC' for c in columns)
|
|
39
40
|
else:
|
|
40
41
|
column_names = ', '.join(f'{c.current_name} AS "{c.full_name}"' for c in columns)
|
|
42
|
+
order_names = ', '.join(f'"{c.full_name}" ASC' for c in columns)
|
|
41
43
|
|
|
42
|
-
|
|
44
|
+
# create sql
|
|
45
|
+
return f'SELECT {column_names} FROM ({sql}) {self._name()} ORDER BY {order_names}'
|
|
43
46
|
|
|
44
47
|
def to_sql_with_count(self, tables: Dict[str, Table]) -> str:
|
|
45
48
|
sql, _ = self.to_sql(tables)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from . import RAUnaryOperator
|
|
4
|
+
from .LogicElement import LogicElement
|
|
5
|
+
from ..ParserError import RAParserError
|
|
6
|
+
from ..tokenizer import Token
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RARelationReference(LogicElement):
|
|
10
|
+
@staticmethod
|
|
11
|
+
def parse_tokens(operator: type[RAUnaryOperator], *tokens: Token, depth: int = 0) -> 'RARelationReference':
|
|
12
|
+
try:
|
|
13
|
+
# If we get one single token, it should be like
|
|
14
|
+
# R -> "R"
|
|
15
|
+
# [ R ] -> "R"
|
|
16
|
+
# [ R(A, B, C) ] -> "R(A, B, C)"
|
|
17
|
+
# (A, B, C) -> "(A, B, C")
|
|
18
|
+
# [ (A, B, C) ] -> "(A, B, C)"
|
|
19
|
+
if len(tokens) == 1:
|
|
20
|
+
return RARelationReference._parse_one_token(*tokens)
|
|
21
|
+
|
|
22
|
+
# If we get two tokens, it should be like
|
|
23
|
+
# R(A, B, C) -> "R", "A, B, C"
|
|
24
|
+
# R A -> "R", "A"
|
|
25
|
+
# (The latter equals R(A), but we should think about rejecting this type.)
|
|
26
|
+
elif len(tokens) == 2:
|
|
27
|
+
return RARelationReference._parse_two_tokens(*tokens)
|
|
28
|
+
|
|
29
|
+
# Otherwise, the input is malformed.
|
|
30
|
+
else:
|
|
31
|
+
raise AssertionError()
|
|
32
|
+
|
|
33
|
+
except AssertionError:
|
|
34
|
+
raise RAParserError(f'malformed input for operator {operator.symbols()[0]} {tokens=}', depth=depth)
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def _parse_one_token(token: Token) -> 'RARelationReference':
|
|
38
|
+
match = re.fullmatch(r'^\s*([A-Za-z0-9]+)?\s*(\(?((\s*[A-Za-z0-9]+\s*,\s*)*(\s*[A-Za-z0-9]+\s*,?\s*))\)?)?\s*$', token)
|
|
39
|
+
if match is None:
|
|
40
|
+
raise AssertionError()
|
|
41
|
+
|
|
42
|
+
if match.group(1) is not None:
|
|
43
|
+
relation = match.group(1).strip()
|
|
44
|
+
else:
|
|
45
|
+
relation = None
|
|
46
|
+
|
|
47
|
+
if match.group(3) is not None:
|
|
48
|
+
attributes = [b for b in (a.strip() for a in match.group(3).split(',')) if b != '']
|
|
49
|
+
else:
|
|
50
|
+
attributes = None
|
|
51
|
+
|
|
52
|
+
if relation is None and attributes is None:
|
|
53
|
+
raise AssertionError()
|
|
54
|
+
|
|
55
|
+
return RARelationReference(relation, attributes)
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def _parse_two_tokens(token1: Token, token2: Token) -> 'RARelationReference':
|
|
59
|
+
# We expect the first token to be a relation name and the second one
|
|
60
|
+
# to be a list of column names separated by commas.
|
|
61
|
+
relation = token1.strip()
|
|
62
|
+
attributes = [b for b in (a.strip() for a in token2.split(',')) if b != '']
|
|
63
|
+
|
|
64
|
+
return RARelationReference(relation, attributes)
|
|
65
|
+
|
|
66
|
+
def __init__(self, relation: str | None, attributes: list[str] | None):
|
|
67
|
+
# check duplicated attributes
|
|
68
|
+
if attributes is not None:
|
|
69
|
+
for i in range(len(attributes)):
|
|
70
|
+
for k in range(i + 1, len(attributes)):
|
|
71
|
+
if attributes[i] == attributes[k]:
|
|
72
|
+
raise RAParserError(f'duplicate attribute {attributes[i]}', 0)
|
|
73
|
+
if attributes[i].lower() == attributes[k].lower():
|
|
74
|
+
raise RAParserError(f'duplicate attribute {attributes[i]}={attributes[k]}', 0)
|
|
75
|
+
|
|
76
|
+
# store
|
|
77
|
+
self.relation: str | None = relation
|
|
78
|
+
self.attributes: list[str] | None = attributes
|
|
79
|
+
|
|
80
|
+
def __str__(self) -> str:
|
|
81
|
+
if self.relation is not None and self.attributes is None:
|
|
82
|
+
return self.relation
|
|
83
|
+
elif self.relation is None and self.attributes is not None:
|
|
84
|
+
return f'({", ".join(self.attributes)})'
|
|
85
|
+
else:
|
|
86
|
+
return f'{self.relation}({", ".join(self.attributes)})'
|
|
@@ -3,9 +3,15 @@ from typing import Iterator
|
|
|
3
3
|
from .LogicElement import LogicElement
|
|
4
4
|
from .RAElement import RAElement
|
|
5
5
|
from .RAOperator import RAOperator
|
|
6
|
+
from ..tokenizer import Token
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class RAUnaryOperator(RAOperator):
|
|
10
|
+
@classmethod
|
|
11
|
+
def parse_args(cls: type['RAUnaryOperator'], *tokens: Token, depth: int):
|
|
12
|
+
from .. import LogicParser
|
|
13
|
+
return LogicParser.parse_tokens(*tokens, depth=depth)
|
|
14
|
+
|
|
9
15
|
def __init__(self, target: RAElement):
|
|
10
16
|
self.target: RAElement = target
|
|
11
17
|
|
|
@@ -9,8 +9,9 @@ from .RAOperator import RAOperator
|
|
|
9
9
|
from .RABinaryOperator import RABinaryOperator
|
|
10
10
|
from .RAUnaryOperator import RAUnaryOperator
|
|
11
11
|
from .RAOperand import RAOperand
|
|
12
|
-
from .binary import RA_BINARY_OPERATORS
|
|
12
|
+
from .binary import RA_BINARY_OPERATORS, RA_BINARY_SYMBOLS
|
|
13
13
|
from .unary import RA_UNARY_OPERATORS
|
|
14
|
+
from .RARelationReference import RARelationReference
|
|
14
15
|
|
|
15
16
|
from .DCOperand import DCOperand
|
|
16
17
|
from .binary import DC_SET
|