jupyter-duckdb 1.2.0.0__py3-none-any.whl → 1.4.111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckdb_kernel/db/Connection.py +3 -0
- duckdb_kernel/db/Table.py +8 -0
- duckdb_kernel/db/implementation/duckdb/Connection.py +27 -13
- duckdb_kernel/db/implementation/postgres/Connection.py +27 -12
- duckdb_kernel/db/implementation/sqlite/Connection.py +9 -3
- duckdb_kernel/kernel.py +407 -200
- duckdb_kernel/magics/MagicCommand.py +34 -10
- duckdb_kernel/magics/MagicCommandCallback.py +11 -7
- duckdb_kernel/magics/MagicCommandHandler.py +58 -9
- duckdb_kernel/magics/MagicState.py +11 -0
- duckdb_kernel/magics/__init__.py +1 -0
- duckdb_kernel/parser/DCParser.py +17 -7
- duckdb_kernel/parser/LogicParser.py +6 -6
- duckdb_kernel/parser/ParserError.py +18 -0
- duckdb_kernel/parser/RAParser.py +29 -21
- duckdb_kernel/parser/__init__.py +1 -0
- duckdb_kernel/parser/elements/DCOperand.py +7 -4
- duckdb_kernel/parser/elements/LogicElement.py +0 -2
- duckdb_kernel/parser/elements/RAElement.py +4 -1
- duckdb_kernel/parser/elements/RARelationReference.py +86 -0
- duckdb_kernel/parser/elements/RAUnaryOperator.py +6 -0
- duckdb_kernel/parser/elements/__init__.py +2 -1
- duckdb_kernel/parser/elements/binary/And.py +1 -1
- duckdb_kernel/parser/elements/binary/ConditionalSet.py +37 -10
- duckdb_kernel/parser/elements/binary/Cross.py +2 -2
- duckdb_kernel/parser/elements/binary/Difference.py +1 -1
- duckdb_kernel/parser/elements/binary/Divide.py +1 -1
- duckdb_kernel/parser/elements/binary/Division.py +0 -4
- duckdb_kernel/parser/elements/binary/FullOuterJoin.py +40 -0
- duckdb_kernel/parser/elements/binary/Join.py +4 -1
- duckdb_kernel/parser/elements/binary/LeftOuterJoin.py +27 -0
- duckdb_kernel/parser/elements/binary/LeftSemiJoin.py +27 -0
- duckdb_kernel/parser/elements/binary/RightOuterJoin.py +27 -0
- duckdb_kernel/parser/elements/binary/RightSemiJoin.py +27 -0
- duckdb_kernel/parser/elements/binary/__init__.py +21 -6
- duckdb_kernel/parser/elements/unary/AttributeRename.py +39 -0
- duckdb_kernel/parser/elements/unary/Projection.py +1 -1
- duckdb_kernel/parser/elements/unary/Rename.py +68 -14
- duckdb_kernel/parser/elements/unary/__init__.py +2 -0
- duckdb_kernel/parser/tokenizer/Token.py +24 -3
- duckdb_kernel/parser/util/QuerySplitter.py +87 -0
- duckdb_kernel/parser/util/RenamableColumn.py +1 -1
- duckdb_kernel/parser/util/RenamableColumnList.py +10 -2
- duckdb_kernel/tests/__init__.py +76 -0
- duckdb_kernel/tests/test_dc.py +483 -0
- duckdb_kernel/tests/test_ra.py +1966 -0
- duckdb_kernel/tests/test_result_comparison.py +173 -0
- duckdb_kernel/tests/test_sql.py +48 -0
- duckdb_kernel/util/ResultSetComparator.py +22 -4
- duckdb_kernel/util/SQL.py +6 -0
- duckdb_kernel/util/TestError.py +4 -0
- duckdb_kernel/visualization/Plotly.py +144 -0
- duckdb_kernel/visualization/RATreeDrawer.py +34 -2
- duckdb_kernel/visualization/__init__.py +1 -0
- duckdb_kernel/visualization/lib/__init__.py +53 -0
- duckdb_kernel/visualization/lib/plotly-3.0.1.min.js +3879 -0
- duckdb_kernel/visualization/lib/ra.css +3 -0
- duckdb_kernel/visualization/lib/ra.js +55 -0
- {jupyter_duckdb-1.2.0.0.dist-info → jupyter_duckdb-1.4.111.dist-info}/METADATA +53 -19
- jupyter_duckdb-1.4.111.dist-info/RECORD +104 -0
- {jupyter_duckdb-1.2.0.0.dist-info → jupyter_duckdb-1.4.111.dist-info}/WHEEL +1 -1
- jupyter_duckdb-1.2.0.0.dist-info/RECORD +0 -82
- {jupyter_duckdb-1.2.0.0.dist-info → jupyter_duckdb-1.4.111.dist-info}/top_level.txt +0 -0
|
@@ -8,6 +8,7 @@ from ..LogicElement import LogicElement
|
|
|
8
8
|
from ..LogicOperand import LogicOperand
|
|
9
9
|
from ..LogicOperator import LogicOperator
|
|
10
10
|
from ..unary import Not
|
|
11
|
+
from ...ParserError import DCParserError
|
|
11
12
|
from ...tokenizer import Token
|
|
12
13
|
from ...util.RenamableColumnList import RenamableColumnList
|
|
13
14
|
from ....db import Table
|
|
@@ -42,8 +43,11 @@ class ConditionalSet:
|
|
|
42
43
|
|
|
43
44
|
# If a constant was found, we store the value and replace it with a random attribute name.
|
|
44
45
|
constant = le.names[i]
|
|
45
|
-
new_token = Token.random()
|
|
46
|
-
new_operand = DCOperand(le.relation,
|
|
46
|
+
new_token = Token.random(constant)
|
|
47
|
+
new_operand = DCOperand(le.relation,
|
|
48
|
+
le.names[:i] + (new_token,) + le.names[i + 1:],
|
|
49
|
+
skip_comma=True,
|
|
50
|
+
depth=le.depth)
|
|
47
51
|
|
|
48
52
|
# We now need an equality comparison to ensure the introduced attribute is equal to the constant.
|
|
49
53
|
equality = Equal(
|
|
@@ -103,7 +107,7 @@ class ConditionalSet:
|
|
|
103
107
|
# The default case is to return the LogicElement with not DCOperands.
|
|
104
108
|
return le, []
|
|
105
109
|
|
|
106
|
-
def
|
|
110
|
+
def to_sql_with_renamed_columns(self, tables: Dict[str, Table]) -> Tuple[str, Dict[str, str]]:
|
|
107
111
|
# First we have to find and remove all DCOperands from the operator tree.
|
|
108
112
|
condition, dc_operands = self.split_tree(self.condition)
|
|
109
113
|
|
|
@@ -116,12 +120,13 @@ class ConditionalSet:
|
|
|
116
120
|
underscore_regex = re.compile(r'_{1,}')
|
|
117
121
|
|
|
118
122
|
for operand_i, operand in enumerate(dc_operands):
|
|
119
|
-
source_columns = tables[operand.relation].columns
|
|
123
|
+
source_columns = tables[Table.normalize_name(operand.relation)].columns
|
|
120
124
|
|
|
121
125
|
# Raise an exception if the given number of operands does not match
|
|
122
126
|
# the number of attributes in the relation.
|
|
123
127
|
if len(source_columns) != len(operand.names):
|
|
124
|
-
raise
|
|
128
|
+
raise DCParserError(f'invalid number of attributes for relation {operand.relation}',
|
|
129
|
+
depth=operand.depth)
|
|
125
130
|
|
|
126
131
|
# Create a column list for this operand.
|
|
127
132
|
rcl: RenamableColumnList = RenamableColumnList.from_iter(source_columns)
|
|
@@ -215,7 +220,8 @@ class ConditionalSet:
|
|
|
215
220
|
if left_name != right_name:
|
|
216
221
|
break
|
|
217
222
|
else:
|
|
218
|
-
raise
|
|
223
|
+
raise DCParserError(f'could not build join for relation {left_name}',
|
|
224
|
+
depth=left_op.depth)
|
|
219
225
|
|
|
220
226
|
join_tuple = min(left_name, right_name), max(left_name, right_name)
|
|
221
227
|
|
|
@@ -253,7 +259,7 @@ class ConditionalSet:
|
|
|
253
259
|
|
|
254
260
|
# If no joins were discovered using this table, an exception is raised.
|
|
255
261
|
if discovered_joins == 0:
|
|
256
|
-
raise
|
|
262
|
+
raise DCParserError('no common attributes found for join', depth=right_op.depth)
|
|
257
263
|
|
|
258
264
|
# The joins have to be sorted in a topologic order starting from t0.
|
|
259
265
|
used_relations: Set[str] = {'t0'}
|
|
@@ -287,7 +293,8 @@ class ConditionalSet:
|
|
|
287
293
|
break
|
|
288
294
|
|
|
289
295
|
else:
|
|
290
|
-
raise
|
|
296
|
+
raise DCParserError('no valid topologic order found for positive joins',
|
|
297
|
+
depth=min(op.depth for _, _, op in relevant_positive))
|
|
291
298
|
|
|
292
299
|
all_negative_conditions: Dict[str, List[str]] = {}
|
|
293
300
|
all_negative_filters: Dict[str, List[str]] = {}
|
|
@@ -317,7 +324,8 @@ class ConditionalSet:
|
|
|
317
324
|
used_relations.add(target_name)
|
|
318
325
|
break
|
|
319
326
|
else:
|
|
320
|
-
raise
|
|
327
|
+
raise DCParserError('no valid topologic order found for negative joins',
|
|
328
|
+
depth=min(op.depth for _, _, op in relevant_negative))
|
|
321
329
|
|
|
322
330
|
# Build the SQL statement.
|
|
323
331
|
sql_select = ', '.join(select_columns[col] if col in select_columns else col
|
|
@@ -340,4 +348,23 @@ class ConditionalSet:
|
|
|
340
348
|
|
|
341
349
|
sql_condition = condition.to_sql(joined_columns) if condition is not None else '1=1'
|
|
342
350
|
|
|
343
|
-
|
|
351
|
+
if self.projection == ('*',):
|
|
352
|
+
sql_order = ', '.join(f'{rc.name} ASC' for rcl in rcls for rc in rcl)
|
|
353
|
+
else:
|
|
354
|
+
sql_order = ', '.join(f'{col} ASC' for col in self.projection)
|
|
355
|
+
|
|
356
|
+
sql_query = f'SELECT DISTINCT {sql_select} FROM {sql_tables} WHERE ({sql_join_filters}) AND ({sql_condition}) ORDER BY {sql_order}'
|
|
357
|
+
|
|
358
|
+
# Create a mapping from intermediate column names to constant values.
|
|
359
|
+
column_name_mapping = {
|
|
360
|
+
p: p.constant
|
|
361
|
+
for o in dc_operands
|
|
362
|
+
for p in o.names
|
|
363
|
+
if p.constant is not None
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
return sql_query, column_name_mapping
|
|
367
|
+
|
|
368
|
+
def to_sql(self, tables: Dict[str, Table]) -> str:
|
|
369
|
+
sql, _ = self.to_sql_with_renamed_columns(tables)
|
|
370
|
+
return sql
|
|
@@ -8,7 +8,7 @@ from ...util.RenamableColumnList import RenamableColumnList
|
|
|
8
8
|
class Cross(RABinaryOperator):
|
|
9
9
|
@staticmethod
|
|
10
10
|
def symbols() -> Tuple[str, ...]:
|
|
11
|
-
return chr(215), 'x'
|
|
11
|
+
return chr(215), 'x', 'times'
|
|
12
12
|
|
|
13
13
|
def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
|
|
14
14
|
# execute subqueries
|
|
@@ -19,4 +19,4 @@ class Cross(RABinaryOperator):
|
|
|
19
19
|
cols = lcols.merge(rcols)
|
|
20
20
|
|
|
21
21
|
# create statement
|
|
22
|
-
return f'SELECT {cols.list} FROM ({lq}) {self._name()} CROSS JOIN ({rq}) {self._name()}', cols
|
|
22
|
+
return f'SELECT DISTINCT {cols.list} FROM ({lq}) {self._name()} CROSS JOIN ({rq}) {self._name()}', cols
|
|
@@ -8,7 +8,7 @@ from ...util.RenamableColumnList import RenamableColumnList
|
|
|
8
8
|
class Difference(RABinaryOperator):
|
|
9
9
|
@staticmethod
|
|
10
10
|
def symbols() -> Tuple[str, ...]:
|
|
11
|
-
return '\\'
|
|
11
|
+
return '-', '\\'
|
|
12
12
|
|
|
13
13
|
def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
|
|
14
14
|
# execute subqueries
|
|
@@ -27,10 +27,6 @@ class Division(RABinaryOperator):
|
|
|
27
27
|
# inter_name_left = ', '.join(l.current_name for l, _ in inter_cols)
|
|
28
28
|
inter_name_right = ', '.join(r.current_name for _, r in inter_cols)
|
|
29
29
|
|
|
30
|
-
print('-', diff_name)
|
|
31
|
-
print(inter_name)
|
|
32
|
-
print(inter_name_right)
|
|
33
|
-
|
|
34
30
|
# create sql
|
|
35
31
|
return f'''
|
|
36
32
|
SELECT {diff_name}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from typing import Tuple, Dict
|
|
3
|
+
|
|
4
|
+
from duckdb_kernel.db import Table
|
|
5
|
+
from ..RABinaryOperator import RABinaryOperator
|
|
6
|
+
from ...ParserError import RAParserError
|
|
7
|
+
from ...util.RenamableColumn import RenamableColumn
|
|
8
|
+
from ...util.RenamableColumnList import RenamableColumnList
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FullOuterJoin(RABinaryOperator):
|
|
12
|
+
@staticmethod
|
|
13
|
+
def symbols() -> Tuple[str, ...]:
|
|
14
|
+
return chr(10199), 'fjoin', 'ojoin'
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def _coalesce(c1: RenamableColumn, c2: Optional[RenamableColumn]) -> str:
|
|
18
|
+
if c2 is not None:
|
|
19
|
+
return f'COALESCE({c1.current_name}, {c2.current_name}) AS {c1.current_name}'
|
|
20
|
+
else:
|
|
21
|
+
return c1.current_name
|
|
22
|
+
|
|
23
|
+
def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
|
|
24
|
+
# execute subqueries
|
|
25
|
+
lq, lcols = self.left.to_sql(tables)
|
|
26
|
+
rq, rcols = self.right.to_sql(tables)
|
|
27
|
+
|
|
28
|
+
# find matching columns
|
|
29
|
+
join_cols, all_cols = lcols.intersect(rcols)
|
|
30
|
+
if len(join_cols) == 0:
|
|
31
|
+
raise RAParserError('no common attributes found for full outer join', 0)
|
|
32
|
+
|
|
33
|
+
replacements = {c1: c2 for c1, c2 in join_cols}
|
|
34
|
+
select_cols = [self._coalesce(c, replacements.get(c)) for c in all_cols]
|
|
35
|
+
select_clause = ', '.join(select_cols)
|
|
36
|
+
|
|
37
|
+
on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
|
|
38
|
+
|
|
39
|
+
# create sql
|
|
40
|
+
return f'SELECT DISTINCT {select_clause} FROM ({lq}) {self._name()} FULL OUTER JOIN ({rq}) {self._name()} ON {on_clause}', all_cols
|
|
@@ -2,6 +2,7 @@ from typing import Tuple, Dict
|
|
|
2
2
|
|
|
3
3
|
from duckdb_kernel.db import Table
|
|
4
4
|
from ..RABinaryOperator import RABinaryOperator
|
|
5
|
+
from ...ParserError import RAParserError
|
|
5
6
|
from ...util.RenamableColumnList import RenamableColumnList
|
|
6
7
|
|
|
7
8
|
|
|
@@ -17,8 +18,10 @@ class Join(RABinaryOperator):
|
|
|
17
18
|
|
|
18
19
|
# find matching columns
|
|
19
20
|
join_cols, all_cols = lcols.intersect(rcols)
|
|
21
|
+
if len(join_cols) == 0:
|
|
22
|
+
raise RAParserError('no common attributes found for join', 0)
|
|
20
23
|
|
|
21
24
|
on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
|
|
22
25
|
|
|
23
26
|
# create sql
|
|
24
|
-
return f'SELECT {all_cols.list} FROM ({lq}) {self._name()} JOIN ({rq}) {self._name()} ON {on_clause}', all_cols
|
|
27
|
+
return f'SELECT DISTINCT {all_cols.list} FROM ({lq}) {self._name()} JOIN ({rq}) {self._name()} ON {on_clause}', all_cols
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import Tuple, Dict
|
|
2
|
+
|
|
3
|
+
from duckdb_kernel.db import Table
|
|
4
|
+
from ..RABinaryOperator import RABinaryOperator
|
|
5
|
+
from ...ParserError import RAParserError
|
|
6
|
+
from ...util.RenamableColumnList import RenamableColumnList
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LeftOuterJoin(RABinaryOperator):
|
|
10
|
+
@staticmethod
|
|
11
|
+
def symbols() -> Tuple[str, ...]:
|
|
12
|
+
return chr(10197), 'ljoin'
|
|
13
|
+
|
|
14
|
+
def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
|
|
15
|
+
# execute subqueries
|
|
16
|
+
lq, lcols = self.left.to_sql(tables)
|
|
17
|
+
rq, rcols = self.right.to_sql(tables)
|
|
18
|
+
|
|
19
|
+
# find matching columns
|
|
20
|
+
join_cols, all_cols = lcols.intersect(rcols)
|
|
21
|
+
if len(join_cols) == 0:
|
|
22
|
+
raise RAParserError('no common attributes found for left outer join', 0)
|
|
23
|
+
|
|
24
|
+
on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
|
|
25
|
+
|
|
26
|
+
# create sql
|
|
27
|
+
return f'SELECT DISTINCT {all_cols.list} FROM ({lq}) {self._name()} LEFT OUTER JOIN ({rq}) {self._name()} ON {on_clause}', all_cols
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import Tuple, Dict
|
|
2
|
+
|
|
3
|
+
from duckdb_kernel.db import Table
|
|
4
|
+
from ..RABinaryOperator import RABinaryOperator
|
|
5
|
+
from ...ParserError import RAParserError
|
|
6
|
+
from ...util.RenamableColumnList import RenamableColumnList
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LeftSemiJoin(RABinaryOperator):
|
|
10
|
+
@staticmethod
|
|
11
|
+
def symbols() -> Tuple[str, ...]:
|
|
12
|
+
return chr(8905), 'lsjoin'
|
|
13
|
+
|
|
14
|
+
def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
|
|
15
|
+
# execute subqueries
|
|
16
|
+
lq, lcols = self.left.to_sql(tables)
|
|
17
|
+
rq, rcols = self.right.to_sql(tables)
|
|
18
|
+
|
|
19
|
+
# find matching columns
|
|
20
|
+
join_cols, all_cols = lcols.intersect(rcols)
|
|
21
|
+
if len(join_cols) == 0:
|
|
22
|
+
raise RAParserError('no common attributes found for left semi join', 0)
|
|
23
|
+
|
|
24
|
+
on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
|
|
25
|
+
|
|
26
|
+
# create sql
|
|
27
|
+
return f'SELECT DISTINCT {lcols.list} FROM ({lq}) {self._name()} JOIN ({rq}) {self._name()} ON {on_clause}', lcols
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import Tuple, Dict
|
|
2
|
+
|
|
3
|
+
from duckdb_kernel.db import Table
|
|
4
|
+
from ..RABinaryOperator import RABinaryOperator
|
|
5
|
+
from ...ParserError import RAParserError
|
|
6
|
+
from ...util.RenamableColumnList import RenamableColumnList
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RightOuterJoin(RABinaryOperator):
|
|
10
|
+
@staticmethod
|
|
11
|
+
def symbols() -> Tuple[str, ...]:
|
|
12
|
+
return chr(10198), 'rjoin'
|
|
13
|
+
|
|
14
|
+
def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
|
|
15
|
+
# execute subqueries
|
|
16
|
+
lq, lcols = self.left.to_sql(tables)
|
|
17
|
+
rq, rcols = self.right.to_sql(tables)
|
|
18
|
+
|
|
19
|
+
# find matching columns
|
|
20
|
+
join_cols, all_cols = lcols.intersect(rcols, prefer_right=True)
|
|
21
|
+
if len(join_cols) == 0:
|
|
22
|
+
raise RAParserError('no common attributes found for right outer join', 0)
|
|
23
|
+
|
|
24
|
+
on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
|
|
25
|
+
|
|
26
|
+
# create sql
|
|
27
|
+
return f'SELECT DISTINCT {all_cols.list} FROM ({lq}) {self._name()} RIGHT OUTER JOIN ({rq}) {self._name()} ON {on_clause}', all_cols
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import Tuple, Dict
|
|
2
|
+
|
|
3
|
+
from duckdb_kernel.db import Table
|
|
4
|
+
from ..RABinaryOperator import RABinaryOperator
|
|
5
|
+
from ...ParserError import RAParserError
|
|
6
|
+
from ...util.RenamableColumnList import RenamableColumnList
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RightSemiJoin(RABinaryOperator):
|
|
10
|
+
@staticmethod
|
|
11
|
+
def symbols() -> Tuple[str, ...]:
|
|
12
|
+
return chr(8906), 'rsjoin'
|
|
13
|
+
|
|
14
|
+
def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
|
|
15
|
+
# execute subqueries
|
|
16
|
+
lq, lcols = self.left.to_sql(tables)
|
|
17
|
+
rq, rcols = self.right.to_sql(tables)
|
|
18
|
+
|
|
19
|
+
# find matching columns
|
|
20
|
+
join_cols, all_cols = lcols.intersect(rcols, prefer_right=True)
|
|
21
|
+
if len(join_cols) == 0:
|
|
22
|
+
raise RAParserError('no common attributes found for right semi join', 0)
|
|
23
|
+
|
|
24
|
+
on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
|
|
25
|
+
|
|
26
|
+
# create sql
|
|
27
|
+
return f'SELECT DISTINCT {rcols.list} FROM ({lq}) {self._name()} JOIN ({rq}) {self._name()} ON {on_clause}', rcols
|
|
@@ -2,6 +2,11 @@ from .Cross import Cross
|
|
|
2
2
|
from .Difference import Difference
|
|
3
3
|
from .Intersection import Intersection
|
|
4
4
|
from .Join import Join
|
|
5
|
+
from .LeftOuterJoin import LeftOuterJoin
|
|
6
|
+
from .LeftSemiJoin import LeftSemiJoin
|
|
7
|
+
from .RightOuterJoin import RightOuterJoin
|
|
8
|
+
from .RightSemiJoin import RightSemiJoin
|
|
9
|
+
from .FullOuterJoin import FullOuterJoin
|
|
5
10
|
from .Union import Union
|
|
6
11
|
|
|
7
12
|
from .Add import Add
|
|
@@ -30,12 +35,22 @@ LOGIC_BINARY_OPERATORS = sorted([
|
|
|
30
35
|
], key=lambda x: x.order, reverse=True)
|
|
31
36
|
|
|
32
37
|
RA_BINARY_OPERATORS = [
|
|
33
|
-
Difference,
|
|
34
|
-
Union,
|
|
35
|
-
Intersection,
|
|
36
|
-
Join,
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
[Difference],
|
|
39
|
+
[Union],
|
|
40
|
+
[Intersection],
|
|
41
|
+
[Join],
|
|
42
|
+
[LeftOuterJoin, RightOuterJoin, FullOuterJoin, LeftSemiJoin, RightSemiJoin],
|
|
43
|
+
[Cross],
|
|
44
|
+
[Division]
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
RA_BINARY_SYMBOLS = [
|
|
48
|
+
{
|
|
49
|
+
symbol: operator
|
|
50
|
+
for operator in level
|
|
51
|
+
for symbol in operator.symbols()
|
|
52
|
+
}
|
|
53
|
+
for level in RA_BINARY_OPERATORS
|
|
39
54
|
]
|
|
40
55
|
|
|
41
56
|
DC_SET = ConditionalSet
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import Dict, Tuple
|
|
2
|
+
|
|
3
|
+
from ..LogicElement import LogicElement
|
|
4
|
+
from ..RAElement import RAElement
|
|
5
|
+
from ..RAUnaryOperator import RAUnaryOperator
|
|
6
|
+
from ..binary import ArrowLeft
|
|
7
|
+
from ...util.RenamableColumnList import RenamableColumnList
|
|
8
|
+
from ....db import Table
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AttributeRename(RAUnaryOperator):
|
|
12
|
+
@staticmethod
|
|
13
|
+
def symbols() -> Tuple[str, ...]:
|
|
14
|
+
return 'β', 'beta'
|
|
15
|
+
|
|
16
|
+
def __init__(self, target: RAElement, arg: LogicElement):
|
|
17
|
+
if not isinstance(arg, ArrowLeft):
|
|
18
|
+
raise AssertionError('only arrow statements allowed as parameter')
|
|
19
|
+
|
|
20
|
+
super().__init__(target)
|
|
21
|
+
self.arrow: ArrowLeft = arg
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def arg(self) -> LogicElement:
|
|
25
|
+
return self.arrow
|
|
26
|
+
|
|
27
|
+
def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
|
|
28
|
+
# execute subquery
|
|
29
|
+
subquery, subcols = self.target.to_sql(tables)
|
|
30
|
+
|
|
31
|
+
# find and rename column
|
|
32
|
+
subcols.rename(str(self.arrow.right), str(self.arrow.left))
|
|
33
|
+
|
|
34
|
+
# return sql statement
|
|
35
|
+
return subquery, subcols
|
|
36
|
+
|
|
37
|
+
# We replace the "real" attribute name later anyway,
|
|
38
|
+
# so we do not need to change the sql statement here.
|
|
39
|
+
# return f'SELECT DISTINCT {subcols.list} FROM ({subquery}) {self._name()}', subcols
|
|
@@ -13,7 +13,7 @@ class Projection(RAUnaryOperator):
|
|
|
13
13
|
def symbols() -> Tuple[str, ...]:
|
|
14
14
|
return 'Π', 'π', 'pi'
|
|
15
15
|
|
|
16
|
-
def __init__(self, target: RAElement, arg:
|
|
16
|
+
def __init__(self, target: RAElement, arg: LogicElement):
|
|
17
17
|
if not isinstance(arg, LogicOperand):
|
|
18
18
|
raise AssertionError('only argument lists allowed as parameter')
|
|
19
19
|
|
|
@@ -1,39 +1,93 @@
|
|
|
1
1
|
from typing import Tuple, Dict
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from .. import RARelationReference
|
|
4
4
|
from ..LogicElement import LogicElement
|
|
5
5
|
from ..RAElement import RAElement
|
|
6
6
|
from ..RAUnaryOperator import RAUnaryOperator
|
|
7
|
-
from
|
|
7
|
+
from ...ParserError import RAParserError
|
|
8
|
+
from ...tokenizer import Token
|
|
8
9
|
from ...util.RenamableColumnList import RenamableColumnList
|
|
10
|
+
from ....db import Table
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
class Rename(RAUnaryOperator):
|
|
12
14
|
@staticmethod
|
|
13
15
|
def symbols() -> Tuple[str, ...]:
|
|
14
|
-
return '
|
|
16
|
+
return 'ρ', 'ϱ', 'rho'
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
18
|
+
@classmethod
|
|
19
|
+
def parse_args(cls: type[RAUnaryOperator], *tokens: Token, depth: int):
|
|
20
|
+
from .. import RARelationReference
|
|
21
|
+
return RARelationReference.parse_tokens(cls, *tokens, depth=depth)
|
|
19
22
|
|
|
23
|
+
def __init__(self, target: RAElement, arg: RARelationReference):
|
|
20
24
|
super().__init__(target)
|
|
21
|
-
self.
|
|
25
|
+
self.reference: RARelationReference = arg
|
|
22
26
|
|
|
23
27
|
@property
|
|
24
28
|
def arg(self) -> LogicElement:
|
|
25
|
-
return self.
|
|
29
|
+
return self.reference
|
|
26
30
|
|
|
27
31
|
def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
|
|
28
32
|
# execute subquery
|
|
29
33
|
subquery, subcols = self.target.to_sql(tables)
|
|
30
34
|
|
|
31
|
-
#
|
|
32
|
-
|
|
35
|
+
# rename attributes
|
|
36
|
+
if self.reference.relation is None and self.reference.attributes is not None:
|
|
37
|
+
return self._to_sql_with_renamed_attributes(tables, subquery, subcols)
|
|
33
38
|
|
|
34
|
-
#
|
|
39
|
+
# rename relation
|
|
40
|
+
elif self.reference.relation is not None and self.reference.attributes is None:
|
|
41
|
+
return self._to_sql_with_renamed_relation(tables, subquery, subcols)
|
|
42
|
+
|
|
43
|
+
# rename relation and attributes
|
|
44
|
+
else:
|
|
45
|
+
return self._to_sql_with_renamed_relation_and_attributes(tables, subquery, subcols)
|
|
46
|
+
|
|
47
|
+
def _to_sql_with_renamed_relation(self,
|
|
48
|
+
tables: Dict[str, Table],
|
|
49
|
+
subquery: str,
|
|
50
|
+
subcols: RenamableColumnList) -> Tuple[str, RenamableColumnList]:
|
|
51
|
+
# check if there are two columns with the same name
|
|
52
|
+
for i in range(len(subcols)):
|
|
53
|
+
for k in range(i + 1, len(subcols)):
|
|
54
|
+
if subcols[i].name == subcols[k].name:
|
|
55
|
+
raise RAParserError(
|
|
56
|
+
f'attribute {subcols[i].name} is present in both {subcols[i].table.name} and {subcols[k].table.name}',
|
|
57
|
+
depth=0
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# add new table
|
|
61
|
+
table = Table(self.reference.relation)
|
|
62
|
+
# tables[self.reference.relation] = table
|
|
63
|
+
|
|
64
|
+
# set table for all attributes
|
|
65
|
+
for col in subcols:
|
|
66
|
+
col.table = table
|
|
67
|
+
|
|
68
|
+
# return
|
|
35
69
|
return subquery, subcols
|
|
36
70
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
71
|
+
def _to_sql_with_renamed_attributes(self,
|
|
72
|
+
tables: Dict[str, Table],
|
|
73
|
+
subquery: str,
|
|
74
|
+
subcols: RenamableColumnList) -> Tuple[str, RenamableColumnList]:
|
|
75
|
+
# check if there are more names than subcols
|
|
76
|
+
if len(self.reference.attributes) > len(subcols):
|
|
77
|
+
raise RAParserError('more names than attributes', 0)
|
|
78
|
+
|
|
79
|
+
# rename columns
|
|
80
|
+
for col, new_name in zip(subcols, self.reference.attributes):
|
|
81
|
+
col.name = new_name
|
|
82
|
+
|
|
83
|
+
# return
|
|
84
|
+
return subquery, subcols
|
|
85
|
+
|
|
86
|
+
def _to_sql_with_renamed_relation_and_attributes(self,
|
|
87
|
+
tables: Dict[str, Table],
|
|
88
|
+
subquery: str,
|
|
89
|
+
subcols: RenamableColumnList) -> Tuple[str, RenamableColumnList]:
|
|
90
|
+
subquery, subcols = self._to_sql_with_renamed_attributes(tables, subquery, subcols)
|
|
91
|
+
subquery, subcols = self._to_sql_with_renamed_relation(tables, subquery, subcols)
|
|
92
|
+
|
|
93
|
+
return subquery, subcols
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from .AttributeRename import AttributeRename
|
|
1
2
|
from .Not import Not
|
|
2
3
|
from .Projection import Projection
|
|
3
4
|
from .Rename import Rename
|
|
@@ -10,6 +11,7 @@ LOGIC_UNARY_OPERATORS = [
|
|
|
10
11
|
]
|
|
11
12
|
|
|
12
13
|
RA_UNARY_OPERATORS = [
|
|
14
|
+
AttributeRename,
|
|
13
15
|
Projection,
|
|
14
16
|
Rename,
|
|
15
17
|
Selection
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
from typing import Optional
|
|
1
2
|
from uuid import uuid4
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
class Token(str):
|
|
5
|
-
def __new__(cls, value: str):
|
|
6
|
+
def __new__(cls, value: str, constant: 'Token' = None):
|
|
6
7
|
while True:
|
|
7
8
|
# strip whitespaces
|
|
8
9
|
value = value.strip()
|
|
@@ -38,20 +39,40 @@ class Token(str):
|
|
|
38
39
|
|
|
39
40
|
return super().__new__(cls, value)
|
|
40
41
|
|
|
42
|
+
def __init__(self, value: str, constant: 'Token' = None):
|
|
43
|
+
self.constant: Optional[Token] = constant
|
|
44
|
+
|
|
41
45
|
@staticmethod
|
|
42
|
-
def random() -> 'Token':
|
|
43
|
-
return Token('__' + str(uuid4()).replace('-', '_'))
|
|
46
|
+
def random(constant: 'Token' = None) -> 'Token':
|
|
47
|
+
return Token('__' + str(uuid4()).replace('-', '_'), constant)
|
|
44
48
|
|
|
45
49
|
@property
|
|
46
50
|
def empty(self) -> bool:
|
|
47
51
|
return len(self) == 0
|
|
48
52
|
|
|
53
|
+
@property
|
|
54
|
+
def is_temporary(self) -> bool:
|
|
55
|
+
return self.startswith('__')
|
|
56
|
+
|
|
49
57
|
@property
|
|
50
58
|
def is_constant(self) -> bool:
|
|
51
59
|
return ((self[0] == '"' and self[-1] == '"') or
|
|
52
60
|
(self[0] == "'" and self[-1] == "'") or
|
|
53
61
|
self.replace('.', '', 1).isnumeric())
|
|
54
62
|
|
|
63
|
+
@property
|
|
64
|
+
def no_quotes(self) -> str:
|
|
65
|
+
quotes = ('"', "'")
|
|
66
|
+
|
|
67
|
+
if self[0] in quotes and self[-1] in quotes:
|
|
68
|
+
return self[1:-1]
|
|
69
|
+
if self[0] in quotes:
|
|
70
|
+
return self[1:]
|
|
71
|
+
if self[-1] in quotes:
|
|
72
|
+
return self[:-1]
|
|
73
|
+
else:
|
|
74
|
+
return self
|
|
75
|
+
|
|
55
76
|
@property
|
|
56
77
|
def single_quotes(self) -> str:
|
|
57
78
|
# TODO Is this comparison useless because tokens are cleaned automatically?
|