jupyter-duckdb 1.2.0.0__py3-none-any.whl → 1.4.111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. duckdb_kernel/db/Connection.py +3 -0
  2. duckdb_kernel/db/Table.py +8 -0
  3. duckdb_kernel/db/implementation/duckdb/Connection.py +27 -13
  4. duckdb_kernel/db/implementation/postgres/Connection.py +27 -12
  5. duckdb_kernel/db/implementation/sqlite/Connection.py +9 -3
  6. duckdb_kernel/kernel.py +407 -200
  7. duckdb_kernel/magics/MagicCommand.py +34 -10
  8. duckdb_kernel/magics/MagicCommandCallback.py +11 -7
  9. duckdb_kernel/magics/MagicCommandHandler.py +58 -9
  10. duckdb_kernel/magics/MagicState.py +11 -0
  11. duckdb_kernel/magics/__init__.py +1 -0
  12. duckdb_kernel/parser/DCParser.py +17 -7
  13. duckdb_kernel/parser/LogicParser.py +6 -6
  14. duckdb_kernel/parser/ParserError.py +18 -0
  15. duckdb_kernel/parser/RAParser.py +29 -21
  16. duckdb_kernel/parser/__init__.py +1 -0
  17. duckdb_kernel/parser/elements/DCOperand.py +7 -4
  18. duckdb_kernel/parser/elements/LogicElement.py +0 -2
  19. duckdb_kernel/parser/elements/RAElement.py +4 -1
  20. duckdb_kernel/parser/elements/RARelationReference.py +86 -0
  21. duckdb_kernel/parser/elements/RAUnaryOperator.py +6 -0
  22. duckdb_kernel/parser/elements/__init__.py +2 -1
  23. duckdb_kernel/parser/elements/binary/And.py +1 -1
  24. duckdb_kernel/parser/elements/binary/ConditionalSet.py +37 -10
  25. duckdb_kernel/parser/elements/binary/Cross.py +2 -2
  26. duckdb_kernel/parser/elements/binary/Difference.py +1 -1
  27. duckdb_kernel/parser/elements/binary/Divide.py +1 -1
  28. duckdb_kernel/parser/elements/binary/Division.py +0 -4
  29. duckdb_kernel/parser/elements/binary/FullOuterJoin.py +40 -0
  30. duckdb_kernel/parser/elements/binary/Join.py +4 -1
  31. duckdb_kernel/parser/elements/binary/LeftOuterJoin.py +27 -0
  32. duckdb_kernel/parser/elements/binary/LeftSemiJoin.py +27 -0
  33. duckdb_kernel/parser/elements/binary/RightOuterJoin.py +27 -0
  34. duckdb_kernel/parser/elements/binary/RightSemiJoin.py +27 -0
  35. duckdb_kernel/parser/elements/binary/__init__.py +21 -6
  36. duckdb_kernel/parser/elements/unary/AttributeRename.py +39 -0
  37. duckdb_kernel/parser/elements/unary/Projection.py +1 -1
  38. duckdb_kernel/parser/elements/unary/Rename.py +68 -14
  39. duckdb_kernel/parser/elements/unary/__init__.py +2 -0
  40. duckdb_kernel/parser/tokenizer/Token.py +24 -3
  41. duckdb_kernel/parser/util/QuerySplitter.py +87 -0
  42. duckdb_kernel/parser/util/RenamableColumn.py +1 -1
  43. duckdb_kernel/parser/util/RenamableColumnList.py +10 -2
  44. duckdb_kernel/tests/__init__.py +76 -0
  45. duckdb_kernel/tests/test_dc.py +483 -0
  46. duckdb_kernel/tests/test_ra.py +1966 -0
  47. duckdb_kernel/tests/test_result_comparison.py +173 -0
  48. duckdb_kernel/tests/test_sql.py +48 -0
  49. duckdb_kernel/util/ResultSetComparator.py +22 -4
  50. duckdb_kernel/util/SQL.py +6 -0
  51. duckdb_kernel/util/TestError.py +4 -0
  52. duckdb_kernel/visualization/Plotly.py +144 -0
  53. duckdb_kernel/visualization/RATreeDrawer.py +34 -2
  54. duckdb_kernel/visualization/__init__.py +1 -0
  55. duckdb_kernel/visualization/lib/__init__.py +53 -0
  56. duckdb_kernel/visualization/lib/plotly-3.0.1.min.js +3879 -0
  57. duckdb_kernel/visualization/lib/ra.css +3 -0
  58. duckdb_kernel/visualization/lib/ra.js +55 -0
  59. {jupyter_duckdb-1.2.0.0.dist-info → jupyter_duckdb-1.4.111.dist-info}/METADATA +53 -19
  60. jupyter_duckdb-1.4.111.dist-info/RECORD +104 -0
  61. {jupyter_duckdb-1.2.0.0.dist-info → jupyter_duckdb-1.4.111.dist-info}/WHEEL +1 -1
  62. jupyter_duckdb-1.2.0.0.dist-info/RECORD +0 -82
  63. {jupyter_duckdb-1.2.0.0.dist-info → jupyter_duckdb-1.4.111.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,7 @@ from ..LogicElement import LogicElement
8
8
  from ..LogicOperand import LogicOperand
9
9
  from ..LogicOperator import LogicOperator
10
10
  from ..unary import Not
11
+ from ...ParserError import DCParserError
11
12
  from ...tokenizer import Token
12
13
  from ...util.RenamableColumnList import RenamableColumnList
13
14
  from ....db import Table
@@ -42,8 +43,11 @@ class ConditionalSet:
42
43
 
43
44
  # If a constant was found, we store the value and replace it with a random attribute name.
44
45
  constant = le.names[i]
45
- new_token = Token.random()
46
- new_operand = DCOperand(le.relation, le.names[:i] + (new_token,) + le.names[i + 1:], skip_comma=True)
46
+ new_token = Token.random(constant)
47
+ new_operand = DCOperand(le.relation,
48
+ le.names[:i] + (new_token,) + le.names[i + 1:],
49
+ skip_comma=True,
50
+ depth=le.depth)
47
51
 
48
52
  # We now need an equality comparison to ensure the introduced attribute is equal to the constant.
49
53
  equality = Equal(
@@ -103,7 +107,7 @@ class ConditionalSet:
103
107
  # The default case is to return the LogicElement with not DCOperands.
104
108
  return le, []
105
109
 
106
- def to_sql(self, tables: Dict[str, Table]) -> str:
110
+ def to_sql_with_renamed_columns(self, tables: Dict[str, Table]) -> Tuple[str, Dict[str, str]]:
107
111
  # First we have to find and remove all DCOperands from the operator tree.
108
112
  condition, dc_operands = self.split_tree(self.condition)
109
113
 
@@ -116,12 +120,13 @@ class ConditionalSet:
116
120
  underscore_regex = re.compile(r'_{1,}')
117
121
 
118
122
  for operand_i, operand in enumerate(dc_operands):
119
- source_columns = tables[operand.relation].columns
123
+ source_columns = tables[Table.normalize_name(operand.relation)].columns
120
124
 
121
125
  # Raise an exception if the given number of operands does not match
122
126
  # the number of attributes in the relation.
123
127
  if len(source_columns) != len(operand.names):
124
- raise AssertionError(f'invalid number of attributes for relation {operand.relation}')
128
+ raise DCParserError(f'invalid number of attributes for relation {operand.relation}',
129
+ depth=operand.depth)
125
130
 
126
131
  # Create a column list for this operand.
127
132
  rcl: RenamableColumnList = RenamableColumnList.from_iter(source_columns)
@@ -215,7 +220,8 @@ class ConditionalSet:
215
220
  if left_name != right_name:
216
221
  break
217
222
  else:
218
- raise AssertionError(f'could not build join for relation {left_name}')
223
+ raise DCParserError(f'could not build join for relation {left_name}',
224
+ depth=left_op.depth)
219
225
 
220
226
  join_tuple = min(left_name, right_name), max(left_name, right_name)
221
227
 
@@ -253,7 +259,7 @@ class ConditionalSet:
253
259
 
254
260
  # If no joins were discovered using this table, an exception is raised.
255
261
  if discovered_joins == 0:
256
- raise AssertionError('no common attributes found for join')
262
+ raise DCParserError('no common attributes found for join', depth=right_op.depth)
257
263
 
258
264
  # The joins have to be sorted in a topologic order starting from t0.
259
265
  used_relations: Set[str] = {'t0'}
@@ -287,7 +293,8 @@ class ConditionalSet:
287
293
  break
288
294
 
289
295
  else:
290
- raise AssertionError('no valid topologic order found for positive joins')
296
+ raise DCParserError('no valid topologic order found for positive joins',
297
+ depth=min(op.depth for _, _, op in relevant_positive))
291
298
 
292
299
  all_negative_conditions: Dict[str, List[str]] = {}
293
300
  all_negative_filters: Dict[str, List[str]] = {}
@@ -317,7 +324,8 @@ class ConditionalSet:
317
324
  used_relations.add(target_name)
318
325
  break
319
326
  else:
320
- raise AssertionError('no valid topologic order found for negative joins')
327
+ raise DCParserError('no valid topologic order found for negative joins',
328
+ depth=min(op.depth for _, _, op in relevant_negative))
321
329
 
322
330
  # Build the SQL statement.
323
331
  sql_select = ', '.join(select_columns[col] if col in select_columns else col
@@ -340,4 +348,23 @@ class ConditionalSet:
340
348
 
341
349
  sql_condition = condition.to_sql(joined_columns) if condition is not None else '1=1'
342
350
 
343
- return f'SELECT DISTINCT {sql_select} FROM {sql_tables} WHERE ({sql_join_filters}) AND ({sql_condition})'
351
+ if self.projection == ('*',):
352
+ sql_order = ', '.join(f'{rc.name} ASC' for rcl in rcls for rc in rcl)
353
+ else:
354
+ sql_order = ', '.join(f'{col} ASC' for col in self.projection)
355
+
356
+ sql_query = f'SELECT DISTINCT {sql_select} FROM {sql_tables} WHERE ({sql_join_filters}) AND ({sql_condition}) ORDER BY {sql_order}'
357
+
358
+ # Create a mapping from intermediate column names to constant values.
359
+ column_name_mapping = {
360
+ p: p.constant
361
+ for o in dc_operands
362
+ for p in o.names
363
+ if p.constant is not None
364
+ }
365
+
366
+ return sql_query, column_name_mapping
367
+
368
+ def to_sql(self, tables: Dict[str, Table]) -> str:
369
+ sql, _ = self.to_sql_with_renamed_columns(tables)
370
+ return sql
@@ -8,7 +8,7 @@ from ...util.RenamableColumnList import RenamableColumnList
8
8
  class Cross(RABinaryOperator):
9
9
  @staticmethod
10
10
  def symbols() -> Tuple[str, ...]:
11
- return chr(215), 'x'
11
+ return chr(215), 'x', 'times'
12
12
 
13
13
  def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
14
14
  # execute subqueries
@@ -19,4 +19,4 @@ class Cross(RABinaryOperator):
19
19
  cols = lcols.merge(rcols)
20
20
 
21
21
  # create statement
22
- return f'SELECT {cols.list} FROM ({lq}) {self._name()} CROSS JOIN ({rq}) {self._name()}', cols
22
+ return f'SELECT DISTINCT {cols.list} FROM ({lq}) {self._name()} CROSS JOIN ({rq}) {self._name()}', cols
@@ -8,7 +8,7 @@ from ...util.RenamableColumnList import RenamableColumnList
8
8
  class Difference(RABinaryOperator):
9
9
  @staticmethod
10
10
  def symbols() -> Tuple[str, ...]:
11
- return '\\',
11
+ return '-', '\\'
12
12
 
13
13
  def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
14
14
  # execute subqueries
@@ -8,7 +8,7 @@ class Divide(LogicOperator):
8
8
 
9
9
  @staticmethod
10
10
  def symbols() -> Tuple[str, ...]:
11
- return '÷', '/'
11
+ return '/', '÷'
12
12
 
13
13
  @property
14
14
  def sql_symbol(self) -> str:
@@ -27,10 +27,6 @@ class Division(RABinaryOperator):
27
27
  # inter_name_left = ', '.join(l.current_name for l, _ in inter_cols)
28
28
  inter_name_right = ', '.join(r.current_name for _, r in inter_cols)
29
29
 
30
- print('-', diff_name)
31
- print(inter_name)
32
- print(inter_name_right)
33
-
34
30
  # create sql
35
31
  return f'''
36
32
  SELECT {diff_name}
@@ -0,0 +1,40 @@
1
+ from typing import Optional
2
+ from typing import Tuple, Dict
3
+
4
+ from duckdb_kernel.db import Table
5
+ from ..RABinaryOperator import RABinaryOperator
6
+ from ...ParserError import RAParserError
7
+ from ...util.RenamableColumn import RenamableColumn
8
+ from ...util.RenamableColumnList import RenamableColumnList
9
+
10
+
11
+ class FullOuterJoin(RABinaryOperator):
12
+ @staticmethod
13
+ def symbols() -> Tuple[str, ...]:
14
+ return chr(10199), 'fjoin', 'ojoin'
15
+
16
+ @staticmethod
17
+ def _coalesce(c1: RenamableColumn, c2: Optional[RenamableColumn]) -> str:
18
+ if c2 is not None:
19
+ return f'COALESCE({c1.current_name}, {c2.current_name}) AS {c1.current_name}'
20
+ else:
21
+ return c1.current_name
22
+
23
+ def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
24
+ # execute subqueries
25
+ lq, lcols = self.left.to_sql(tables)
26
+ rq, rcols = self.right.to_sql(tables)
27
+
28
+ # find matching columns
29
+ join_cols, all_cols = lcols.intersect(rcols)
30
+ if len(join_cols) == 0:
31
+ raise RAParserError('no common attributes found for full outer join', 0)
32
+
33
+ replacements = {c1: c2 for c1, c2 in join_cols}
34
+ select_cols = [self._coalesce(c, replacements.get(c)) for c in all_cols]
35
+ select_clause = ', '.join(select_cols)
36
+
37
+ on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
38
+
39
+ # create sql
40
+ return f'SELECT DISTINCT {select_clause} FROM ({lq}) {self._name()} FULL OUTER JOIN ({rq}) {self._name()} ON {on_clause}', all_cols
@@ -2,6 +2,7 @@ from typing import Tuple, Dict
2
2
 
3
3
  from duckdb_kernel.db import Table
4
4
  from ..RABinaryOperator import RABinaryOperator
5
+ from ...ParserError import RAParserError
5
6
  from ...util.RenamableColumnList import RenamableColumnList
6
7
 
7
8
 
@@ -17,8 +18,10 @@ class Join(RABinaryOperator):
17
18
 
18
19
  # find matching columns
19
20
  join_cols, all_cols = lcols.intersect(rcols)
21
+ if len(join_cols) == 0:
22
+ raise RAParserError('no common attributes found for join', 0)
20
23
 
21
24
  on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
22
25
 
23
26
  # create sql
24
- return f'SELECT {all_cols.list} FROM ({lq}) {self._name()} JOIN ({rq}) {self._name()} ON {on_clause}', all_cols
27
+ return f'SELECT DISTINCT {all_cols.list} FROM ({lq}) {self._name()} JOIN ({rq}) {self._name()} ON {on_clause}', all_cols
@@ -0,0 +1,27 @@
1
+ from typing import Tuple, Dict
2
+
3
+ from duckdb_kernel.db import Table
4
+ from ..RABinaryOperator import RABinaryOperator
5
+ from ...ParserError import RAParserError
6
+ from ...util.RenamableColumnList import RenamableColumnList
7
+
8
+
9
+ class LeftOuterJoin(RABinaryOperator):
10
+ @staticmethod
11
+ def symbols() -> Tuple[str, ...]:
12
+ return chr(10197), 'ljoin'
13
+
14
+ def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
15
+ # execute subqueries
16
+ lq, lcols = self.left.to_sql(tables)
17
+ rq, rcols = self.right.to_sql(tables)
18
+
19
+ # find matching columns
20
+ join_cols, all_cols = lcols.intersect(rcols)
21
+ if len(join_cols) == 0:
22
+ raise RAParserError('no common attributes found for left outer join', 0)
23
+
24
+ on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
25
+
26
+ # create sql
27
+ return f'SELECT DISTINCT {all_cols.list} FROM ({lq}) {self._name()} LEFT OUTER JOIN ({rq}) {self._name()} ON {on_clause}', all_cols
@@ -0,0 +1,27 @@
1
+ from typing import Tuple, Dict
2
+
3
+ from duckdb_kernel.db import Table
4
+ from ..RABinaryOperator import RABinaryOperator
5
+ from ...ParserError import RAParserError
6
+ from ...util.RenamableColumnList import RenamableColumnList
7
+
8
+
9
+ class LeftSemiJoin(RABinaryOperator):
10
+ @staticmethod
11
+ def symbols() -> Tuple[str, ...]:
12
+ return chr(8905), 'lsjoin'
13
+
14
+ def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
15
+ # execute subqueries
16
+ lq, lcols = self.left.to_sql(tables)
17
+ rq, rcols = self.right.to_sql(tables)
18
+
19
+ # find matching columns
20
+ join_cols, all_cols = lcols.intersect(rcols)
21
+ if len(join_cols) == 0:
22
+ raise RAParserError('no common attributes found for left semi join', 0)
23
+
24
+ on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
25
+
26
+ # create sql
27
+ return f'SELECT DISTINCT {lcols.list} FROM ({lq}) {self._name()} JOIN ({rq}) {self._name()} ON {on_clause}', lcols
@@ -0,0 +1,27 @@
1
+ from typing import Tuple, Dict
2
+
3
+ from duckdb_kernel.db import Table
4
+ from ..RABinaryOperator import RABinaryOperator
5
+ from ...ParserError import RAParserError
6
+ from ...util.RenamableColumnList import RenamableColumnList
7
+
8
+
9
+ class RightOuterJoin(RABinaryOperator):
10
+ @staticmethod
11
+ def symbols() -> Tuple[str, ...]:
12
+ return chr(10198), 'rjoin'
13
+
14
+ def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
15
+ # execute subqueries
16
+ lq, lcols = self.left.to_sql(tables)
17
+ rq, rcols = self.right.to_sql(tables)
18
+
19
+ # find matching columns
20
+ join_cols, all_cols = lcols.intersect(rcols, prefer_right=True)
21
+ if len(join_cols) == 0:
22
+ raise RAParserError('no common attributes found for right outer join', 0)
23
+
24
+ on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
25
+
26
+ # create sql
27
+ return f'SELECT DISTINCT {all_cols.list} FROM ({lq}) {self._name()} RIGHT OUTER JOIN ({rq}) {self._name()} ON {on_clause}', all_cols
@@ -0,0 +1,27 @@
1
+ from typing import Tuple, Dict
2
+
3
+ from duckdb_kernel.db import Table
4
+ from ..RABinaryOperator import RABinaryOperator
5
+ from ...ParserError import RAParserError
6
+ from ...util.RenamableColumnList import RenamableColumnList
7
+
8
+
9
+ class RightSemiJoin(RABinaryOperator):
10
+ @staticmethod
11
+ def symbols() -> Tuple[str, ...]:
12
+ return chr(8906), 'rsjoin'
13
+
14
+ def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
15
+ # execute subqueries
16
+ lq, lcols = self.left.to_sql(tables)
17
+ rq, rcols = self.right.to_sql(tables)
18
+
19
+ # find matching columns
20
+ join_cols, all_cols = lcols.intersect(rcols, prefer_right=True)
21
+ if len(join_cols) == 0:
22
+ raise RAParserError('no common attributes found for right semi join', 0)
23
+
24
+ on_clause = ' AND '.join(f'{l.current_name} = {r.current_name}' for l, r in join_cols)
25
+
26
+ # create sql
27
+ return f'SELECT DISTINCT {rcols.list} FROM ({lq}) {self._name()} JOIN ({rq}) {self._name()} ON {on_clause}', rcols
@@ -2,6 +2,11 @@ from .Cross import Cross
2
2
  from .Difference import Difference
3
3
  from .Intersection import Intersection
4
4
  from .Join import Join
5
+ from .LeftOuterJoin import LeftOuterJoin
6
+ from .LeftSemiJoin import LeftSemiJoin
7
+ from .RightOuterJoin import RightOuterJoin
8
+ from .RightSemiJoin import RightSemiJoin
9
+ from .FullOuterJoin import FullOuterJoin
5
10
  from .Union import Union
6
11
 
7
12
  from .Add import Add
@@ -30,12 +35,22 @@ LOGIC_BINARY_OPERATORS = sorted([
30
35
  ], key=lambda x: x.order, reverse=True)
31
36
 
32
37
  RA_BINARY_OPERATORS = [
33
- Difference,
34
- Union,
35
- Intersection,
36
- Join,
37
- Cross,
38
- Division
38
+ [Difference],
39
+ [Union],
40
+ [Intersection],
41
+ [Join],
42
+ [LeftOuterJoin, RightOuterJoin, FullOuterJoin, LeftSemiJoin, RightSemiJoin],
43
+ [Cross],
44
+ [Division]
45
+ ]
46
+
47
+ RA_BINARY_SYMBOLS = [
48
+ {
49
+ symbol: operator
50
+ for operator in level
51
+ for symbol in operator.symbols()
52
+ }
53
+ for level in RA_BINARY_OPERATORS
39
54
  ]
40
55
 
41
56
  DC_SET = ConditionalSet
@@ -0,0 +1,39 @@
1
+ from typing import Dict, Tuple
2
+
3
+ from ..LogicElement import LogicElement
4
+ from ..RAElement import RAElement
5
+ from ..RAUnaryOperator import RAUnaryOperator
6
+ from ..binary import ArrowLeft
7
+ from ...util.RenamableColumnList import RenamableColumnList
8
+ from ....db import Table
9
+
10
+
11
+ class AttributeRename(RAUnaryOperator):
12
+ @staticmethod
13
+ def symbols() -> Tuple[str, ...]:
14
+ return 'β', 'beta'
15
+
16
+ def __init__(self, target: RAElement, arg: LogicElement):
17
+ if not isinstance(arg, ArrowLeft):
18
+ raise AssertionError('only arrow statements allowed as parameter')
19
+
20
+ super().__init__(target)
21
+ self.arrow: ArrowLeft = arg
22
+
23
+ @property
24
+ def arg(self) -> LogicElement:
25
+ return self.arrow
26
+
27
+ def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
28
+ # execute subquery
29
+ subquery, subcols = self.target.to_sql(tables)
30
+
31
+ # find and rename column
32
+ subcols.rename(str(self.arrow.right), str(self.arrow.left))
33
+
34
+ # return sql statement
35
+ return subquery, subcols
36
+
37
+ # We replace the "real" attribute name later anyway,
38
+ # so we do not need to change the sql statement here.
39
+ # return f'SELECT DISTINCT {subcols.list} FROM ({subquery}) {self._name()}', subcols
@@ -13,7 +13,7 @@ class Projection(RAUnaryOperator):
13
13
  def symbols() -> Tuple[str, ...]:
14
14
  return 'Π', 'π', 'pi'
15
15
 
16
- def __init__(self, target: RAElement, arg: LogicOperand):
16
+ def __init__(self, target: RAElement, arg: LogicElement):
17
17
  if not isinstance(arg, LogicOperand):
18
18
  raise AssertionError('only argument lists allowed as parameter')
19
19
 
@@ -1,39 +1,93 @@
1
1
  from typing import Tuple, Dict
2
2
 
3
- from duckdb_kernel.db import Table
3
+ from .. import RARelationReference
4
4
  from ..LogicElement import LogicElement
5
5
  from ..RAElement import RAElement
6
6
  from ..RAUnaryOperator import RAUnaryOperator
7
- from ..binary import ArrowLeft
7
+ from ...ParserError import RAParserError
8
+ from ...tokenizer import Token
8
9
  from ...util.RenamableColumnList import RenamableColumnList
10
+ from ....db import Table
9
11
 
10
12
 
11
13
  class Rename(RAUnaryOperator):
12
14
  @staticmethod
13
15
  def symbols() -> Tuple[str, ...]:
14
- return 'β', 'beta'
16
+ return 'ρ', 'ϱ', 'rho'
15
17
 
16
- def __init__(self, target: RAElement, arg: ArrowLeft):
17
- if not isinstance(arg, ArrowLeft):
18
- raise AssertionError('only arrow statements allowed as parameter')
18
+ @classmethod
19
+ def parse_args(cls: type[RAUnaryOperator], *tokens: Token, depth: int):
20
+ from .. import RARelationReference
21
+ return RARelationReference.parse_tokens(cls, *tokens, depth=depth)
19
22
 
23
+ def __init__(self, target: RAElement, arg: RARelationReference):
20
24
  super().__init__(target)
21
- self.arrow: ArrowLeft = arg
25
+ self.reference: RARelationReference = arg
22
26
 
23
27
  @property
24
28
  def arg(self) -> LogicElement:
25
- return self.arrow
29
+ return self.reference
26
30
 
27
31
  def to_sql(self, tables: Dict[str, Table]) -> Tuple[str, RenamableColumnList]:
28
32
  # execute subquery
29
33
  subquery, subcols = self.target.to_sql(tables)
30
34
 
31
- # find and rename column
32
- subcols.rename(str(self.arrow.right), str(self.arrow.left))
35
+ # rename attributes
36
+ if self.reference.relation is None and self.reference.attributes is not None:
37
+ return self._to_sql_with_renamed_attributes(tables, subquery, subcols)
33
38
 
34
- # return sql statement
39
+ # rename relation
40
+ elif self.reference.relation is not None and self.reference.attributes is None:
41
+ return self._to_sql_with_renamed_relation(tables, subquery, subcols)
42
+
43
+ # rename relation and attributes
44
+ else:
45
+ return self._to_sql_with_renamed_relation_and_attributes(tables, subquery, subcols)
46
+
47
+ def _to_sql_with_renamed_relation(self,
48
+ tables: Dict[str, Table],
49
+ subquery: str,
50
+ subcols: RenamableColumnList) -> Tuple[str, RenamableColumnList]:
51
+ # check if there are two columns with the same name
52
+ for i in range(len(subcols)):
53
+ for k in range(i + 1, len(subcols)):
54
+ if subcols[i].name == subcols[k].name:
55
+ raise RAParserError(
56
+ f'attribute {subcols[i].name} is present in both {subcols[i].table.name} and {subcols[k].table.name}',
57
+ depth=0
58
+ )
59
+
60
+ # add new table
61
+ table = Table(self.reference.relation)
62
+ # tables[self.reference.relation] = table
63
+
64
+ # set table for all attributes
65
+ for col in subcols:
66
+ col.table = table
67
+
68
+ # return
35
69
  return subquery, subcols
36
70
 
37
- # We replace the "real" attribute name later anyway,
38
- # so we do not need to change the sql statement here.
39
- # return f'SELECT DISTINCT {subcols.list} FROM ({subquery}) {self._name()}', subcols
71
+ def _to_sql_with_renamed_attributes(self,
72
+ tables: Dict[str, Table],
73
+ subquery: str,
74
+ subcols: RenamableColumnList) -> Tuple[str, RenamableColumnList]:
75
+ # check if there are more names than subcols
76
+ if len(self.reference.attributes) > len(subcols):
77
+ raise RAParserError('more names than attributes', 0)
78
+
79
+ # rename columns
80
+ for col, new_name in zip(subcols, self.reference.attributes):
81
+ col.name = new_name
82
+
83
+ # return
84
+ return subquery, subcols
85
+
86
+ def _to_sql_with_renamed_relation_and_attributes(self,
87
+ tables: Dict[str, Table],
88
+ subquery: str,
89
+ subcols: RenamableColumnList) -> Tuple[str, RenamableColumnList]:
90
+ subquery, subcols = self._to_sql_with_renamed_attributes(tables, subquery, subcols)
91
+ subquery, subcols = self._to_sql_with_renamed_relation(tables, subquery, subcols)
92
+
93
+ return subquery, subcols
@@ -1,3 +1,4 @@
1
+ from .AttributeRename import AttributeRename
1
2
  from .Not import Not
2
3
  from .Projection import Projection
3
4
  from .Rename import Rename
@@ -10,6 +11,7 @@ LOGIC_UNARY_OPERATORS = [
10
11
  ]
11
12
 
12
13
  RA_UNARY_OPERATORS = [
14
+ AttributeRename,
13
15
  Projection,
14
16
  Rename,
15
17
  Selection
@@ -1,8 +1,9 @@
1
+ from typing import Optional
1
2
  from uuid import uuid4
2
3
 
3
4
 
4
5
  class Token(str):
5
- def __new__(cls, value: str):
6
+ def __new__(cls, value: str, constant: 'Token' = None):
6
7
  while True:
7
8
  # strip whitespaces
8
9
  value = value.strip()
@@ -38,20 +39,40 @@ class Token(str):
38
39
 
39
40
  return super().__new__(cls, value)
40
41
 
42
+ def __init__(self, value: str, constant: 'Token' = None):
43
+ self.constant: Optional[Token] = constant
44
+
41
45
  @staticmethod
42
- def random() -> 'Token':
43
- return Token('__' + str(uuid4()).replace('-', '_'))
46
+ def random(constant: 'Token' = None) -> 'Token':
47
+ return Token('__' + str(uuid4()).replace('-', '_'), constant)
44
48
 
45
49
  @property
46
50
  def empty(self) -> bool:
47
51
  return len(self) == 0
48
52
 
53
+ @property
54
+ def is_temporary(self) -> bool:
55
+ return self.startswith('__')
56
+
49
57
  @property
50
58
  def is_constant(self) -> bool:
51
59
  return ((self[0] == '"' and self[-1] == '"') or
52
60
  (self[0] == "'" and self[-1] == "'") or
53
61
  self.replace('.', '', 1).isnumeric())
54
62
 
63
+ @property
64
+ def no_quotes(self) -> str:
65
+ quotes = ('"', "'")
66
+
67
+ if self[0] in quotes and self[-1] in quotes:
68
+ return self[1:-1]
69
+ if self[0] in quotes:
70
+ return self[1:]
71
+ if self[-1] in quotes:
72
+ return self[:-1]
73
+ else:
74
+ return self
75
+
55
76
  @property
56
77
  def single_quotes(self) -> str:
57
78
  # TODO Is this comparison useless because tokens are cleaned automatically?