rtgl 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rtgl/__init__.py +9 -0
- rtgl/base/__init__.py +6 -0
- rtgl/base/database.py +57 -0
- rtgl/base/table.py +65 -0
- rtgl/converter/__init__.py +7 -0
- rtgl/converter/converter.py +484 -0
- rtgl/converter/static_converter.py +250 -0
- rtgl/converter/temporal_converter.py +565 -0
- rtgl/converter/utils.py +117 -0
- rtgl/parser/.antlr/LexerRTGL.interp +156 -0
- rtgl/parser/.antlr/LexerRTGL.java +670 -0
- rtgl/parser/.antlr/LexerRTGL.tokens +50 -0
- rtgl/parser/.antlr/ParserRTGL.interp +121 -0
- rtgl/parser/.antlr/ParserRTGL.java +1743 -0
- rtgl/parser/.antlr/ParserRTGL.tokens +50 -0
- rtgl/parser/.antlr/ParserRTGLBaseListener.java +303 -0
- rtgl/parser/.antlr/ParserRTGLListener.java +229 -0
- rtgl/parser/LexerRTGL.g4 +252 -0
- rtgl/parser/ParserRTGL.g4 +134 -0
- rtgl/parser/__init__.py +7 -0
- rtgl/parser/gen/LexerRTGL.interp +156 -0
- rtgl/parser/gen/LexerRTGL.py +415 -0
- rtgl/parser/gen/LexerRTGL.tokens +50 -0
- rtgl/parser/gen/ParserRTGL.interp +121 -0
- rtgl/parser/gen/ParserRTGL.py +1911 -0
- rtgl/parser/gen/ParserRTGL.tokens +50 -0
- rtgl/parser/gen/ParserRTGLListener.py +210 -0
- rtgl/parser/gen/ParserRTGLVisitor.py +123 -0
- rtgl/validator/__init__.py +8 -0
- rtgl/validator/error.py +124 -0
- rtgl/validator/static_validator.py +132 -0
- rtgl/validator/temporal_validator.py +229 -0
- rtgl/validator/validator.py +458 -0
- rtgl/visitor/__init__.py +6 -0
- rtgl/visitor/parsed_value.py +32 -0
- rtgl/visitor/visitor.py +531 -0
- rtgl-0.0.3.dist-info/METADATA +227 -0
- rtgl-0.0.3.dist-info/RECORD +40 -0
- rtgl-0.0.3.dist-info/WHEEL +4 -0
- rtgl-0.0.3.dist-info/licenses/LICENSE +21 -0
rtgl/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""RTGL: A Framework for Relational Task Generation Language."""
|
|
2
|
+
|
|
3
|
+
from rtgl import base
|
|
4
|
+
from rtgl import converter
|
|
5
|
+
from rtgl import parser
|
|
6
|
+
from rtgl import validator
|
|
7
|
+
from rtgl import visitor
|
|
8
|
+
|
|
9
|
+
__all__ = ["base", "converter", "parser", "validator", "visitor"]
|
rtgl/base/__init__.py
ADDED
rtgl/base/database.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Database class to hold multiple tables."""
|
|
2
|
+
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from rtgl.base.table import Table
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Database:
|
|
11
|
+
r"""Represents a database containing multiple related tables.
|
|
12
|
+
|
|
13
|
+
The *`Database`* class stores a collection of *`Table`* objects and provides
|
|
14
|
+
a representation method for displaying all tables in the database.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, table_dict: dict[str, Table]) -> None:
|
|
18
|
+
r"""Initializes *`Database`* with a dictionary of tables.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
table_dict (dict[str, Table]): Dictionary where keys are table
|
|
22
|
+
names and values are Table objects.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
out (None):
|
|
26
|
+
"""
|
|
27
|
+
self.table_dict = table_dict
|
|
28
|
+
|
|
29
|
+
def __repr__(self) -> str:
|
|
30
|
+
r"""Returns a string representation of the database.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
out (str): Formatted string showing all tables in the database.
|
|
34
|
+
"""
|
|
35
|
+
return "================= Database ================\n" + "".join(
|
|
36
|
+
f"Table Name: {name}\n{table}\n" for name, table in self.table_dict.items()
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
@cached_property
|
|
40
|
+
def min_timestamp(self) -> pd.Timestamp | None:
|
|
41
|
+
r"""Returns the minimum timestamp across all tables in the database, if any time columns exist.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
min_timestamp (pd.Timestamp | None): Minimum timestamp across all tables, or None if no time columns.
|
|
45
|
+
"""
|
|
46
|
+
min_timestamps = [table.min_timestamp for table in self.table_dict.values() if table.time_col]
|
|
47
|
+
return min(min_timestamps) if min_timestamps else None
|
|
48
|
+
|
|
49
|
+
@cached_property
|
|
50
|
+
def max_timestamp(self) -> pd.Timestamp | None:
|
|
51
|
+
r"""Returns the maximum timestamp across all tables in the database, if any time columns exist.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
max_timestamp (pd.Timestamp | None): Maximum timestamp across all tables, or None if no time columns.
|
|
55
|
+
"""
|
|
56
|
+
max_timestamps = [table.max_timestamp for table in self.table_dict.values() if table.time_col]
|
|
57
|
+
return max(max_timestamps) if max_timestamps else None
|
rtgl/base/table.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Table class representing a database table with metadata."""
|
|
2
|
+
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Table:
|
|
9
|
+
r"""Represents a database table with its data and relational metadata.
|
|
10
|
+
|
|
11
|
+
The *`Table`* class encapsulates a pandas DataFrame along with metadata about
|
|
12
|
+
primary keys, foreign keys, and temporal columns.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self, df: pd.DataFrame, fkey_col_to_pkey_table: dict[str, str] = None, pkey_col: str = None, time_col: str = None
|
|
17
|
+
) -> None:
|
|
18
|
+
r"""Initializes *`Table`* with data and metadata.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
df (pd.DataFrame): The table data.
|
|
22
|
+
fkey_col_to_pkey_table (dict, optional): Dictionary mapping foreign key column names to parent table names.
|
|
23
|
+
Default = None.
|
|
24
|
+
pkey_col (str, optional): Primary key column name.
|
|
25
|
+
Default = None.
|
|
26
|
+
time_col (str, optional): Timestamp column name for temporal tables.
|
|
27
|
+
Default = None.
|
|
28
|
+
"""
|
|
29
|
+
self.df = df
|
|
30
|
+
self.fkey_col_to_pkey_table = fkey_col_to_pkey_table
|
|
31
|
+
self.pkey_col = pkey_col
|
|
32
|
+
self.time_col = time_col
|
|
33
|
+
|
|
34
|
+
def __repr__(self) -> str:
|
|
35
|
+
r"""Returns a string representation of the table.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
out (str): Formatted string showing *`DataFrame`* and all metadata.
|
|
39
|
+
"""
|
|
40
|
+
return (
|
|
41
|
+
"------------------ Table ------------------\n"
|
|
42
|
+
f"DataFrame:\n{self.df}\n"
|
|
43
|
+
f"Foreign Key Columns to Primary Key Tables: {self.fkey_col_to_pkey_table}\n"
|
|
44
|
+
f"Primary Key Column: {self.pkey_col}\n"
|
|
45
|
+
f"Time Column: {self.time_col}\n"
|
|
46
|
+
"-------------------------------------------"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@cached_property
|
|
50
|
+
def min_timestamp(self) -> pd.Timestamp | None:
|
|
51
|
+
r"""Returns the minimum timestamp in the time column, if it exists.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
min_timestamp (pd.Timestamp | None): Minimum timestamp in the time column, or None if no time column.
|
|
55
|
+
"""
|
|
56
|
+
return self.df[self.time_col].min() if self.time_col else None
|
|
57
|
+
|
|
58
|
+
@cached_property
|
|
59
|
+
def max_timestamp(self) -> pd.Timestamp | None:
|
|
60
|
+
r"""Returns the maximum timestamp in the time column, if it exists.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
max_timestamp (pd.Timestamp) | None: Maximum timestamp in the time column, or None if no time column.
|
|
64
|
+
"""
|
|
65
|
+
return self.df[self.time_col].max() if self.time_col else None
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Converter modules for RTGL to SQL translation."""
|
|
2
|
+
|
|
3
|
+
from rtgl.converter.converter import Converter
|
|
4
|
+
from rtgl.converter.static_converter import SConverter
|
|
5
|
+
from rtgl.converter.temporal_converter import TConverter
|
|
6
|
+
|
|
7
|
+
__all__ = ["Converter", "SConverter", "TConverter"]
|
|
@@ -0,0 +1,484 @@
|
|
|
1
|
+
"""Base RTGL converter class."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
|
|
6
|
+
import duckdb
|
|
7
|
+
from antlr4 import CommonTokenStream, InputStream
|
|
8
|
+
|
|
9
|
+
from rtgl.base import Database, Table
|
|
10
|
+
from rtgl.converter.utils import (
|
|
11
|
+
build_aggr_func,
|
|
12
|
+
build_null_condition,
|
|
13
|
+
build_num_condition,
|
|
14
|
+
build_str_condition,
|
|
15
|
+
get_div_line,
|
|
16
|
+
)
|
|
17
|
+
from rtgl.parser import LexerRTGL, ParserRTGL
|
|
18
|
+
from rtgl.validator import ErrorCollector, Validator
|
|
19
|
+
from rtgl.visitor import Visitor
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Converter(ABC):
|
|
23
|
+
r"""Base abstract RTGL converter class for conversion RTGL -> SQL.
|
|
24
|
+
|
|
25
|
+
Provides shared functionality for temporal and static RTGL converters.
|
|
26
|
+
Some methods are abstract and must be implemented by concrete subclasses,
|
|
27
|
+
but others provide common logic used in both static and temporal conversion.
|
|
28
|
+
|
|
29
|
+
Attributes:
|
|
30
|
+
validator (Validator): Validator instance for semantic validation of parsed queries.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
# validator instance for semantic validation of parsed queries
|
|
34
|
+
# set in concrete subclasses
|
|
35
|
+
validator: Validator
|
|
36
|
+
|
|
37
|
+
def __init__(self, db: Database) -> None:
|
|
38
|
+
r"""Base constructor.
|
|
39
|
+
|
|
40
|
+
Initializes *`Database`* instance, *`Visitor`* instance, and *`ErrorCollector`* instance
|
|
41
|
+
for storing validation errors.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
db (Database): *`Database`* instance containing the schema and data tables to be queried.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
out (None):
|
|
48
|
+
"""
|
|
49
|
+
self.conn = None
|
|
50
|
+
self.db = db
|
|
51
|
+
self.visitor = Visitor()
|
|
52
|
+
self.collector = ErrorCollector()
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def convert(self, rtgl_query: str, execute: bool = False) -> str | Table:
|
|
56
|
+
r"""Abstract conversion method.
|
|
57
|
+
|
|
58
|
+
Main entry point.
|
|
59
|
+
|
|
60
|
+
Note:
|
|
61
|
+
For explanation of the conversion process, see concrete subclasses.
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def build_for_each(self, for_each_dict: dict) -> tuple[str, str, str]:
|
|
67
|
+
r"""Abstrac method to build the SQL query for the for each part of the RTGL query.
|
|
68
|
+
|
|
69
|
+
Note:
|
|
70
|
+
For explanation of the building process, see concrete subclasses.
|
|
71
|
+
"""
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
@abstractmethod
|
|
75
|
+
def build_predict(self, predict_dict: dict, ptable: str, ppk: str, for_each_query: str) -> str:
|
|
76
|
+
r"""Abstract method to build the SQL query for the predict part of the RTGL query.
|
|
77
|
+
|
|
78
|
+
Note:
|
|
79
|
+
For explanation of the building process, see concrete subclasses.
|
|
80
|
+
"""
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def build_expr(self, expr_dict: dict, ptable: str, ppk: str) -> str:
|
|
85
|
+
r"""Abstract method to build the SQL query for the expression part of the RTGL query.
|
|
86
|
+
|
|
87
|
+
Note:
|
|
88
|
+
For explanation of the building process, see concrete subclasses.
|
|
89
|
+
"""
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
@abstractmethod
|
|
93
|
+
def build_aggregation(self, aggr_dict: dict, ptable: str, ppk: str) -> str:
|
|
94
|
+
r"""Abstract method to build the SQL query for the aggregation part of the RTGL query.
|
|
95
|
+
|
|
96
|
+
Note:
|
|
97
|
+
For explanation of the building process, see concrete subclasses.
|
|
98
|
+
"""
|
|
99
|
+
pass
|
|
100
|
+
|
|
101
|
+
def parse_query(self, rtgl_query: str) -> dict:
|
|
102
|
+
r"""Parses the RTGL query string into a dictionary representation.
|
|
103
|
+
|
|
104
|
+
Validates a dictionary representation, prints all errors on stderr
|
|
105
|
+
and exit the program if any errors were found.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
rtgl_query (str): The RTGL query string to be parsed.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
query_dict (dict): Dictionary representation of the parsed RTGL query.
|
|
112
|
+
"""
|
|
113
|
+
input_stream = InputStream(rtgl_query)
|
|
114
|
+
lexer = LexerRTGL(input_stream)
|
|
115
|
+
token_stream = CommonTokenStream(lexer)
|
|
116
|
+
|
|
117
|
+
parser = ParserRTGL(token_stream)
|
|
118
|
+
parser.removeErrorListeners()
|
|
119
|
+
parser.addErrorListener(self.collector)
|
|
120
|
+
tree = parser.query()
|
|
121
|
+
|
|
122
|
+
query_dict = self.visitor.visit(tree)
|
|
123
|
+
|
|
124
|
+
if self.validator:
|
|
125
|
+
self.validator.validate(query_dict)
|
|
126
|
+
|
|
127
|
+
if len(self.collector) > 0:
|
|
128
|
+
print(self.collector, file=sys.stderr)
|
|
129
|
+
self.collector.clear()
|
|
130
|
+
sys.exit(1)
|
|
131
|
+
|
|
132
|
+
return query_dict
|
|
133
|
+
|
|
134
|
+
def build_stat_where(self, where_dict: dict, ptable: str, ppk: str) -> str:
|
|
135
|
+
r"""Builds the SQL query for the static WHERE part of the RTGL query.
|
|
136
|
+
|
|
137
|
+
Filters a ptable before using.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
where_dict (dict): Dictionary representation of the WHERE part of the RTGL query.
|
|
141
|
+
ptable (str): Name of the parent table.
|
|
142
|
+
ppk (str): Name of the primary key column in the parent table.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
where_query (str): SQL query string representing the filtered ptable.
|
|
146
|
+
"""
|
|
147
|
+
# create division markers for formatted output
|
|
148
|
+
div_line_expr1 = get_div_line("STAT_WHERE_START")
|
|
149
|
+
div_line_expr2 = get_div_line("STAT_WHERE_END")
|
|
150
|
+
|
|
151
|
+
expr_query = self.build_stat_expr(where_dict["Expr"].value, ptable, ppk)
|
|
152
|
+
expr_query = expr_query.replace("\n", "\n" + 4 * " ") + "\n"
|
|
153
|
+
|
|
154
|
+
where_query = (
|
|
155
|
+
f"{div_line_expr1}\n"
|
|
156
|
+
"SELECT\n"
|
|
157
|
+
" *\n"
|
|
158
|
+
"FROM\n"
|
|
159
|
+
f" {ptable} __UNSORTED_AGGR_TBL__\n"
|
|
160
|
+
f"JOIN\n"
|
|
161
|
+
f" ({expr_query}) __EXPR__\n"
|
|
162
|
+
"ON\n"
|
|
163
|
+
f" __UNSORTED_AGGR_TBL__.{ppk} = __EXPR__.fk\n"
|
|
164
|
+
f"{div_line_expr2}"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
return where_query
|
|
168
|
+
|
|
169
|
+
def build_stat_expr(self, expr_dict: dict, ptable: str, ppk: str) -> str:
|
|
170
|
+
r"""Builds a SQL query for the static expression part of the RTGL query.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
expr_dict (dict): Dictionary representation of the expr part of the RTGL query.
|
|
174
|
+
ptable (str): Name of the parent table.
|
|
175
|
+
ppk (str): Name of the primary key column in the parent table.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
expr_query (str): SQL query string representing the expr part of the RTGL query.
|
|
179
|
+
"""
|
|
180
|
+
# create division markers for formatted output
|
|
181
|
+
div_line_expr1 = get_div_line("STAT_EXPR_START")
|
|
182
|
+
div_line_expr2 = get_div_line("STAT_EXPR_END")
|
|
183
|
+
|
|
184
|
+
# if expression is composite (AND/OR) -> recursively build left and right sub-expressions
|
|
185
|
+
# otherwise -> build single condition expression
|
|
186
|
+
if isinstance(expr_dict, dict) and "Op" in expr_dict:
|
|
187
|
+
# build left expession
|
|
188
|
+
left_expr = self.build_stat_expr(expr_dict["LeftExpr"], ptable, ppk)
|
|
189
|
+
left_expr = left_expr.replace("\n", "\n" + 4 * " ") + "\n"
|
|
190
|
+
# build right expression
|
|
191
|
+
right_expr = self.build_stat_expr(expr_dict["RightExpr"], ptable, ppk)
|
|
192
|
+
right_expr = right_expr.replace("\n", "\n" + 4 * " ") + "\n"
|
|
193
|
+
|
|
194
|
+
# check operation and convert to SQL format for tables
|
|
195
|
+
op = expr_dict["Op"].value.lower()
|
|
196
|
+
if op == "and":
|
|
197
|
+
filt = "INTERSECT"
|
|
198
|
+
elif op == "or":
|
|
199
|
+
filt = "UNION"
|
|
200
|
+
else:
|
|
201
|
+
pass
|
|
202
|
+
|
|
203
|
+
expr_query = (
|
|
204
|
+
f"{div_line_expr1}\n"
|
|
205
|
+
"SELECT\n"
|
|
206
|
+
" fk,\n"
|
|
207
|
+
"FROM\n"
|
|
208
|
+
f" ({left_expr}) __LEFT_EXPR__\n"
|
|
209
|
+
f"{filt}\n"
|
|
210
|
+
"SELECT\n"
|
|
211
|
+
" fk,\n"
|
|
212
|
+
f"FROM ({right_expr}) __RIGHT_EXPR__\n"
|
|
213
|
+
f"{div_line_expr2}"
|
|
214
|
+
)
|
|
215
|
+
else:
|
|
216
|
+
expr_query = self.build_condition(expr_dict.value, ptable, ppk, stat=True)
|
|
217
|
+
|
|
218
|
+
return expr_query
|
|
219
|
+
|
|
220
|
+
def build_condition(self, cond_dict: dict, ptable: str, ppk: str, stat: bool = False) -> str:
|
|
221
|
+
r"""Builds a SQL query for a condition part of the RTGL query.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
cond_dict (dict): Dictionary representation of the condition part of the RTGL query.
|
|
225
|
+
ptable (str): Name of the parent table.
|
|
226
|
+
ppk (str): Name of the primary key column in the parent table.
|
|
227
|
+
stat (bool): Flag indicating whether the condition is part of a static expression or not.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
res_query (str): SQL query string representing the condition part of the RTGL query.
|
|
231
|
+
"""
|
|
232
|
+
# check condition type and build main query for condition accordingly
|
|
233
|
+
# aggregation / id_dot_id
|
|
234
|
+
cond_type = cond_dict["CondType"]
|
|
235
|
+
match cond_type:
|
|
236
|
+
case "aggregation":
|
|
237
|
+
if stat:
|
|
238
|
+
main_query = self.build_stat_aggregation(cond_dict["Aggregation"].value, ptable, ppk)
|
|
239
|
+
else:
|
|
240
|
+
main_query = self.build_aggregation(cond_dict["Aggregation"].value, ptable, ppk)
|
|
241
|
+
case "id_dot_id":
|
|
242
|
+
main_query = self.build_id_dot_id(cond_dict, ptable, ppk)
|
|
243
|
+
case _:
|
|
244
|
+
pass
|
|
245
|
+
main_query = main_query.replace("\n", "\n" + 4 * " ") + "\n"
|
|
246
|
+
|
|
247
|
+
# column to compare in condition
|
|
248
|
+
comp_col = "comp_col"
|
|
249
|
+
|
|
250
|
+
# check value condition type and build condition accordingly
|
|
251
|
+
# num / str / null
|
|
252
|
+
ctype = cond_dict["CType"]
|
|
253
|
+
match ctype:
|
|
254
|
+
case "num":
|
|
255
|
+
cond = build_num_condition(cond_dict)
|
|
256
|
+
case "str":
|
|
257
|
+
cond = build_str_condition(cond_dict)
|
|
258
|
+
case "null":
|
|
259
|
+
cond = build_null_condition(cond_dict)
|
|
260
|
+
case _:
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
# handle NOT operator
|
|
264
|
+
not_op = "NOT " if cond_dict["NOT"] else ""
|
|
265
|
+
|
|
266
|
+
# create division markers for formatted output
|
|
267
|
+
div_line1 = get_div_line("CONDITION_START")
|
|
268
|
+
div_line2 = get_div_line("CONDITION_END")
|
|
269
|
+
|
|
270
|
+
# build final condition query
|
|
271
|
+
res_query = (
|
|
272
|
+
f"{div_line1}\nSELECT\n *\nFROM\n ({main_query})\nWHERE\n {not_op}{cond(comp_col)}\n{div_line2}"
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
return res_query
|
|
276
|
+
|
|
277
|
+
def build_stat_aggregation(self, aggr_dict: dict, ptable: str, ppk: str) -> str:
|
|
278
|
+
r"""Builds a SQL query for a static RTGL aggregation.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
aggr_dict (dict): Parsed aggregation dictionary containing 'Table', 'Column', 'Where'(optional) keys.
|
|
282
|
+
ptable (str): Name of the parent table.
|
|
283
|
+
ppk (str): Name of the primary key column in the parent table.
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
aggr_query (str): SQL query returning pairs (fk, comp_col).
|
|
287
|
+
"""
|
|
288
|
+
# extract aggregation table
|
|
289
|
+
aggr_table = aggr_dict["Table"].value
|
|
290
|
+
|
|
291
|
+
# find foreign key column in the aggregation table that links to parent table
|
|
292
|
+
fk = self._find_fk(aggr_table, ptable, ppk)
|
|
293
|
+
|
|
294
|
+
# build SQL aggregation function with proper column references
|
|
295
|
+
aggr_dict["Column"].value = self._find_column(aggr_table, aggr_dict["Column"].value)
|
|
296
|
+
aggr_func = build_aggr_func(aggr_dict)
|
|
297
|
+
aggr = aggr_func("__AGGR_TBL__").replace("\n", "\n" + 4 * " ") + "\n"
|
|
298
|
+
|
|
299
|
+
# build static WHERE query if exists
|
|
300
|
+
if where := aggr_dict["Where"]:
|
|
301
|
+
aggr_ppk = self._find_pkey(aggr_table)
|
|
302
|
+
aggr_table = self.build_stat_where(where.value, aggr_table, aggr_ppk)
|
|
303
|
+
aggr_table = f"({aggr_table})"
|
|
304
|
+
|
|
305
|
+
# create division markers for formatted output
|
|
306
|
+
div_line_aggr1 = get_div_line("STAT_AGGREGATION_START")
|
|
307
|
+
div_line_aggr2 = get_div_line("STAT_AGGREGATION_END")
|
|
308
|
+
|
|
309
|
+
# build aggregation query
|
|
310
|
+
aggr_query = (
|
|
311
|
+
f"{div_line_aggr1}\n"
|
|
312
|
+
"SELECT\n"
|
|
313
|
+
f" __PARENT__.{ppk} AS fk,\n"
|
|
314
|
+
f" {aggr} AS comp_col,\n"
|
|
315
|
+
"FROM\n"
|
|
316
|
+
f" {ptable} __PARENT__\n"
|
|
317
|
+
"LEFT JOIN\n"
|
|
318
|
+
f" {aggr_table} __AGGR_TBL__\n"
|
|
319
|
+
"ON\n"
|
|
320
|
+
f" __AGGR_TBL__.{fk} = __PARENT__.{ppk}\n"
|
|
321
|
+
"GROUP BY\n"
|
|
322
|
+
f" __PARENT__.{ppk}\n"
|
|
323
|
+
f"{div_line_aggr2}"
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
return aggr_query
|
|
327
|
+
|
|
328
|
+
def build_id_dot_id(self, some_dict: dict, ptable: str, ppk: str) -> str:
|
|
329
|
+
r"""Builds the SQL query for a table.column(id_dot_id) part of the RTGL query.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
some_dict (dict): Dictionary containing 'Table', 'Column' keys.
|
|
333
|
+
ptable (str): Name of the parent table.
|
|
334
|
+
ppk (str): Name of the primary key column in the parent table.
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
res_query (str): SQL query string representing the id_dot_id part of the RTGL query.
|
|
338
|
+
"""
|
|
339
|
+
table = some_dict["Table"].value
|
|
340
|
+
column = self._find_column(table, some_dict["Column"].value)
|
|
341
|
+
|
|
342
|
+
# column to compare in condition
|
|
343
|
+
comp_col = "comp_col"
|
|
344
|
+
# find foreign key column in child table referencing parent table
|
|
345
|
+
fk = self._find_fk(table, ptable, ppk)
|
|
346
|
+
|
|
347
|
+
# create division markers for formatted output
|
|
348
|
+
div_line1 = get_div_line("ID_DOT_ID_START")
|
|
349
|
+
div_line2 = get_div_line("ID_DOT_ID_END")
|
|
350
|
+
|
|
351
|
+
# if foreign key exists -> build simple query on the child table
|
|
352
|
+
# otherwise -> try to find foreign key in the opposite direction and build left join query
|
|
353
|
+
if fk := self._find_fk(table, ptable, ppk):
|
|
354
|
+
res_query = (
|
|
355
|
+
f"{div_line1}\nSELECT\n {fk} AS fk,\n {column} AS {comp_col}\nFROM\n {table}\n{div_line2}"
|
|
356
|
+
)
|
|
357
|
+
elif fk := self._find_fk(ptable, table, ppk=None):
|
|
358
|
+
res_query = (
|
|
359
|
+
f"{div_line1}\n"
|
|
360
|
+
"SELECT\n"
|
|
361
|
+
f" __PT__.{ppk} AS fk,\n"
|
|
362
|
+
f" __T__.{column} AS {comp_col}\n"
|
|
363
|
+
"FROM\n"
|
|
364
|
+
f" {ptable} __PT__\n"
|
|
365
|
+
"LEFT JOIN\n"
|
|
366
|
+
f" {table} __T__\n"
|
|
367
|
+
"ON\n"
|
|
368
|
+
f" __PT__.{fk} = __T__.{self._find_pkey(table)}\n"
|
|
369
|
+
f"{div_line2}"
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
return res_query
|
|
373
|
+
|
|
374
|
+
################## Helper methods ##################
|
|
375
|
+
|
|
376
|
+
def _register_db(self) -> None:
|
|
377
|
+
"""Registers all tables from the *`Database`* instance in the *`DuckDB`* connection.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
out (None):
|
|
381
|
+
"""
|
|
382
|
+
if self.conn:
|
|
383
|
+
return
|
|
384
|
+
|
|
385
|
+
self.conn = duckdb.connect()
|
|
386
|
+
|
|
387
|
+
for name, table in self.db.table_dict.items():
|
|
388
|
+
self.conn.register(name, table.df)
|
|
389
|
+
|
|
390
|
+
def _find_table(self, table: str) -> tuple[str, Table] | None:
|
|
391
|
+
r"""Finds a *`Table`* object in the *`Database`* by its name (case-insensitive).
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
table (str): Name of the table to find.
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
out (tuple[str, Table] | None): Tuple of the form (original_table_name, Table)
|
|
398
|
+
Returns None if no table with the given name was found.
|
|
399
|
+
"""
|
|
400
|
+
# k ... name of the table
|
|
401
|
+
# v ... Table object
|
|
402
|
+
for k, v in self.db.table_dict.items():
|
|
403
|
+
if k.lower() == table.lower():
|
|
404
|
+
return k, v
|
|
405
|
+
|
|
406
|
+
return None
|
|
407
|
+
|
|
408
|
+
def _find_column(self, table: str, column: str) -> str | None:
|
|
409
|
+
r"""Finds a column name in a table (case-insensitive).
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
table (str): Name of the table.
|
|
413
|
+
column (str): Name of the column to find.
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
out (str | None): Original name of the column if found, None otherwise.
|
|
417
|
+
"""
|
|
418
|
+
_, table_obj = self._find_table(table)
|
|
419
|
+
if table_obj:
|
|
420
|
+
# if column is "*" -> return primary key column
|
|
421
|
+
if column == "*":
|
|
422
|
+
return table_obj.pkey_col
|
|
423
|
+
for col in table_obj.df.columns:
|
|
424
|
+
if col.lower() == column.lower():
|
|
425
|
+
return col
|
|
426
|
+
return None
|
|
427
|
+
|
|
428
|
+
def _find_ptable(self, table: str, fk: str) -> str | None:
|
|
429
|
+
r"""Finds the parent table name that a given table references through a given foreign key (case-insensitive).
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
table (str): Name of the child table.
|
|
433
|
+
fk (str): Name of the foreign key column in the child table.
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
out (str | None): Name of the parent table that the child table references through the foreign key column
|
|
437
|
+
if found, None otherwise.
|
|
438
|
+
"""
|
|
439
|
+
_, table_obj = self._find_table(table)
|
|
440
|
+
|
|
441
|
+
# k ... name of foreign key column in child table
|
|
442
|
+
# v ... name of referenced parent table
|
|
443
|
+
for k, v in table_obj.fkey_col_to_pkey_table.items():
|
|
444
|
+
if k.lower() == fk.lower():
|
|
445
|
+
return v
|
|
446
|
+
|
|
447
|
+
return None
|
|
448
|
+
|
|
449
|
+
def _find_fk(self, ctable: str, ptable: str, ppk: str) -> str | None:
|
|
450
|
+
r"""Finds the foreign key column in a child table that references the parent table (case-insensitive).
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
ctable (str): Name of the child table.
|
|
454
|
+
ptable (str): Name of the parent table.
|
|
455
|
+
ppk (str): Name of the primary key column in the parent table.
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
out (str | None): Name of the foreign key column in the child table if found, None otherwise.
|
|
459
|
+
"""
|
|
460
|
+
# if child and parent table are the same -> return primary key
|
|
461
|
+
if ctable.lower() == ptable.lower():
|
|
462
|
+
return ppk
|
|
463
|
+
|
|
464
|
+
_, ctable_obj = self._find_table(ctable)
|
|
465
|
+
|
|
466
|
+
# k ... name of foreign key column in child table
|
|
467
|
+
# v ... name of referenced parent table
|
|
468
|
+
for k, v in ctable_obj.fkey_col_to_pkey_table.items():
|
|
469
|
+
if v.lower() == ptable.lower():
|
|
470
|
+
return k
|
|
471
|
+
|
|
472
|
+
return None
|
|
473
|
+
|
|
474
|
+
def _find_pkey(self, table: str) -> str | None:
|
|
475
|
+
r"""Finds the primary key column of a table (case-insensitive).
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
table (str): Name of the table.
|
|
479
|
+
|
|
480
|
+
Returns:
|
|
481
|
+
out (str | None): Name of the primary key column of the table if found, None otherwise.
|
|
482
|
+
"""
|
|
483
|
+
_, table_obj = self._find_table(table)
|
|
484
|
+
return table_obj.pkey_col
|