pdql 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pdql-0.1.0/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ # Environments
2
+ .venv/
3
+ venv/
4
+ ENV/
5
+
6
+ # Python cache
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+ .pytest_cache/
11
+ .mypy_cache/
12
+ .ruff_cache/
13
+
14
+ # Build and distribution
15
+ dist/
16
+ build/
17
+ *.egg-info/
18
+
19
+ # OS files
20
+ .DS_Store
pdql-0.1.0/LICENSE.md ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Marcin Zawalski
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
pdql-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,115 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdql
3
+ Version: 0.1.0
4
+ Summary: A library to transpile Pandas syntax to SQL strings.
5
+ Project-URL: Homepage, https://github.com/marcinz606/pdql
6
+ Project-URL: Issues, https://github.com/marcinz606/pdql/issues
7
+ Author-email: Marcin Zawalski <zawalskimarcin@gmail.com>
8
+ License: MIT
9
+ License-File: LICENSE.md
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Requires-Python: >=3.8
14
+ Provides-Extra: dev
15
+ Requires-Dist: flake8; extra == 'dev'
16
+ Requires-Dist: mypy; extra == 'dev'
17
+ Requires-Dist: pytest; extra == 'dev'
18
+ Requires-Dist: ruff; extra == 'dev'
19
+ Description-Content-Type: text/markdown
20
+
21
+ # pdql
22
+
23
+ Lightweight Python library that allows you to write SQL queries using familiar Pandas syntax. It functions as a "lazy compiler," building a syntax tree from your operations and transpiling them into standard SQL strings without executing them or requiring a database connection.
24
+
25
+ ## Installation
26
+
27
+ Clone the repository and set up the environment using the provided Makefile:
28
+
29
+ ```bash
30
+ git clone <repo-url>
31
+ cd pdql
32
+ make setup
33
+ ```
34
+
35
+ ## Usage
36
+
37
+ ### Persistent Dialect & Filtering
38
+
39
+ ```python
40
+ from pdql.dataframe import SQLDataFrame
41
+ from pdql.dialects import BigQueryDialect
42
+
43
+ # Initialize with a specific dialect
44
+ df = SQLDataFrame("my_table", dialect=BigQueryDialect())
45
+
46
+ # Filters use dialect-specific quoting (backticks for BigQuery)
47
+ query = df[df["age"] > 21]
48
+
49
+ print(query.to_sql())
50
+ # SELECT * FROM `my_table` WHERE (`my_table`.`age` > 21)
51
+ ```
52
+
53
+ ### Common Table Expressions (CTEs)
54
+
55
+ ```python
56
+ from pdql.dataframe import SQLDataFrame
57
+
58
+ # Define a subquery
59
+ sub = SQLDataFrame("raw_data")[["id", "val"]]
60
+ sub = sub[sub["val"] > 10]
61
+
62
+ # Use it as a source and define the CTE
63
+ df = SQLDataFrame("filtered").with_cte("filtered", sub)
64
+
65
+ print(df.to_sql())
66
+ # WITH "filtered" AS (SELECT "id", "val" FROM "raw_data" WHERE ("raw_data"."val" > 10)) SELECT * FROM "filtered"
67
+ ```
68
+
69
+ ### Subqueries & Aliasing
70
+
71
+ ```python
72
+ inner = SQLDataFrame("orders").groupby("user_id").agg({"amount": "sum"}).alias("totals")
73
+ outer = SQLDataFrame(inner)
74
+ query = outer[outer["amount_sum"] > 1000]
75
+
76
+ print(query.to_sql())
77
+ # SELECT * FROM (SELECT "user_id", SUM("amount") AS "amount_sum" FROM "orders" GROUP BY "user_id") AS "totals" WHERE ("totals"."amount_sum" > 1000)
78
+ ```
79
+
80
+ ### Ordering & Limits
81
+
82
+ ```python
83
+ from pdql.expressions import SQLFunction
84
+
85
+ # Order by columns or expressions/functions
86
+ query = df.sort_values(["created_at", SQLFunction("rand")], ascending=[False, True]).head(10)
87
+
88
+ print(query.to_sql())
89
+ # SELECT * FROM "my_table" ORDER BY "my_table"."created_at" DESC, RAND() ASC LIMIT 10
90
+ ```
91
+
92
+ ### DML Operations
93
+
94
+ ```python
95
+ df = SQLDataFrame("users")
96
+
97
+ # Generate INSERT
98
+ insert_sql = df.insert({"name": "Alice", "status": "active"})
99
+
100
+ # Generate DELETE based on current filters
101
+ delete_sql = df[df["status"] == "inactive"].delete()
102
+ ```
103
+
104
+ ## Development
105
+
106
+ Use the `Makefile` for standard tasks:
107
+
108
+ - **Run Tests:** `make test`
109
+ - **Format Code:** `make format`
110
+ - **Linting:** `make lint`
111
+ - **Build Package:** `make build`
112
+
113
+ ## License
114
+
115
+ [MIT](LICENSE.md)
pdql-0.1.0/README.md ADDED
@@ -0,0 +1,95 @@
1
+ # pdql
2
+
3
+ Lightweight Python library that allows you to write SQL queries using familiar Pandas syntax. It functions as a "lazy compiler," building a syntax tree from your operations and transpiling them into standard SQL strings without executing them or requiring a database connection.
4
+
5
+ ## Installation
6
+
7
+ Clone the repository and set up the environment using the provided Makefile:
8
+
9
+ ```bash
10
+ git clone <repo-url>
11
+ cd pdql
12
+ make setup
13
+ ```
14
+
15
+ ## Usage
16
+
17
+ ### Persistent Dialect & Filtering
18
+
19
+ ```python
20
+ from pdql.dataframe import SQLDataFrame
21
+ from pdql.dialects import BigQueryDialect
22
+
23
+ # Initialize with a specific dialect
24
+ df = SQLDataFrame("my_table", dialect=BigQueryDialect())
25
+
26
+ # Filters use dialect-specific quoting (backticks for BigQuery)
27
+ query = df[df["age"] > 21]
28
+
29
+ print(query.to_sql())
30
+ # SELECT * FROM `my_table` WHERE (`my_table`.`age` > 21)
31
+ ```
32
+
33
+ ### Common Table Expressions (CTEs)
34
+
35
+ ```python
36
+ from pdql.dataframe import SQLDataFrame
37
+
38
+ # Define a subquery
39
+ sub = SQLDataFrame("raw_data")[["id", "val"]]
40
+ sub = sub[sub["val"] > 10]
41
+
42
+ # Use it as a source and define the CTE
43
+ df = SQLDataFrame("filtered").with_cte("filtered", sub)
44
+
45
+ print(df.to_sql())
46
+ # WITH "filtered" AS (SELECT "id", "val" FROM "raw_data" WHERE ("raw_data"."val" > 10)) SELECT * FROM "filtered"
47
+ ```
48
+
49
+ ### Subqueries & Aliasing
50
+
51
+ ```python
52
+ inner = SQLDataFrame("orders").groupby("user_id").agg({"amount": "sum"}).alias("totals")
53
+ outer = SQLDataFrame(inner)
54
+ query = outer[outer["amount_sum"] > 1000]
55
+
56
+ print(query.to_sql())
57
+ # SELECT * FROM (SELECT "user_id", SUM("amount") AS "amount_sum" FROM "orders" GROUP BY "user_id") AS "totals" WHERE ("totals"."amount_sum" > 1000)
58
+ ```
59
+
60
+ ### Ordering & Limits
61
+
62
+ ```python
63
+ from pdql.expressions import SQLFunction
64
+
65
+ # Order by columns or expressions/functions
66
+ query = df.sort_values(["created_at", SQLFunction("rand")], ascending=[False, True]).head(10)
67
+
68
+ print(query.to_sql())
69
+ # SELECT * FROM "my_table" ORDER BY "my_table"."created_at" DESC, RAND() ASC LIMIT 10
70
+ ```
71
+
72
+ ### DML Operations
73
+
74
+ ```python
75
+ df = SQLDataFrame("users")
76
+
77
+ # Generate INSERT
78
+ insert_sql = df.insert({"name": "Alice", "status": "active"})
79
+
80
+ # Generate DELETE based on current filters
81
+ delete_sql = df[df["status"] == "inactive"].delete()
82
+ ```
83
+
84
+ ## Development
85
+
86
+ Use the `Makefile` for standard tasks:
87
+
88
+ - **Run Tests:** `make test`
89
+ - **Format Code:** `make format`
90
+ - **Linting:** `make lint`
91
+ - **Build Package:** `make build`
92
+
93
+ ## License
94
+
95
+ [MIT](LICENSE.md)
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "pdql"
7
+ version = "0.1.0"
8
+ description = "A library to transpile Pandas syntax to SQL strings."
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Marcin Zawalski", email = "zawalskimarcin@gmail.com"},
14
+ ]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ ]
20
+ dependencies = []
21
+
22
+ [project.optional-dependencies]
23
+ dev = [
24
+ "pytest",
25
+ "flake8",
26
+ "mypy",
27
+ "ruff",
28
+ ]
29
+
30
+ [tool.hatch.build.targets.sdist]
31
+ include = ["src/pdql"]
32
+
33
+ [tool.hatch.build.targets.wheel]
34
+ packages = ["src/pdql"]
35
+
36
+ [project.urls]
37
+ Homepage = "https://github.com/marcinz606/pdql"
38
+ Issues = "https://github.com/marcinz606/pdql/issues"
File without changes
@@ -0,0 +1,394 @@
1
+ import uuid
2
+ from typing import List, Optional, Union, Any, Dict, Tuple
3
+ from dataclasses import dataclass
4
+ from pdql.expressions import SQLColumn, SQLExpression, SQLNode, SQLFunction
5
+ from pdql.dialects import Dialect, GenericDialect
6
+
7
+
8
+ @dataclass
9
+ class Join:
10
+ table: Union[str, "SQLDataFrame"]
11
+ join_type: str
12
+ condition: SQLExpression
13
+ alias: Optional[str] = None
14
+
15
+
16
+ class SQLDataFrame:
17
+ """Immutable container representing a SQL query."""
18
+
19
+ def __init__(
20
+ self,
21
+ source: Union[str, "SQLDataFrame"],
22
+ select_cols: Optional[List[Union[str, SQLNode]]] = None,
23
+ where_conditions: Optional[List[SQLExpression]] = None,
24
+ joins: Optional[List[Join]] = None,
25
+ group_by_cols: Optional[List[Union[str, SQLNode]]] = None,
26
+ order_by: Optional[List[Tuple[SQLNode, bool]]] = None,
27
+ limit_count: Optional[int] = None,
28
+ alias_name: Optional[str] = None,
29
+ dialect: Optional[Dialect] = None,
30
+ ctes: Optional[Dict[str, "SQLDataFrame"]] = None,
31
+ ):
32
+ self.source = source
33
+ self.select_cols = select_cols or ["*"]
34
+ self.where_conditions = where_conditions or []
35
+ self.joins = joins or []
36
+ self.group_by_cols = group_by_cols or []
37
+ self.order_by = order_by or []
38
+ self.limit_count = limit_count
39
+ self.alias_name = alias_name
40
+ self.dialect = dialect
41
+ self.ctes = ctes or {}
42
+
43
+ @property
44
+ def identifier(self) -> str:
45
+ """Identifier used for table qualification."""
46
+ if self.alias_name:
47
+ return self.alias_name
48
+ if isinstance(self.source, str):
49
+ return self.source
50
+ return self.source.identifier
51
+
52
+ def is_simple(self) -> bool:
53
+ """True if the dataframe is a simple table reference."""
54
+ return (
55
+ self.select_cols == ["*"]
56
+ and not self.where_conditions
57
+ and not self.joins
58
+ and not self.group_by_cols
59
+ and not self.order_by
60
+ and self.limit_count is None
61
+ and isinstance(self.source, str)
62
+ )
63
+
64
+ def alias(self, name: str) -> "SQLDataFrame":
65
+ """Assign an alias for subquery usage."""
66
+ return SQLDataFrame(
67
+ source=self.source,
68
+ select_cols=self.select_cols,
69
+ where_conditions=self.where_conditions,
70
+ joins=self.joins,
71
+ group_by_cols=self.group_by_cols,
72
+ order_by=self.order_by,
73
+ limit_count=self.limit_count,
74
+ alias_name=name,
75
+ dialect=self.dialect,
76
+ ctes=self.ctes,
77
+ )
78
+
79
+ def with_cte(self, name: str, dataframe: "SQLDataFrame") -> "SQLDataFrame":
80
+ """Add a Common Table Expression to the query."""
81
+ new_ctes = dict(self.ctes)
82
+ new_ctes[name] = dataframe
83
+ return SQLDataFrame(
84
+ source=self.source,
85
+ select_cols=self.select_cols,
86
+ where_conditions=self.where_conditions,
87
+ joins=self.joins,
88
+ group_by_cols=self.group_by_cols,
89
+ order_by=self.order_by,
90
+ limit_count=self.limit_count,
91
+ alias_name=self.alias_name,
92
+ dialect=self.dialect,
93
+ ctes=new_ctes,
94
+ )
95
+
96
+ def __getitem__(self, item: Any) -> Union[SQLColumn, "SQLDataFrame"]:
97
+ if isinstance(item, str):
98
+ owner = self.identifier if not self.joins else None
99
+ return SQLColumn(item, owner=owner)
100
+
101
+ if isinstance(item, list):
102
+ return SQLDataFrame(
103
+ source=self.source,
104
+ select_cols=item,
105
+ where_conditions=self.where_conditions,
106
+ joins=self.joins,
107
+ group_by_cols=self.group_by_cols,
108
+ order_by=self.order_by,
109
+ limit_count=self.limit_count,
110
+ alias_name=self.alias_name,
111
+ dialect=self.dialect,
112
+ ctes=self.ctes,
113
+ )
114
+
115
+ if isinstance(item, SQLExpression):
116
+ new_conditions = self.where_conditions + [item]
117
+ return SQLDataFrame(
118
+ source=self.source,
119
+ select_cols=self.select_cols,
120
+ where_conditions=new_conditions,
121
+ joins=self.joins,
122
+ group_by_cols=self.group_by_cols,
123
+ order_by=self.order_by,
124
+ limit_count=self.limit_count,
125
+ alias_name=self.alias_name,
126
+ dialect=self.dialect,
127
+ ctes=self.ctes,
128
+ )
129
+
130
+ raise TypeError(f"Invalid argument type for __getitem__: {type(item)}")
131
+
132
+ def merge(
133
+ self,
134
+ right: "SQLDataFrame",
135
+ how: str = "inner",
136
+ on: Optional[str] = None,
137
+ left_on: Optional[str] = None,
138
+ right_on: Optional[str] = None,
139
+ ) -> "SQLDataFrame":
140
+ """Merge with another SQLDataFrame."""
141
+ if on:
142
+ left_col = self[on]
143
+ right_col = right[on]
144
+ condition = left_col == right_col
145
+ elif left_on and right_on:
146
+ left_col = self[left_on]
147
+ right_col = right[right_on]
148
+ condition = left_col == right_col
149
+ else:
150
+ raise ValueError("Must specify 'on' or 'left_on' and 'right_on'")
151
+
152
+ join_map = {
153
+ "inner": "JOIN",
154
+ "left": "LEFT JOIN",
155
+ "right": "RIGHT JOIN",
156
+ "outer": "FULL OUTER JOIN",
157
+ }
158
+ join_type = join_map.get(how, "JOIN")
159
+
160
+ new_join = Join(
161
+ table=right,
162
+ join_type=join_type,
163
+ condition=condition,
164
+ alias=right.alias_name,
165
+ )
166
+
167
+ return SQLDataFrame(
168
+ source=self.source,
169
+ select_cols=self.select_cols,
170
+ where_conditions=self.where_conditions,
171
+ joins=self.joins + [new_join],
172
+ group_by_cols=self.group_by_cols,
173
+ order_by=self.order_by,
174
+ limit_count=self.limit_count,
175
+ alias_name=self.alias_name,
176
+ dialect=self.dialect,
177
+ ctes=self.ctes,
178
+ )
179
+
180
+ def groupby(
181
+ self, by: Union[str, List[str], SQLNode, List[SQLNode]]
182
+ ) -> "SQLDataFrame":
183
+ if not isinstance(by, list):
184
+ by = [by]
185
+
186
+ return SQLDataFrame(
187
+ source=self.source,
188
+ select_cols=self.select_cols,
189
+ where_conditions=self.where_conditions,
190
+ joins=self.joins,
191
+ group_by_cols=by,
192
+ order_by=self.order_by,
193
+ limit_count=self.limit_count,
194
+ alias_name=self.alias_name,
195
+ dialect=self.dialect,
196
+ ctes=self.ctes,
197
+ )
198
+
199
+ def agg(self, func_map: Dict[str, str]) -> "SQLDataFrame":
200
+ new_selects = []
201
+ for grp in self.group_by_cols:
202
+ if isinstance(grp, str):
203
+ new_selects.append(self[grp])
204
+ else:
205
+ new_selects.append(grp)
206
+
207
+ for col_name, func_name in func_map.items():
208
+ col = self[col_name]
209
+ alias = f"{col_name}_{func_name}"
210
+ func_node = SQLFunction(func_name, col, alias=alias)
211
+ new_selects.append(func_node)
212
+
213
+ return SQLDataFrame(
214
+ source=self.source,
215
+ select_cols=new_selects,
216
+ where_conditions=self.where_conditions,
217
+ joins=self.joins,
218
+ group_by_cols=self.group_by_cols,
219
+ order_by=self.order_by,
220
+ limit_count=self.limit_count,
221
+ alias_name=self.alias_name,
222
+ dialect=self.dialect,
223
+ ctes=self.ctes,
224
+ )
225
+
226
+ def sort_values(
227
+ self,
228
+ by: Union[str, SQLNode, List[Union[str, SQLNode]]],
229
+ ascending: Union[bool, List[bool]] = True,
230
+ ) -> "SQLDataFrame":
231
+ if not isinstance(by, list):
232
+ by = [by]
233
+ if not isinstance(ascending, list):
234
+ ascending = [ascending] * len(by)
235
+
236
+ new_order_by = list(self.order_by)
237
+ for item, asc in zip(by, ascending):
238
+ if isinstance(item, str):
239
+ node = self[item]
240
+ else:
241
+ node = item
242
+ new_order_by.append((node, asc))
243
+
244
+ return SQLDataFrame(
245
+ source=self.source,
246
+ select_cols=self.select_cols,
247
+ where_conditions=self.where_conditions,
248
+ joins=self.joins,
249
+ group_by_cols=self.group_by_cols,
250
+ order_by=new_order_by,
251
+ limit_count=self.limit_count,
252
+ alias_name=self.alias_name,
253
+ dialect=self.dialect,
254
+ ctes=self.ctes,
255
+ )
256
+
257
+ def head(self, n: int = 5) -> "SQLDataFrame":
258
+ return SQLDataFrame(
259
+ source=self.source,
260
+ select_cols=self.select_cols,
261
+ where_conditions=self.where_conditions,
262
+ joins=self.joins,
263
+ group_by_cols=self.group_by_cols,
264
+ order_by=self.order_by,
265
+ limit_count=n,
266
+ alias_name=self.alias_name,
267
+ dialect=self.dialect,
268
+ ctes=self.ctes,
269
+ )
270
+
271
+ def insert(self, data: Union[Dict[str, Any], List[Dict[str, Any]]]) -> str:
272
+ if not isinstance(self.source, str):
273
+ raise ValueError("Can only insert into a table (string source)")
274
+
275
+ if isinstance(data, dict):
276
+ data = [data]
277
+
278
+ if not data:
279
+ raise ValueError("No data provided for insert")
280
+
281
+ dialect = self.dialect or GenericDialect()
282
+ columns = list(data[0].keys())
283
+ quoted_table = dialect.quote_identifier(self.source)
284
+ quoted_cols = ", ".join(dialect.quote_identifier(c) for c in columns)
285
+
286
+ all_values = []
287
+ for record in data:
288
+ vals = ", ".join(dialect.format_value(record[c]) for c in columns)
289
+ all_values.append(f"({vals})")
290
+
291
+ values_str = ", ".join(all_values)
292
+ return f"INSERT INTO {quoted_table} ({quoted_cols}) VALUES {values_str}"
293
+
294
+ def delete(self) -> str:
295
+ if not isinstance(self.source, str):
296
+ raise ValueError("Can only delete from a table (string source)")
297
+
298
+ dialect = self.dialect or GenericDialect()
299
+ quoted_table = dialect.quote_identifier(self.source)
300
+ sql = f"DELETE FROM {quoted_table}"
301
+
302
+ if self.where_conditions:
303
+ conditions = [cond.to_sql(dialect) for cond in self.where_conditions]
304
+ where_clause = " AND ".join(conditions)
305
+ sql += f" WHERE {where_clause}"
306
+
307
+ return sql
308
+
309
+ def to_sql(self, dialect: Optional[Dialect] = None) -> str:
310
+ """Generate SQL query string."""
311
+ if dialect is None:
312
+ dialect = self.dialect or GenericDialect()
313
+
314
+ if self.select_cols == ["*"]:
315
+ select_clause = "*"
316
+ else:
317
+ quoted_cols = []
318
+ for col in self.select_cols:
319
+ if isinstance(col, SQLNode):
320
+ quoted_cols.append(col.to_sql(dialect))
321
+ else:
322
+ quoted_cols.append(dialect.quote_identifier(col))
323
+ select_clause = ", ".join(quoted_cols)
324
+
325
+ if isinstance(self.source, str):
326
+ from_clause = dialect.quote_identifier(self.source)
327
+ elif self.source.is_simple() and not self.alias_name:
328
+ from_clause = dialect.quote_identifier(self.source.source) # type: ignore
329
+ else:
330
+ subquery_sql = self.source.to_sql(dialect)
331
+ alias = dialect.quote_identifier(self.alias_name or self.identifier)
332
+ from_clause = f"({subquery_sql}) AS {alias}"
333
+
334
+ join_clauses = []
335
+ for join in self.joins:
336
+ if isinstance(join.table, str):
337
+ table_sql = dialect.quote_identifier(join.table)
338
+ elif join.table.is_simple() and not join.alias:
339
+ table_sql = dialect.quote_identifier(join.table.source) # type: ignore
340
+ else:
341
+ inner_sql = join.table.to_sql(dialect)
342
+ alias = dialect.quote_identifier(join.alias or join.table.identifier)
343
+ table_sql = f"({inner_sql}) AS {alias}"
344
+
345
+ condition = join.condition.to_sql(dialect)
346
+ join_clauses.append(f"{join.join_type} {table_sql} ON {condition}")
347
+
348
+ full_from = f"{from_clause}"
349
+ if join_clauses:
350
+ full_from += " " + " ".join(join_clauses)
351
+
352
+ sql = f"SELECT {select_clause} FROM {full_from}"
353
+
354
+ if self.where_conditions:
355
+ conditions = [cond.to_sql(dialect) for cond in self.where_conditions]
356
+ where_clause = " AND ".join(conditions)
357
+ sql += f" WHERE {where_clause}"
358
+
359
+ if self.group_by_cols:
360
+ group_items = []
361
+ for g in self.group_by_cols:
362
+ if isinstance(g, SQLNode):
363
+ group_items.append(g.to_sql(dialect))
364
+ else:
365
+ owner = self.alias_name or (
366
+ self.source
367
+ if isinstance(self.source, str)
368
+ else self.source.identifier
369
+ )
370
+ quoted_owner = dialect.quote_identifier(owner)
371
+ quoted_col = dialect.quote_identifier(g)
372
+ group_items.append(f"{quoted_owner}.{quoted_col}")
373
+ group_clause = ", ".join(group_items)
374
+ sql += f" GROUP BY {group_clause}"
375
+
376
+ if self.order_by:
377
+ order_items = []
378
+ for node, asc in self.order_by:
379
+ direction = "ASC" if asc else "DESC"
380
+ order_items.append(f"{node.to_sql(dialect)} {direction}")
381
+ sql += f" ORDER BY {', '.join(order_items)}"
382
+
383
+ if self.limit_count is not None:
384
+ sql += f" LIMIT {self.limit_count}"
385
+
386
+ if self.ctes:
387
+ cte_parts = []
388
+ for name, cte_df in self.ctes.items():
389
+ cte_parts.append(
390
+ f"{dialect.quote_identifier(name)} AS ({cte_df.to_sql(dialect)})"
391
+ )
392
+ sql = f"WITH {', '.join(cte_parts)} {sql}"
393
+
394
+ return sql
@@ -0,0 +1,62 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any
3
+
4
+
5
+ class Dialect(ABC):
6
+ """Abstract base class for SQL dialects."""
7
+
8
+ @abstractmethod
9
+ def quote_identifier(self, name: str) -> str:
10
+ pass
11
+
12
+ def format_value(self, value: Any) -> str:
13
+ if isinstance(value, str):
14
+ escaped = value.replace("'", "''")
15
+ return f"'{escaped}'"
16
+ if value is None:
17
+ return "NULL"
18
+ if isinstance(value, bool):
19
+ return "TRUE" if value else "FALSE"
20
+ return str(value)
21
+
22
+ def translate_function(self, name: str) -> str:
23
+ mapping = {
24
+ "mean": "AVG",
25
+ "sum": "SUM",
26
+ "count": "COUNT",
27
+ "min": "MIN",
28
+ "max": "MAX",
29
+ }
30
+ return mapping.get(name.lower(), name.upper())
31
+
32
+ def translate_op(self, op: str) -> str:
33
+ mapping = {
34
+ "eq": "=",
35
+ "ne": "!=",
36
+ "gt": ">",
37
+ "lt": "<",
38
+ "ge": ">=",
39
+ "le": "<=",
40
+ "add": "+",
41
+ "sub": "-",
42
+ "mul": "*",
43
+ "div": "/",
44
+ "and": "AND",
45
+ "or": "OR",
46
+ }
47
+ return mapping.get(op, op)
48
+
49
+
50
+ class GenericDialect(Dialect):
51
+ def quote_identifier(self, name: str) -> str:
52
+ return f'"{name}"'
53
+
54
+
55
+ class PostgresDialect(Dialect):
56
+ def quote_identifier(self, name: str) -> str:
57
+ return f'"{name}"'
58
+
59
+
60
+ class BigQueryDialect(Dialect):
61
+ def quote_identifier(self, name: str) -> str:
62
+ return f"`{name}`"
@@ -0,0 +1,184 @@
1
+ from typing import Any, Union, Optional, List
2
+ from pdql.dialects import Dialect
3
+
4
+
5
+ class SQLNode:
6
+ """Base class for SQL syntax tree nodes."""
7
+
8
+ def to_sql(self, dialect: Dialect) -> str:
9
+ raise NotImplementedError
10
+
11
+ def _op(self, op: str, other: Any) -> "SQLExpression":
12
+ return SQLExpression(self, op, other)
13
+
14
+ def __eq__(self, other: Any) -> "SQLExpression": # type: ignore
15
+ return self._op("eq", other)
16
+
17
+ def __ne__(self, other: Any) -> "SQLExpression": # type: ignore
18
+ return self._op("ne", other)
19
+
20
+ def __lt__(self, other: Any) -> "SQLExpression":
21
+ return self._op("lt", other)
22
+
23
+ def __le__(self, other: Any) -> "SQLExpression":
24
+ return self._op("le", other)
25
+
26
+ def __gt__(self, other: Any) -> "SQLExpression":
27
+ return self._op("gt", other)
28
+
29
+ def __ge__(self, other: Any) -> "SQLExpression":
30
+ return self._op("ge", other)
31
+
32
+ def __add__(self, other: Any) -> "SQLExpression":
33
+ return self._op("add", other)
34
+
35
+ def __sub__(self, other: Any) -> "SQLExpression":
36
+ return self._op("sub", other)
37
+
38
+ def __mul__(self, other: Any) -> "SQLExpression":
39
+ return self._op("mul", other)
40
+
41
+ def __truediv__(self, other: Any) -> "SQLExpression":
42
+ return self._op("div", other)
43
+
44
+ def __and__(self, other: Any) -> "SQLExpression":
45
+ return self._op("and", other)
46
+
47
+ def __or__(self, other: Any) -> "SQLExpression":
48
+ return self._op("or", other)
49
+
50
+
51
+ class SQLExpression(SQLNode):
52
+ """Represents a binary operation in SQL."""
53
+
54
+ def __init__(self, left: Union[SQLNode, Any], op: str, right: Union[SQLNode, Any]):
55
+ self.left = left
56
+ self.op = op
57
+ self.right = right
58
+
59
+ def to_sql(self, dialect: Dialect) -> str:
60
+ left_sql = (
61
+ self.left.to_sql(dialect)
62
+ if isinstance(self.left, SQLNode)
63
+ else dialect.format_value(self.left)
64
+ )
65
+ right_sql = (
66
+ self.right.to_sql(dialect)
67
+ if isinstance(self.right, SQLNode)
68
+ else dialect.format_value(self.right)
69
+ )
70
+ operator = dialect.translate_op(self.op)
71
+ return f"({left_sql} {operator} {right_sql})"
72
+
73
+
74
+ class SQLColumn(SQLNode):
75
+ """Represents a column in a SQL table."""
76
+
77
+ def __init__(self, name: str, owner: Optional[str] = None):
78
+ self.name = name
79
+ self.owner = owner
80
+
81
+ def to_sql(self, dialect: Dialect) -> str:
82
+ col = dialect.quote_identifier(self.name)
83
+ if self.owner:
84
+ owner = dialect.quote_identifier(self.owner)
85
+ return f"{owner}.{col}"
86
+ return col
87
+
88
+
89
+ class SQLFunction(SQLNode):
90
+ """Represents a SQL function."""
91
+
92
+ def __init__(
93
+ self,
94
+ name: str,
95
+ args: Optional[Union[List[Any], Any]] = None,
96
+ alias: Optional[str] = None,
97
+ ):
98
+ self.name = name
99
+ if args is None:
100
+ self.args = []
101
+ elif isinstance(args, list):
102
+ self.args = args
103
+ else:
104
+ self.args = [args]
105
+ self.alias = alias
106
+
107
+ def to_sql(self, dialect: Dialect) -> str:
108
+ func_name = dialect.translate_function(self.name)
109
+ arg_sqls = []
110
+ for arg in self.args:
111
+ if isinstance(arg, SQLNode):
112
+ arg_sqls.append(arg.to_sql(dialect))
113
+ elif arg == "*":
114
+ arg_sqls.append("*")
115
+ else:
116
+ arg_sqls.append(dialect.format_value(arg))
117
+
118
+ args_str = ", ".join(arg_sqls)
119
+ sql = f"{func_name}({args_str})"
120
+
121
+ if self.alias:
122
+ return f"{sql} AS {dialect.quote_identifier(self.alias)}"
123
+ return sql
124
+
125
+ def over(self, partition_by=None, order_by=None) -> "SQLWindowFunction":
126
+ return SQLWindowFunction(self, partition_by=partition_by, order_by=order_by)
127
+
128
+
129
+ class SQLWindowFunction(SQLNode):
130
+ """Represents a Window Function."""
131
+
132
+ def __init__(
133
+ self,
134
+ func: SQLFunction,
135
+ partition_by: Optional[Union[List[Any], Any]] = None,
136
+ order_by: Optional[Union[List[Any], Any]] = None,
137
+ ):
138
+ self.func = func
139
+
140
+ if partition_by is None:
141
+ self.partition_by = []
142
+ elif isinstance(partition_by, list):
143
+ self.partition_by = partition_by
144
+ else:
145
+ self.partition_by = [partition_by]
146
+
147
+ if order_by is None:
148
+ self.order_by = []
149
+ elif isinstance(order_by, list):
150
+ self.order_by = order_by
151
+ else:
152
+ self.order_by = [order_by]
153
+
154
+ self.alias = func.alias
155
+ self.func.alias = None
156
+
157
+ def to_sql(self, dialect: Dialect) -> str:
158
+ func_sql = self.func.to_sql(dialect)
159
+
160
+ parts = []
161
+ if self.partition_by:
162
+ p_sqls = []
163
+ for p in self.partition_by:
164
+ if isinstance(p, SQLNode):
165
+ p_sqls.append(p.to_sql(dialect))
166
+ else:
167
+ p_sqls.append(dialect.quote_identifier(str(p)))
168
+ parts.append(f"PARTITION BY {', '.join(p_sqls)}")
169
+
170
+ if self.order_by:
171
+ o_sqls = []
172
+ for o in self.order_by:
173
+ if isinstance(o, SQLNode):
174
+ o_sqls.append(o.to_sql(dialect))
175
+ else:
176
+ o_sqls.append(dialect.quote_identifier(str(o)))
177
+ parts.append(f"ORDER BY {', '.join(o_sqls)}")
178
+
179
+ over_clause = " ".join(parts)
180
+ sql = f"{func_sql} OVER ({over_clause})"
181
+
182
+ if self.alias:
183
+ return f"{sql} AS {dialect.quote_identifier(self.alias)}"
184
+ return sql