PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
postbound/db/mysql.py ADDED
@@ -0,0 +1,1195 @@
1
+ """Contains the MySQL implementation of the Database interface.
2
+
3
+ The current implementation has a number of limitations. Some are caused by fundamental restrictions of how MySQL
4
+ optimizes and executes queries, while others are caused by the sheer implementation effort that would have to be
5
+ invested to implement the corresponding feature in MySQL.
6
+
7
+ The most important restrictions are as follows:
8
+
9
+ No support for parsing EXPLAIN ANALYZE plans. Calling the corresponding MysqlOptimizer.analyze_plan method raises a
10
+ ``NotImplementedError``. This is because MySQL currently (i.e. as of version 8.0) only provides EXPLAIN ANALYZE plans
11
+ in TREE output format, which is not exhaustively documented and appears fairly irregular. This makes parsing the
12
+ output fairly hard.
13
+
14
+ Restrictions of the query hint generation: query execution in MySQL differs fundamentally from the way queries are
15
+ executed in more traditional systems such as PostgreSQL or Oracle. MySQL makes heavy usage of clustered indexes,
16
+ meaning that all tuples in a table are automatically stored in a B-Tree according to the primary key index. As a
17
+ consequence, MySQL strongly favors the usage of (Index-) Nested Loop Joins during query execution and rarely resorts to
18
+ other operators. In fact, the only fundamentally different join operator available is the Hash Join. This operator is
19
+ only used if a equality join should be executed between columns that do not have an index available. Therefore, it is
20
+ not possible to disable Nested Loop Joins entirely, nor can the usage of Hash Joins be enforced. Instead, query hints
21
+ can only disable the usage of Hash Joins, or *recommend* their usage. But whether or not they are actually applied is
22
+ up to the MySQL query optimizer. A similar thing happens for the join order: although MySQL provides a number of hints
23
+ related to the join order optimization, these hints are not always enforced. More specifically, to the best of our
24
+ knowledge, it is not possible to enforce the branches in the join order and MySQL heavily favors left-deep query plans.
25
+ Therefore, the generation of join order hints only works for linear join orders for now.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import configparser
31
+ import dataclasses
32
+ import json
33
+ import math
34
+ import numbers
35
+ import os
36
+ import textwrap
37
+ import warnings
38
+ from collections.abc import Iterable, Sequence
39
+ from typing import Any, Optional
40
+
41
+ import mysql.connector
42
+
43
+ from .. import qal, util
44
+ from .._core import (
45
+ Cardinality,
46
+ JoinOperator,
47
+ PhysicalOperator,
48
+ ScanOperator,
49
+ UnboundColumnError,
50
+ VirtualTableError,
51
+ )
52
+ from .._hints import (
53
+ HintType,
54
+ PhysicalOperatorAssignment,
55
+ PlanParameterization,
56
+ operators_from_plan,
57
+ )
58
+ from .._jointree import JoinTree, jointree_from_plan, parameters_from_plan
59
+ from .._qep import QueryPlan
60
+ from ..qal import transform
61
+ from ..qal._qal import (
62
+ CastExpression,
63
+ ColumnReference,
64
+ Explain,
65
+ Hint,
66
+ SqlExpression,
67
+ SqlQuery,
68
+ StaticValueExpression,
69
+ TableReference,
70
+ )
71
+ from ..util import Version
72
+ from ._db import (
73
+ Cursor,
74
+ Database,
75
+ DatabasePool,
76
+ DatabaseSchema,
77
+ DatabaseStatistics,
78
+ HintService,
79
+ OptimizerInterface,
80
+ UnsupportedDatabaseFeatureError,
81
+ )
82
+
83
+
84
+ @dataclasses.dataclass(frozen=True)
85
+ class MysqlConnectionArguments:
86
+ """Captures all relevant parameters that customize the way the connection to a MySQL instance is establised.
87
+
88
+ The only required parameters are the user that should connect to the database and the name of the database to
89
+ connect to.
90
+ See [1]_ for the different parameters' meaning.
91
+
92
+ References
93
+ ----------
94
+
95
+ .. [1] https://dev.mysql.com/doc/connector-python/en/connector-python-connectargs.html
96
+ """
97
+
98
+ user: str
99
+ database: str
100
+ password: str = ""
101
+ host: str = "127.0.0.1"
102
+ port: int = 3306
103
+ use_unicode: bool = True
104
+ charset: str = "utf8mb4"
105
+ autocommit: bool = True
106
+ sql_mode: str = "ANSI"
107
+
108
+ def parameters(self) -> dict[str, str | int | bool]:
109
+ """Provides all arguments in one neat ``dict``.
110
+
111
+ Returns
112
+ -------
113
+ dict[str, str | int | bool]
114
+ A mapping from parameter name to parameter value.
115
+ """
116
+ return dataclasses.asdict(self)
117
+
118
+
119
+ class MysqlInterface(Database):
120
+ """MySQL-specific implementation of the general `Database` interface."""
121
+
122
+ def __init__(
123
+ self,
124
+ connection_args: MysqlConnectionArguments,
125
+ system_name: str = "MySQL",
126
+ *,
127
+ cache_enabled: bool = True,
128
+ ) -> None:
129
+ """Generates a new database interface and establishes a connection to the specified database server.
130
+
131
+ Parameters
132
+ ----------
133
+ connection_args : MysqlConnectionArguments
134
+ Configuration and required information to establish a connection to some MySQL instance.
135
+ system_name : str, optional
136
+ The name of the current database. Typically, this can be used to query the `DatabasePool` for this very
137
+ instance. Defaults to ``"MySQL"``.
138
+ cache_enabled : bool, optional
139
+ Whether or not caching of complicated database queries should be enabled by default. Defaults to ``True``.
140
+ """
141
+ self.connection_args = connection_args
142
+ self._cnx = mysql.connector.connect(**connection_args.parameters())
143
+ self._cur = self._cnx.cursor(buffered=True)
144
+
145
+ self._db_schema = MysqlSchemaInterface(self)
146
+ self._db_stats = MysqlStatisticsInterface(self)
147
+ super().__init__(system_name, cache_enabled=cache_enabled)
148
+
149
+ def schema(self) -> MysqlSchemaInterface:
150
+ return self._db_schema
151
+
152
+ def statistics(self) -> MysqlStatisticsInterface:
153
+ return self._db_stats
154
+
155
+ def hinting(self) -> HintService:
156
+ return MysqlHintService(self)
157
+
158
+ def execute_query(
159
+ self,
160
+ query: SqlQuery | str,
161
+ *,
162
+ cache_enabled: Optional[bool] = None,
163
+ raw: bool = False,
164
+ ) -> Any:
165
+ cache_enabled = cache_enabled or (cache_enabled is None and self._cache_enabled)
166
+ query = self._prepare_query_execution(query)
167
+
168
+ if cache_enabled and query in self._query_cache:
169
+ query_result = self._query_cache[query]
170
+ else:
171
+ self._cur.execute(query)
172
+ query_result = self._cur.fetchall()
173
+ if cache_enabled:
174
+ self._inflate_query_cache()
175
+ self._query_cache[query] = query_result
176
+
177
+ if raw:
178
+ return query_result
179
+
180
+ # simplify the query result as much as possible: [(42, 24)] becomes (42, 24) and [(1,), (2,)] becomes [1, 2]
181
+ # [(42, 24), (4.2, 2.4)] is left as-is
182
+ if not query_result:
183
+ return []
184
+ result_structure = query_result[0] # what do the result tuples look like?
185
+ if len(result_structure) == 1: # do we have just one column?
186
+ query_result = [
187
+ row[0] for row in query_result
188
+ ] # if it is just one column, unwrap it
189
+ return (
190
+ query_result if len(query_result) > 1 else query_result[0]
191
+ ) # if it is just one row, unwrap it
192
+
193
+ def optimizer(self) -> OptimizerInterface:
194
+ return MysqlOptimizer(self)
195
+
196
+ def database_name(self) -> str:
197
+ self._cur.execute("SELECT DATABASE();")
198
+ db_name = self._cur.fetchone()[0]
199
+ return db_name
200
+
201
+ def database_system_version(self) -> Version:
202
+ self._cur.execute("SELECT VERSION();")
203
+ version = self._cur.fetchone()[0]
204
+ return Version(version)
205
+
206
+ def server_mode(self) -> str:
207
+ """Provides the current settings in the ``sql_mode`` MySQL variable.
208
+
209
+ Returns
210
+ -------
211
+ str
212
+ The ``sql_mode`` value, exactly as it is returned by the server. Typically, this is a list of
213
+ comma-separated features.
214
+ """
215
+ self._cur.execute("SELECT @@session.sql_mode")
216
+ return self._cur.fetchone()[0]
217
+
218
+ def describe(self) -> dict:
219
+ base_info = {
220
+ "system_name": self.database_system_name(),
221
+ "system_version": self.database_system_version(),
222
+ "database": self.database_name(),
223
+ "statistics_settings": {
224
+ "emulated": self._db_stats.emulated,
225
+ "cache_enabled": self._db_stats.cache_enabled,
226
+ },
227
+ }
228
+ self._cur.execute("SHOW VARIABLES")
229
+ system_config = self._cur.fetchall()
230
+ base_info["system_settings"] = dict(system_config)
231
+ return base_info
232
+
233
+ def reset_connection(self) -> None:
234
+ self._cur.close()
235
+ self._cnx.cmd_reset_connection()
236
+ self._cur = self._cnx.cursor()
237
+
238
+ def cursor(self) -> Cursor:
239
+ return self._cur
240
+
241
+ def close(self) -> None:
242
+ self._cur.close()
243
+ self._cnx.close()
244
+
245
+ def _prepare_query_execution(
246
+ self, query: SqlQuery | str, *, drop_explain: bool = False
247
+ ) -> str:
248
+ """Provides the query in a unified format, taking care of preparatory statements as necessary.
249
+
250
+ `drop_explain` can be used to remove any EXPLAIN clauses from the query. Note that all actions that require
251
+ the "semantics" of the query to be known (e.g. EXPLAIN modifications or query hints) and are therefore only
252
+ executed for instances of the qal queries.
253
+ """
254
+ if not isinstance(query, SqlQuery):
255
+ return query
256
+
257
+ if drop_explain:
258
+ query = transform.drop_clause(query, Explain)
259
+ if query.hints and query.hints.preparatory_statements:
260
+ self._cur.execute(query.hints.preparatory_statements)
261
+ query = transform.drop_hints(query, preparatory_statements_only=True)
262
+ return str(query)
263
+
264
+ def _obtain_query_plan(self, query: str) -> dict:
265
+ if not query.startswith("EXPLAIN FORMAT = JSON"):
266
+ query = "EXPLAIN FORMAT = JSON " + query
267
+ self._cur.execute(query)
268
+ result = self._cur.fetchone()[0]
269
+ return json.loads(result)
270
+
271
+
272
+ class MysqlSchemaInterface(DatabaseSchema):
273
+ def __init__(self, mysql_db: MysqlInterface):
274
+ super().__init__(mysql_db)
275
+
276
+ def lookup_column(
277
+ self,
278
+ column: ColumnReference | str,
279
+ candidate_tables: list[TableReference],
280
+ *,
281
+ expect_match: bool = False,
282
+ ) -> Optional[TableReference]:
283
+ column = column.name if isinstance(column, ColumnReference) else column
284
+
285
+ for table in candidate_tables:
286
+ table_columns = self._fetch_columns(table)
287
+ if column in table_columns:
288
+ return table
289
+
290
+ if not expect_match:
291
+ return None
292
+ candidate_tables = [tab.full_name for tab in candidate_tables]
293
+ raise ValueError(
294
+ f"Column {column} not found in candidate tables {candidate_tables}"
295
+ )
296
+
297
+ def is_primary_key(self, column: ColumnReference) -> bool:
298
+ if not column.table:
299
+ raise UnboundColumnError(column)
300
+ if column.table.virtual:
301
+ raise VirtualTableError(column.table)
302
+ index_map = self._fetch_indexes(column.table)
303
+ return index_map.get(column.name, False)
304
+
305
+ def has_secondary_index(self, column: ColumnReference) -> bool:
306
+ if not column.table:
307
+ raise UnboundColumnError(column)
308
+ if column.table.virtual:
309
+ raise VirtualTableError(column.table)
310
+ index_map = self._fetch_indexes(column.table)
311
+
312
+ # The index map contains an entry for each attribute that actually has an index. The value is True, if the
313
+ # attribute (which is known to be indexed), is even the Primary Key
314
+ # Our method should return False in two cases: 1) the attribute is not indexed at all; and 2) the attribute
315
+ # actually is the Primary key. Therefore, by assuming it is the PK in case of absence, we get the correct
316
+ # value.
317
+ return not index_map.get(column.name, True)
318
+
319
+ def indexes_on(self, column: ColumnReference) -> set[str]:
320
+ if not column.table:
321
+ raise UnboundColumnError(column)
322
+ if column.table.virtual:
323
+ raise VirtualTableError(column.table)
324
+ query_template = (
325
+ "SELECT index_name FROM information_schema.statistics "
326
+ "WHERE table_name = %s AND column_name = %s"
327
+ )
328
+ self._db.cursor().execute(query_template, (column.table.full_name, column.name))
329
+ result_set = self._db.cursor().fetchall()
330
+ return {index[0] for index in result_set}
331
+
332
+ def foreign_keys_on(self, column: ColumnReference) -> set[ColumnReference]:
333
+ if not column.table:
334
+ raise UnboundColumnError(column)
335
+ if column.table.virtual:
336
+ raise VirtualTableError(column.table)
337
+ query_template = (
338
+ "SELECT referenced_table_name, referenced_column_name "
339
+ "FROM information_schema.key_column_usage "
340
+ "WHERE table_name = %s AND column_name = %s AND referenced_column_name IS NOT NULL"
341
+ )
342
+ self._db.cursor().execute(query_template, (column.table.full_name, column.name))
343
+ result_set = self._db.cursor().fetchall()
344
+ return {
345
+ ColumnReference(table=TableReference(name=table), name=col)
346
+ for table, col in result_set
347
+ }
348
+
349
+ def datatype(self, column: ColumnReference) -> str:
350
+ if not column.table:
351
+ raise UnboundColumnError(column)
352
+ if column.table.virtual:
353
+ raise VirtualTableError(column.table)
354
+ query_template = (
355
+ "SELECT column_type FROM information_schema.columns "
356
+ "WHERE table_name = %s AND column_name = %s"
357
+ )
358
+ self._db.cursor().execute(query_template, (column.table.full_name, column.name))
359
+ result_set = self._db.cursor().fetchone()
360
+ return str(result_set[0])
361
+
362
+ def is_nullable(self, column) -> bool:
363
+ if not column.table:
364
+ raise UnboundColumnError(column)
365
+ if column.table.virtual:
366
+ raise VirtualTableError(column.table)
367
+ query_template = (
368
+ "SELECT is_nullable FROM information_schema.columns "
369
+ "WHERE table_name = %s AND column_name = %s"
370
+ )
371
+ self._db.cursor().execute(query_template, (column.table.full_name, column.name))
372
+ result_set = self._db.cursor().fetchone()
373
+ return result_set[0] == "YES"
374
+
375
+ def _fetch_columns(self, table: TableReference) -> list[str]:
376
+ query_template = (
377
+ "SELECT column_name FROM information_schema.columns WHERE table_name = %s"
378
+ )
379
+ self._db.cursor().execute(query_template, (table.full_name,))
380
+ result_set = self._db.cursor().fetchall()
381
+ return [col[0] for col in result_set]
382
+
383
+ def _fetch_indexes(self, table: TableReference) -> dict[str, bool]:
384
+ index_query = textwrap.dedent("""
385
+ SELECT column_name, column_key = 'PRI'
386
+ FROM information_schema.columns
387
+ WHERE table_name = %s AND column_key <> ''
388
+ """)
389
+ self._db.cursor().execute(index_query, table.full_name)
390
+ result_set = self._db.cursor().fetchall()
391
+ index_map = dict(result_set)
392
+ return index_map
393
+
394
+
395
+ class MysqlStatisticsInterface(DatabaseStatistics):
396
+ def __init__(self, mysql_db: MysqlInterface):
397
+ super().__init__(mysql_db)
398
+
399
+ def _retrieve_total_rows_from_stats(self, table: TableReference) -> Optional[int]:
400
+ count_query = (
401
+ "SELECT table_rows FROM information_schema.tables WHERE table_name = %s"
402
+ )
403
+ self._db.cursor().execute(count_query, table.full_name)
404
+ count = self._db.cursor().fetchone()[0]
405
+ return count
406
+
407
+ def _retrieve_distinct_values_from_stats(
408
+ self, column: ColumnReference
409
+ ) -> Optional[int]:
410
+ stats_query = (
411
+ "SELECT cardinality FROM information_schema.statistics "
412
+ "WHERE table_name = %s AND column_name = %s"
413
+ )
414
+ self._db.cursor().execute(stats_query, (column.table.full_name, column.name))
415
+ distinct_vals: Optional[int] = self._db.cursor().fetchone()
416
+ if distinct_vals is None and not self.enable_emulation_fallback:
417
+ return distinct_vals
418
+ elif distinct_vals is None:
419
+ return self._calculate_distinct_values(column, cache_enabled=True)
420
+ else:
421
+ return distinct_vals
422
+
423
+ def _retrieve_min_max_values_from_stats(
424
+ self, column: ColumnReference
425
+ ) -> Optional[tuple[Any, Any]]:
426
+ if not self.enable_emulation_fallback:
427
+ raise UnsupportedDatabaseFeatureError(self._db, "min/max value statistics")
428
+ return self._calculate_min_max_values(column, cache_enabled=True)
429
+
430
+ def _retrieve_most_common_values_from_stats(
431
+ self, column: ColumnReference, k: int
432
+ ) -> Sequence[tuple[Any, int]]:
433
+ if not self.enable_emulation_fallback:
434
+ raise UnsupportedDatabaseFeatureError(
435
+ self._db, "most common values statistics"
436
+ )
437
+ return self._calculate_most_common_values(column, k=k, cache_enabled=True)
438
+
439
+
440
+ MysqlJoinHints = {JoinOperator.HashJoin, JoinOperator.NestedLoopJoin}
441
+ MysqlScanHints = {ScanOperator.IndexScan, ScanOperator.SequentialScan}
442
+ MysqlPlanHints = {HintType.LinearJoinOrder, HintType.Operator}
443
+
444
+
445
+ class _MysqlExplainClause(Explain):
446
+ def __init__(self, original_clause: Explain):
447
+ super().__init__(original_clause.analyze, original_clause.target_format)
448
+
449
+ def __str__(self) -> str:
450
+ explain_body = ""
451
+ if self.analyze:
452
+ explain_body += " ANALYZE"
453
+ if self.target_format:
454
+ explain_body += f" FORMAT={self.target_format}"
455
+ return "EXPLAIN" + explain_body
456
+
457
+
458
+ class _MysqlStaticValueExpression(StaticValueExpression):
459
+ def __init__(self, original_expression: StaticValueExpression) -> None:
460
+ super().__init__(original_expression.value)
461
+
462
+ def __str__(self) -> str:
463
+ return (
464
+ f"{self.value}"
465
+ if isinstance(self.value, numbers.Number)
466
+ else f'"{self.value}"'
467
+ )
468
+
469
+
470
+ class _MysqlCastExpression(CastExpression):
471
+ def __init__(self, original_expression: CastExpression) -> None:
472
+ super().__init__(
473
+ original_expression.casted_expression, original_expression.target_type
474
+ )
475
+
476
+ def __str__(self) -> str:
477
+ return f"CAST({self.casted_expression} AS {self.target_type})"
478
+
479
+
480
+ def _replace_static_vals(e: SqlExpression) -> SqlExpression:
481
+ return _MysqlStaticValueExpression(e) if isinstance(e, StaticValueExpression) else e
482
+
483
+
484
+ def _replace_casts(e: SqlExpression) -> SqlExpression:
485
+ return _MysqlCastExpression(e) if isinstance(e, CastExpression) else e
486
+
487
+
488
+ def _generate_join_order_hint(join_order: Optional[JoinTree]) -> str:
489
+ if not join_order:
490
+ return ""
491
+
492
+ join_order_text = ", ".join(table.identifier() for table in join_order.itertables())
493
+ return f" JOIN_ORDER({join_order_text})"
494
+
495
+
496
+ MysqlOptimizerHints = {
497
+ JoinOperator.NestedLoopJoin: "NO_BNL",
498
+ JoinOperator.HashJoin: "BNL",
499
+ ScanOperator.SequentialScan: "NO_INDEX",
500
+ ScanOperator.IndexScan: "INDEX",
501
+ ScanOperator.IndexOnlyScan: "INDEX",
502
+ ScanOperator.BitmapScan: "INDEX_MERGE",
503
+ }
504
+ """See https://dev.mysql.com/doc/refman/8.0/en/optimizer-hints.html"""
505
+
506
+
507
+ def _generate_operator_hints(
508
+ physical_operators: Optional[PhysicalOperatorAssignment],
509
+ ) -> str:
510
+ if not physical_operators:
511
+ return ""
512
+ hints = []
513
+
514
+ for table, scan_assignment in physical_operators.scan_operators.items():
515
+ table_key = table.identifier()
516
+ operator = MysqlOptimizerHints[scan_assignment.operator]
517
+ hints.append(f" {operator}({table_key})")
518
+
519
+ for join, join_assignment in physical_operators.join_operators.items():
520
+ join_key = ", ".join(tab.identifier() for tab in join)
521
+ operator = MysqlOptimizerHints[join_assignment.operator]
522
+ hints.append(f" {operator}({join_key})")
523
+
524
+ if physical_operators.intermediate_operators:
525
+ warnings.warn("Cannot generate intermediate operator hints for MySQL.")
526
+
527
+ return "\n".join(hints)
528
+
529
+
530
+ MysqlSwitchableOptimizations = {JoinOperator.HashJoin: "block_nested_loop"}
531
+ """See https://dev.mysql.com/doc/refman/8.0/en/switchable-optimizations.html"""
532
+
533
+
534
+ def _escape_setting(setting) -> str:
535
+ """Transforms the setting variable into a string that can be used in an SQL query."""
536
+ if isinstance(setting, float) or isinstance(setting, int):
537
+ return str(setting)
538
+ elif isinstance(setting, bool):
539
+ return "TRUE" if setting else "FALSE"
540
+ return f"'{setting}'"
541
+
542
+
543
+ def _generate_prep_statements(
544
+ physical_operators: Optional[PhysicalOperatorAssignment],
545
+ plan_parameters: Optional[PlanParameterization],
546
+ ) -> str:
547
+ statements = []
548
+ if physical_operators:
549
+ switchable_optimizations = []
550
+ for operator, enabled in physical_operators.global_settings.items():
551
+ value = "on" if enabled else "off"
552
+ switchable_optimizations.append(
553
+ f"{MysqlSwitchableOptimizations[operator]}={value}"
554
+ )
555
+ if switchable_optimizations:
556
+ optimizer_switch = ",".join(switchable_optimizations)
557
+ statements.append(f"SET @@optimizer_switch='{optimizer_switch}';")
558
+
559
+ if plan_parameters:
560
+ for setting, value in plan_parameters.system_settings.items():
561
+ statements.append(f"SET {setting}={_escape_setting(value)};")
562
+
563
+ return "\n".join(statements) if statements else ""
564
+
565
+
566
+ class MysqlHintService(HintService):
567
+ def __init__(self, mysql_instance: MysqlInterface) -> None:
568
+ super().__init__()
569
+ self._mysql_instance = mysql_instance
570
+
571
+ def generate_hints(
572
+ self,
573
+ query: SqlQuery,
574
+ plan: Optional[QueryPlan] = None,
575
+ *,
576
+ join_order: Optional[JoinTree] = None,
577
+ physical_operators: Optional[PhysicalOperatorAssignment] = None,
578
+ plan_parameters: Optional[PlanParameterization] = None,
579
+ ) -> SqlQuery:
580
+ if join_order and not join_order.is_linear():
581
+ raise UnsupportedDatabaseFeatureError(
582
+ self._mysql_instance,
583
+ "Can only enforce join order for linear join trees for now",
584
+ )
585
+
586
+ if plan is not None:
587
+ join_order = jointree_from_plan(plan)
588
+ physical_operators = operators_from_plan(plan)
589
+ plan_parameters = parameters_from_plan(plan)
590
+
591
+ join_order_hint = _generate_join_order_hint(join_order)
592
+ operator_hint = _generate_operator_hints(physical_operators)
593
+ prep_statements = _generate_prep_statements(physical_operators, plan_parameters)
594
+
595
+ if not join_order_hint and not operator_hint:
596
+ return query
597
+
598
+ final_hint_block = (
599
+ "/*+\n"
600
+ + "\n".join(hint for hint in (join_order_hint, operator_hint) if hint)
601
+ + "\n*/"
602
+ )
603
+ hint_clause = Hint(prep_statements, final_hint_block)
604
+ return transform.add_clause(query, hint_clause)
605
+
606
+ def format_query(self, query: SqlQuery) -> str:
607
+ updated_query = query
608
+
609
+ if updated_query.is_explain():
610
+ transform.replace_clause(query, _MysqlExplainClause(query.explain))
611
+
612
+ if "ANSI_QUOTES" not in self._mysql_instance.server_mode():
613
+ updated_query = transform.replace_expressions(
614
+ updated_query, _replace_static_vals
615
+ )
616
+ updated_query = transform.replace_expressions(updated_query, _replace_casts)
617
+
618
+ return qal.format_quick(updated_query, inline_hint_block=True)
619
+
620
+ def supports_hint(self, hint: PhysicalOperator | HintType) -> bool:
621
+ return hint in MysqlJoinHints | MysqlScanHints | MysqlPlanHints
622
+
623
+
624
+ class MysqlOptimizer(OptimizerInterface):
625
+ def __init__(self, mysql_instance: MysqlInterface) -> None:
626
+ self._mysql_instance = mysql_instance
627
+
628
+ def query_plan(self, query: SqlQuery | str) -> QueryPlan:
629
+ if isinstance(query, SqlQuery):
630
+ prepared_query = self._mysql_instance._prepare_query_execution(
631
+ query, drop_explain=True
632
+ )
633
+ query_for_plan = query
634
+ else:
635
+ prepared_query = query
636
+ query_for_plan = None
637
+ raw_query_plan = self._mysql_instance._obtain_query_plan(prepared_query)
638
+ query_plan = parse_mysql_explain_plan(query_for_plan, raw_query_plan)
639
+ return query_plan.as_qep()
640
+
641
+ def analyze_plan(self, query: SqlQuery) -> QueryPlan:
642
+ raise NotImplementedError("MySQL interface does not support ANALYZE plans yet")
643
+
644
+ def cardinality_estimate(self, query: SqlQuery | str) -> Cardinality:
645
+ return self.query_plan(query).estimated_cardinality
646
+
647
+ def cost_estimate(self, query: SqlQuery | str) -> float:
648
+ return self.query_plan(query).cost
649
+
650
+
651
+ def _parse_mysql_connection(config_file: str) -> MysqlConnectionArguments:
652
+ config = configparser.ConfigParser()
653
+ config.read(config_file)
654
+ if "MYSQL" not in config:
655
+ raise ValueError("Malformed MySQL config file: no [MYSQL] section found.")
656
+ mysql_config = config["MYSQL"]
657
+
658
+ if "User" not in mysql_config or "Database" not in mysql_config:
659
+ raise ValueError(
660
+ "Malformed MySQL config file: "
661
+ "'User' and 'Database' keys are required in the [MYSQL] section."
662
+ )
663
+ user = mysql_config["User"]
664
+ database = mysql_config["Database"]
665
+
666
+ optional_settings = {}
667
+ for key in [
668
+ "Password",
669
+ "Host",
670
+ "Port",
671
+ "UseUnicode",
672
+ "Charset",
673
+ "AutoCommit",
674
+ "SqlMode",
675
+ ]:
676
+ if key not in mysql_config:
677
+ continue
678
+ optional_settings[util.camel_case2snake_case(key)] = mysql_config[key]
679
+ return MysqlConnectionArguments(user, database, **optional_settings)
680
+
681
+
682
+ def connect(
683
+ *,
684
+ name: str = "mysql",
685
+ connection_args: Optional[MysqlConnectionArguments] = None,
686
+ config_file: str = ".mysql_connection.config",
687
+ cache_enabled: Optional[bool] = None,
688
+ private: bool = False,
689
+ ) -> MysqlInterface:
690
+ db_pool = DatabasePool.get_instance()
691
+ if config_file and not connection_args:
692
+ if not os.path.exists(config_file):
693
+ raise ValueError(
694
+ "Config file was given, but does not exist: " + config_file
695
+ )
696
+ connection_args = _parse_mysql_connection(config_file)
697
+ elif not connection_args:
698
+ raise ValueError(
699
+ "Connect string or config file are required to connect to MySQL"
700
+ )
701
+
702
+ mysql_db = MysqlInterface(
703
+ connection_args, system_name=name, cache_enabled=cache_enabled
704
+ )
705
+ if not private:
706
+ db_pool.register_database(name, mysql_db)
707
+ return mysql_db
708
+
709
+
710
+ # The next several functions are concerned with MySQL EXPLAIN query plans. Although in theory MySQL offers some great
711
+ # tools to inspect query plans produced by the optimizer (having 3 different output formats: tabular, human-readable
712
+ # plan trees and JSON data), these output formats differ in the information they provide. Only the JSON format provides
713
+ # all the details that we are interested in (and makes them harder to access then when using the tree output for
714
+ # example).
715
+ # Sadly, the JSON output is not available when using EXPLAIN ANALYZE to match the optimizer's expectation
716
+ # with the reality encoutered upon query execution. Since parsing the EXPLAIN trees is quite difficult, we restrict
717
+ # ourselves to plain EXPLAIN plans for now and maybe integrate EXPLAIN ANALYZE plans in the future along with a
718
+ # dedicated parser for its structure.
719
+ # What makes the situation with the JSON-formatted EXPLAIN plans pretty bad is the fact that the structure of the
720
+ # provided JSON document is barely documented and seems incosistent at best (see
721
+ # https://mariadb.com/kb/en/explain-format-json/ for example). Therefore, our JSON-based parser strongly follows a
722
+ # similar implementation, namely the "visual explain" feature of the MySQL Workbench. They also need to traverse the
723
+ # JSON-based EXPLAIN plans, but this time to generate a graphical representation of the information. Still, the
724
+ # traversal and attribute access logic can be re-used by a great deal. It is even implemented in Python! Nevertheless,
725
+ # the code there is often barely documented so a lot of guesswork is still left for us to do. See
726
+ # https://github.com/mysql/mysql-workbench/blob/8.0/plugins/wb.query.analysis/explain_renderer.py for the Workbench
727
+ # implementation that our code is based on. The best explanation of how the different attributes in the JSON document
728
+ # should be interpreted is contained in the MySQL worklog entry to implement parts of the JSON output:
729
+ # https://dev.mysql.com/worklog/task/?id=6510
730
+
731
+
732
+ def _lookup_table(
733
+ alias: str, candidate_tables: Iterable[TableReference]
734
+ ) -> TableReference:
735
+ """Searches for a specific table in a list of candidate tables.
736
+
737
+ If no candidate table has the given `alias`, the full names are used instead. If still no table matches, a
738
+ `KeyError` is raised.
739
+
740
+ This function is necessary, because MySQL does not contain the complete table names in the output. If that were
741
+ the case, we could construct our `TableReference` objects directly based on this information. Instead, MySQL
742
+ provides the "identifier" of the tables, i.e. the alias if the tables was aliased or the full name otherwise. In
743
+ order to build the correct `TableReference` objects that also line up with the tables contained in the `SqlQuery`
744
+ object for the same query, we need to take this detour and lookup the correct tables.
745
+
746
+ Parameters
747
+ ----------
748
+ alias : str
749
+ The table alias to search for. This does not have to be an alias, but could be a full table name just as well.
750
+ candidate_tables : Iterable[TableReference]
751
+ The tables that could potentially have the given alias. `_lookup_table` assumes that at least one of the
752
+ candidates matches.
753
+
754
+ Returns
755
+ -------
756
+ TableReference
757
+ The table with the given alias or full name.
758
+ """
759
+ table_map = {tab.full_name: tab for tab in candidate_tables}
760
+
761
+ # alias takes precedence over full_name in case of conflicts
762
+ table_map |= {tab.alias: tab for tab in candidate_tables}
763
+ return table_map[alias]
764
+
765
+
766
+ _MysqlExplainNodeTypes = {
767
+ "nested_loop",
768
+ "table",
769
+ "optimized_away_subqueries",
770
+ "grouping_operation",
771
+ "ordering_operation",
772
+ "duplicate_removal",
773
+ "union_result",
774
+ "buffer_result",
775
+ "select_list_subqueries",
776
+ }
777
+ """The different nodes that can occurr in the MySQL EXPLAIN output which correspond to actual operators.
778
+
779
+ Derived from ExplainContext.handle_query_block in mysql_renderer.py
780
+ """
781
+
782
+ _MysqlMetadataNodes = {
783
+ "cost_info",
784
+ "rows_examined_per_scan",
785
+ "rows_produced_per_join",
786
+ "filtered",
787
+ }
788
+ """The metadata contained in the MySQL EXPLAIN output that we are interested in.
789
+
790
+ For some reason, the MySQL authors decided that it was a good idea to merge this information with the normal operator
791
+ nodes and not denote the operator tree in any special way.
792
+ """
793
+
794
+ _Cost, _IdxLookup, _IdxMerge, _TabScan = (
795
+ "Const",
796
+ "Index Lookup",
797
+ "Index Merge",
798
+ "Table Scan",
799
+ )
800
+
801
+ _MysqlJoinSourceTypes = {
802
+ "system": _Cost,
803
+ "const": _Cost,
804
+ "eq_ref": _IdxLookup,
805
+ "ref": _IdxLookup,
806
+ "fulltext": _IdxLookup,
807
+ "ref_or_null": _IdxLookup,
808
+ "index_merge": _IdxMerge,
809
+ "unique_subquery": _IdxLookup,
810
+ "index_subquery": _IdxLookup,
811
+ "range": _IdxLookup,
812
+ "index": _IdxLookup,
813
+ "ALL": _TabScan,
814
+ }
815
+ """The different ways (Nested Loop) joins can be executed with a single input table.
816
+
817
+ See https://dev.mysql.com/doc/refman/8.0/en/explain-output.html#explain-join-types for details
818
+ """
819
+
820
+
821
+ _MysqlJoinTypes = {
822
+ "Block Nested Loop": "Block Nested Loop",
823
+ "Batched Key Access": "Batched Key Access",
824
+ "Batched Key Access (unique)": "Batched Key Access",
825
+ "hash join": "Hash Join", # the lower-case is intentional and not a bug..
826
+ }
827
+ """The different join algorithms supported by MySQL.
828
+
829
+ See https://dev.mysql.com/doc/refman/8.0/en/explain-output.html#explain-extra-information for the listing.
830
+ """
831
+
832
+
833
+ def _parse_cost_info(explain_data: dict) -> tuple[float, float]:
834
+ """Extracts the relevant cost information from a MySQL EXPLAIN node.
835
+
836
+ Parameters
837
+ ----------
838
+ explain_data : dict
839
+ The current EXPLAIN node. Nodes without cost information are handled gracefully.
840
+
841
+ Returns
842
+ -------
843
+ tuple[float, float]
844
+ A tuple of ``(scan cost, join cost)``. Remember that MySQL merges join nodes and scan nodes in the JSON-based
845
+ EXPLAIN output. If the node does not contain any cost information, a ``NaN`` tuple will be returned instead.
846
+ """
847
+ if "cost_info" not in explain_data:
848
+ return math.nan, math.nan
849
+ cost_info: dict = explain_data["cost_info"]
850
+
851
+ read_cost = cost_info.get("read_cost", "")
852
+ read_cost = float(read_cost) if read_cost else 0
853
+
854
+ eval_cost = cost_info.get("eval_cost", "")
855
+ eval_cost = float(eval_cost) if eval_cost else 0
856
+
857
+ scan_cost = read_cost + eval_cost
858
+ scan_cost = scan_cost if scan_cost else math.nan
859
+
860
+ join_cost = cost_info.get("prefix_cost", "")
861
+ join_cost = float(join_cost) if join_cost else math.nan
862
+ return scan_cost, join_cost
863
+
864
+
865
+ def _parse_cardinality_info(explain_data: dict) -> tuple[float, float]:
866
+ """Extracts the relevant cardinality information from a MySQL EXPLAIN node.
867
+
868
+ Parameters
869
+ ----------
870
+ explain_data : dict
871
+ The current EXPLAIN node. Nodes without cardinality information are handled gracefully.
872
+
873
+ Returns
874
+ -------
875
+ tuple[float, float]
876
+ A tuple of ``(scan cardinality, join cardinality)``. Remember that MySQL merges join nodes and scan nodes in
877
+ the JSON-based EXPLAIN output. The scan cardinality accounts for all filter predicates. If no scan or join
878
+ cardinality can be determined, a ``NaN`` is used instead.
879
+ """
880
+ table_cardinality = explain_data.get("rows_examined_per_scan", "")
881
+ table_cardinality = float(table_cardinality) if table_cardinality else math.nan
882
+
883
+ filtered = explain_data.get("filtered")
884
+ filtered = float(filtered) if filtered else math.nan
885
+ selectivity = filtered / 100
886
+ scan_cardinality = selectivity * table_cardinality
887
+
888
+ join_cardinality = explain_data.get("rows_produced_per_join", "")
889
+ join_cardinality = float(join_cardinality) if join_cardinality else math.nan
890
+ return scan_cardinality, join_cardinality
891
+
892
+
893
+ def _determine_join_type(explain_data: dict) -> str:
894
+ if "using_join_buffer" not in explain_data:
895
+ return "Nested Loop"
896
+ return _MysqlJoinTypes[explain_data["using_join_buffer"]]
897
+
898
+
899
+ def _parse_mysql_join_node(
900
+ query: Optional[SqlQuery], node_name: str, explain_data: list
901
+ ) -> Optional[MysqlExplainNode]:
902
+ first_table, *remaining_tables = explain_data
903
+ first_node = _parse_next_mysql_explain_node(query, first_table)
904
+ current_node = first_node
905
+ for next_table in remaining_tables:
906
+ next_node = _parse_next_mysql_explain_node(query, next_table)
907
+ current_node.next_node = next_node
908
+ current_node = next_node
909
+ return first_node
910
+
911
+
912
+ def _parse_mysql_table_node(
913
+ query: Optional[SqlQuery], node_name: str, explain_data: dict
914
+ ) -> Optional[MysqlExplainNode]:
915
+ scanned_table = (
916
+ _lookup_table(explain_data["table_name"], query.tables())
917
+ if query is not None
918
+ else None
919
+ )
920
+ scan_type = _MysqlJoinSourceTypes[
921
+ explain_data["access_type"]
922
+ ] # tables are mostly scanned as part of a join
923
+ join_type = _determine_join_type(explain_data)
924
+ scan_cost, join_cost = _parse_cost_info(explain_data)
925
+ scan_card, join_card = _parse_cardinality_info(explain_data)
926
+
927
+ subquery = (
928
+ _parse_next_mysql_explain_node(
929
+ query, explain_data["materialized_from_subquery"]
930
+ )
931
+ if "materialized_from_subquery" in explain_data
932
+ else None
933
+ )
934
+ table_node = MysqlExplainNode(
935
+ scan_type,
936
+ join_type,
937
+ table=scanned_table,
938
+ scan_cost=scan_cost,
939
+ join_cost=join_cost,
940
+ scan_cardinality_estimate=scan_card,
941
+ join_cardinality_estimate=join_card,
942
+ subquery_node=subquery,
943
+ )
944
+ return table_node
945
+
946
+
947
+ def _parse_mysql_wrapper_node(
948
+ query: Optional[SqlQuery], node_name: str, explain_data: dict
949
+ ) -> Optional[MysqlExplainNode]:
950
+ scan_cost, join_cost = _parse_cost_info(explain_data)
951
+ scan_card, join_card = _parse_cardinality_info(explain_data)
952
+ source_node = _parse_next_mysql_explain_node(query, explain_data)
953
+ pretty_node_name = node_name.replace(
954
+ "_", " "
955
+ ).title() # "grouping_operation" -> "Grouping Operation"
956
+ return MysqlExplainNode(
957
+ subquery_node=source_node,
958
+ node_type=pretty_node_name,
959
+ scan_cost=scan_cost,
960
+ join_cost=join_cost,
961
+ scan_cardinality_estimate=scan_card,
962
+ join_cardinality_estimate=join_card,
963
+ )
964
+
965
+
966
+ def _parse_mysql_explain_node(
967
+ query: Optional[SqlQuery], node_name: str, explain_data: dict | list
968
+ ) -> Optional[MysqlExplainNode]:
969
+ if not explain_data:
970
+ return None
971
+
972
+ if node_name == "nested_loop":
973
+ assert isinstance(explain_data, list)
974
+ return _parse_mysql_join_node(query, node_name, explain_data)
975
+ elif node_name == "table":
976
+ assert isinstance(explain_data, dict)
977
+ return _parse_mysql_table_node(query, node_name, explain_data)
978
+ else:
979
+ explain_data = (
980
+ explain_data["query_block"]
981
+ if "query_block" in explain_data
982
+ else explain_data
983
+ )
984
+ return _parse_mysql_wrapper_node(query, node_name, explain_data)
985
+
986
+
987
+ def _parse_next_mysql_explain_node(
988
+ query: Optional[SqlQuery], explain_data: dict
989
+ ) -> Optional[MysqlExplainNode]:
990
+ for info_key, node_data in explain_data.items():
991
+ if info_key in _MysqlExplainNodeTypes:
992
+ return _parse_mysql_explain_node(query, info_key, node_data)
993
+ raise ValueError("No known node found: " + str(explain_data))
994
+
995
+
996
+ def parse_mysql_explain_plan(
997
+ query: Optional[SqlQuery], explain_data: dict
998
+ ) -> MysqlExplainPlan:
999
+ explain_data = explain_data["query_block"]
1000
+ query_cost = explain_data.get("cost_info", {}).get("query_cost", math.nan)
1001
+
1002
+ # the EXPLAIN plan should only have a single root node, but we do not know which operator it is (the JSON document
1003
+ # contains the nodes directly as keys, not under a normalized name, remember?). Therefore, we simply iterate over
1004
+ # all entries in the JSON document and check if the current key is a valid operator name. This is exactly, what
1005
+ # _parse_next_mysql_explain_node does.
1006
+ plan_root = _parse_next_mysql_explain_node(query, explain_data)
1007
+ assert plan_root is not None
1008
+ return MysqlExplainPlan(plan_root, query_cost)
1009
+
1010
+
1011
+ _MysqlExplainScanNodes = {
1012
+ _IdxLookup: ScanOperator.IndexScan,
1013
+ _IdxMerge: ScanOperator.BitmapScan,
1014
+ _TabScan: ScanOperator.SequentialScan,
1015
+ }
1016
+
1017
+
1018
+ _MysqlExplainJoinNodes = {
1019
+ "Block Nested Loop": JoinOperator.NestedLoopJoin,
1020
+ "Batched Key Access": JoinOperator.NestedLoopJoin,
1021
+ "Hash Join": JoinOperator.HashJoin,
1022
+ }
1023
+
1024
+
1025
+ def _node_sequence_to_qep(nodes: Sequence[MysqlExplainNode]) -> QueryPlan:
1026
+ assert nodes
1027
+ if len(nodes) == 1:
1028
+ return nodes[0]._make_qep_node_for_scan()
1029
+
1030
+ if len(nodes) == 2:
1031
+ final_table, first_table = nodes
1032
+ final_qep = final_table._make_qep_node_for_scan()
1033
+ first_qep = first_table._make_qep_node_for_scan()
1034
+ join_operator = _MysqlExplainJoinNodes.get(
1035
+ final_table.join_type, JoinOperator.NestedLoopJoin
1036
+ )
1037
+ join_node = QueryPlan(
1038
+ final_table.join_type,
1039
+ operator=join_operator,
1040
+ children=[first_qep, final_qep],
1041
+ estimated_cost=final_table.join_cost,
1042
+ estimated_cardinality=Cardinality(final_table.join_cardinality_estimate),
1043
+ )
1044
+ return join_node
1045
+
1046
+ if len(nodes) > 2:
1047
+ final_table, *former_tables = nodes
1048
+ former_qep = _node_sequence_to_qep(former_tables)
1049
+ final_qep = final_table._make_qep_node_for_scan()
1050
+
1051
+ join_operator = _MysqlExplainJoinNodes.get(
1052
+ final_table.join_type, JoinOperator.NestedLoopJoin
1053
+ )
1054
+ join_node = QueryPlan(
1055
+ final_table.join_type,
1056
+ operator=join_operator,
1057
+ children=[former_qep, final_qep],
1058
+ estimated_cost=final_table.join_cost,
1059
+ estimated_cardinality=Cardinality(final_table.join_cardinality_estimate),
1060
+ )
1061
+ return join_node
1062
+
1063
+
1064
+ class MysqlExplainNode:
1065
+ def __init__(
1066
+ self,
1067
+ scan_type: str = "",
1068
+ join_type: str = "",
1069
+ next_node: Optional[MysqlExplainNode] = None,
1070
+ *,
1071
+ node_type: Optional[str] = None,
1072
+ table: Optional[TableReference] = None,
1073
+ scan_cost: float = math.nan,
1074
+ join_cost: float = math.nan,
1075
+ scan_cardinality_estimate: float = math.nan,
1076
+ join_cardinality_estimate: float = math.nan,
1077
+ subquery_node: Optional[MysqlExplainNode] = None,
1078
+ ) -> None:
1079
+ self.scan_type = scan_type
1080
+ self.join_type = join_type
1081
+ self.node_type = node_type
1082
+ self.next_node = next_node
1083
+ self.table = table
1084
+ self.scan_cost = scan_cost
1085
+ self.join_cost = join_cost
1086
+ self.scan_cardinality_estimate = scan_cardinality_estimate
1087
+ self.join_cardinality_estimate = join_cardinality_estimate
1088
+ self.subquery = subquery_node
1089
+
1090
+ def as_qep(self) -> QueryPlan:
1091
+ if self.node_type is not None:
1092
+ subquery_plan = (
1093
+ [self.subquery.as_qep()] if self.subquery is not None else []
1094
+ )
1095
+ own_node = QueryPlan(
1096
+ self.node_type,
1097
+ base_table=self.table,
1098
+ children=subquery_plan,
1099
+ estimated_cost=self.join_cost,
1100
+ estimated_cardinality=Cardinality(self.join_cardinality_estimate),
1101
+ )
1102
+ return own_node
1103
+
1104
+ if not self.next_node:
1105
+ return self._make_qep_node_for_scan()
1106
+
1107
+ node_sequence = self._collect_node_sequence()
1108
+ return _node_sequence_to_qep(node_sequence)
1109
+
1110
+ def inspect(self, *, _indendation: int = 0) -> str:
1111
+ prefix = " " * _indendation + "-> " if _indendation else ""
1112
+ own_str = f"{prefix}{self}" if prefix else self._scan_str()
1113
+
1114
+ if self.subquery is not None:
1115
+ subquery_str = self.subquery.inspect(_indendation=_indendation)
1116
+ return "\n".join((own_str, subquery_str))
1117
+
1118
+ if self.next_node is None:
1119
+ return own_str
1120
+
1121
+ next_str = self.next_node.inspect(_indendation=_indendation + 2)
1122
+ return "\n".join((own_str, next_str))
1123
+
1124
+ def _collect_node_sequence(self) -> list[MysqlExplainNode]:
1125
+ if not self.next_node:
1126
+ return [self]
1127
+ return self.next_node._collect_node_sequence() + [self]
1128
+
1129
+ def _make_qep_node_for_scan(self) -> QueryPlan:
1130
+ return QueryPlan(
1131
+ self.scan_type,
1132
+ base_table=self.table,
1133
+ operator=_MysqlExplainScanNodes.get(self.scan_type),
1134
+ estimated_cost=self.scan_cost,
1135
+ estimated_cardinality=Cardinality(self.scan_cardinality_estimate),
1136
+ )
1137
+
1138
+ def _join_str(self) -> str:
1139
+ if self.node_type is not None:
1140
+ join_str = (
1141
+ f"Join[cost={self.join_cost}, cardinality={self.join_cardinality_estimate}]"
1142
+ if not math.isnan(self.join_cost)
1143
+ or not math.isnan(self.join_cardinality_estimate)
1144
+ else ""
1145
+ )
1146
+ else:
1147
+ join_str = f"{self.join_type} [cost={self.join_cost}, cardinality={self.join_cardinality_estimate}]"
1148
+ return join_str
1149
+
1150
+ def _scan_str(self) -> str:
1151
+ if self.node_type is not None:
1152
+ scan_str = (
1153
+ f"Scan[cost={self.scan_cost}, cardinality={self.scan_cardinality_estimate}]"
1154
+ if not math.isnan(self.scan_cost)
1155
+ or not math.isnan(self.scan_cardinality_estimate)
1156
+ else ""
1157
+ )
1158
+ else:
1159
+ scan_str = f"{self.scan_type} [cost={self.scan_cost}, cardinality={self.scan_cardinality_estimate}]"
1160
+ if self.table is not None:
1161
+ scan_str += f" ON {self.table}"
1162
+ return scan_str
1163
+
1164
+ def __repr__(self) -> str:
1165
+ return str(self)
1166
+
1167
+ def __str__(self) -> str:
1168
+ join_str, scan_str = self._join_str(), self._scan_str()
1169
+ if self.node_type is not None:
1170
+ node_str = str(self.node_type)
1171
+ if join_str:
1172
+ node_str += " " + join_str
1173
+ if scan_str:
1174
+ node_str += " " + scan_str
1175
+ return node_str
1176
+
1177
+ return f"{join_str} USING {scan_str}"
1178
+
1179
+
1180
+ class MysqlExplainPlan:
1181
+ def __init__(self, root: MysqlExplainNode, total_cost: float) -> None:
1182
+ self.root = root
1183
+ self.total_cost = total_cost
1184
+
1185
+ def as_qep(self) -> QueryPlan:
1186
+ return self.root.as_qep()
1187
+
1188
+ def inspect(self) -> str:
1189
+ return self.root.inspect()
1190
+
1191
+ def __repr__(self) -> str:
1192
+ return str(self)
1193
+
1194
+ def __str__(self) -> str:
1195
+ return f"Plan cost={self.total_cost}, Root={self.root}"