pytrilogy 0.0.2.1__tar.gz → 0.0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (105) hide show
  1. {pytrilogy-0.0.2.1/pytrilogy.egg-info → pytrilogy-0.0.2.3}/PKG-INFO +3 -3
  2. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/README.md +2 -2
  3. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3/pytrilogy.egg-info}/PKG-INFO +3 -3
  4. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_parsing.py +93 -0
  5. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_statements.py +19 -0
  6. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/__init__.py +1 -1
  7. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/enums.py +1 -0
  8. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/env_processor.py +5 -0
  9. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/functions.py +30 -5
  10. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/models.py +71 -8
  11. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/optimization.py +46 -31
  12. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/optimizations/predicate_pushdown.py +74 -8
  13. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/concept_strategies_v3.py +9 -0
  14. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/basic_node.py +1 -1
  15. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/node_merge_node.py +46 -108
  16. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/select_node.py +2 -3
  17. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/utility.py +10 -32
  18. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/query_processor.py +3 -1
  19. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/dialect/base.py +32 -83
  20. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/dialect/presto.py +1 -0
  21. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/parsing/common.py +4 -1
  22. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/parsing/parse_engine.py +25 -5
  23. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/parsing/render.py +7 -3
  24. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/parsing/trilogy.lark +6 -3
  25. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/LICENSE.md +0 -0
  26. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/pyproject.toml +0 -0
  27. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/pytrilogy.egg-info/SOURCES.txt +0 -0
  28. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/pytrilogy.egg-info/dependency_links.txt +0 -0
  29. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/pytrilogy.egg-info/entry_points.txt +0 -0
  30. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/pytrilogy.egg-info/requires.txt +0 -0
  31. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/pytrilogy.egg-info/top_level.txt +0 -0
  32. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/setup.cfg +0 -0
  33. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/setup.py +0 -0
  34. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_datatypes.py +0 -0
  35. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_declarations.py +0 -0
  36. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_derived_concepts.py +0 -0
  37. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_discovery_nodes.py +0 -0
  38. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_environment.py +0 -0
  39. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_functions.py +0 -0
  40. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_imports.py +0 -0
  41. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_metadata.py +0 -0
  42. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_models.py +0 -0
  43. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_multi_join_assignments.py +0 -0
  44. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_partial_handling.py +0 -0
  45. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_query_processing.py +0 -0
  46. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_select.py +0 -0
  47. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_undefined_concept.py +0 -0
  48. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/tests/test_where_clause.py +0 -0
  49. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/compiler.py +0 -0
  50. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/constants.py +0 -0
  51. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/__init__.py +0 -0
  52. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/constants.py +0 -0
  53. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/environment_helpers.py +0 -0
  54. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/ergonomics.py +0 -0
  55. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/exceptions.py +0 -0
  56. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/graph_models.py +0 -0
  57. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/internal.py +0 -0
  58. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/optimizations/__init__.py +0 -0
  59. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/optimizations/base_optimization.py +0 -0
  60. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/optimizations/inline_constant.py +0 -0
  61. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/optimizations/inline_datasource.py +0 -0
  62. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/__init__.py +0 -0
  63. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/graph_utils.py +0 -0
  64. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/__init__.py +0 -0
  65. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/common.py +0 -0
  66. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/filter_node.py +0 -0
  67. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/group_node.py +0 -0
  68. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
  69. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
  70. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/rowset_node.py +0 -0
  71. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/unnest_node.py +0 -0
  72. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/node_generators/window_node.py +0 -0
  73. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/nodes/__init__.py +0 -0
  74. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/nodes/base_node.py +0 -0
  75. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/nodes/filter_node.py +0 -0
  76. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/nodes/group_node.py +0 -0
  77. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/nodes/merge_node.py +0 -0
  78. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/nodes/select_node_v2.py +0 -0
  79. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/nodes/unnest_node.py +0 -0
  80. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/core/processing/nodes/window_node.py +0 -0
  81. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/dialect/__init__.py +0 -0
  82. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/dialect/bigquery.py +0 -0
  83. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/dialect/common.py +0 -0
  84. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/dialect/config.py +0 -0
  85. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/dialect/duckdb.py +0 -0
  86. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/dialect/enums.py +0 -0
  87. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/dialect/postgres.py +0 -0
  88. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/dialect/snowflake.py +0 -0
  89. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/dialect/sql_server.py +0 -0
  90. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/engine.py +0 -0
  91. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/executor.py +0 -0
  92. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/hooks/__init__.py +0 -0
  93. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/hooks/base_hook.py +0 -0
  94. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/hooks/graph_hook.py +0 -0
  95. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/hooks/query_debugger.py +0 -0
  96. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/metadata/__init__.py +0 -0
  97. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/parser.py +0 -0
  98. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/parsing/__init__.py +0 -0
  99. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/parsing/config.py +0 -0
  100. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/parsing/exceptions.py +0 -0
  101. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/parsing/helpers.py +0 -0
  102. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/py.typed +0 -0
  103. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/scripts/__init__.py +0 -0
  104. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/scripts/trilogy.py +0 -0
  105. {pytrilogy-0.0.2.1 → pytrilogy-0.0.2.3}/trilogy/utility.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.2.1
3
+ Version: 0.0.2.3
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -247,7 +247,7 @@ N/A, only supports default auth. In python you can pass in a custom client.
247
247
 
248
248
 
249
249
  > [!TIP]
250
- > The CLI can also be used for formatting. PreQL has a default formatting style that should always be adhered to. `trilogy fmt <path to trilogy file>`
250
+ > The CLI can also be used for formatting. Trilogy has a default formatting style that should always be adhered to. `trilogy fmt <path to trilogy file>`
251
251
 
252
252
 
253
253
  ## More Examples
@@ -284,7 +284,7 @@ but all are worth checking out. Please open PRs/comment for anything missed!
284
284
 
285
285
  #### CONCEPT
286
286
 
287
- Types: `string | int | float | bool | date | datetime | time | timestamp | interval`;
287
+ Types: `string | int | float | bool | date | datetime | time | numeric(scale, precision) | timestamp | interval`;
288
288
 
289
289
  Key:
290
290
  `key <name> <type>;`
@@ -218,7 +218,7 @@ N/A, only supports default auth. In python you can pass in a custom client.
218
218
 
219
219
 
220
220
  > [!TIP]
221
- > The CLI can also be used for formatting. PreQL has a default formatting style that should always be adhered to. `trilogy fmt <path to trilogy file>`
221
+ > The CLI can also be used for formatting. Trilogy has a default formatting style that should always be adhered to. `trilogy fmt <path to trilogy file>`
222
222
 
223
223
 
224
224
  ## More Examples
@@ -255,7 +255,7 @@ but all are worth checking out. Please open PRs/comment for anything missed!
255
255
 
256
256
  #### CONCEPT
257
257
 
258
- Types: `string | int | float | bool | date | datetime | time | timestamp | interval`;
258
+ Types: `string | int | float | bool | date | datetime | time | numeric(scale, precision) | timestamp | interval`;
259
259
 
260
260
  Key:
261
261
  `key <name> <type>;`
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.2.1
3
+ Version: 0.0.2.3
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -247,7 +247,7 @@ N/A, only supports default auth. In python you can pass in a custom client.
247
247
 
248
248
 
249
249
  > [!TIP]
250
- > The CLI can also be used for formatting. PreQL has a default formatting style that should always be adhered to. `trilogy fmt <path to trilogy file>`
250
+ > The CLI can also be used for formatting. Trilogy has a default formatting style that should always be adhered to. `trilogy fmt <path to trilogy file>`
251
251
 
252
252
 
253
253
  ## More Examples
@@ -284,7 +284,7 @@ but all are worth checking out. Please open PRs/comment for anything missed!
284
284
 
285
285
  #### CONCEPT
286
286
 
287
- Types: `string | int | float | bool | date | datetime | time | timestamp | interval`;
287
+ Types: `string | int | float | bool | date | datetime | time | numeric(scale, precision) | timestamp | interval`;
288
288
 
289
289
  Key:
290
290
  `key <name> <type>;`
@@ -16,6 +16,7 @@ from trilogy.parsing.parse_engine import (
16
16
  from trilogy.constants import MagicConstants
17
17
  from trilogy.dialect.base import BaseDialect
18
18
  from trilogy.core.enums import BooleanOperator
19
+ from trilogy import Dialects
19
20
 
20
21
 
21
22
  def test_in():
@@ -314,3 +315,95 @@ select 1 as test;
314
315
  """
315
316
  )
316
317
  assert parsed[0].text == "select 1"
318
+
319
+
320
+ def test_circular_aliasing():
321
+ from trilogy.hooks.query_debugger import DebuggingHook
322
+
323
+ executor = Dialects.DUCK_DB.default_executor(hooks=[DebuggingHook()])
324
+ test_case = """key composite_id string;
325
+
326
+ property composite_id.first <- split(composite_id, '-')[1];
327
+ property composite_id.second <- split(composite_id, '-')[2];
328
+
329
+ key composite_id_alt <- concat(first, '-', second);
330
+
331
+ merge composite_id_alt into composite_id;
332
+
333
+ datasource random (
334
+ first:first,
335
+ second:second
336
+ )
337
+ grain (composite_id)
338
+ query '''
339
+ select '123' as first, 'abc' as second
340
+ '''
341
+ ;
342
+
343
+ datasource metrics (
344
+ composite_id: composite_id,
345
+ )
346
+ grain (composite_id)
347
+ query '''
348
+ select '123-abc' as composite_id
349
+ '''
350
+ ;
351
+
352
+
353
+ select first, second;
354
+
355
+
356
+ """
357
+ executor.parse_text(test_case)
358
+
359
+ results = executor.execute_text(test_case)[0].fetchall()
360
+
361
+ assert results == [("123", "abc")]
362
+
363
+
364
+ def test_circular_aliasing_inverse():
365
+ from trilogy.hooks.query_debugger import DebuggingHook
366
+
367
+ executor = Dialects.DUCK_DB.default_executor(hooks=[DebuggingHook()])
368
+ test_case = """key composite_id string;
369
+
370
+ property composite_id.first <- split(composite_id, '-')[1];
371
+ property composite_id.second <- split(composite_id, '-')[2];
372
+
373
+ key composite_id_alt <- concat(first, '-', second);
374
+
375
+ merge composite_id_alt into composite_id;
376
+
377
+
378
+ datasource metrics (
379
+ first:first,
380
+ second:second
381
+ )
382
+ grain (composite_id)
383
+ query '''
384
+ select '123' as first, 'abc' as second
385
+ '''
386
+ ;
387
+
388
+
389
+ select composite_id;
390
+
391
+
392
+ """
393
+ executor.parse_text(test_case)
394
+
395
+ results = executor.execute_text(test_case)[0].fetchall()
396
+
397
+ assert results == [("123-abc",)]
398
+
399
+
400
+ def test_map_definition():
401
+ env, parsed = parse_text(
402
+ """
403
+ key id int;
404
+ property id.labels map<string, int>;
405
+
406
+ """
407
+ )
408
+ assert env.concepts["labels"].datatype.key_type == DataType.STRING
409
+ assert env.concepts["labels"].datatype.value_type == DataType.INTEGER
@@ -41,3 +41,22 @@ datasource posts (
41
41
  ;
42
42
  """
43
43
  parse(text)
44
+
45
+
46
+ def test_datasource_where():
47
+ text = """key user_id int metadata(description="the description");
48
+ property user_id.display_name string metadata(description="The display name ");
49
+ property user_id.about_me string metadata(description="User provided description");
50
+ key post_id int;
51
+
52
+
53
+ datasource x_posts (
54
+ user_id: user_id,
55
+ id: post_id
56
+ )
57
+ grain (post_id)
58
+ address bigquery-public-data.stackoverflow.post_history
59
+ where post_id = 2
60
+ ;
61
+ """
62
+ parse(text)
@@ -4,6 +4,6 @@ from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
5
  from trilogy.constants import CONFIG
6
6
 
7
- __version__ = "0.0.2.1"
7
+ __version__ = "0.0.2.3"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
@@ -119,6 +119,7 @@ class FunctionType(Enum):
119
119
 
120
120
  # COMPLEX
121
121
  INDEX_ACCESS = "index_access"
122
+ MAP_ACCESS = "map_access"
122
123
  ATTR_ACCESS = "attr_access"
123
124
 
124
125
  # TEXT AND MAYBE MORE
@@ -17,6 +17,11 @@ def add_concept(concept: Concept, g: ReferenceGraph):
17
17
  for _, pseudonym in concept.pseudonyms.items():
18
18
  pseudonym = pseudonym.with_default_grain()
19
19
  pseudonym_node = concept_to_node(pseudonym)
20
+ if (pseudonym_node, node_name) in g.edges and (
21
+ node_name,
22
+ pseudonym_node,
23
+ ) in g.edges:
24
+ continue
20
25
  if pseudonym_node.split("@")[0] == node_name.split("@")[0]:
21
26
  continue
22
27
  g.add_edge(pseudonym_node, node_name, pseudonym=True)
@@ -202,15 +202,21 @@ def Split(args: list[Concept]) -> Function:
202
202
  )
203
203
 
204
204
 
205
+ def get_index_output_type(
206
+ arg: Concept,
207
+ ) -> DataType | StructType | MapType | ListType | NumericType:
208
+ if isinstance(arg.datatype, ListType):
209
+ return arg.datatype.value_data_type
210
+ elif isinstance(arg.datatype, MapType):
211
+ return arg.datatype.value_data_type
212
+ return arg.datatype
213
+
214
+
205
215
  def IndexAccess(args: list[Concept]):
206
216
  return Function(
207
217
  operator=FunctionType.INDEX_ACCESS,
208
218
  arguments=args,
209
- output_datatype=(
210
- args[0].datatype.value_data_type
211
- if isinstance(args[0].datatype, ListType)
212
- else args[0].datatype
213
- ),
219
+ output_datatype=get_index_output_type(args[0]),
214
220
  output_purpose=Purpose.PROPERTY,
215
221
  valid_inputs=[
216
222
  {
@@ -226,6 +232,25 @@ def IndexAccess(args: list[Concept]):
226
232
  )
227
233
 
228
234
 
235
+ def MapAccess(args: list[Concept]):
236
+ return Function(
237
+ operator=FunctionType.MAP_ACCESS,
238
+ arguments=args,
239
+ output_datatype=get_index_output_type(args[0]),
240
+ output_purpose=Purpose.PROPERTY,
241
+ valid_inputs=[
242
+ {
243
+ DataType.MAP,
244
+ },
245
+ {
246
+ DataType.INTEGER,
247
+ DataType.STRING,
248
+ },
249
+ ],
250
+ arg_count=2,
251
+ )
252
+
253
+
229
254
  def AttrAccess(args: list[Concept]):
230
255
  return Function(
231
256
  operator=FunctionType.ATTR_ACCESS,
@@ -67,7 +67,7 @@ from trilogy.core.enums import (
67
67
  )
68
68
  from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
69
69
  from trilogy.utility import unique
70
- from collections import UserList
70
+ from collections import UserList, UserDict
71
71
  from functools import cached_property
72
72
  from abc import ABC
73
73
 
@@ -267,7 +267,7 @@ class ListType(BaseModel):
267
267
 
268
268
  class MapType(BaseModel):
269
269
  key_type: DataType
270
- content_type: ALL_TYPES
270
+ value_type: ALL_TYPES
271
271
 
272
272
  @property
273
273
  def data_type(self):
@@ -277,6 +277,22 @@ class MapType(BaseModel):
277
277
  def value(self):
278
278
  return self.data_type.value
279
279
 
280
+ @property
281
+ def value_data_type(
282
+ self,
283
+ ) -> DataType | StructType | MapType | ListType | NumericType:
284
+ if isinstance(self.value_type, Concept):
285
+ return self.value_type.datatype
286
+ return self.value_type
287
+
288
+ @property
289
+ def key_data_type(
290
+ self,
291
+ ) -> DataType | StructType | MapType | ListType | NumericType:
292
+ if isinstance(self.key_type, Concept):
293
+ return self.key_type.datatype
294
+ return self.key_type
295
+
280
296
 
281
297
  class StructType(BaseModel):
282
298
  fields: List[ALL_TYPES]
@@ -314,6 +330,34 @@ class ListWrapper(Generic[VT], UserList):
314
330
  return cls(v, type=arg_to_datatype(v[0]))
315
331
 
316
332
 
333
+ class MapWrapper(Generic[KT, VT], UserDict):
334
+ """Used to distinguish parsed map objects from other dicts"""
335
+
336
+ def __init__(self, *args, key_type: DataType, value_type: DataType, **kwargs):
337
+ super().__init__(*args, **kwargs)
338
+ self.key_type = key_type
339
+ self.value_type = value_type
340
+
341
+ @classmethod
342
+ def __get_pydantic_core_schema__(
343
+ cls, source_type: Any, handler: Callable[[Any], core_schema.CoreSchema]
344
+ ) -> core_schema.CoreSchema:
345
+ args = get_args(source_type)
346
+ if args:
347
+ schema = handler(Dict[args]) # type: ignore
348
+ else:
349
+ schema = handler(Dict)
350
+ return core_schema.no_info_after_validator_function(cls.validate, schema)
351
+
352
+ @classmethod
353
+ def validate(cls, v):
354
+ return cls(
355
+ v,
356
+ key_type=arg_to_datatype(list(v.keys()).pop()),
357
+ value_type=arg_to_datatype(list(v.values()).pop()),
358
+ )
359
+
360
+
317
361
  class Metadata(BaseModel):
318
362
  """Metadata container object.
319
363
  TODO: support arbitrary tags"""
@@ -949,8 +993,8 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
949
993
  output_purpose: Purpose
950
994
  valid_inputs: Optional[
951
995
  Union[
952
- Set[DataType | ListType | StructType | NumericType],
953
- List[Set[DataType | ListType | StructType] | NumericType],
996
+ Set[DataType | ListType | StructType | MapType | NumericType],
997
+ List[Set[DataType | ListType | StructType | MapType | NumericType]],
954
998
  ]
955
999
  ] = None
956
1000
  arguments: Sequence[
@@ -961,17 +1005,17 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
961
1005
  int,
962
1006
  float,
963
1007
  str,
1008
+ MapWrapper[Any, Any],
964
1009
  DataType,
965
1010
  ListType,
1011
+ MapType,
966
1012
  NumericType,
967
1013
  DatePart,
968
1014
  "Parenthetical",
969
1015
  CaseWhen,
970
1016
  "CaseElse",
971
1017
  list,
972
- ListWrapper[int],
973
- ListWrapper[str],
974
- ListWrapper[float],
1018
+ ListWrapper[Any],
975
1019
  ]
976
1020
  ]
977
1021
 
@@ -1822,6 +1866,7 @@ class Datasource(Namespaced, BaseModel):
1822
1866
  metadata: DatasourceMetadata = Field(
1823
1867
  default_factory=lambda: DatasourceMetadata(freshness_concept=None)
1824
1868
  )
1869
+ where: Optional[WhereClause] = None
1825
1870
 
1826
1871
  def merge_concept(
1827
1872
  self, source: Concept, target: Concept, modifiers: List[Modifier]
@@ -1837,6 +1882,9 @@ class Datasource(Namespaced, BaseModel):
1837
1882
  for c in self.columns
1838
1883
  ] + original
1839
1884
  self.grain = self.grain.with_merge(source, target, modifiers)
1885
+ self.where = (
1886
+ self.where.with_merge(source, target, modifiers) if self.where else None
1887
+ )
1840
1888
  del self.output_lcl
1841
1889
 
1842
1890
  @property
@@ -1929,6 +1977,7 @@ class Datasource(Namespaced, BaseModel):
1929
1977
  grain=self.grain.with_namespace(namespace),
1930
1978
  address=self.address,
1931
1979
  columns=[c.with_namespace(namespace) for c in self.columns],
1980
+ where=self.where.with_namespace(namespace) if self.where else None,
1932
1981
  )
1933
1982
 
1934
1983
  @cached_property
@@ -2337,7 +2386,6 @@ class CTE(BaseModel):
2337
2386
  hidden_concepts: List[Concept] = Field(default_factory=list)
2338
2387
  order_by: Optional[OrderBy] = None
2339
2388
  limit: Optional[int] = None
2340
- requires_nesting: bool = True
2341
2389
  base_name_override: Optional[str] = None
2342
2390
  base_alias_override: Optional[str] = None
2343
2391
 
@@ -2806,6 +2854,9 @@ class EnvironmentDatasourceDict(dict):
2806
2854
  return self.__getitem__(key.split(".")[1])
2807
2855
  raise
2808
2856
 
2857
+ def values(self) -> ValuesView[Datasource]: # type: ignore
2858
+ return super().values()
2859
+
2809
2860
 
2810
2861
  class EnvironmentConceptDict(dict):
2811
2862
  def __init__(self, *args, **kwargs) -> None:
@@ -3133,6 +3184,7 @@ class Environment(BaseModel):
3133
3184
  v.pseudonyms[source.address] = source
3134
3185
  if v.address == source.address:
3135
3186
  replacements[k] = target
3187
+ v.pseudonyms[target.address] = target
3136
3188
  self.concepts.update(replacements)
3137
3189
 
3138
3190
  for k, ds in self.datasources.items():
@@ -4107,6 +4159,15 @@ def list_to_wrapper(args):
4107
4159
  return ListWrapper(args, type=types[0])
4108
4160
 
4109
4161
 
4162
+ def dict_to_map_wrapper(arg):
4163
+ key_types = [arg_to_datatype(arg) for arg in arg.keys()]
4164
+
4165
+ value_types = [arg_to_datatype(arg) for arg in arg.values()]
4166
+ assert len(set(key_types)) == 1
4167
+ assert len(set(key_types)) == 1
4168
+ return MapWrapper(arg, key_type=key_types[0], value_type=value_types[0])
4169
+
4170
+
4110
4171
  def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType | NumericType:
4111
4172
  if isinstance(arg, Function):
4112
4173
  return arg.output_datatype
@@ -4135,5 +4196,7 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType | Numeric
4135
4196
  elif isinstance(arg, list):
4136
4197
  wrapper = list_to_wrapper(arg)
4137
4198
  return ListType(type=wrapper.type)
4199
+ elif isinstance(arg, MapWrapper):
4200
+ return MapType(key_type=arg.key_type, value_type=arg.value_type)
4138
4201
  else:
4139
4202
  raise ValueError(f"Cannot parse arg datatype for arg of raw type {type(arg)}")
@@ -1,10 +1,10 @@
1
1
  from trilogy.core.models import (
2
2
  CTE,
3
3
  SelectStatement,
4
- PersistStatement,
5
4
  MultiSelectStatement,
5
+ Conditional,
6
6
  )
7
- from trilogy.core.enums import PurposeLineage
7
+ from trilogy.core.enums import PurposeLineage, BooleanOperator
8
8
  from trilogy.constants import logger, CONFIG
9
9
  from trilogy.core.optimizations import (
10
10
  OptimizationRule,
@@ -42,34 +42,45 @@ def gen_inverse_map(input: list[CTE]) -> dict[str, list[CTE]]:
42
42
  return inverse_map
43
43
 
44
44
 
45
- def is_direct_return_eligible(
46
- cte: CTE, select: SelectStatement | PersistStatement | MultiSelectStatement
47
- ) -> bool:
48
- if isinstance(select, (PersistStatement, MultiSelectStatement)):
49
- return False
45
+ def is_direct_return_eligible(cte: CTE) -> CTE | None:
46
+ # if isinstance(select, (PersistStatement, MultiSelectStatement)):
47
+ # return False
48
+ if len(cte.parent_ctes) != 1:
49
+ return None
50
+ direct_parent = cte.parent_ctes[0]
51
+
52
+ output_addresses = set([x.address for x in cte.output_columns])
53
+ parent_output_addresses = set([x.address for x in direct_parent.output_columns])
54
+ if not output_addresses.issubset(parent_output_addresses):
55
+ return None
56
+ if not direct_parent.grain == cte.grain:
57
+ return None
50
58
  derived_concepts = [
51
59
  c
52
60
  for c in cte.source.output_concepts + cte.source.hidden_concepts
53
61
  if c not in cte.source.input_concepts
54
62
  ]
55
- eligible = True
56
63
  conditions = (
57
- set(x.address for x in select.where_clause.concept_arguments)
58
- if select.where_clause
64
+ set(x.address for x in direct_parent.condition.concept_arguments)
65
+ if direct_parent.condition
59
66
  else set()
60
67
  )
61
68
  for x in derived_concepts:
62
69
  if x.derivation == PurposeLineage.WINDOW:
63
- return False
70
+ return None
64
71
  if x.derivation == PurposeLineage.UNNEST:
65
- return False
72
+ return None
66
73
  if x.derivation == PurposeLineage.AGGREGATE:
67
74
  if x.address in conditions:
68
- return False
75
+ return None
76
+ # handling top level nodes that require unpacking
77
+ for x in cte.output_columns:
78
+ if x.derivation == PurposeLineage.UNNEST:
79
+ return None
69
80
  logger.info(
70
- f"[Optimization][EarlyReturn] Upleveling output select to final CTE with derived_concepts {[x.address for x in derived_concepts]}"
81
+ f"[Optimization][EarlyReturn] Removing redundant output CTE with derived_concepts {[x.address for x in derived_concepts]}"
71
82
  )
72
- return eligible
83
+ return direct_parent
73
84
 
74
85
 
75
86
  def sort_select_output(cte: CTE, query: SelectStatement | MultiSelectStatement):
@@ -90,23 +101,27 @@ def optimize_ctes(
90
101
  input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
91
102
  ) -> list[CTE]:
92
103
 
93
- if CONFIG.optimizations.direct_return and is_direct_return_eligible(
94
- root_cte, select
104
+ direct_parent: CTE | None = root_cte
105
+ while CONFIG.optimizations.direct_return and (
106
+ direct_parent := is_direct_return_eligible(root_cte)
95
107
  ):
96
- root_cte.order_by = select.order_by
97
- root_cte.limit = select.limit
98
- # if select.where_clause:
99
-
100
- # if root_cte.condition:
101
- # root_cte.condition = Conditional(
102
- # left=root_cte.condition,
103
- # operator=BooleanOperator.AND,
104
- # right=select.where_clause.conditional,
105
- # )
106
- # else:
107
- # root_cte.condition = select.where_clause.conditional
108
- root_cte.requires_nesting = False
109
- sort_select_output(root_cte, select)
108
+ direct_parent.order_by = root_cte.order_by
109
+ direct_parent.limit = root_cte.limit
110
+ direct_parent.hidden_concepts = (
111
+ root_cte.hidden_concepts + direct_parent.hidden_concepts
112
+ )
113
+ if root_cte.condition:
114
+ if direct_parent.condition:
115
+ direct_parent.condition = Conditional(
116
+ left=direct_parent.condition,
117
+ operator=BooleanOperator.AND,
118
+ right=root_cte.condition,
119
+ )
120
+ else:
121
+ direct_parent.condition = root_cte.condition
122
+ root_cte = direct_parent
123
+
124
+ sort_select_output(root_cte, select)
110
125
 
111
126
  REGISTERED_RULES: list["OptimizationRule"] = []
112
127
  if CONFIG.optimizations.constant_inlining:
@@ -3,18 +3,43 @@ from trilogy.core.models import (
3
3
  Conditional,
4
4
  BooleanOperator,
5
5
  Datasource,
6
+ SubselectComparison,
7
+ Comparison,
8
+ Parenthetical,
9
+ Function,
10
+ FilterItem,
11
+ MagicConstants,
12
+ Concept,
13
+ WindowItem,
14
+ AggregateWrapper,
15
+ DataType,
6
16
  )
7
17
  from trilogy.core.optimizations.base_optimization import OptimizationRule
18
+ from trilogy.core.enums import FunctionClass
8
19
 
9
20
 
10
- def decompose_condition(conditional: Conditional):
11
- chunks = []
21
+ def decompose_condition(
22
+ conditional: Conditional,
23
+ ) -> list[SubselectComparison | Comparison | Conditional | Parenthetical]:
24
+ chunks: list[SubselectComparison | Comparison | Conditional | Parenthetical] = []
12
25
  if conditional.operator == BooleanOperator.AND:
13
- for val in [conditional.left, conditional.right]:
14
- if isinstance(val, Conditional):
15
- chunks.extend(decompose_condition(val))
16
- else:
17
- chunks.append(val)
26
+ if not (
27
+ isinstance(
28
+ conditional.left,
29
+ (SubselectComparison, Comparison, Conditional, Parenthetical),
30
+ )
31
+ and isinstance(
32
+ conditional.right,
33
+ (SubselectComparison, Comparison, Conditional, Parenthetical),
34
+ )
35
+ ):
36
+ chunks.append(conditional)
37
+ else:
38
+ for val in [conditional.left, conditional.right]:
39
+ if isinstance(val, Conditional):
40
+ chunks.extend(decompose_condition(val))
41
+ else:
42
+ chunks.append(val)
18
43
  else:
19
44
  chunks.append(conditional)
20
45
  return chunks
@@ -31,6 +56,40 @@ def is_child_of(a, comparison):
31
56
  return base
32
57
 
33
58
 
59
+ def is_basic(
60
+ element: (
61
+ int
62
+ | str
63
+ | float
64
+ | list
65
+ | WindowItem
66
+ | FilterItem
67
+ | Concept
68
+ | Comparison
69
+ | Conditional
70
+ | Parenthetical
71
+ | Function
72
+ | AggregateWrapper
73
+ | MagicConstants
74
+ | DataType
75
+ ),
76
+ ) -> bool:
77
+ if isinstance(element, Parenthetical):
78
+ return is_basic(element.content)
79
+ elif isinstance(element, SubselectComparison):
80
+ return True
81
+ elif isinstance(element, Comparison):
82
+ return is_basic(element.left) and is_basic(element.right)
83
+ elif isinstance(element, Function):
84
+ if element.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
85
+ return False
86
+ elif isinstance(element, AggregateWrapper):
87
+ return is_basic(element.function)
88
+ elif isinstance(element, Conditional):
89
+ return is_basic(element.left) and is_basic(element.right)
90
+ return True
91
+
92
+
34
93
  class PredicatePushdown(OptimizationRule):
35
94
 
36
95
  def __init__(self, *args, **kwargs) -> None:
@@ -128,7 +187,14 @@ class PredicatePushdown(OptimizationRule):
128
187
  )
129
188
  optimized = False
130
189
  for candidate in candidates:
131
- self.debug(f"Checking candidate {candidate}")
190
+ if not is_basic(candidate):
191
+ self.debug(
192
+ f"Skipping {candidate} as not a basic [no aggregate, etc] condition"
193
+ )
194
+ continue
195
+ self.log(
196
+ f"Checking candidate {candidate}, {type(candidate)}, {is_basic(candidate)}"
197
+ )
132
198
  for parent_cte in cte.parent_ctes:
133
199
  local_pushdown = self._check_parent(
134
200
  parent_cte=parent_cte, candidate=candidate, inverse_map=inverse_map