pytrilogy 0.0.2.25__tar.gz → 0.0.2.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (108) hide show
  1. {pytrilogy-0.0.2.25/pytrilogy.egg-info → pytrilogy-0.0.2.26}/PKG-INFO +1 -1
  2. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26/pytrilogy.egg-info}/PKG-INFO +1 -1
  3. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_models.py +7 -22
  4. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/__init__.py +1 -1
  5. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/constants.py +1 -1
  6. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/models.py +106 -67
  7. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/common.py +0 -1
  8. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/select_merge_node.py +49 -22
  9. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/nodes/merge_node.py +2 -2
  10. pytrilogy-0.0.2.26/trilogy/core/processing/utility.py +533 -0
  11. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/query_processor.py +47 -39
  12. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/dialect/base.py +1 -0
  13. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/dialect/common.py +4 -25
  14. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/executor.py +12 -3
  15. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/parsing/common.py +4 -6
  16. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/parsing/parse_engine.py +3 -2
  17. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/parsing/render.py +41 -17
  18. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/parsing/trilogy.lark +2 -2
  19. pytrilogy-0.0.2.25/trilogy/core/processing/utility.py +0 -554
  20. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/LICENSE.md +0 -0
  21. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/README.md +0 -0
  22. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/pyproject.toml +0 -0
  23. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/pytrilogy.egg-info/SOURCES.txt +0 -0
  24. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/pytrilogy.egg-info/dependency_links.txt +0 -0
  25. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/pytrilogy.egg-info/entry_points.txt +0 -0
  26. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/pytrilogy.egg-info/requires.txt +0 -0
  27. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/pytrilogy.egg-info/top_level.txt +0 -0
  28. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/setup.cfg +0 -0
  29. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/setup.py +0 -0
  30. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_datatypes.py +0 -0
  31. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_declarations.py +0 -0
  32. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_derived_concepts.py +0 -0
  33. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_discovery_nodes.py +0 -0
  34. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_environment.py +0 -0
  35. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_functions.py +0 -0
  36. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_imports.py +0 -0
  37. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_metadata.py +0 -0
  38. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_multi_join_assignments.py +0 -0
  39. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_parsing.py +0 -0
  40. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_partial_handling.py +0 -0
  41. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_query_processing.py +0 -0
  42. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_select.py +0 -0
  43. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_show.py +0 -0
  44. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_statements.py +0 -0
  45. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_undefined_concept.py +0 -0
  46. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/tests/test_where_clause.py +0 -0
  47. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/compiler.py +0 -0
  48. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/__init__.py +0 -0
  49. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/constants.py +0 -0
  50. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/enums.py +0 -0
  51. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/env_processor.py +0 -0
  52. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/environment_helpers.py +0 -0
  53. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/ergonomics.py +0 -0
  54. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/exceptions.py +0 -0
  55. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/functions.py +0 -0
  56. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/graph_models.py +0 -0
  57. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/internal.py +0 -0
  58. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/optimization.py +0 -0
  59. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/optimizations/__init__.py +0 -0
  60. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/optimizations/base_optimization.py +0 -0
  61. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/optimizations/inline_constant.py +0 -0
  62. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/optimizations/inline_datasource.py +0 -0
  63. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/optimizations/predicate_pushdown.py +0 -0
  64. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/__init__.py +0 -0
  65. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/concept_strategies_v3.py +0 -0
  66. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/graph_utils.py +0 -0
  67. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/__init__.py +0 -0
  68. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/basic_node.py +0 -0
  69. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/filter_node.py +0 -0
  70. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/group_node.py +0 -0
  71. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
  72. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
  73. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/node_merge_node.py +0 -0
  74. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/rowset_node.py +0 -0
  75. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/select_node.py +0 -0
  76. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/unnest_node.py +0 -0
  77. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/node_generators/window_node.py +0 -0
  78. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/nodes/__init__.py +0 -0
  79. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/nodes/base_node.py +0 -0
  80. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/nodes/filter_node.py +0 -0
  81. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/nodes/group_node.py +0 -0
  82. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/nodes/select_node_v2.py +0 -0
  83. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/nodes/unnest_node.py +0 -0
  84. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/core/processing/nodes/window_node.py +0 -0
  85. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/dialect/__init__.py +0 -0
  86. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/dialect/bigquery.py +0 -0
  87. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/dialect/config.py +0 -0
  88. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/dialect/duckdb.py +0 -0
  89. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/dialect/enums.py +0 -0
  90. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/dialect/postgres.py +0 -0
  91. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/dialect/presto.py +0 -0
  92. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/dialect/snowflake.py +0 -0
  93. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/dialect/sql_server.py +0 -0
  94. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/engine.py +0 -0
  95. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/hooks/__init__.py +0 -0
  96. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/hooks/base_hook.py +0 -0
  97. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/hooks/graph_hook.py +0 -0
  98. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/hooks/query_debugger.py +0 -0
  99. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/metadata/__init__.py +0 -0
  100. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/parser.py +0 -0
  101. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/parsing/__init__.py +0 -0
  102. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/parsing/config.py +0 -0
  103. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/parsing/exceptions.py +0 -0
  104. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/parsing/helpers.py +0 -0
  105. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/py.typed +0 -0
  106. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/scripts/__init__.py +0 -0
  107. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/scripts/trilogy.py +0 -0
  108. {pytrilogy-0.0.2.25 → pytrilogy-0.0.2.26}/trilogy/utility.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.2.25
3
+ Version: 0.0.2.26
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.2.25
3
+ Version: 0.0.2.26
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -11,7 +11,7 @@ from trilogy.core.models import (
11
11
  BaseJoin,
12
12
  Comparison,
13
13
  Join,
14
- JoinKey,
14
+ CTEConceptPair,
15
15
  Concept,
16
16
  AggregateWrapper,
17
17
  RowsetItem,
@@ -190,31 +190,13 @@ def test_base_join(test_environment: Environment):
190
190
  right_datasource=test_environment.datasources["products"],
191
191
  concepts=[
192
192
  test_environment.concepts["product_id"],
193
- test_environment.concepts["category_name"],
193
+ # test_environment.concepts["category_name"],
194
194
  ],
195
195
  join_type=JoinType.RIGHT_OUTER,
196
- filter_to_mutual=True,
197
196
  )
198
197
 
199
198
  assert x.concepts == [test_environment.concepts["product_id"]]
200
199
 
201
- exc3: SyntaxError | None = None
202
- try:
203
- x = BaseJoin(
204
- left_datasource=test_environment.datasources["revenue"],
205
- right_datasource=test_environment.datasources["category"],
206
- concepts=[
207
- test_environment.concepts["product_id"],
208
- test_environment.concepts["category_name"],
209
- ],
210
- join_type=JoinType.RIGHT_OUTER,
211
- filter_to_mutual=True,
212
- )
213
- except Exception as exc4:
214
- exc3 = exc4
215
- pass
216
- assert isinstance(exc3, SyntaxError)
217
-
218
200
 
219
201
  def test_comparison():
220
202
  try:
@@ -264,13 +246,16 @@ def test_join(test_environment: Environment):
264
246
  test = Join(
265
247
  left_cte=a,
266
248
  right_cte=b,
267
- joinkeys=[JoinKey(concept=x) for x in outputs],
249
+ joinkey_pairs=[
250
+ CTEConceptPair(left=x, right=x, existing_datasource=a.source, cte=a)
251
+ for x in outputs
252
+ ],
268
253
  jointype=JoinType.RIGHT_OUTER,
269
254
  )
270
255
 
271
256
  assert (
272
257
  str(test)
273
- == "right outer JOIN test and testb on local.product_id<local.product_id>,local.category_id<local.category_id>"
258
+ == "right outer join testb on test.local.product_id=local.product_id,test.local.category_id=local.category_id"
274
259
  ), str(test)
275
260
 
276
261
 
@@ -4,6 +4,6 @@ from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
5
  from trilogy.constants import CONFIG
6
6
 
7
- __version__ = "0.0.2.25"
7
+ __version__ = "0.0.2.26"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
@@ -7,7 +7,7 @@ logger = getLogger("trilogy")
7
7
 
8
8
  DEFAULT_NAMESPACE = "local"
9
9
 
10
- VIRTUAL_CONCEPT_PREFIX = "_virtual"
10
+ VIRTUAL_CONCEPT_PREFIX = "_virt"
11
11
 
12
12
  ENV_CACHE_NAME = ".preql_cache.json"
13
13
 
@@ -73,6 +73,7 @@ from collections import UserList, UserDict
73
73
  from functools import cached_property
74
74
  from abc import ABC
75
75
  from collections import defaultdict
76
+ import hashlib
76
77
 
77
78
  LOGGER_PREFIX = "[MODELS]"
78
79
 
@@ -190,6 +191,13 @@ class ConstantInlineable(ABC):
190
191
  raise NotImplementedError
191
192
 
192
193
 
194
+ class HasUUID(ABC):
195
+
196
+ @property
197
+ def uuid(self) -> str:
198
+ return hashlib.md5(str(self).encode()).hexdigest()
199
+
200
+
193
201
  class SelectTypeMixin(BaseModel):
194
202
  where_clause: Union["WhereClause", None] = Field(default=None)
195
203
  having_clause: Union["HavingClause", None] = Field(default=None)
@@ -1606,7 +1614,7 @@ class RawSQLStatement(BaseModel):
1606
1614
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1607
1615
 
1608
1616
 
1609
- class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
1617
+ class SelectStatement(HasUUID, Mergeable, Namespaced, SelectTypeMixin, BaseModel):
1610
1618
  selection: List[SelectItem]
1611
1619
  order_by: Optional[OrderBy] = None
1612
1620
  limit: Optional[int] = None
@@ -1724,19 +1732,34 @@ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
1724
1732
  # if the concept is a locally derived concept, it cannot ever be partial
1725
1733
  # but if it's a concept pulled in from upstream and we have a where clause, it should be partial
1726
1734
  ColumnAssignment(
1727
- alias=c.address.replace(".", "_"),
1735
+ alias=(
1736
+ c.name.replace(".", "_")
1737
+ if c.namespace == DEFAULT_NAMESPACE
1738
+ else c.address.replace(".", "_")
1739
+ ),
1728
1740
  concept=c,
1729
1741
  modifiers=modifiers if c.address not in self.locally_derived else [],
1730
1742
  )
1731
1743
  for c in self.output_components
1732
1744
  ]
1733
1745
 
1746
+ condition = None
1747
+ if self.where_clause:
1748
+ condition = self.where_clause.conditional
1749
+ if self.having_clause:
1750
+ if condition:
1751
+ condition = self.having_clause.conditional + condition
1752
+ else:
1753
+ condition = self.having_clause.conditional
1754
+
1734
1755
  new_datasource = Datasource(
1735
1756
  identifier=identifier,
1736
1757
  address=address,
1737
1758
  grain=grain or self.grain,
1738
1759
  columns=columns,
1739
1760
  namespace=namespace,
1761
+ where=WhereClause(conditional=condition) if condition else None,
1762
+ non_partial_for=WhereClause(conditional=condition) if condition else None,
1740
1763
  )
1741
1764
  for column in columns:
1742
1765
  column.concept = column.concept.with_grain(new_datasource.grain)
@@ -1859,7 +1882,7 @@ class AlignClause(Namespaced, BaseModel):
1859
1882
  return AlignClause(items=[x.with_namespace(namespace) for x in self.items])
1860
1883
 
1861
1884
 
1862
- class MultiSelectStatement(SelectTypeMixin, Mergeable, Namespaced, BaseModel):
1885
+ class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, BaseModel):
1863
1886
  selects: List[SelectStatement]
1864
1887
  align: AlignClause
1865
1888
  namespace: str
@@ -2021,7 +2044,7 @@ class DatasourceMetadata(BaseModel):
2021
2044
  line_no: int | None = None
2022
2045
 
2023
2046
 
2024
- class MergeStatementV2(Namespaced, BaseModel):
2047
+ class MergeStatementV2(HasUUID, Namespaced, BaseModel):
2025
2048
  source: Concept
2026
2049
  target: Concept
2027
2050
  modifiers: List[Modifier] = Field(default_factory=list)
@@ -2035,7 +2058,7 @@ class MergeStatementV2(Namespaced, BaseModel):
2035
2058
  return new
2036
2059
 
2037
2060
 
2038
- class Datasource(Namespaced, BaseModel):
2061
+ class Datasource(HasUUID, Namespaced, BaseModel):
2039
2062
  identifier: str
2040
2063
  columns: List[ColumnAssignment]
2041
2064
  address: Union[Address, str]
@@ -2047,6 +2070,7 @@ class Datasource(Namespaced, BaseModel):
2047
2070
  default_factory=lambda: DatasourceMetadata(freshness_concept=None)
2048
2071
  )
2049
2072
  where: Optional[WhereClause] = None
2073
+ non_partial_for: Optional[WhereClause] = None
2050
2074
 
2051
2075
  def merge_concept(
2052
2076
  self, source: Concept, target: Concept, modifiers: List[Modifier]
@@ -2247,6 +2271,7 @@ class InstantiatedUnnestJoin(BaseModel):
2247
2271
  class ConceptPair(BaseModel):
2248
2272
  left: Concept
2249
2273
  right: Concept
2274
+ existing_datasource: Union[Datasource, "QueryDatasource"]
2250
2275
  modifiers: List[Modifier] = Field(default_factory=list)
2251
2276
 
2252
2277
  @property
@@ -2258,17 +2283,23 @@ class ConceptPair(BaseModel):
2258
2283
  return Modifier.NULLABLE in self.modifiers
2259
2284
 
2260
2285
 
2286
+ class CTEConceptPair(ConceptPair):
2287
+ cte: CTE
2288
+
2289
+
2261
2290
  class BaseJoin(BaseModel):
2262
- left_datasource: Union[Datasource, "QueryDatasource"]
2263
2291
  right_datasource: Union[Datasource, "QueryDatasource"]
2264
- concepts: List[Concept]
2265
2292
  join_type: JoinType
2266
- filter_to_mutual: bool = False
2293
+ concepts: Optional[List[Concept]] = None
2294
+ left_datasource: Optional[Union[Datasource, "QueryDatasource"]] = None
2267
2295
  concept_pairs: list[ConceptPair] | None = None
2268
2296
 
2269
2297
  def __init__(self, **data: Any):
2270
2298
  super().__init__(**data)
2271
- if self.left_datasource.full_name == self.right_datasource.full_name:
2299
+ if (
2300
+ self.left_datasource
2301
+ and self.left_datasource.full_name == self.right_datasource.full_name
2302
+ ):
2272
2303
  raise SyntaxError(
2273
2304
  f"Cannot join a dataself to itself, joining {self.left_datasource} and"
2274
2305
  f" {self.right_datasource}"
@@ -2278,8 +2309,10 @@ class BaseJoin(BaseModel):
2278
2309
  # if we have a list of concept pairs
2279
2310
  if self.concept_pairs:
2280
2311
  return
2281
-
2282
- for concept in self.concepts:
2312
+ if self.concepts == []:
2313
+ return
2314
+ assert self.left_datasource and self.right_datasource
2315
+ for concept in self.concepts or []:
2283
2316
  include = True
2284
2317
  for ds in [self.left_datasource, self.right_datasource]:
2285
2318
  synonyms = []
@@ -2289,13 +2322,10 @@ class BaseJoin(BaseModel):
2289
2322
  concept.address not in [c.address for c in ds.output_concepts]
2290
2323
  and concept.address not in synonyms
2291
2324
  ):
2292
- if self.filter_to_mutual:
2293
- include = False
2294
- else:
2295
- raise SyntaxError(
2296
- f"Invalid join, missing {concept} on {ds.name}, have"
2297
- f" {[c.address for c in ds.output_concepts]}"
2298
- )
2325
+ raise SyntaxError(
2326
+ f"Invalid join, missing {concept} on {ds.name}, have"
2327
+ f" {[c.address for c in ds.output_concepts]}"
2328
+ )
2299
2329
  if include:
2300
2330
  final_concepts.append(concept)
2301
2331
  if not final_concepts and self.concepts:
@@ -2312,7 +2342,7 @@ class BaseJoin(BaseModel):
2312
2342
  self.concepts = []
2313
2343
  return
2314
2344
  # if everything is at abstract grain, we can skip joins
2315
- if all([c.grain == Grain() for c in ds.output_concepts]):
2345
+ if all([c.grain.abstract for c in ds.output_concepts]):
2316
2346
  self.concepts = []
2317
2347
  return
2318
2348
 
@@ -2330,21 +2360,27 @@ class BaseJoin(BaseModel):
2330
2360
 
2331
2361
  @property
2332
2362
  def unique_id(self) -> str:
2333
- # TODO: include join type?
2334
- return (
2335
- self.left_datasource.name
2336
- + self.right_datasource.name
2337
- + self.join_type.value
2338
- )
2363
+ return str(self)
2364
+
2365
+ @property
2366
+ def input_concepts(self) -> List[Concept]:
2367
+ base = []
2368
+ if self.concept_pairs:
2369
+ for pair in self.concept_pairs:
2370
+ base += [pair.left, pair.right]
2371
+ elif self.concepts:
2372
+ base += self.concepts
2373
+ return base
2339
2374
 
2340
2375
  def __str__(self):
2341
2376
  if self.concept_pairs:
2342
2377
  return (
2343
- f"{self.join_type.value} on"
2344
- f" {','.join([str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
2378
+ f"{self.join_type.value} {self.right_datasource.name} on"
2379
+ f" {','.join([str(k.existing_datasource.name) + '.'+ str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
2345
2380
  )
2346
2381
  return (
2347
- f"{self.join_type.value} on" f" {','.join([str(k) for k in self.concepts])}"
2382
+ f"{self.join_type.value} {self.right_datasource.name} on"
2383
+ f" {','.join([str(k) for k in self.concepts])}"
2348
2384
  )
2349
2385
 
2350
2386
 
@@ -2389,19 +2425,9 @@ class QueryDatasource(BaseModel):
2389
2425
  for join in v:
2390
2426
  if not isinstance(join, BaseJoin):
2391
2427
  continue
2392
- if join.left_datasource.identifier == join.right_datasource.identifier:
2393
- raise SyntaxError(
2394
- f"Cannot join a datasource to itself, joining {join.left_datasource}"
2395
- )
2396
- pairing = "".join(
2397
- sorted(
2398
- [join.left_datasource.identifier, join.right_datasource.identifier]
2399
- )
2400
- )
2428
+ pairing = str(join)
2401
2429
  if pairing in unique_pairs:
2402
- raise SyntaxError(
2403
- f"Duplicate join {join.left_datasource.identifier} and {join.right_datasource.identifier}"
2404
- )
2430
+ raise SyntaxError(f"Duplicate join {str(join)}")
2405
2431
  unique_pairs.add(pairing)
2406
2432
  return v
2407
2433
 
@@ -2666,7 +2692,12 @@ class CTE(BaseModel):
2666
2692
  isinstance(join, Join)
2667
2693
  and (
2668
2694
  join.right_cte.name != removed_cte
2669
- and join.left_cte.name != removed_cte
2695
+ and any(
2696
+ [
2697
+ x.cte.name != removed_cte
2698
+ for x in (join.joinkey_pairs or [])
2699
+ ]
2700
+ )
2670
2701
  )
2671
2702
  )
2672
2703
  ]
@@ -2737,8 +2768,12 @@ class CTE(BaseModel):
2737
2768
  for join in self.joins:
2738
2769
  if isinstance(join, InstantiatedUnnestJoin):
2739
2770
  continue
2740
- if join.left_cte.name == parent.name:
2771
+ if join.left_cte and join.left_cte.name == parent.name:
2741
2772
  join.inline_cte(parent)
2773
+ if join.joinkey_pairs:
2774
+ for pair in join.joinkey_pairs:
2775
+ if pair.cte and pair.cte.name == parent.name:
2776
+ join.inline_cte(parent)
2742
2777
  if join.right_cte.name == parent.name:
2743
2778
  join.inline_cte(parent)
2744
2779
  for k, v in self.source_map.items():
@@ -2961,21 +2996,26 @@ class JoinKey(BaseModel):
2961
2996
 
2962
2997
 
2963
2998
  class Join(BaseModel):
2964
- left_cte: CTE
2999
+
2965
3000
  right_cte: CTE
2966
3001
  jointype: JoinType
2967
- joinkeys: List[JoinKey]
2968
- joinkey_pairs: List[ConceptPair] | None = None
3002
+ left_cte: CTE | None = None
3003
+ joinkey_pairs: List[CTEConceptPair] | None = None
2969
3004
  inlined_ctes: set[str] = Field(default_factory=set)
2970
3005
 
2971
3006
  def inline_cte(self, cte: CTE):
2972
3007
  self.inlined_ctes.add(cte.name)
2973
3008
 
2974
- @property
2975
- def left_name(self) -> str:
2976
- if self.left_cte.name in self.inlined_ctes:
2977
- return self.left_cte.source.datasources[0].identifier
2978
- return self.left_cte.name
3009
+ # @property
3010
+ # def left_name(self) -> str:
3011
+ # if self.left_cte.name in self.inlined_ctes:
3012
+ # return self.left_cte.source.datasources[0].identifier
3013
+ # return self.left_cte.name
3014
+
3015
+ def get_name(self, cte: CTE):
3016
+ if cte.name in self.inlined_ctes:
3017
+ return cte.source.datasources[0].identifier
3018
+ return cte.name
2979
3019
 
2980
3020
  @property
2981
3021
  def right_name(self) -> str:
@@ -2983,12 +3023,6 @@ class Join(BaseModel):
2983
3023
  return self.right_cte.source.datasources[0].identifier
2984
3024
  return self.right_cte.name
2985
3025
 
2986
- @property
2987
- def left_ref(self) -> str:
2988
- if self.left_cte.name in self.inlined_ctes:
2989
- return f"{self.left_cte.source.datasources[0].safe_location} as {self.left_cte.source.datasources[0].identifier}"
2990
- return self.left_cte.name
2991
-
2992
3026
  @property
2993
3027
  def right_ref(self) -> str:
2994
3028
  if self.right_cte.name in self.inlined_ctes:
@@ -2997,19 +3031,21 @@ class Join(BaseModel):
2997
3031
 
2998
3032
  @property
2999
3033
  def unique_id(self) -> str:
3000
- return self.left_name + self.right_name + self.jointype.value
3034
+ return str(self)
3001
3035
 
3002
3036
  def __str__(self):
3003
3037
  if self.joinkey_pairs:
3004
3038
  return (
3005
- f"{self.jointype.value} JOIN {self.left_name} and"
3039
+ f"{self.jointype.value} join"
3006
3040
  f" {self.right_name} on"
3007
- f" {','.join([str(k.left)+'='+str(k.right)+str(k.modifiers) for k in self.joinkey_pairs])}"
3041
+ f" {','.join([k.cte.name + '.'+str(k.left.address)+'='+str(k.right.address) for k in self.joinkey_pairs])}"
3008
3042
  )
3009
- return (
3010
- f"{self.jointype.value} JOIN {self.left_name} and"
3011
- f" {self.right_name} on {','.join([str(k) for k in self.joinkeys])}"
3012
- )
3043
+ elif self.left_cte:
3044
+ return (
3045
+ f"{self.jointype.value} JOIN {self.left_cte.name} and"
3046
+ f" {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
3047
+ )
3048
+ return f"{self.jointype.value} JOIN {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
3013
3049
 
3014
3050
 
3015
3051
  class UndefinedConcept(Concept, Mergeable, Namespaced):
@@ -3227,7 +3263,7 @@ class EnvironmentConceptDict(dict):
3227
3263
  return super().items()
3228
3264
 
3229
3265
 
3230
- class ImportStatement(BaseModel):
3266
+ class ImportStatement(HasUUID, BaseModel):
3231
3267
  alias: str
3232
3268
  path: Path
3233
3269
  environment: Union["Environment", None] = None
@@ -4223,6 +4259,9 @@ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
4223
4259
  class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
4224
4260
  conditional: Union[SubselectComparison, Comparison, Conditional, "Parenthetical"]
4225
4261
 
4262
+ def __repr__(self):
4263
+ return str(self.conditional)
4264
+
4226
4265
  @property
4227
4266
  def input(self) -> List[Concept]:
4228
4267
  return self.conditional.input
@@ -4341,7 +4380,7 @@ class Limit(BaseModel):
4341
4380
  count: int
4342
4381
 
4343
4382
 
4344
- class ConceptDeclarationStatement(BaseModel):
4383
+ class ConceptDeclarationStatement(HasUUID, BaseModel):
4345
4384
  concept: Concept
4346
4385
 
4347
4386
 
@@ -4349,7 +4388,7 @@ class ConceptDerivation(BaseModel):
4349
4388
  concept: Concept
4350
4389
 
4351
4390
 
4352
- class RowsetDerivationStatement(Namespaced, BaseModel):
4391
+ class RowsetDerivationStatement(HasUUID, Namespaced, BaseModel):
4353
4392
  name: str
4354
4393
  select: SelectStatement | MultiSelectStatement
4355
4394
  namespace: str
@@ -4614,7 +4653,7 @@ class TupleWrapper(Generic[VT], tuple):
4614
4653
  return cls(v, type=arg_to_datatype(v[0]))
4615
4654
 
4616
4655
 
4617
- class PersistStatement(BaseModel):
4656
+ class PersistStatement(HasUUID, BaseModel):
4618
4657
  datasource: Datasource
4619
4658
  select: SelectStatement
4620
4659
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
@@ -1,6 +1,5 @@
1
1
  from typing import List, Tuple, Callable
2
2
 
3
-
4
3
  from trilogy.core.enums import PurposeLineage, Purpose
5
4
  from trilogy.core.models import (
6
5
  Concept,
@@ -28,14 +28,18 @@ def extract_address(node: str):
28
28
  return node.split("~")[1].split("@")[0]
29
29
 
30
30
 
31
- def get_graph_partial_nodes(g: nx.DiGraph) -> dict[str, list[str]]:
31
+ def get_graph_partial_nodes(
32
+ g: nx.DiGraph, conditions: WhereClause | None
33
+ ) -> dict[str, list[str]]:
32
34
  datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
33
35
  partial: dict[str, list[str]] = {}
34
36
  for node in g.nodes:
35
37
  if node in datasources:
36
- partial[node] = [
37
- concept_to_node(c) for c in datasources[node].partial_concepts
38
- ]
38
+ ds = datasources[node]
39
+ partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
40
+ if ds.non_partial_for and conditions == ds.non_partial_for:
41
+ partial[node] = []
42
+
39
43
  return partial
40
44
 
41
45
 
@@ -49,7 +53,10 @@ def get_graph_grain_length(g: nx.DiGraph) -> dict[str, int]:
49
53
 
50
54
 
51
55
  def create_pruned_concept_graph(
52
- g: nx.DiGraph, all_concepts: List[Concept], accept_partial: bool = False
56
+ g: nx.DiGraph,
57
+ all_concepts: List[Concept],
58
+ accept_partial: bool = False,
59
+ conditions: WhereClause | None = None,
53
60
  ) -> nx.DiGraph:
54
61
  orig_g = g
55
62
  g = g.copy()
@@ -66,11 +73,7 @@ def create_pruned_concept_graph(
66
73
  relevent_datasets: list[str] = []
67
74
  if not accept_partial:
68
75
  partial = {}
69
- for node in g.nodes:
70
- if node in datasources:
71
- partial[node] = [
72
- concept_to_node(c) for c in datasources[node].partial_concepts
73
- ]
76
+ partial = get_graph_partial_nodes(g, conditions)
74
77
  to_remove = []
75
78
  for edge in g.edges:
76
79
  if (
@@ -133,31 +136,53 @@ def create_pruned_concept_graph(
133
136
  return g
134
137
 
135
138
 
136
- def resolve_subgraphs(g: nx.DiGraph) -> dict[str, list[str]]:
139
+ def resolve_subgraphs(
140
+ g: nx.DiGraph, conditions: WhereClause | None
141
+ ) -> dict[str, list[str]]:
137
142
  datasources = [n for n in g.nodes if n.startswith("ds~")]
138
- subgraphs = {ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources}
139
- partial_map = get_graph_partial_nodes(g)
143
+ subgraphs: dict[str, list[str]] = {
144
+ ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources
145
+ }
146
+ partial_map = get_graph_partial_nodes(g, conditions)
140
147
  grain_length = get_graph_grain_length(g)
141
- non_partial = {
142
- ds: [c for c in subgraphs[ds] if c not in partial_map[ds]] for ds in datasources
148
+ concepts: dict[str, Concept] = nx.get_node_attributes(g, "concept")
149
+ non_partial_map = {
150
+ ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
151
+ for ds in datasources
152
+ }
153
+ concept_map = {
154
+ ds: [concepts[c].address for c in subgraphs[ds]] for ds in datasources
143
155
  }
144
156
  pruned_subgraphs = {}
145
- for key, value in subgraphs.items():
157
+ for key, nodes in subgraphs.items():
158
+ value = non_partial_map[key]
159
+ all_concepts = concept_map[key]
146
160
  is_subset = False
147
161
  matches = set()
148
162
  # Compare current list with other lists
149
- for other_key, other_value in non_partial.items():
150
- if key != other_key and set(value).issubset(set(other_value)):
163
+ for other_key, other_all_concepts in concept_map.items():
164
+ other_value = non_partial_map[other_key]
165
+ # needs to be a subset of non partial and a subset of all
166
+ if (
167
+ key != other_key
168
+ and set(value).issubset(set(other_value))
169
+ and set(all_concepts).issubset(set(other_all_concepts))
170
+ ):
151
171
  if len(value) < len(other_value):
152
172
  is_subset = True
173
+ logger.debug(
174
+ f"Dropping subgraph {key} with {value} as it is a subset of {other_key} with {other_value}"
175
+ )
153
176
  break
154
- elif len(value) == len(other_value):
177
+ elif len(value) == len(other_value) and len(all_concepts) == len(
178
+ other_all_concepts
179
+ ):
155
180
  matches.add(other_key)
156
181
  matches.add(key)
157
182
  if matches:
158
183
  is_subset = key is not min(matches, key=lambda x: (grain_length[x], x))
159
184
  if not is_subset:
160
- pruned_subgraphs[key] = value
185
+ pruned_subgraphs[key] = nodes
161
186
  return pruned_subgraphs
162
187
 
163
188
 
@@ -261,7 +286,9 @@ def gen_select_merge_node(
261
286
  force_group=False,
262
287
  )
263
288
  for attempt in [False, True]:
264
- pruned_concept_graph = create_pruned_concept_graph(g, non_constant, attempt)
289
+ pruned_concept_graph = create_pruned_concept_graph(
290
+ g, non_constant, attempt, conditions
291
+ )
265
292
  if pruned_concept_graph:
266
293
  logger.info(
267
294
  f"{padding(depth)}{LOGGER_PREFIX} found covering graph w/ partial flag {attempt}"
@@ -274,7 +301,7 @@ def gen_select_merge_node(
274
301
  )
275
302
  return None
276
303
 
277
- sub_nodes = resolve_subgraphs(pruned_concept_graph)
304
+ sub_nodes = resolve_subgraphs(pruned_concept_graph, conditions)
278
305
 
279
306
  logger.info(f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {sub_nodes}")
280
307
  parents = [
@@ -215,7 +215,7 @@ class MergeNode(StrategyNode):
215
215
  logger.info(
216
216
  f"{self.logging_prefix}{LOGGER_PREFIX} inferring node joins to target grain {str(grain)}"
217
217
  )
218
- joins = get_node_joins(dataset_list, grain.components, environment)
218
+ joins = get_node_joins(dataset_list, environment=environment)
219
219
  elif final_joins:
220
220
  logger.info(
221
221
  f"{self.logging_prefix}{LOGGER_PREFIX} translating provided node joins {len(final_joins)}"
@@ -314,7 +314,7 @@ class MergeNode(StrategyNode):
314
314
  full_join_concepts = []
315
315
  for join in joins:
316
316
  if isinstance(join, BaseJoin) and join.join_type == JoinType.FULL:
317
- full_join_concepts += join.concepts
317
+ full_join_concepts += join.input_concepts
318
318
  if self.whole_grain:
319
319
  force_group = False
320
320
  elif self.force_group is False: