pytrilogy 0.0.2.25__py3-none-any.whl → 0.0.2.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

trilogy/core/models.py CHANGED
@@ -73,6 +73,7 @@ from collections import UserList, UserDict
73
73
  from functools import cached_property
74
74
  from abc import ABC
75
75
  from collections import defaultdict
76
+ import hashlib
76
77
 
77
78
  LOGGER_PREFIX = "[MODELS]"
78
79
 
@@ -190,6 +191,13 @@ class ConstantInlineable(ABC):
190
191
  raise NotImplementedError
191
192
 
192
193
 
194
+ class HasUUID(ABC):
195
+
196
+ @property
197
+ def uuid(self) -> str:
198
+ return hashlib.md5(str(self).encode()).hexdigest()
199
+
200
+
193
201
  class SelectTypeMixin(BaseModel):
194
202
  where_clause: Union["WhereClause", None] = Field(default=None)
195
203
  having_clause: Union["HavingClause", None] = Field(default=None)
@@ -1606,7 +1614,7 @@ class RawSQLStatement(BaseModel):
1606
1614
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1607
1615
 
1608
1616
 
1609
- class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
1617
+ class SelectStatement(HasUUID, Mergeable, Namespaced, SelectTypeMixin, BaseModel):
1610
1618
  selection: List[SelectItem]
1611
1619
  order_by: Optional[OrderBy] = None
1612
1620
  limit: Optional[int] = None
@@ -1711,7 +1719,7 @@ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
1711
1719
  def to_datasource(
1712
1720
  self,
1713
1721
  namespace: str,
1714
- identifier: str,
1722
+ name: str,
1715
1723
  address: Address,
1716
1724
  grain: Grain | None = None,
1717
1725
  ) -> Datasource:
@@ -1724,19 +1732,34 @@ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
1724
1732
  # if the concept is a locally derived concept, it cannot ever be partial
1725
1733
  # but if it's a concept pulled in from upstream and we have a where clause, it should be partial
1726
1734
  ColumnAssignment(
1727
- alias=c.address.replace(".", "_"),
1735
+ alias=(
1736
+ c.name.replace(".", "_")
1737
+ if c.namespace == DEFAULT_NAMESPACE
1738
+ else c.address.replace(".", "_")
1739
+ ),
1728
1740
  concept=c,
1729
1741
  modifiers=modifiers if c.address not in self.locally_derived else [],
1730
1742
  )
1731
1743
  for c in self.output_components
1732
1744
  ]
1733
1745
 
1746
+ condition = None
1747
+ if self.where_clause:
1748
+ condition = self.where_clause.conditional
1749
+ if self.having_clause:
1750
+ if condition:
1751
+ condition = self.having_clause.conditional + condition
1752
+ else:
1753
+ condition = self.having_clause.conditional
1754
+
1734
1755
  new_datasource = Datasource(
1735
- identifier=identifier,
1756
+ name=name,
1736
1757
  address=address,
1737
1758
  grain=grain or self.grain,
1738
1759
  columns=columns,
1739
1760
  namespace=namespace,
1761
+ where=WhereClause(conditional=condition) if condition else None,
1762
+ non_partial_for=WhereClause(conditional=condition) if condition else None,
1740
1763
  )
1741
1764
  for column in columns:
1742
1765
  column.concept = column.concept.with_grain(new_datasource.grain)
@@ -1859,7 +1882,7 @@ class AlignClause(Namespaced, BaseModel):
1859
1882
  return AlignClause(items=[x.with_namespace(namespace) for x in self.items])
1860
1883
 
1861
1884
 
1862
- class MultiSelectStatement(SelectTypeMixin, Mergeable, Namespaced, BaseModel):
1885
+ class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, BaseModel):
1863
1886
  selects: List[SelectStatement]
1864
1887
  align: AlignClause
1865
1888
  namespace: str
@@ -2021,7 +2044,7 @@ class DatasourceMetadata(BaseModel):
2021
2044
  line_no: int | None = None
2022
2045
 
2023
2046
 
2024
- class MergeStatementV2(Namespaced, BaseModel):
2047
+ class MergeStatementV2(HasUUID, Namespaced, BaseModel):
2025
2048
  source: Concept
2026
2049
  target: Concept
2027
2050
  modifiers: List[Modifier] = Field(default_factory=list)
@@ -2035,8 +2058,8 @@ class MergeStatementV2(Namespaced, BaseModel):
2035
2058
  return new
2036
2059
 
2037
2060
 
2038
- class Datasource(Namespaced, BaseModel):
2039
- identifier: str
2061
+ class Datasource(HasUUID, Namespaced, BaseModel):
2062
+ name: str
2040
2063
  columns: List[ColumnAssignment]
2041
2064
  address: Union[Address, str]
2042
2065
  grain: Grain = Field(
@@ -2047,6 +2070,7 @@ class Datasource(Namespaced, BaseModel):
2047
2070
  default_factory=lambda: DatasourceMetadata(freshness_concept=None)
2048
2071
  )
2049
2072
  where: Optional[WhereClause] = None
2073
+ non_partial_for: Optional[WhereClause] = None
2050
2074
 
2051
2075
  def merge_concept(
2052
2076
  self, source: Concept, target: Concept, modifiers: List[Modifier]
@@ -2070,10 +2094,14 @@ class Datasource(Namespaced, BaseModel):
2070
2094
  self.add_column(target, original[0].alias, modifiers)
2071
2095
 
2072
2096
  @property
2073
- def env_label(self) -> str:
2097
+ def identifier(self) -> str:
2074
2098
  if not self.namespace or self.namespace == DEFAULT_NAMESPACE:
2075
- return self.identifier
2076
- return f"{self.namespace}.{self.identifier}"
2099
+ return self.name
2100
+ return f"{self.namespace}.{self.name}"
2101
+
2102
+ @property
2103
+ def safe_identifier(self) -> str:
2104
+ return self.identifier.replace(".", "_")
2077
2105
 
2078
2106
  @property
2079
2107
  def condition(self):
@@ -2142,13 +2170,13 @@ class Datasource(Namespaced, BaseModel):
2142
2170
  return self
2143
2171
 
2144
2172
  def __repr__(self):
2145
- return f"Datasource<{self.namespace}.{self.identifier}@<{self.grain}>"
2173
+ return f"Datasource<{self.identifier}@<{self.grain}>"
2146
2174
 
2147
2175
  def __str__(self):
2148
2176
  return self.__repr__()
2149
2177
 
2150
2178
  def __hash__(self):
2151
- return self.full_name.__hash__()
2179
+ return self.identifier.__hash__()
2152
2180
 
2153
2181
  def with_namespace(self, namespace: str):
2154
2182
  new_namespace = (
@@ -2157,7 +2185,7 @@ class Datasource(Namespaced, BaseModel):
2157
2185
  else namespace
2158
2186
  )
2159
2187
  return Datasource(
2160
- identifier=self.identifier,
2188
+ name=self.name,
2161
2189
  namespace=new_namespace,
2162
2190
  grain=self.grain.with_namespace(namespace),
2163
2191
  address=self.address,
@@ -2207,19 +2235,6 @@ class Datasource(Namespaced, BaseModel):
2207
2235
  f" {existing}."
2208
2236
  )
2209
2237
 
2210
- @property
2211
- def name(self) -> str:
2212
- return self.identifier
2213
- # TODO: namespace all references
2214
- # return f'{self.namespace}_{self.identifier}'
2215
-
2216
- @property
2217
- def full_name(self) -> str:
2218
- if not self.namespace:
2219
- return self.identifier
2220
- namespace = self.namespace.replace(".", "_") if self.namespace else ""
2221
- return f"{namespace}_{self.identifier}"
2222
-
2223
2238
  @property
2224
2239
  def safe_location(self) -> str:
2225
2240
  if isinstance(self.address, Address):
@@ -2247,6 +2262,7 @@ class InstantiatedUnnestJoin(BaseModel):
2247
2262
  class ConceptPair(BaseModel):
2248
2263
  left: Concept
2249
2264
  right: Concept
2265
+ existing_datasource: Union[Datasource, "QueryDatasource"]
2250
2266
  modifiers: List[Modifier] = Field(default_factory=list)
2251
2267
 
2252
2268
  @property
@@ -2258,17 +2274,23 @@ class ConceptPair(BaseModel):
2258
2274
  return Modifier.NULLABLE in self.modifiers
2259
2275
 
2260
2276
 
2277
+ class CTEConceptPair(ConceptPair):
2278
+ cte: CTE
2279
+
2280
+
2261
2281
  class BaseJoin(BaseModel):
2262
- left_datasource: Union[Datasource, "QueryDatasource"]
2263
2282
  right_datasource: Union[Datasource, "QueryDatasource"]
2264
- concepts: List[Concept]
2265
2283
  join_type: JoinType
2266
- filter_to_mutual: bool = False
2284
+ concepts: Optional[List[Concept]] = None
2285
+ left_datasource: Optional[Union[Datasource, "QueryDatasource"]] = None
2267
2286
  concept_pairs: list[ConceptPair] | None = None
2268
2287
 
2269
2288
  def __init__(self, **data: Any):
2270
2289
  super().__init__(**data)
2271
- if self.left_datasource.full_name == self.right_datasource.full_name:
2290
+ if (
2291
+ self.left_datasource
2292
+ and self.left_datasource.identifier == self.right_datasource.identifier
2293
+ ):
2272
2294
  raise SyntaxError(
2273
2295
  f"Cannot join a dataself to itself, joining {self.left_datasource} and"
2274
2296
  f" {self.right_datasource}"
@@ -2278,8 +2300,10 @@ class BaseJoin(BaseModel):
2278
2300
  # if we have a list of concept pairs
2279
2301
  if self.concept_pairs:
2280
2302
  return
2281
-
2282
- for concept in self.concepts:
2303
+ if self.concepts == []:
2304
+ return
2305
+ assert self.left_datasource and self.right_datasource
2306
+ for concept in self.concepts or []:
2283
2307
  include = True
2284
2308
  for ds in [self.left_datasource, self.right_datasource]:
2285
2309
  synonyms = []
@@ -2289,13 +2313,10 @@ class BaseJoin(BaseModel):
2289
2313
  concept.address not in [c.address for c in ds.output_concepts]
2290
2314
  and concept.address not in synonyms
2291
2315
  ):
2292
- if self.filter_to_mutual:
2293
- include = False
2294
- else:
2295
- raise SyntaxError(
2296
- f"Invalid join, missing {concept} on {ds.name}, have"
2297
- f" {[c.address for c in ds.output_concepts]}"
2298
- )
2316
+ raise SyntaxError(
2317
+ f"Invalid join, missing {concept} on {ds.name}, have"
2318
+ f" {[c.address for c in ds.output_concepts]}"
2319
+ )
2299
2320
  if include:
2300
2321
  final_concepts.append(concept)
2301
2322
  if not final_concepts and self.concepts:
@@ -2312,7 +2333,7 @@ class BaseJoin(BaseModel):
2312
2333
  self.concepts = []
2313
2334
  return
2314
2335
  # if everything is at abstract grain, we can skip joins
2315
- if all([c.grain == Grain() for c in ds.output_concepts]):
2336
+ if all([c.grain.abstract for c in ds.output_concepts]):
2316
2337
  self.concepts = []
2317
2338
  return
2318
2339
 
@@ -2330,21 +2351,27 @@ class BaseJoin(BaseModel):
2330
2351
 
2331
2352
  @property
2332
2353
  def unique_id(self) -> str:
2333
- # TODO: include join type?
2334
- return (
2335
- self.left_datasource.name
2336
- + self.right_datasource.name
2337
- + self.join_type.value
2338
- )
2354
+ return str(self)
2355
+
2356
+ @property
2357
+ def input_concepts(self) -> List[Concept]:
2358
+ base = []
2359
+ if self.concept_pairs:
2360
+ for pair in self.concept_pairs:
2361
+ base += [pair.left, pair.right]
2362
+ elif self.concepts:
2363
+ base += self.concepts
2364
+ return base
2339
2365
 
2340
2366
  def __str__(self):
2341
2367
  if self.concept_pairs:
2342
2368
  return (
2343
- f"{self.join_type.value} on"
2344
- f" {','.join([str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
2369
+ f"{self.join_type.value} {self.right_datasource.name} on"
2370
+ f" {','.join([str(k.existing_datasource.name) + '.'+ str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
2345
2371
  )
2346
2372
  return (
2347
- f"{self.join_type.value} on" f" {','.join([str(k) for k in self.concepts])}"
2373
+ f"{self.join_type.value} {self.right_datasource.name} on"
2374
+ f" {','.join([str(k) for k in self.concepts])}"
2348
2375
  )
2349
2376
 
2350
2377
 
@@ -2374,6 +2401,10 @@ class QueryDatasource(BaseModel):
2374
2401
  def __repr__(self):
2375
2402
  return f"{self.identifier}@<{self.grain}>"
2376
2403
 
2404
+ @property
2405
+ def safe_identifier(self):
2406
+ return self.identifier.replace(".", "_")
2407
+
2377
2408
  @property
2378
2409
  def non_partial_concept_addresses(self) -> List[str]:
2379
2410
  return [
@@ -2389,19 +2420,9 @@ class QueryDatasource(BaseModel):
2389
2420
  for join in v:
2390
2421
  if not isinstance(join, BaseJoin):
2391
2422
  continue
2392
- if join.left_datasource.identifier == join.right_datasource.identifier:
2393
- raise SyntaxError(
2394
- f"Cannot join a datasource to itself, joining {join.left_datasource}"
2395
- )
2396
- pairing = "".join(
2397
- sorted(
2398
- [join.left_datasource.identifier, join.right_datasource.identifier]
2399
- )
2400
- )
2423
+ pairing = str(join)
2401
2424
  if pairing in unique_pairs:
2402
- raise SyntaxError(
2403
- f"Duplicate join {join.left_datasource.identifier} and {join.right_datasource.identifier}"
2404
- )
2425
+ raise SyntaxError(f"Duplicate join {str(join)}")
2405
2426
  unique_pairs.add(pairing)
2406
2427
  return v
2407
2428
 
@@ -2448,10 +2469,6 @@ class QueryDatasource(BaseModel):
2448
2469
  def name(self):
2449
2470
  return self.identifier
2450
2471
 
2451
- @property
2452
- def full_name(self):
2453
- return self.identifier
2454
-
2455
2472
  @property
2456
2473
  def group_required(self) -> bool:
2457
2474
  if self.force_group is True:
@@ -2498,10 +2515,12 @@ class QueryDatasource(BaseModel):
2498
2515
  merged_datasources = {}
2499
2516
 
2500
2517
  for ds in [*self.datasources, *other.datasources]:
2501
- if ds.full_name in merged_datasources:
2502
- merged_datasources[ds.full_name] = merged_datasources[ds.full_name] + ds
2518
+ if ds.safe_identifier in merged_datasources:
2519
+ merged_datasources[ds.safe_identifier] = (
2520
+ merged_datasources[ds.safe_identifier] + ds
2521
+ )
2503
2522
  else:
2504
- merged_datasources[ds.full_name] = ds
2523
+ merged_datasources[ds.safe_identifier] = ds
2505
2524
 
2506
2525
  final_source_map = defaultdict(set)
2507
2526
  for key in self.source_map:
@@ -2512,7 +2531,9 @@ class QueryDatasource(BaseModel):
2512
2531
  if key not in final_source_map:
2513
2532
  final_source_map[key] = other.source_map[key]
2514
2533
  for k, v in final_source_map.items():
2515
- final_source_map[k] = set(merged_datasources[x.full_name] for x in list(v))
2534
+ final_source_map[k] = set(
2535
+ merged_datasources[x.safe_identifier] for x in list(v)
2536
+ )
2516
2537
  self_hidden = self.hidden_concepts or []
2517
2538
  other_hidden = other.hidden_concepts or []
2518
2539
  hidden = [x for x in self_hidden if x.address in other_hidden]
@@ -2552,7 +2573,7 @@ class QueryDatasource(BaseModel):
2552
2573
  )
2553
2574
  # partial = "_".join([str(c.address).replace(".", "_") for c in self.partial_concepts])
2554
2575
  return (
2555
- "_join_".join([d.full_name for d in self.datasources])
2576
+ "_join_".join([d.identifier for d in self.datasources])
2556
2577
  + (f"_at_{grain}" if grain else "_at_abstract")
2557
2578
  + (f"_filtered_by_{filters}" if filters else "")
2558
2579
  # + (f"_partial_{partial}" if partial else "")
@@ -2568,8 +2589,9 @@ class QueryDatasource(BaseModel):
2568
2589
  for x in self.datasources:
2569
2590
  # query datasources should be referenced by their alias, always
2570
2591
  force_alias = isinstance(x, QueryDatasource)
2592
+ #
2571
2593
  use_raw_name = isinstance(x, Datasource) and not force_alias
2572
- if source and x.identifier != source:
2594
+ if source and x.safe_identifier != source:
2573
2595
  continue
2574
2596
  try:
2575
2597
  return x.get_alias(
@@ -2623,6 +2645,14 @@ class CTE(BaseModel):
2623
2645
  base_name_override: Optional[str] = None
2624
2646
  base_alias_override: Optional[str] = None
2625
2647
 
2648
+ @property
2649
+ def identifier(self):
2650
+ return self.name
2651
+
2652
+ @property
2653
+ def safe_identifier(self):
2654
+ return self.name
2655
+
2626
2656
  @computed_field # type: ignore
2627
2657
  @property
2628
2658
  def output_lcl(self) -> LooseConceptList:
@@ -2666,7 +2696,12 @@ class CTE(BaseModel):
2666
2696
  isinstance(join, Join)
2667
2697
  and (
2668
2698
  join.right_cte.name != removed_cte
2669
- and join.left_cte.name != removed_cte
2699
+ and any(
2700
+ [
2701
+ x.cte.name != removed_cte
2702
+ for x in (join.joinkey_pairs or [])
2703
+ ]
2704
+ )
2670
2705
  )
2671
2706
  )
2672
2707
  ]
@@ -2715,7 +2750,7 @@ class CTE(BaseModel):
2715
2750
  return False
2716
2751
  if any(
2717
2752
  [
2718
- x.identifier == ds_being_inlined.identifier
2753
+ x.safe_identifier == ds_being_inlined.safe_identifier
2719
2754
  for x in self.source.datasources
2720
2755
  ]
2721
2756
  ):
@@ -2726,35 +2761,49 @@ class CTE(BaseModel):
2726
2761
  *[
2727
2762
  x
2728
2763
  for x in self.source.datasources
2729
- if x.identifier != qds_being_inlined.identifier
2764
+ if x.safe_identifier != qds_being_inlined.safe_identifier
2730
2765
  ],
2731
2766
  ]
2732
2767
  # need to identify this before updating joins
2733
2768
  if self.base_name == parent.name:
2734
2769
  self.base_name_override = ds_being_inlined.safe_location
2735
- self.base_alias_override = ds_being_inlined.identifier
2770
+ self.base_alias_override = ds_being_inlined.safe_identifier
2736
2771
 
2737
2772
  for join in self.joins:
2738
2773
  if isinstance(join, InstantiatedUnnestJoin):
2739
2774
  continue
2740
- if join.left_cte.name == parent.name:
2775
+ if (
2776
+ join.left_cte
2777
+ and join.left_cte.safe_identifier == parent.safe_identifier
2778
+ ):
2741
2779
  join.inline_cte(parent)
2742
- if join.right_cte.name == parent.name:
2780
+ if join.joinkey_pairs:
2781
+ for pair in join.joinkey_pairs:
2782
+ if pair.cte and pair.cte.safe_identifier == parent.safe_identifier:
2783
+ join.inline_cte(parent)
2784
+ if join.right_cte.safe_identifier == parent.safe_identifier:
2743
2785
  join.inline_cte(parent)
2744
2786
  for k, v in self.source_map.items():
2745
2787
  if isinstance(v, list):
2746
2788
  self.source_map[k] = [
2747
- ds_being_inlined.name if x == parent.name else x for x in v
2789
+ (
2790
+ ds_being_inlined.safe_identifier
2791
+ if x == parent.safe_identifier
2792
+ else x
2793
+ )
2794
+ for x in v
2748
2795
  ]
2749
- elif v == parent.name:
2750
- self.source_map[k] = [ds_being_inlined.name]
2796
+ elif v == parent.safe_identifier:
2797
+ self.source_map[k] = [ds_being_inlined.safe_identifier]
2751
2798
 
2752
2799
  # zip in any required values for lookups
2753
2800
  for k in ds_being_inlined.output_lcl.addresses:
2754
2801
  if k in self.source_map and self.source_map[k]:
2755
2802
  continue
2756
- self.source_map[k] = [ds_being_inlined.name]
2757
- self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
2803
+ self.source_map[k] = [ds_being_inlined.safe_identifier]
2804
+ self.parent_ctes = [
2805
+ x for x in self.parent_ctes if x.safe_identifier != parent.safe_identifier
2806
+ ]
2758
2807
  if force_group:
2759
2808
  self.group_to_grain = True
2760
2809
  return True
@@ -2961,55 +3010,50 @@ class JoinKey(BaseModel):
2961
3010
 
2962
3011
 
2963
3012
  class Join(BaseModel):
2964
- left_cte: CTE
3013
+
2965
3014
  right_cte: CTE
2966
3015
  jointype: JoinType
2967
- joinkeys: List[JoinKey]
2968
- joinkey_pairs: List[ConceptPair] | None = None
3016
+ left_cte: CTE | None = None
3017
+ joinkey_pairs: List[CTEConceptPair] | None = None
2969
3018
  inlined_ctes: set[str] = Field(default_factory=set)
2970
3019
 
2971
3020
  def inline_cte(self, cte: CTE):
2972
3021
  self.inlined_ctes.add(cte.name)
2973
3022
 
2974
- @property
2975
- def left_name(self) -> str:
2976
- if self.left_cte.name in self.inlined_ctes:
2977
- return self.left_cte.source.datasources[0].identifier
2978
- return self.left_cte.name
3023
+ def get_name(self, cte: CTE):
3024
+ if cte.identifier in self.inlined_ctes:
3025
+ return cte.source.datasources[0].safe_identifier
3026
+ return cte.safe_identifier
2979
3027
 
2980
3028
  @property
2981
3029
  def right_name(self) -> str:
2982
- if self.right_cte.name in self.inlined_ctes:
2983
- return self.right_cte.source.datasources[0].identifier
2984
- return self.right_cte.name
2985
-
2986
- @property
2987
- def left_ref(self) -> str:
2988
- if self.left_cte.name in self.inlined_ctes:
2989
- return f"{self.left_cte.source.datasources[0].safe_location} as {self.left_cte.source.datasources[0].identifier}"
2990
- return self.left_cte.name
3030
+ if self.right_cte.identifier in self.inlined_ctes:
3031
+ return self.right_cte.source.datasources[0].safe_identifier
3032
+ return self.right_cte.safe_identifier
2991
3033
 
2992
3034
  @property
2993
3035
  def right_ref(self) -> str:
2994
- if self.right_cte.name in self.inlined_ctes:
2995
- return f"{self.right_cte.source.datasources[0].safe_location} as {self.right_cte.source.datasources[0].identifier}"
2996
- return self.right_cte.name
3036
+ if self.right_cte.identifier in self.inlined_ctes:
3037
+ return f"{self.right_cte.source.datasources[0].safe_location} as {self.right_cte.source.datasources[0].safe_identifier}"
3038
+ return self.right_cte.safe_identifier
2997
3039
 
2998
3040
  @property
2999
3041
  def unique_id(self) -> str:
3000
- return self.left_name + self.right_name + self.jointype.value
3042
+ return str(self)
3001
3043
 
3002
3044
  def __str__(self):
3003
3045
  if self.joinkey_pairs:
3004
3046
  return (
3005
- f"{self.jointype.value} JOIN {self.left_name} and"
3047
+ f"{self.jointype.value} join"
3006
3048
  f" {self.right_name} on"
3007
- f" {','.join([str(k.left)+'='+str(k.right)+str(k.modifiers) for k in self.joinkey_pairs])}"
3049
+ f" {','.join([k.cte.name + '.'+str(k.left.address)+'='+str(k.right.address) for k in self.joinkey_pairs])}"
3008
3050
  )
3009
- return (
3010
- f"{self.jointype.value} JOIN {self.left_name} and"
3011
- f" {self.right_name} on {','.join([str(k) for k in self.joinkeys])}"
3012
- )
3051
+ elif self.left_cte:
3052
+ return (
3053
+ f"{self.jointype.value} JOIN {self.left_cte.name} and"
3054
+ f" {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
3055
+ )
3056
+ return f"{self.jointype.value} JOIN {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
3013
3057
 
3014
3058
 
3015
3059
  class UndefinedConcept(Concept, Mergeable, Namespaced):
@@ -3227,7 +3271,7 @@ class EnvironmentConceptDict(dict):
3227
3271
  return super().items()
3228
3272
 
3229
3273
 
3230
- class ImportStatement(BaseModel):
3274
+ class ImportStatement(HasUUID, BaseModel):
3231
3275
  alias: str
3232
3276
  path: Path
3233
3277
  environment: Union["Environment", None] = None
@@ -3270,7 +3314,9 @@ class Environment(BaseModel):
3270
3314
  ] = Field(default_factory=EnvironmentDatasourceDict)
3271
3315
  functions: Dict[str, Function] = Field(default_factory=dict)
3272
3316
  data_types: Dict[str, DataType] = Field(default_factory=dict)
3273
- imports: Dict[str, ImportStatement] = Field(default_factory=dict)
3317
+ imports: Dict[str, list[ImportStatement]] = Field(
3318
+ default_factory=lambda: defaultdict(list)
3319
+ )
3274
3320
  namespace: str = DEFAULT_NAMESPACE
3275
3321
  working_path: str | Path = Field(default_factory=lambda: os.getcwd())
3276
3322
  environment_config: EnvironmentOptions = Field(default_factory=EnvironmentOptions)
@@ -3384,14 +3430,28 @@ class Environment(BaseModel):
3384
3430
  f"Assignment to concept '{lookup}' is a duplicate declaration;"
3385
3431
  )
3386
3432
 
3387
- def add_import(self, alias: str, environment: Environment):
3388
- self.imports[alias] = ImportStatement(
3389
- alias=alias, path=Path(environment.working_path)
3390
- )
3391
- for key, concept in environment.concepts.items():
3392
- self.concepts[f"{alias}.{key}"] = concept.with_namespace(alias)
3393
- for key, datasource in environment.datasources.items():
3394
- self.datasources[f"{alias}.{key}"] = datasource.with_namespace(alias)
3433
+ def add_import(
3434
+ self, alias: str, source: Environment, imp_stm: ImportStatement | None = None
3435
+ ):
3436
+ exists = False
3437
+ existing = self.imports[alias]
3438
+ if imp_stm:
3439
+ if any([x.path == imp_stm.path for x in existing]):
3440
+ exists = True
3441
+
3442
+ else:
3443
+ if any([x.path == source.working_path for x in existing]):
3444
+ exists = True
3445
+ imp_stm = ImportStatement(alias=alias, path=Path(source.working_path))
3446
+
3447
+ if not exists:
3448
+ self.imports[alias].append(imp_stm)
3449
+
3450
+ for _, concept in source.concepts.items():
3451
+ self.add_concept(concept.with_namespace(alias), _ignore_cache=True)
3452
+
3453
+ for _, datasource in source.datasources.items():
3454
+ self.add_datasource(datasource.with_namespace(alias), _ignore_cache=True)
3395
3455
  self.gen_concept_list_caches()
3396
3456
  return self
3397
3457
 
@@ -3402,18 +3462,15 @@ class Environment(BaseModel):
3402
3462
  apath[-1] = apath[-1] + ".preql"
3403
3463
 
3404
3464
  target: Path = Path(self.working_path, *apath)
3465
+ if alias in self.imports:
3466
+ imports = self.imports[alias]
3467
+ for x in imports:
3468
+ if x.path == target:
3469
+ return imports
3405
3470
  if env:
3406
- self.imports[alias] = ImportStatement(
3407
- alias=alias, path=target, environment=env
3471
+ self.imports[alias].append(
3472
+ ImportStatement(alias=alias, path=target, environment=env)
3408
3473
  )
3409
-
3410
- elif alias in self.imports:
3411
- current = self.imports[alias]
3412
- env = self.imports[alias].environment
3413
- if current.path != target:
3414
- raise ImportError(
3415
- f"Attempted to import {target} with alias {alias} but {alias} is already imported from {current.path}"
3416
- )
3417
3474
  else:
3418
3475
  try:
3419
3476
  with open(target, "r", encoding="utf-8") as f:
@@ -3432,14 +3489,13 @@ class Environment(BaseModel):
3432
3489
  f"Unable to import file {target.parent}, parsing error: {e}"
3433
3490
  )
3434
3491
  env = nparser.environment
3435
- if env:
3436
- for _, concept in env.concepts.items():
3437
- self.add_concept(concept.with_namespace(alias))
3492
+ for _, concept in env.concepts.items():
3493
+ self.add_concept(concept.with_namespace(alias))
3438
3494
 
3439
- for _, datasource in env.datasources.items():
3440
- self.add_datasource(datasource.with_namespace(alias))
3495
+ for _, datasource in env.datasources.items():
3496
+ self.add_datasource(datasource.with_namespace(alias))
3441
3497
  imps = ImportStatement(alias=alias, path=target, environment=env)
3442
- self.imports[alias] = imps
3498
+ self.imports[alias].append(imps)
3443
3499
  return imps
3444
3500
 
3445
3501
  def parse(
@@ -3502,8 +3558,14 @@ class Environment(BaseModel):
3502
3558
  meta: Meta | None = None,
3503
3559
  _ignore_cache: bool = False,
3504
3560
  ):
3505
- self.datasources[datasource.env_label] = datasource
3561
+ self.datasources[datasource.identifier] = datasource
3562
+
3563
+ eligible_to_promote_roots = datasource.non_partial_for is None
3564
+ # mark this as canonical source
3506
3565
  for current_concept in datasource.output_concepts:
3566
+ if not eligible_to_promote_roots:
3567
+ continue
3568
+
3507
3569
  current_derivation = current_concept.derivation
3508
3570
  # TODO: refine this section;
3509
3571
  # too hacky for maintainability
@@ -4223,6 +4285,9 @@ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
4223
4285
  class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
4224
4286
  conditional: Union[SubselectComparison, Comparison, Conditional, "Parenthetical"]
4225
4287
 
4288
+ def __repr__(self):
4289
+ return str(self.conditional)
4290
+
4226
4291
  @property
4227
4292
  def input(self) -> List[Concept]:
4228
4293
  return self.conditional.input
@@ -4341,7 +4406,7 @@ class Limit(BaseModel):
4341
4406
  count: int
4342
4407
 
4343
4408
 
4344
- class ConceptDeclarationStatement(BaseModel):
4409
+ class ConceptDeclarationStatement(HasUUID, BaseModel):
4345
4410
  concept: Concept
4346
4411
 
4347
4412
 
@@ -4349,7 +4414,7 @@ class ConceptDerivation(BaseModel):
4349
4414
  concept: Concept
4350
4415
 
4351
4416
 
4352
- class RowsetDerivationStatement(Namespaced, BaseModel):
4417
+ class RowsetDerivationStatement(HasUUID, Namespaced, BaseModel):
4353
4418
  name: str
4354
4419
  select: SelectStatement | MultiSelectStatement
4355
4420
  namespace: str
@@ -4614,7 +4679,7 @@ class TupleWrapper(Generic[VT], tuple):
4614
4679
  return cls(v, type=arg_to_datatype(v[0]))
4615
4680
 
4616
4681
 
4617
- class PersistStatement(BaseModel):
4682
+ class PersistStatement(HasUUID, BaseModel):
4618
4683
  datasource: Datasource
4619
4684
  select: SelectStatement
4620
4685
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())