pytrilogy 0.3.149__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp313-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.149.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.149.dist-info/RECORD +207 -0
  6. pytrilogy-0.3.149.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.149.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.149.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2670 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +436 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +846 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1432 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +397 -0
  117. trilogy/dialect/enums.py +151 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/exceptions.py +26 -0
  130. trilogy/execution/state/file_state_store.py +0 -0
  131. trilogy/execution/state/sqllite_state_store.py +0 -0
  132. trilogy/execution/state/state_store.py +406 -0
  133. trilogy/executor.py +692 -0
  134. trilogy/hooks/__init__.py +4 -0
  135. trilogy/hooks/base_hook.py +40 -0
  136. trilogy/hooks/graph_hook.py +135 -0
  137. trilogy/hooks/query_debugger.py +166 -0
  138. trilogy/metadata/__init__.py +0 -0
  139. trilogy/parser.py +10 -0
  140. trilogy/parsing/README.md +21 -0
  141. trilogy/parsing/__init__.py +0 -0
  142. trilogy/parsing/common.py +1069 -0
  143. trilogy/parsing/config.py +5 -0
  144. trilogy/parsing/exceptions.py +8 -0
  145. trilogy/parsing/helpers.py +1 -0
  146. trilogy/parsing/parse_engine.py +2876 -0
  147. trilogy/parsing/render.py +775 -0
  148. trilogy/parsing/trilogy.lark +546 -0
  149. trilogy/py.typed +0 -0
  150. trilogy/render.py +45 -0
  151. trilogy/scripts/README.md +9 -0
  152. trilogy/scripts/__init__.py +0 -0
  153. trilogy/scripts/agent.py +41 -0
  154. trilogy/scripts/agent_info.py +306 -0
  155. trilogy/scripts/common.py +432 -0
  156. trilogy/scripts/dependency/Cargo.lock +617 -0
  157. trilogy/scripts/dependency/Cargo.toml +39 -0
  158. trilogy/scripts/dependency/README.md +131 -0
  159. trilogy/scripts/dependency/build.sh +25 -0
  160. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  161. trilogy/scripts/dependency/src/lib.rs +16 -0
  162. trilogy/scripts/dependency/src/main.rs +770 -0
  163. trilogy/scripts/dependency/src/parser.rs +435 -0
  164. trilogy/scripts/dependency/src/preql.pest +208 -0
  165. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  166. trilogy/scripts/dependency/src/resolver.rs +716 -0
  167. trilogy/scripts/dependency/tests/base.preql +3 -0
  168. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  169. trilogy/scripts/dependency/tests/customer.preql +6 -0
  170. trilogy/scripts/dependency/tests/main.preql +9 -0
  171. trilogy/scripts/dependency/tests/orders.preql +7 -0
  172. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  173. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  174. trilogy/scripts/dependency.py +323 -0
  175. trilogy/scripts/display.py +555 -0
  176. trilogy/scripts/environment.py +59 -0
  177. trilogy/scripts/fmt.py +32 -0
  178. trilogy/scripts/ingest.py +487 -0
  179. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  180. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  181. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  182. trilogy/scripts/ingest_helpers/typing.py +161 -0
  183. trilogy/scripts/init.py +105 -0
  184. trilogy/scripts/parallel_execution.py +762 -0
  185. trilogy/scripts/plan.py +189 -0
  186. trilogy/scripts/refresh.py +161 -0
  187. trilogy/scripts/run.py +79 -0
  188. trilogy/scripts/serve.py +202 -0
  189. trilogy/scripts/serve_helpers/__init__.py +41 -0
  190. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  191. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  192. trilogy/scripts/serve_helpers/models.py +38 -0
  193. trilogy/scripts/single_execution.py +131 -0
  194. trilogy/scripts/testing.py +143 -0
  195. trilogy/scripts/trilogy.py +75 -0
  196. trilogy/std/__init__.py +0 -0
  197. trilogy/std/color.preql +3 -0
  198. trilogy/std/date.preql +13 -0
  199. trilogy/std/display.preql +18 -0
  200. trilogy/std/geography.preql +22 -0
  201. trilogy/std/metric.preql +15 -0
  202. trilogy/std/money.preql +67 -0
  203. trilogy/std/net.preql +14 -0
  204. trilogy/std/ranking.preql +7 -0
  205. trilogy/std/report.preql +5 -0
  206. trilogy/std/semantic.preql +6 -0
  207. trilogy/utility.py +34 -0
@@ -0,0 +1,406 @@
1
+ from dataclasses import dataclass, field
2
+ from datetime import date
3
+ from typing import Callable
4
+
5
+ from trilogy import Executor
6
+ from trilogy.core.enums import Purpose
7
+ from trilogy.core.models.build import Factory
8
+ from trilogy.core.models.datasource import (
9
+ Address,
10
+ ColumnAssignment,
11
+ Datasource,
12
+ RawColumnExpr,
13
+ UpdateKey,
14
+ UpdateKeys,
15
+ UpdateKeyType,
16
+ )
17
+ from trilogy.core.models.environment import Environment
18
+ from trilogy.core.models.execute import CTE
19
+ from trilogy.execution.state.exceptions import is_missing_source_error
20
+
21
+
22
+ @dataclass
23
+ class DatasourceWatermark:
24
+ keys: dict[str, UpdateKey]
25
+
26
+
27
+ @dataclass
28
+ class StaleAsset:
29
+ """Represents an asset that needs to be refreshed."""
30
+
31
+ datasource_id: str
32
+ reason: str
33
+ filters: UpdateKeys = field(default_factory=UpdateKeys)
34
+
35
+
36
+ def _compare_watermark_values(
37
+ a: str | int | float | date, b: str | int | float | date
38
+ ) -> int:
39
+ """Compare two watermark values, returning -1, 0, or 1.
40
+
41
+ Handles type mismatches by comparing string representations.
42
+ """
43
+ if type(a) is type(b):
44
+ if a < b: # type: ignore[operator]
45
+ return -1
46
+ elif a > b: # type: ignore[operator]
47
+ return 1
48
+ return 0
49
+ # Different types: compare as strings
50
+ sa, sb = str(a), str(b)
51
+ if sa < sb:
52
+ return -1
53
+ elif sa > sb:
54
+ return 1
55
+ return 0
56
+
57
+
58
+ def get_last_update_time_watermarks(
59
+ datasource: Datasource, executor: Executor
60
+ ) -> DatasourceWatermark:
61
+ update_time = executor.generator.get_table_last_modified(
62
+ executor, datasource.safe_address
63
+ )
64
+ return DatasourceWatermark(
65
+ keys={
66
+ "update_time": UpdateKey(
67
+ concept_name="update_time",
68
+ type=UpdateKeyType.UPDATE_TIME,
69
+ value=update_time,
70
+ )
71
+ }
72
+ )
73
+
74
+
75
+ def get_unique_key_hash_watermarks(
76
+ datasource: Datasource, executor: Executor
77
+ ) -> DatasourceWatermark:
78
+ key_columns: list[ColumnAssignment] = []
79
+ for col_assignment in datasource.columns:
80
+ concrete = executor.environment.concepts[col_assignment.concept.address]
81
+ if concrete.purpose == Purpose.KEY:
82
+ key_columns.append(col_assignment)
83
+
84
+ if not key_columns:
85
+ return DatasourceWatermark(keys={})
86
+
87
+ if isinstance(datasource.address, Address):
88
+ table_ref = executor.generator.render_source(datasource.address)
89
+ else:
90
+ table_ref = datasource.safe_address
91
+
92
+ dialect = executor.generator
93
+ watermarks = {}
94
+ for col in key_columns:
95
+ if isinstance(col.alias, str):
96
+ column_name = col.alias
97
+ elif isinstance(col.alias, RawColumnExpr):
98
+ column_name = col.alias.text
99
+ else:
100
+ # Function - use rendered expression
101
+ column_name = str(col.alias)
102
+ hash_expr = dialect.hash_column_value(column_name)
103
+ checksum_expr = dialect.aggregate_checksum(hash_expr)
104
+ query = f"SELECT {checksum_expr} as checksum FROM {table_ref}"
105
+
106
+ try:
107
+ result = executor.execute_raw_sql(query).fetchone()
108
+ checksum_value = result[0] if result else None
109
+ except Exception as e:
110
+ if is_missing_source_error(e, dialect):
111
+ checksum_value = None
112
+ executor.connection.rollback()
113
+ else:
114
+ raise
115
+
116
+ watermarks[col.concept.address] = UpdateKey(
117
+ concept_name=col.concept.address,
118
+ type=UpdateKeyType.KEY_HASH,
119
+ value=checksum_value,
120
+ )
121
+
122
+ return DatasourceWatermark(keys=watermarks)
123
+
124
+
125
+ def get_incremental_key_watermarks(
126
+ datasource: Datasource, executor: Executor
127
+ ) -> DatasourceWatermark:
128
+ if not datasource.incremental_by:
129
+ return DatasourceWatermark(keys={})
130
+
131
+ if isinstance(datasource.address, Address):
132
+ table_ref = executor.generator.render_source(datasource.address)
133
+ else:
134
+ table_ref = datasource.safe_address
135
+
136
+ watermarks = {}
137
+ factory = Factory(environment=executor.environment)
138
+
139
+ dialect = executor.generator
140
+ for concept_ref in datasource.incremental_by:
141
+ concept = executor.environment.concepts[concept_ref.address]
142
+ build_concept = factory.build(concept)
143
+ build_datasource = factory.build(datasource)
144
+ cte: CTE = CTE.from_datasource(build_datasource)
145
+ # Check if concept is in output_concepts by comparing addresses
146
+ output_addresses = {c.address for c in datasource.output_concepts}
147
+ if concept.address in output_addresses:
148
+ query = f"SELECT MAX({dialect.render_concept_sql(build_concept, cte=cte, alias=False)}) as max_value FROM {table_ref} as {dialect.quote(cte.base_alias)}"
149
+ else:
150
+ query = f"SELECT MAX({dialect.render_expr(build_concept.lineage, cte=cte)}) as max_value FROM {table_ref} as {dialect.quote(cte.base_alias)}"
151
+
152
+ try:
153
+ result = executor.execute_raw_sql(query).fetchone()
154
+ max_value = result[0] if result else None
155
+ except Exception as e:
156
+ if is_missing_source_error(e, dialect):
157
+ max_value = None
158
+ executor.connection.rollback()
159
+ else:
160
+ raise
161
+
162
+ watermarks[concept.name] = UpdateKey(
163
+ concept_name=concept.name,
164
+ type=UpdateKeyType.INCREMENTAL_KEY,
165
+ value=max_value,
166
+ )
167
+
168
+ return DatasourceWatermark(keys=watermarks)
169
+
170
+
171
+ def get_freshness_watermarks(
172
+ datasource: Datasource, executor: Executor
173
+ ) -> DatasourceWatermark:
174
+ if not datasource.freshness_by:
175
+ return DatasourceWatermark(keys={})
176
+
177
+ if isinstance(datasource.address, Address):
178
+ table_ref = executor.generator.render_source(datasource.address)
179
+ else:
180
+ table_ref = datasource.safe_address
181
+
182
+ watermarks = {}
183
+ factory = Factory(environment=executor.environment)
184
+
185
+ dialect = executor.generator
186
+ for concept_ref in datasource.freshness_by:
187
+ concept = executor.environment.concepts[concept_ref.address]
188
+ build_concept = factory.build(concept)
189
+ build_datasource = factory.build(datasource)
190
+ cte: CTE = CTE.from_datasource(build_datasource)
191
+ output_addresses = {c.address for c in datasource.output_concepts}
192
+ if concept.address in output_addresses:
193
+ query = f"SELECT MAX({dialect.render_concept_sql(build_concept, cte=cte, alias=False)}) as max_value FROM {table_ref} as {dialect.quote(cte.base_alias)}"
194
+ else:
195
+ query = f"SELECT MAX({dialect.render_expr(build_concept.lineage, cte=cte)}) as max_value FROM {table_ref} as {dialect.quote(cte.base_alias)}"
196
+
197
+ try:
198
+ result = executor.execute_raw_sql(query).fetchone()
199
+ max_value = result[0] if result else None
200
+ except Exception as e:
201
+ if is_missing_source_error(e, dialect):
202
+ max_value = None
203
+ executor.connection.rollback()
204
+ else:
205
+ raise
206
+
207
+ watermarks[concept.name] = UpdateKey(
208
+ concept_name=concept.name,
209
+ type=UpdateKeyType.UPDATE_TIME,
210
+ value=max_value,
211
+ )
212
+
213
+ return DatasourceWatermark(keys=watermarks)
214
+
215
+
216
+ class BaseStateStore:
217
+
218
+ def __init__(self) -> None:
219
+ self.watermarks: dict[str, DatasourceWatermark] = {}
220
+
221
+ def watermark_asset(
222
+ self, datasource: Datasource, executor: Executor
223
+ ) -> DatasourceWatermark:
224
+ if datasource.freshness_by:
225
+ watermarks = get_freshness_watermarks(datasource, executor)
226
+ elif datasource.incremental_by:
227
+ watermarks = get_incremental_key_watermarks(datasource, executor)
228
+ else:
229
+ key_columns = [
230
+ col
231
+ for col in datasource.columns
232
+ if executor.environment.concepts[col.concept.address].purpose
233
+ == Purpose.KEY
234
+ ]
235
+ if key_columns:
236
+ watermarks = get_unique_key_hash_watermarks(datasource, executor)
237
+ else:
238
+ watermarks = get_last_update_time_watermarks(datasource, executor)
239
+
240
+ self.watermarks[datasource.identifier] = watermarks
241
+ return watermarks
242
+
243
+ def get_datasource_watermarks(
244
+ self, datasource: Datasource
245
+ ) -> DatasourceWatermark | None:
246
+ return self.watermarks.get(datasource.identifier)
247
+
248
+ def check_datasource_state(self, datasource: Datasource) -> bool:
249
+ return datasource.identifier in self.watermarks
250
+
251
+ def watermark_all_assets(
252
+ self, env: Environment, executor: Executor
253
+ ) -> dict[str, DatasourceWatermark]:
254
+ """Watermark all datasources in the environment."""
255
+ for ds in env.datasources.values():
256
+ self.watermark_asset(ds, executor)
257
+ return self.watermarks
258
+
259
+ def get_stale_assets(
260
+ self,
261
+ env: Environment,
262
+ executor: Executor,
263
+ root_assets: set[str] | None = None,
264
+ ) -> list[StaleAsset]:
265
+ """Find all assets that are stale and need refresh.
266
+
267
+ Args:
268
+ env: The environment containing datasources
269
+ executor: Executor for querying current state
270
+ root_assets: Optional set of datasource identifiers that are "source of truth"
271
+ and should not be marked stale. If None, uses datasources marked
272
+ with is_root=True in the model.
273
+
274
+ Returns:
275
+ List of StaleAsset objects describing what needs refresh and why.
276
+ """
277
+ if root_assets is None:
278
+ root_assets = {
279
+ ds.identifier for ds in env.datasources.values() if ds.is_root
280
+ }
281
+ stale: list[StaleAsset] = []
282
+
283
+ # First pass: watermark all assets to get current state
284
+ self.watermark_all_assets(env, executor)
285
+
286
+ # Build map of concept -> max watermark across root assets
287
+ concept_max_watermarks: dict[str, UpdateKey] = {}
288
+ for ds_id, watermark in self.watermarks.items():
289
+ if ds_id in root_assets:
290
+ for key, val in watermark.keys.items():
291
+ if (
292
+ val.type
293
+ in (UpdateKeyType.INCREMENTAL_KEY, UpdateKeyType.UPDATE_TIME)
294
+ and val.value is not None
295
+ ):
296
+ existing = concept_max_watermarks.get(key)
297
+ if existing is None or (
298
+ existing.value is not None
299
+ and _compare_watermark_values(val.value, existing.value) > 0
300
+ ):
301
+ concept_max_watermarks[key] = val
302
+
303
+ # Second pass: check non-root assets against max watermarks
304
+ for ds_id, watermark in self.watermarks.items():
305
+ if ds_id in root_assets:
306
+ continue
307
+
308
+ for key, val in watermark.keys.items():
309
+ if val.type == UpdateKeyType.INCREMENTAL_KEY:
310
+ max_val = concept_max_watermarks.get(key)
311
+ if max_val and max_val.value is not None:
312
+ if (
313
+ val.value is None
314
+ or _compare_watermark_values(val.value, max_val.value) < 0
315
+ ):
316
+ filters = (
317
+ UpdateKeys(keys={key: val})
318
+ if val.value
319
+ else UpdateKeys()
320
+ )
321
+ stale.append(
322
+ StaleAsset(
323
+ datasource_id=ds_id,
324
+ reason=f"incremental key '{key}' behind: {val.value} < {max_val.value}",
325
+ filters=filters,
326
+ )
327
+ )
328
+ break
329
+
330
+ elif val.type == UpdateKeyType.UPDATE_TIME:
331
+ max_val = concept_max_watermarks.get(key)
332
+ if max_val and max_val.value is not None:
333
+ if (
334
+ val.value is None
335
+ or _compare_watermark_values(val.value, max_val.value) < 0
336
+ ):
337
+ stale.append(
338
+ StaleAsset(
339
+ datasource_id=ds_id,
340
+ reason=f"freshness '{key}' behind: {val.value} < {max_val.value}",
341
+ filters=UpdateKeys(),
342
+ )
343
+ )
344
+ break
345
+
346
+ elif val.type == UpdateKeyType.KEY_HASH:
347
+ pass
348
+
349
+ return stale
350
+
351
+
352
+ @dataclass
353
+ class RefreshResult:
354
+ """Result of refreshing stale assets."""
355
+
356
+ stale_count: int
357
+ refreshed_count: int
358
+ root_assets: int
359
+ all_assets: int
360
+
361
+ @property
362
+ def had_stale(self) -> bool:
363
+ return self.stale_count > 0
364
+
365
+
366
+ def refresh_stale_assets(
367
+ executor: "Executor",
368
+ on_stale_found: Callable[[int, int, int], None] | None = None,
369
+ on_refresh: Callable[[str, str], None] | None = None,
370
+ on_watermarks: Callable[[dict[str, DatasourceWatermark]], None] | None = None,
371
+ ) -> RefreshResult:
372
+ """Find and refresh stale assets.
373
+
374
+ Args:
375
+ executor: The executor with parsed environment
376
+ on_stale_found: Optional callback(stale_count, root_assets, all_assets)
377
+ on_refresh: Optional callback(asset_id, reason) called before each refresh
378
+ on_watermarks: Optional callback(watermarks_dict) called after collecting watermarks
379
+ """
380
+ state_store = BaseStateStore()
381
+ stale_assets = state_store.get_stale_assets(executor.environment, executor)
382
+
383
+ if on_watermarks:
384
+ on_watermarks(state_store.watermarks)
385
+ root_assets = sum(
386
+ 1 for asset in executor.environment.datasources.values() if asset.is_root
387
+ )
388
+ all_assets = len(executor.environment.datasources)
389
+
390
+ if on_stale_found:
391
+ on_stale_found(len(stale_assets), root_assets, all_assets)
392
+
393
+ refreshed = 0
394
+ for asset in stale_assets:
395
+ if on_refresh:
396
+ on_refresh(asset.datasource_id, asset.reason)
397
+ datasource = executor.environment.datasources[asset.datasource_id]
398
+ executor.update_datasource(datasource)
399
+ refreshed += 1
400
+
401
+ return RefreshResult(
402
+ stale_count=len(stale_assets),
403
+ refreshed_count=refreshed,
404
+ root_assets=root_assets,
405
+ all_assets=all_assets,
406
+ )