fabricks 3.0.18__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. fabricks/api/context.py +15 -3
  2. fabricks/api/notebooks/schedule.py +2 -3
  3. fabricks/api/parsers.py +2 -1
  4. fabricks/api/utils.py +3 -1
  5. fabricks/cdc/__init__.py +1 -2
  6. fabricks/cdc/base/__init__.py +1 -2
  7. fabricks/cdc/base/_types.py +5 -3
  8. fabricks/cdc/base/configurator.py +5 -0
  9. fabricks/cdc/base/generator.py +7 -3
  10. fabricks/cdc/base/merger.py +2 -0
  11. fabricks/cdc/base/processor.py +15 -0
  12. fabricks/cdc/templates/README.md +490 -0
  13. fabricks/cdc/templates/ctes/base.sql.jinja +1 -0
  14. fabricks/cdc/templates/ctes/current.sql.jinja +4 -0
  15. fabricks/cdc/templates/merges/scd1.sql.jinja +6 -0
  16. fabricks/cdc/templates/merges/scd2.sql.jinja +6 -0
  17. fabricks/cdc/templates/queries/context.sql.jinja +104 -96
  18. fabricks/cdc/templates/query.sql.jinja +1 -1
  19. fabricks/context/__init__.py +13 -1
  20. fabricks/context/config.py +13 -122
  21. fabricks/context/log.py +92 -1
  22. fabricks/context/runtime.py +35 -69
  23. fabricks/context/spark_session.py +8 -7
  24. fabricks/context/utils.py +26 -39
  25. fabricks/core/__init__.py +2 -2
  26. fabricks/core/dags/base.py +5 -5
  27. fabricks/core/dags/processor.py +2 -3
  28. fabricks/core/extenders.py +1 -1
  29. fabricks/core/job_schema.py +26 -16
  30. fabricks/core/jobs/__init__.py +1 -7
  31. fabricks/core/jobs/base/README.md +1545 -0
  32. fabricks/core/jobs/base/__init__.py +1 -8
  33. fabricks/core/jobs/base/checker.py +7 -7
  34. fabricks/core/jobs/base/configurator.py +142 -63
  35. fabricks/core/jobs/base/generator.py +38 -34
  36. fabricks/core/jobs/base/invoker.py +48 -63
  37. fabricks/core/jobs/base/processor.py +13 -28
  38. fabricks/core/jobs/bronze.py +88 -38
  39. fabricks/core/jobs/get_job.py +3 -6
  40. fabricks/core/jobs/get_job_conf.py +19 -68
  41. fabricks/core/jobs/get_jobs.py +10 -11
  42. fabricks/core/jobs/get_schedules.py +3 -17
  43. fabricks/core/jobs/gold.py +96 -43
  44. fabricks/core/jobs/silver.py +42 -22
  45. fabricks/core/masks.py +11 -8
  46. fabricks/core/parsers/__init__.py +0 -2
  47. fabricks/core/parsers/base.py +10 -10
  48. fabricks/core/parsers/decorator.py +1 -1
  49. fabricks/core/parsers/get_parser.py +4 -5
  50. fabricks/core/schedules/process.py +1 -4
  51. fabricks/core/steps/base.py +27 -17
  52. fabricks/core/steps/get_step.py +2 -4
  53. fabricks/core/steps/get_step_conf.py +3 -7
  54. fabricks/core/udfs.py +9 -8
  55. fabricks/core/views.py +2 -2
  56. fabricks/deploy/__init__.py +27 -16
  57. fabricks/deploy/masks.py +1 -1
  58. fabricks/deploy/notebooks.py +19 -16
  59. fabricks/deploy/schedules.py +1 -1
  60. fabricks/deploy/tables.py +66 -49
  61. fabricks/deploy/udfs.py +2 -2
  62. fabricks/deploy/views.py +15 -16
  63. fabricks/metastore/database.py +3 -3
  64. fabricks/metastore/table.py +103 -68
  65. fabricks/models/__init__.py +125 -0
  66. fabricks/models/common.py +79 -0
  67. fabricks/models/config.py +225 -0
  68. fabricks/models/dependency.py +50 -0
  69. fabricks/models/job.py +157 -0
  70. fabricks/models/path.py +17 -0
  71. fabricks/models/runtime.py +182 -0
  72. fabricks/models/schedule.py +21 -0
  73. fabricks/models/step.py +103 -0
  74. fabricks/models/table.py +77 -0
  75. fabricks/{core/jobs/get_job_id.py → models/utils.py} +2 -0
  76. fabricks/utils/helpers.py +6 -5
  77. fabricks/utils/log.py +25 -6
  78. fabricks/utils/path.py +269 -102
  79. fabricks/utils/pip.py +7 -7
  80. fabricks/utils/read/read.py +23 -22
  81. fabricks/utils/read/read_yaml.py +2 -2
  82. fabricks/utils/write/delta.py +4 -4
  83. fabricks/utils/write/stream.py +2 -2
  84. {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/METADATA +9 -4
  85. {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/RECORD +86 -83
  86. fabricks/context/_types.py +0 -137
  87. fabricks/context/helpers.py +0 -63
  88. fabricks/core/jobs/base/_types.py +0 -284
  89. fabricks/core/parsers/_types.py +0 -6
  90. fabricks/utils/fdict.py +0 -240
  91. fabricks/utils/pydantic.py +0 -94
  92. fabricks/utils/schema/__init__.py +0 -7
  93. fabricks/utils/schema/get_json_schema_for_type.py +0 -161
  94. fabricks/utils/schema/get_schema_for_type.py +0 -99
  95. {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,490 @@
1
+ # CDC Templates Documentation
2
+
3
+ This directory contains Jinja2 SQL templates used by the Fabricks CDC (Change Data Capture) system to generate queries for handling data changes across different CDC strategies (NoCDC, SCD1, SCD2).
4
+
5
+ ## Directory Structure
6
+
7
+ ```
8
+ templates/
9
+ ├── ctes/ # Common Table Expression templates
10
+ ├── filters/ # Filter logic for slicing and updating data
11
+ ├── macros/ # Reusable Jinja2 macros
12
+ ├── merges/ # MERGE statement templates for each CDC type
13
+ ├── queries/ # Query assembly templates for each CDC type
14
+ ├── filter.sql.jinja # Main filter orchestration
15
+ ├── merge.sql.jinja # Main merge orchestration
16
+ └── query.sql.jinja # Main query orchestration
17
+ ```
18
+
19
+ ## Main Templates
20
+
21
+ ### query.sql.jinja
22
+ Main template that orchestrates the complete query generation process. It assembles various CTEs and query components based on the CDC type and configuration.
23
+
24
+ **Included Components:**
25
+ - Context information (via `queries/context.sql.jinja`)
26
+ - Base CTE (via `ctes/base.sql.jinja`)
27
+ - Optional slice CTE (via `ctes/slice.sql.jinja`)
28
+ - Optional deduplication CTEs (key and hash)
29
+ - Optional current state CTE (for update mode)
30
+ - Optional rectification CTE
31
+ - CDC-specific query logic (NoCDC, SCD1, or SCD2)
32
+ - Final output CTE
33
+
34
+ **Parameters:**
35
+ - `slice`: Type of slice filter ("update" or "latest")
36
+ - `deduplicate_key`: Enable key-based deduplication
37
+ - `deduplicate_hash`: Enable hash-based deduplication
38
+ - `mode`: Operation mode ("update" or "complete")
39
+ - `has_rows`: Whether target table has existing rows
40
+ - `rectify`: Enable data rectification
41
+ - `cdc`: CDC type ("nocdc", "scd1", or "scd2")
42
+
43
+ ### merge.sql.jinja
44
+ Main template for generating MERGE statements to apply changes to target tables.
45
+
46
+ **Included Components:**
47
+ - `merges/scd1.sql.jinja` - For SCD Type 1 merges
48
+ - `merges/scd2.sql.jinja` - For SCD Type 2 merges
49
+ - `merges/nocdc.sql.jinja` - For tables without CDC
50
+
51
+ **Parameters:**
52
+ - `cdc`: CDC type determining which merge template to use
53
+
54
+ ### filter.sql.jinja
55
+ Main template for generating filter queries to determine which data slices to process.
56
+
57
+ **Included Components:**
58
+ - Base CTE (via `ctes/base.sql.jinja`)
59
+ - Update filter (via `filters/update.sql.jinja`)
60
+ - Latest filter (via `filters/latest.sql.jinja`)
61
+ - Final aggregation (via `filters/final.sql.jinja`)
62
+
63
+ **Parameters:**
64
+ - `slice`: Type of slice ("update" or "latest")
65
+
66
+ ## CTEs (Common Table Expressions)
67
+
68
+ ### ctes/base.sql.jinja
69
+ Creates the base CTE that reads from the source and prepares the data with necessary transformations.
70
+
71
+ **Features:**
72
+ - Handles multiple source formats (query, table, global_temp_view, dataframe)
73
+ - Applies column casting
74
+ - Adds calculated columns
75
+ - Adds system columns (__timestamp, __operation, __last_updated, __source, __hash, __key, __metadata)
76
+ - Supports column overwriting
77
+ - Applies WHERE filter
78
+
79
+ **Parameters:**
80
+ - `format`: Source format type
81
+ - `src`: Source reference
82
+ - `cast`: Dictionary of columns to cast with target types
83
+ - `overwrite`: List of columns to overwrite
84
+ - `add_calculated_columns`: List of calculated column expressions
85
+ - `add_timestamp`, `add_operation`, `add_last_updated`, `add_source`, `add_hash`, `add_key`, `add_metadata`: Flags to add system columns
86
+ - `hashes`: List of columns to include in hash calculation
87
+ - `keys`: List of columns to include in key calculation
88
+ - `filter_where`: WHERE clause filter
89
+
90
+ ### ctes/slice.sql.jinja
91
+ Filters data based on timestamp and source slices.
92
+
93
+ **Parameters:**
94
+ - `parent_slice`: Parent CTE name to slice from
95
+ - `slices`: Slice condition expression
96
+ - `has_source`: Whether source tracking is enabled
97
+
98
+ ### ctes/deduplicate_key.sql.jinja
99
+ Removes duplicate records based on key and timestamp, keeping the most relevant record based on priority.
100
+
101
+ **Features:**
102
+ - Prioritizes delete operations over upserts
103
+ - Supports custom ordering for tie-breaking
104
+ - Advanced mode adds explicit row numbering; simple mode uses QUALIFY
105
+
106
+ **Parameters:**
107
+ - `parent_deduplicate_key`: Parent CTE to deduplicate
108
+ - `has_source`: Whether source tracking is enabled
109
+ - `advanced_deduplication`: Use advanced deduplication logic
110
+ - `has_order_by`: Whether custom ordering is specified
111
+ - `order_duplicate_by`: List of columns for ordering duplicates
112
+
113
+ ### ctes/deduplicate_hash.sql.jinja
114
+ Removes consecutive duplicate records based on hash value changes.
115
+
116
+ **Features:**
117
+ - Detects when hash or operation changes from previous record
118
+ - Preserves only records where values differ from previous
119
+ - Advanced mode uses explicit LAG; simple mode uses QUALIFY
120
+
121
+ **Parameters:**
122
+ - `parent_deduplicate_hash`: Parent CTE to deduplicate
123
+ - `has_source`: Whether source tracking is enabled
124
+ - `advanced_deduplication`: Use advanced deduplication logic
125
+
126
+ ### ctes/current.sql.jinja
127
+ Retrieves the current state from the target table for update operations.
128
+
129
+ **Features:**
130
+ - Handles different timestamp columns per CDC type (SCD2 uses __valid_from)
131
+ - Refreshes __timestamp, __last_updated, __hash, __key as needed
132
+ - Filters by __is_current flag for SCD1/SCD2
133
+ - Applies source and update filters
134
+
135
+ **Parameters:**
136
+ - `intermediates`: List of intermediate columns to select
137
+ - `tgt`: Target table name
138
+ - `cdc`: CDC type
139
+ - `add_timestamp`, `add_last_updated`, `add_hash`, `add_key`: Flags for column refresh
140
+ - `has_no_data`: Whether treating as delete operation
141
+ - `soft_delete`: Whether soft delete is enabled
142
+ - `sources`: Source filter condition
143
+ - `update_where`: Additional WHERE clause
144
+
145
+ ### ctes/rectify.sql.jinja
146
+ Corrects historical data inconsistencies, particularly handling deleted records that reappear in subsequent reloads.
147
+
148
+ **Features:**
149
+ - Detects records deleted before reloads but present in later reloads
150
+ - Generates synthetic delete operations to maintain consistency
151
+ - Handles cross-reload data validation
152
+ - Filters out redundant current operations in update mode
153
+
154
+ **Logic Flow:**
155
+ 1. Combines base records with current state (update mode)
156
+ 2. Identifies next operation for each record
157
+ 3. Tracks reload timestamps
158
+ 4. Determines if records are deleted before next reload or missing in next reload
159
+ 5. Generates appropriate delete operations
160
+
161
+ **Parameters:**
162
+ - `mode`: Operation mode ("update" or "complete")
163
+ - `parent_rectify`: Parent CTE to rectify
164
+ - `intermediates`: List of intermediate columns
165
+ - `has_rows`: Whether target has existing rows
166
+ - `has_source`: Whether source tracking is enabled
167
+
168
+ ## Filters
169
+
170
+ ### filters/update.sql.jinja
171
+ Generates filter conditions to select only new or updated records since the last load.
172
+
173
+ **Features:**
174
+ - Determines maximum timestamp from target table
175
+ - Generates slice conditions for records newer than max timestamp
176
+ - Handles different timestamp columns per CDC type
177
+ - Supports multi-source filtering
178
+
179
+ **Parameters:**
180
+ - `parent_slice`: Parent CTE name
181
+ - `tgt`: Target table name
182
+ - `cdc`: CDC type
183
+ - `has_source`: Whether source tracking is enabled
184
+
185
+ ### filters/latest.sql.jinja
186
+ Generates filter conditions to select only the most recent timestamp per source.
187
+
188
+ **Features:**
189
+ - Finds maximum timestamp per source
190
+ - Creates slice conditions for latest data only
191
+
192
+ **Parameters:**
193
+ - `parent_slice`: Parent CTE name
194
+ - `has_source`: Whether source tracking is enabled
195
+
196
+ ### filters/final.sql.jinja
197
+ Aggregates slice and source filter conditions using OR logic.
198
+
199
+ **Parameters:**
200
+ - `has_source`: Whether source tracking is enabled
201
+
202
+ ## Macros
203
+
204
+ ### macros/hash.sql.jinja
205
+ Defines macros for generating hash and key values.
206
+
207
+ **Macros:**
208
+ - `add_hash(fields)`: Creates MD5 hash from specified fields, treating __operation specially (deletes get different hash)
209
+ - `add_key(fields)`: Creates MD5 hash for key columns
210
+
211
+ **Features:**
212
+ - Uses array concatenation with '*' delimiter and '-1' null replacement
213
+ - Casts all fields to string
214
+ - Special handling for __operation field in hashes
215
+
216
+ ### macros/backtick.sql.jinja
217
+ Simple macro to wrap field names in backticks for proper SQL escaping.
218
+
219
+ ## Merge Templates
220
+
221
+ ### merges/nocdc.sql.jinja
222
+ Generates MERGE statement for tables without CDC tracking.
223
+
224
+ **Features:**
225
+ - Matches on key columns
226
+ - Supports upsert and delete operations
227
+ - No historical tracking
228
+
229
+ **Parameters:**
230
+ - `format`: Source format ("dataframe" or "view")
231
+ - `tgt`: Target table name
232
+ - `src`: Source reference
233
+ - `has_key`: Whether to use __key column for matching
234
+ - `keys`: List of key columns for matching
235
+ - `has_source`: Whether source tracking is enabled
236
+ - `update_where`: Additional WHERE clause
237
+ - `columns`: List of all columns to merge
238
+
239
+ ### merges/scd1.sql.jinja
240
+ Generates MERGE statement for SCD Type 1 (overwrite) tracking.
241
+
242
+ **Features:**
243
+ - Updates records in place
244
+ - Maintains __is_current and __is_deleted flags
245
+ - Supports soft delete option
246
+ - Updates metadata timestamps
247
+
248
+ **Parameters:**
249
+ - `format`: Source format
250
+ - `tgt`, `src`: Target and source references
251
+ - `has_key`, `keys`, `has_source`: Matching configuration
252
+ - `fields`: Data fields to update
253
+ - `has_timestamp`, `has_last_updated`, `has_metadata`, `has_hash`, `has_rescued_data`: System column flags
254
+ - `soft_delete`: Enable soft delete instead of physical delete
255
+ - `columns`: All columns for insert
256
+
257
+ ### merges/scd2.sql.jinja
258
+ Generates MERGE statement for SCD Type 2 (versioned history) tracking.
259
+
260
+ **Features:**
261
+ - Closes current records by setting __valid_to
262
+ - Inserts new versions with __valid_from
263
+ - Maintains __is_current and __is_deleted flags
264
+ - Updates records matched with current flag only
265
+
266
+ **Operations:**
267
+ - `update`: Close current version and insert new version
268
+ - `delete`: Close current version and mark as deleted
269
+ - `insert`: Insert new version
270
+
271
+ **Parameters:**
272
+ - `format`: Source format
273
+ - `tgt`, `src`: Target and source references
274
+ - `has_key`, `keys`, `has_source`: Matching configuration
275
+ - `soft_delete`: Enable soft delete marking
276
+ - `has_metadata`, `has_last_updated`: System column flags
277
+ - `columns`: All columns for insert
278
+
279
+ ## Query Templates
280
+
281
+ ### queries/context.sql.jinja
282
+ Generates a SQL comment block documenting the query configuration and parameters.
283
+
284
+ **Sections:**
285
+ - ⚙️ BASE: CDC type and mode
286
+ - 🎯 SOURCE & TARGET: Format and references
287
+ - 📊 CTE's: Which CTEs are included
288
+ - 🔪 FILTERING: Filter conditions
289
+ - 🗑️ DELETES: Delete handling options
290
+ - ✅ DATA VALIDATION: Data state flags
291
+ - 🏷️ HAS FIELDS: Which system fields are present
292
+ - ➕ ADD COLUMNS: Which columns to add
293
+ - 🔄 EXTRA COLUMN OPERATIONS: Column transformations
294
+ - 👨‍👩‍👧 PARENTS: Parent CTE references
295
+ - 📦 LAYOUT: Column lists
296
+
297
+ ### queries/final.sql.jinja
298
+ Final SELECT that outputs the result, excluding specified columns.
299
+
300
+ **Parameters:**
301
+ - `all_except`: List of columns to exclude from output
302
+
303
+ ### queries/scd1.sql.jinja
304
+ Implements SCD Type 1 logic that maintains only current state.
305
+
306
+ **Features:**
307
+ - Takes latest record per key
308
+ - Marks deleted records
309
+ - Handles first delete when no upserts exist (update mode)
310
+ - Filters out fake updates (records matching current hash)
311
+ - Generates merge conditions for upsert/delete operations
312
+
313
+ **Parameters:**
314
+ - `parent_cdc`: Parent CTE name
315
+ - `mode`: "complete" or "update"
316
+ - `has_source`, `has_rows`: Configuration flags
317
+ - `soft_delete`: Enable soft delete
318
+ - `rectify`: Whether rectification was applied
319
+ - `outputs`: Output columns
320
+
321
+ ### queries/scd2.sql.jinja
322
+ Implements SCD Type 2 logic that maintains full version history.
323
+
324
+ **Features:**
325
+ - Creates __valid_from and __valid_to temporal columns
326
+ - Assigns validity periods based on next timestamp
327
+ - Marks current records (__is_current)
328
+ - Identifies deleted records
329
+ - Generates merge conditions (insert/update/delete)
330
+ - Filters out fake updates
331
+ - Optional __valid_from correction to use 1900-01-01 for earliest records
332
+
333
+ **Parameters:**
334
+ - `parent_cdc`: Parent CTE name
335
+ - `mode`: "complete" or "update"
336
+ - `has_source`, `has_rows`: Configuration flags
337
+ - `correct_valid_from`: Correct earliest valid_from date
338
+ - `rectify`: Whether rectification was applied
339
+ - `outputs`: Output columns
340
+
341
+ ### queries/nocdc/complete.sql.jinja
342
+ Generates complete load query for NoCDC mode.
343
+
344
+ **Features:**
345
+ - Selects all output columns
346
+ - Filters out 'current' operations if filter enabled
347
+
348
+ **Parameters:**
349
+ - `parent_cdc`: Parent CTE name
350
+ - `filter`: Enable operation filtering
351
+ - `outputs`: Output columns
352
+
353
+ ### queries/nocdc/update.sql.jinja
354
+ Generates incremental update query for NoCDC mode.
355
+
356
+ **Features:**
357
+ - Identifies records to upsert (not matching current hash)
358
+ - Optional delete missing records
359
+ - Filters out 'current' operations if filter enabled
360
+
361
+ **Parameters:**
362
+ - `parent_cdc`: Parent CTE name
363
+ - `has_rows`: Whether target has existing rows
364
+ - `delete_missing`: Enable delete for missing records
365
+ - `has_source`: Whether source tracking is enabled
366
+ - `filter`: Enable operation filtering
367
+ - `outputs`: Output columns
368
+
369
+ ## Usage Example
370
+
371
+ The templates are typically invoked through the CDC classes (NoCDC, SCD1, SCD2) which populate the template variables and render the appropriate templates based on the operation:
372
+
373
+ ```python
374
+ from fabricks.cdc import SCD2
375
+
376
+ cdc = SCD2(
377
+ src="source_table",
378
+ tgt="target_table",
379
+ keys=["id"],
380
+ mode="update"
381
+ )
382
+
383
+ # Generates query using query.sql.jinja and dependencies
384
+ query = cdc.render_query()
385
+
386
+ # Generates merge using merge.sql.jinja and dependencies
387
+ merge = cdc.render_merge()
388
+ ```
389
+
390
+ ## Template Parameters Reference
391
+
392
+ ### Common Parameters
393
+
394
+ - `cdc`: CDC type ("nocdc", "scd1", "scd2")
395
+ - `mode`: Operation mode ("complete", "update")
396
+ - `format`: Source format ("query", "table", "global_temp_view", "dataframe")
397
+ - `src`: Source table/query reference
398
+ - `tgt`: Target table name
399
+ - `keys`: List of key columns for matching records
400
+ - `hashes`: List of columns to include in hash calculation
401
+
402
+ ### System Column Flags
403
+
404
+ - `has_timestamp`, `add_timestamp`: Timestamp tracking
405
+ - `has_last_updated`, `add_last_updated`: Last updated timestamp
406
+ - `has_operation`, `add_operation`: Operation type (upsert/delete/reload)
407
+ - `has_source`, `add_source`: Source system tracking
408
+ - `has_hash`, `add_hash`: Row hash for change detection
409
+ - `has_key`, `add_key`: Composite key hash
410
+ - `has_metadata`, `add_metadata`: Metadata struct (inserted/updated times)
411
+ - `has_identity`: Identity column present
412
+ - `has_rescued_data`: Rescued data column present
413
+
414
+ ### Processing Options
415
+
416
+ - `deduplicate_key`: Enable key-based deduplication
417
+ - `deduplicate_hash`: Enable hash-based deduplication
418
+ - `advanced_deduplication`: Use explicit window functions
419
+ - `rectify`: Enable data rectification
420
+ - `soft_delete`: Enable soft delete (mark as deleted vs physical delete)
421
+ - `delete_missing`: Delete records not in source
422
+ - `slice`: Slice type ("update", "latest")
423
+ - `filter`: Enable operation filtering
424
+
425
+ ### Data State Flags
426
+
427
+ - `has_rows`: Target table has existing rows
428
+ - `has_no_data`: Treating as empty/delete operation
429
+ - `has_order_by`: Custom ordering specified
430
+
431
+ ### Column Lists
432
+
433
+ - `columns`: All columns in target table
434
+ - `inputs`: Input columns from source
435
+ - `intermediates`: Intermediate processing columns
436
+ - `outputs`: Final output columns
437
+ - `fields`: Data fields (non-system columns)
438
+ - `order_duplicate_by`: Columns for ordering duplicates
439
+ - `all_except`: Columns to exclude from output
440
+ - `all_overwrite`: Columns to overwrite
441
+ - `overwrite`: Columns to overwrite (subset)
442
+ - `cast`: Dictionary of column type casts
443
+ - `add_calculated_columns`: Calculated column expressions
444
+
445
+ ### Filter Conditions
446
+
447
+ - `filter_where`: WHERE clause for base data
448
+ - `update_where`: WHERE clause for update operations
449
+ - `slices`: Slice condition expression
450
+ - `sources`: Source filter condition
451
+
452
+ ### Parent CTE References
453
+
454
+ - `parent_slice`: Parent CTE for slicing
455
+ - `parent_rectify`: Parent CTE for rectification
456
+ - `parent_deduplicate_key`: Parent CTE for key deduplication
457
+ - `parent_deduplicate_hash`: Parent CTE for hash deduplication
458
+ - `parent_cdc`: Parent CTE for CDC logic
459
+ - `parent_final`: Parent CTE for final output
460
+
461
+ ### SCD2-Specific
462
+
463
+ - `correct_valid_from`: Correct earliest __valid_from to 1900-01-01
464
+
465
+ ## Template Rendering Flow
466
+
467
+ ### Query Rendering (query.sql.jinja)
468
+
469
+ 1. **Context** - Document configuration in SQL comment
470
+ 2. **Base CTE** - Load and transform source data
471
+ 3. **Slice CTE** (optional) - Filter to specific time slices
472
+ 4. **Deduplicate Key CTE** (optional) - Remove key duplicates
473
+ 5. **Current CTE** (optional, update mode) - Load existing target state
474
+ 6. **Rectify CTE** (optional) - Fix historical inconsistencies
475
+ 7. **Deduplicate Hash CTE** (optional) - Remove hash duplicates
476
+ 8. **CDC Logic CTE** - Apply NoCDC/SCD1/SCD2 logic
477
+ 9. **Final CTE** - Select output columns
478
+
479
+ ### Merge Rendering (merge.sql.jinja)
480
+
481
+ 1. Select appropriate merge template based on CDC type
482
+ 2. Generate MERGE statement with ON clause
483
+ 3. Define WHEN MATCHED and WHEN NOT MATCHED clauses
484
+ 4. Specify UPDATE, DELETE, and INSERT operations
485
+
486
+ ### Filter Rendering (filter.sql.jinja)
487
+
488
+ 1. **Base CTE** - Load source metadata
489
+ 2. **Update/Latest Filter CTE** - Determine slice conditions
490
+ 3. **Final CTE** - Aggregate filter expressions
@@ -16,6 +16,7 @@ with
16
16
  {% if add_calculated_columns %} {% for c in add_calculated_columns %} {{ c }}, {% endfor %} {% endif %}
17
17
  {% if add_timestamp %} cast(current_date() as timestamp) as __timestamp, {% endif %}
18
18
  {% if add_operation %} cast('{{ add_operation }}' as string) as __operation, {% endif %}
19
+ {% if add_last_updated %} cast(current_timestamp() as timestamp) as __last_updated, {% endif %}
19
20
  {% if add_source %} cast('{{ add_source }}' as string) as __source, {% endif %}
20
21
  {% if add_hash %} {{ h.add_hash(fields=hashes) }} as __hash, {% endif %}
21
22
  {% if add_key %} {{ h.add_hash(fields=keys) }} as __key, {% endif %}
@@ -9,6 +9,10 @@ __current as (
9
9
  {% elif cdc == "scd1" %} __timestamp,
10
10
  {% elif cdc == "scd2" %} __valid_from as __timestamp,
11
11
  {% endif %}
12
+ {% elif i == "__last_updated" %}
13
+ {% if add_last_updated %} cast(current_timestamp() as timestamp) as __last_updated,
14
+ {% else %} __last_updated,
15
+ {% endif %}
12
16
  {% elif i == "__operation" %}
13
17
  {% if has_no_data %} 'delete' as __operation, {% else %} 'current' as __operation, {% endif %}
14
18
  {% elif i == "__hash" %}
@@ -26,6 +26,9 @@
26
26
  {% if has_timestamp %}
27
27
  __timestamp = s.__timestamp,
28
28
  {% endif %}
29
+ {% if has_last_updated %}
30
+ __last_updated = s.__last_updated,
31
+ {% endif %}
29
32
  {% if has_metadata %}
30
33
  __metadata.updated = cast(current_timestamp() as timestamp),
31
34
  {% endif %}
@@ -50,6 +53,9 @@
50
53
  {% if has_timestamp %}
51
54
  __timestamp = s.__timestamp,
52
55
  {% endif %}
56
+ {% if has_last_updated %}
57
+ __last_updated = s.__last_updated,
58
+ {% endif %}
53
59
  {% if has_metadata %}
54
60
  __metadata.updated = cast(current_timestamp() as timestamp),
55
61
  {% endif %}
@@ -27,6 +27,9 @@
27
27
  {% endif %}
28
28
  {% if has_metadata %}
29
29
  __metadata.updated = cast(current_timestamp() as timestamp),
30
+ {% endif %}
31
+ {% if has_last_updated %}
32
+ __last_updated = s.__last_updated,
30
33
  {% endif %}
31
34
  when matched
32
35
  and __merge_condition == 'delete' then
@@ -39,6 +42,9 @@
39
42
  {% endif %}
40
43
  {% if has_metadata %}
41
44
  __metadata.updated = cast(current_timestamp() as timestamp),
45
+ {% endif %}
46
+ {% if has_last_updated %}
47
+ __last_updated = s.__last_updated,
42
48
  {% endif %}
43
49
  when not matched
44
50
  and __merge_condition == 'insert' then