@sylix/coworker 2.0.11 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/dist/commands/slash/config.d.ts.map +1 -1
  2. package/dist/commands/slash/config.js +22 -4
  3. package/dist/commands/slash/config.js.map +1 -1
  4. package/dist/core/CoWorkerAgent.d.ts.map +1 -1
  5. package/dist/core/CoWorkerAgent.js +6 -3
  6. package/dist/core/CoWorkerAgent.js.map +1 -1
  7. package/dist/skills/defaults/accessibility/screen-reader-testing.md +545 -0
  8. package/dist/skills/defaults/accessibility/wcag-audit-patterns.md +555 -0
  9. package/dist/skills/defaults/ai-ml/rag.md +276 -0
  10. package/dist/skills/defaults/backend-development/api-design-principles.md +528 -0
  11. package/dist/skills/defaults/backend-development/api-design.md +285 -0
  12. package/dist/skills/defaults/backend-development/architecture-patterns.md +494 -0
  13. package/dist/skills/defaults/backend-development/async-python.md +237 -0
  14. package/dist/skills/defaults/backend-development/auth-implementation-patterns.md +638 -0
  15. package/dist/skills/defaults/backend-development/bazel-build-optimization.md +387 -0
  16. package/dist/skills/defaults/backend-development/billing-automation/SKILL.md +566 -0
  17. package/dist/skills/defaults/backend-development/code-review-excellence.md +538 -0
  18. package/dist/skills/defaults/backend-development/cqrs-implementation.md +554 -0
  19. package/dist/skills/defaults/backend-development/database-design.md +305 -0
  20. package/dist/skills/defaults/backend-development/debugging-strategies.md +536 -0
  21. package/dist/skills/defaults/backend-development/e2e-testing-patterns.md +544 -0
  22. package/dist/skills/defaults/backend-development/error-handling-patterns.md +641 -0
  23. package/dist/skills/defaults/backend-development/fastapi-templates.md +559 -0
  24. package/dist/skills/defaults/backend-development/fastapi.md +309 -0
  25. package/dist/skills/defaults/backend-development/git-advanced-workflows.md +405 -0
  26. package/dist/skills/defaults/backend-development/microservices-patterns.md +595 -0
  27. package/dist/skills/defaults/backend-development/microservices.md +284 -0
  28. package/dist/skills/defaults/backend-development/monorepo-management.md +623 -0
  29. package/dist/skills/defaults/backend-development/nodejs-backend-patterns.md +1048 -0
  30. package/dist/skills/defaults/backend-development/nx-workspace-patterns.md +457 -0
  31. package/dist/skills/defaults/backend-development/paypal-integration/SKILL.md +478 -0
  32. package/dist/skills/defaults/backend-development/pci-compliance/SKILL.md +480 -0
  33. package/dist/skills/defaults/backend-development/python-anti-patterns.md +349 -0
  34. package/dist/skills/defaults/backend-development/python-background-jobs.md +364 -0
  35. package/dist/skills/defaults/backend-development/python-code-style.md +360 -0
  36. package/dist/skills/defaults/backend-development/python-configuration.md +368 -0
  37. package/dist/skills/defaults/backend-development/python-design-patterns.md +296 -0
  38. package/dist/skills/defaults/backend-development/python-error-handling.md +323 -0
  39. package/dist/skills/defaults/backend-development/python-packaging.md +887 -0
  40. package/dist/skills/defaults/backend-development/python-performance-optimization.md +874 -0
  41. package/dist/skills/defaults/backend-development/python-project-structure.md +252 -0
  42. package/dist/skills/defaults/backend-development/python-resilience.md +376 -0
  43. package/dist/skills/defaults/backend-development/python-resource-management.md +421 -0
  44. package/dist/skills/defaults/backend-development/python-type-safety.md +428 -0
  45. package/dist/skills/defaults/backend-development/sql-optimization-patterns.md +509 -0
  46. package/dist/skills/defaults/backend-development/stripe-integration/SKILL.md +522 -0
  47. package/dist/skills/defaults/backend-development/turborepo-caching.md +376 -0
  48. package/dist/skills/defaults/blockchain/defi-protocol-templates.md +430 -0
  49. package/dist/skills/defaults/blockchain/nft-standards.md +364 -0
  50. package/dist/skills/defaults/blockchain/solidity-security.md +514 -0
  51. package/dist/skills/defaults/blockchain/web3-testing.md +360 -0
  52. package/dist/skills/defaults/business/competitive-landscape/SKILL.md +527 -0
  53. package/dist/skills/defaults/business/market-sizing-analysis/SKILL.md +451 -0
  54. package/dist/skills/defaults/business/startup-financial-modeling/SKILL.md +494 -0
  55. package/dist/skills/defaults/business/startup-metrics-framework/SKILL.md +564 -0
  56. package/dist/skills/defaults/business/team-composition-analysis.md +437 -0
  57. package/dist/skills/defaults/compliance/employment-contract-templates/SKILL.md +527 -0
  58. package/dist/skills/defaults/compliance/gdpr-data-handling/SKILL.md +630 -0
  59. package/dist/skills/defaults/data-engineering/airflow-dag-patterns.md +436 -0
  60. package/dist/skills/defaults/data-engineering/airflow.md +519 -0
  61. package/dist/skills/defaults/data-engineering/data-quality.md +583 -0
  62. package/dist/skills/defaults/data-engineering/dbt-transformation-patterns.md +482 -0
  63. package/dist/skills/defaults/data-engineering/dbt.md +556 -0
  64. package/dist/skills/defaults/data-engineering/ml-pipeline-workflow/SKILL.md +247 -0
  65. package/dist/skills/defaults/data-engineering/spark-optimization.md +348 -0
  66. package/dist/skills/defaults/data-engineering/spark.md +411 -0
  67. package/dist/skills/defaults/database/postgresql.md +202 -0
  68. package/dist/skills/defaults/debugging/systematic-debugging.md +249 -0
  69. package/dist/skills/defaults/devops/architecture-decision-records.md +448 -0
  70. package/dist/skills/defaults/devops/changelog-automation.md +580 -0
  71. package/dist/skills/defaults/devops/cicd.md +314 -0
  72. package/dist/skills/defaults/devops/cloud.md +263 -0
  73. package/dist/skills/defaults/devops/code-review-excellence.md +299 -0
  74. package/dist/skills/defaults/devops/cost-optimization.md +295 -0
  75. package/dist/skills/defaults/devops/deployment-pipeline-design.md +356 -0
  76. package/dist/skills/defaults/devops/docker.md +281 -0
  77. package/dist/skills/defaults/devops/git-workflows.md +205 -0
  78. package/dist/skills/defaults/devops/github-actions.md +311 -0
  79. package/dist/skills/defaults/devops/gitlab-ci-patterns.md +266 -0
  80. package/dist/skills/defaults/devops/hybrid-cloud-networking.md +241 -0
  81. package/dist/skills/defaults/devops/istio-traffic-management.md +327 -0
  82. package/dist/skills/defaults/devops/kubernetes.md +339 -0
  83. package/dist/skills/defaults/devops/linkerd-patterns.md +311 -0
  84. package/dist/skills/defaults/devops/multi-cloud-architecture.md +181 -0
  85. package/dist/skills/defaults/devops/observability.md +243 -0
  86. package/dist/skills/defaults/devops/openapi-spec-generation.md +1024 -0
  87. package/dist/skills/defaults/devops/postmortem-writing.md +396 -0
  88. package/dist/skills/defaults/devops/prometheus-configuration.md +265 -0
  89. package/dist/skills/defaults/devops/secrets-management.md +341 -0
  90. package/dist/skills/defaults/devops/service-mesh-observability.md +385 -0
  91. package/dist/skills/defaults/devops/terraform-module-library.md +244 -0
  92. package/dist/skills/defaults/finance/backtesting-frameworks/SKILL.md +663 -0
  93. package/dist/skills/defaults/finance/risk-metrics-calculation/SKILL.md +557 -0
  94. package/dist/skills/defaults/frontend/accessibility-compliance.md +420 -0
  95. package/dist/skills/defaults/frontend/design-system-patterns.md +337 -0
  96. package/dist/skills/defaults/frontend/interaction-design.md +327 -0
  97. package/dist/skills/defaults/frontend/javascript.md +311 -0
  98. package/dist/skills/defaults/frontend/modern-javascript-patterns.md +927 -0
  99. package/dist/skills/defaults/frontend/react-native-design.md +440 -0
  100. package/dist/skills/defaults/frontend/react.md +345 -0
  101. package/dist/skills/defaults/frontend/responsive-design.md +472 -0
  102. package/dist/skills/defaults/frontend/tailwind-design-system.md +337 -0
  103. package/dist/skills/defaults/frontend/typescript-advanced-types.md +724 -0
  104. package/dist/skills/defaults/frontend/typescript.md +334 -0
  105. package/dist/skills/defaults/frontend/visual-design-foundations.md +326 -0
  106. package/dist/skills/defaults/frontend/web-component-design.md +279 -0
  107. package/dist/skills/defaults/game-development/godot-gdscript-patterns.md +188 -0
  108. package/dist/skills/defaults/game-development/unity-ecs-patterns.md +594 -0
  109. package/dist/skills/defaults/kubernetes/gitops-workflow.md +285 -0
  110. package/dist/skills/defaults/kubernetes/gitops.md +280 -0
  111. package/dist/skills/defaults/kubernetes/helm-chart-scaffolding.md +553 -0
  112. package/dist/skills/defaults/kubernetes/helm.md +343 -0
  113. package/dist/skills/defaults/kubernetes/k8s-manifest-generator.md +501 -0
  114. package/dist/skills/defaults/kubernetes/k8s-security-policies.md +342 -0
  115. package/dist/skills/defaults/kubernetes/manifests.md +330 -0
  116. package/dist/skills/defaults/kubernetes/security.md +337 -0
  117. package/dist/skills/defaults/llm-application/embedding-strategies.md +608 -0
  118. package/dist/skills/defaults/llm-application/hybrid-search-implementation.md +570 -0
  119. package/dist/skills/defaults/llm-application/hybrid-search.md +570 -0
  120. package/dist/skills/defaults/llm-application/langchain-architecture.md +666 -0
  121. package/dist/skills/defaults/llm-application/langchain.md +259 -0
  122. package/dist/skills/defaults/llm-application/llm-evaluation.md +695 -0
  123. package/dist/skills/defaults/llm-application/prompt-engineering-patterns.md +449 -0
  124. package/dist/skills/defaults/llm-application/prompt-engineering.md +219 -0
  125. package/dist/skills/defaults/llm-application/rag-implementation.md +434 -0
  126. package/dist/skills/defaults/llm-application/similarity-search-patterns.md +560 -0
  127. package/dist/skills/defaults/llm-application/similarity-search.md +560 -0
  128. package/dist/skills/defaults/llm-application/vector-index-tuning.md +523 -0
  129. package/dist/skills/defaults/mobile/mobile-android-design.md +440 -0
  130. package/dist/skills/defaults/mobile/mobile-ios-design.md +266 -0
  131. package/dist/skills/defaults/monitoring/distributed-tracing.md +436 -0
  132. package/dist/skills/defaults/monitoring/grafana-dashboards.md +370 -0
  133. package/dist/skills/defaults/monitoring/prometheus-configuration.md +379 -0
  134. package/dist/skills/defaults/monitoring/slo-implementation.md +323 -0
  135. package/dist/skills/defaults/refactoring/code-refactoring.md +349 -0
  136. package/dist/skills/defaults/security/anti-reversing-techniques/SKILL.md +559 -0
  137. package/dist/skills/defaults/security/auditor.md +168 -0
  138. package/dist/skills/defaults/security/binary-analysis-patterns/SKILL.md +438 -0
  139. package/dist/skills/defaults/security/memory-forensics/SKILL.md +483 -0
  140. package/dist/skills/defaults/security/mtls-configuration.md +349 -0
  141. package/dist/skills/defaults/security/protocol-reverse-engineering/SKILL.md +520 -0
  142. package/dist/skills/defaults/security/sast-configuration.md +182 -0
  143. package/dist/skills/defaults/security/security.md +313 -0
  144. package/dist/skills/defaults/security/stride-analysis.md +273 -0
  145. package/dist/skills/defaults/security/threat-mitigation-mapping.md +290 -0
  146. package/dist/skills/defaults/systems/bash-defensive-patterns/SKILL.md +539 -0
  147. package/dist/skills/defaults/systems/bats-testing-patterns/SKILL.md +631 -0
  148. package/dist/skills/defaults/systems/go-concurrency-patterns.md +657 -0
  149. package/dist/skills/defaults/systems/memory-safety-patterns.md +605 -0
  150. package/dist/skills/defaults/systems/rust-async-patterns.md +519 -0
  151. package/dist/skills/defaults/systems/shellcheck-configuration/SKILL.md +456 -0
  152. package/dist/skills/defaults/team-collaboration/multi-reviewer-patterns.md +126 -0
  153. package/dist/skills/defaults/team-collaboration/parallel-feature-development.md +151 -0
  154. package/dist/skills/defaults/testing/javascript-testing-patterns.md +1021 -0
  155. package/dist/skills/defaults/testing/python-testing-patterns.md +351 -0
  156. package/dist/skills/defaults/testing/testing.md +332 -0
  157. package/dist/skills/defaults/workflows/context-driven-development.md +384 -0
  158. package/dist/skills/defaults/workflows/track-management.md +592 -0
  159. package/dist/skills/defaults/workflows/workflow-patterns.md +622 -0
  160. package/dist/skills/index.d.ts +11 -0
  161. package/dist/skills/index.d.ts.map +1 -0
  162. package/dist/skills/index.js +129 -0
  163. package/dist/skills/index.js.map +1 -0
  164. package/dist/utils/character.js +4 -4
  165. package/dist/utils/character.js.map +1 -1
  166. package/dist/utils/inputbar.d.ts.map +1 -1
  167. package/dist/utils/inputbar.js +7 -0
  168. package/dist/utils/inputbar.js.map +1 -1
  169. package/package.json +1 -1
@@ -0,0 +1,583 @@
1
+ ---
2
+ name: data-quality-frameworks
3
+ description: Implement data quality validation with Great Expectations, dbt tests, and data contracts. Use when building data quality pipelines, implementing validation rules, or establishing data contracts.
4
+ ---
5
+
6
+ # Data Quality Frameworks
7
+
8
+ Production patterns for implementing data quality with Great Expectations, dbt tests, and data contracts to ensure reliable data pipelines.
9
+
10
+ ## When to Use This Skill
11
+
12
+ - Implementing data quality checks in pipelines
13
+ - Setting up Great Expectations validation
14
+ - Building comprehensive dbt test suites
15
+ - Establishing data contracts between teams
16
+ - Monitoring data quality metrics
17
+ - Automating data validation in CI/CD
18
+
19
+ ## Core Concepts
20
+
21
+ ### 1. Data Quality Dimensions
22
+
23
+ | Dimension | Description | Example Check |
24
+ | ---------------- | ------------------------ | -------------------------------------------------- |
25
+ | **Completeness** | No missing values | `expect_column_values_to_not_be_null` |
26
+ | **Uniqueness** | No duplicates | `expect_column_values_to_be_unique` |
27
+ | **Validity** | Values in expected range | `expect_column_values_to_be_in_set` |
28
+ | **Accuracy** | Data matches reality | Cross-reference validation |
29
+ | **Consistency** | No contradictions | `expect_column_pair_values_A_to_be_greater_than_B` |
30
+ | **Timeliness** | Data is recent | `expect_column_max_to_be_between` |
31
+
32
+ ### 2. Testing Pyramid for Data
33
+
34
+ ```
35
+ /\
36
+ / \ Integration Tests (cross-table)
37
+ /────\
38
+ / \ Unit Tests (single column)
39
+ /────────\
40
+ / \ Schema Tests (structure)
41
+ /────────────\
42
+ ```
43
+
44
+ ## Quick Start
45
+
46
+ ### Great Expectations Setup
47
+
48
+ ```bash
49
+ # Install
50
+ pip install great_expectations
51
+
52
+ # Initialize project
53
+ great_expectations init
54
+
55
+ # Create datasource
56
+ great_expectations datasource new
57
+ ```
58
+
59
+ ```python
60
+ # great_expectations/checkpoints/daily_validation.yml
61
+ import great_expectations as gx
62
+
63
+ # Create context
64
+ context = gx.get_context()
65
+
66
+ # Create expectation suite
67
+ suite = context.add_expectation_suite("orders_suite")
68
+
69
+ # Add expectations
70
+ suite.add_expectation(
71
+ gx.expectations.ExpectColumnValuesToNotBeNull(column="order_id")
72
+ )
73
+ suite.add_expectation(
74
+ gx.expectations.ExpectColumnValuesToBeUnique(column="order_id")
75
+ )
76
+
77
+ # Validate
78
+ results = context.run_checkpoint(checkpoint_name="daily_orders")
79
+ ```
80
+
81
+ ## Patterns
82
+
83
+ ### Pattern 1: Great Expectations Suite
84
+
85
+ ```python
86
+ # expectations/orders_suite.py
87
+ import great_expectations as gx
88
+ from great_expectations.core import ExpectationSuite
89
+ from great_expectations.core.expectation_configuration import ExpectationConfiguration
90
+
91
+ def build_orders_suite() -> ExpectationSuite:
92
+ """Build comprehensive orders expectation suite"""
93
+
94
+ suite = ExpectationSuite(expectation_suite_name="orders_suite")
95
+
96
+ # Schema expectations
97
+ suite.add_expectation(ExpectationConfiguration(
98
+ expectation_type="expect_table_columns_to_match_set",
99
+ kwargs={
100
+ "column_set": ["order_id", "customer_id", "amount", "status", "created_at"],
101
+ "exact_match": False # Allow additional columns
102
+ }
103
+ ))
104
+
105
+ # Primary key
106
+ suite.add_expectation(ExpectationConfiguration(
107
+ expectation_type="expect_column_values_to_not_be_null",
108
+ kwargs={"column": "order_id"}
109
+ ))
110
+ suite.add_expectation(ExpectationConfiguration(
111
+ expectation_type="expect_column_values_to_be_unique",
112
+ kwargs={"column": "order_id"}
113
+ ))
114
+
115
+ # Foreign key
116
+ suite.add_expectation(ExpectationConfiguration(
117
+ expectation_type="expect_column_values_to_not_be_null",
118
+ kwargs={"column": "customer_id"}
119
+ ))
120
+
121
+ # Categorical values
122
+ suite.add_expectation(ExpectationConfiguration(
123
+ expectation_type="expect_column_values_to_be_in_set",
124
+ kwargs={
125
+ "column": "status",
126
+ "value_set": ["pending", "processing", "shipped", "delivered", "cancelled"]
127
+ }
128
+ ))
129
+
130
+ # Numeric ranges
131
+ suite.add_expectation(ExpectationConfiguration(
132
+ expectation_type="expect_column_values_to_be_between",
133
+ kwargs={
134
+ "column": "amount",
135
+ "min_value": 0,
136
+ "max_value": 100000,
137
+ "strict_min": True # amount > 0
138
+ }
139
+ ))
140
+
141
+ # Date validity
142
+ suite.add_expectation(ExpectationConfiguration(
143
+ expectation_type="expect_column_values_to_be_dateutil_parseable",
144
+ kwargs={"column": "created_at"}
145
+ ))
146
+
147
+ # Freshness - data should be recent
148
+ suite.add_expectation(ExpectationConfiguration(
149
+ expectation_type="expect_column_max_to_be_between",
150
+ kwargs={
151
+ "column": "created_at",
152
+ "min_value": {"$PARAMETER": "now - timedelta(days=1)"},
153
+ "max_value": {"$PARAMETER": "now"}
154
+ }
155
+ ))
156
+
157
+ # Row count sanity
158
+ suite.add_expectation(ExpectationConfiguration(
159
+ expectation_type="expect_table_row_count_to_be_between",
160
+ kwargs={
161
+ "min_value": 1000, # Expect at least 1000 rows
162
+ "max_value": 10000000
163
+ }
164
+ ))
165
+
166
+ # Statistical expectations
167
+ suite.add_expectation(ExpectationConfiguration(
168
+ expectation_type="expect_column_mean_to_be_between",
169
+ kwargs={
170
+ "column": "amount",
171
+ "min_value": 50,
172
+ "max_value": 500
173
+ }
174
+ ))
175
+
176
+ return suite
177
+ ```
178
+
179
+ ### Pattern 2: Great Expectations Checkpoint
180
+
181
+ ```yaml
182
+ # great_expectations/checkpoints/orders_checkpoint.yml
183
+ name: orders_checkpoint
184
+ config_version: 1.0
185
+ class_name: Checkpoint
186
+ run_name_template: "%Y%m%d-%H%M%S-orders-validation"
187
+
188
+ validations:
189
+ - batch_request:
190
+ datasource_name: warehouse
191
+ data_connector_name: default_inferred_data_connector_name
192
+ data_asset_name: orders
193
+ data_connector_query:
194
+ index: -1 # Latest batch
195
+ expectation_suite_name: orders_suite
196
+
197
+ action_list:
198
+ - name: store_validation_result
199
+ action:
200
+ class_name: StoreValidationResultAction
201
+
202
+ - name: store_evaluation_parameters
203
+ action:
204
+ class_name: StoreEvaluationParametersAction
205
+
206
+ - name: update_data_docs
207
+ action:
208
+ class_name: UpdateDataDocsAction
209
+
210
+ # Slack notification on failure
211
+ - name: send_slack_notification
212
+ action:
213
+ class_name: SlackNotificationAction
214
+ slack_webhook: ${SLACK_WEBHOOK}
215
+ notify_on: failure
216
+ renderer:
217
+ module_name: great_expectations.render.renderer.slack_renderer
218
+ class_name: SlackRenderer
219
+ ```
220
+
221
+ ```python
222
+ # Run checkpoint
223
+ import great_expectations as gx
224
+
225
+ context = gx.get_context()
226
+ result = context.run_checkpoint(checkpoint_name="orders_checkpoint")
227
+
228
+ if not result.success:
229
+ failed_expectations = [
230
+ r for r in result.run_results.values()
231
+ if not r.success
232
+ ]
233
+ raise ValueError(f"Data quality check failed: {failed_expectations}")
234
+ ```
235
+
236
+ ### Pattern 3: dbt Data Tests
237
+
238
+ ```yaml
239
+ # models/marts/core/_core__models.yml
240
+ version: 2
241
+
242
+ models:
243
+ - name: fct_orders
244
+ description: Order fact table
245
+ tests:
246
+ # Table-level tests
247
+ - dbt_utils.recency:
248
+ datepart: day
249
+ field: created_at
250
+ interval: 1
251
+ - dbt_utils.at_least_one
252
+ - dbt_utils.expression_is_true:
253
+ expression: "total_amount >= 0"
254
+
255
+ columns:
256
+ - name: order_id
257
+ description: Primary key
258
+ tests:
259
+ - unique
260
+ - not_null
261
+
262
+ - name: customer_id
263
+ description: Foreign key to dim_customers
264
+ tests:
265
+ - not_null
266
+ - relationships:
267
+ to: ref('dim_customers')
268
+ field: customer_id
269
+
270
+ - name: order_status
271
+ tests:
272
+ - accepted_values:
273
+ values:
274
+ ["pending", "processing", "shipped", "delivered", "cancelled"]
275
+
276
+ - name: total_amount
277
+ tests:
278
+ - not_null
279
+ - dbt_utils.expression_is_true:
280
+ expression: ">= 0"
281
+
282
+ - name: created_at
283
+ tests:
284
+ - not_null
285
+ - dbt_utils.expression_is_true:
286
+ expression: "<= current_timestamp"
287
+
288
+ - name: dim_customers
289
+ columns:
290
+ - name: customer_id
291
+ tests:
292
+ - unique
293
+ - not_null
294
+
295
+ - name: email
296
+ tests:
297
+ - unique
298
+ - not_null
299
+ # Custom regex test
300
+ - dbt_utils.expression_is_true:
301
+ expression: "email ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$'"
302
+ ```
303
+
304
+ ### Pattern 4: Custom dbt Tests
305
+
306
+ ```sql
307
+ -- tests/generic/test_row_count_in_range.sql
308
+ {% test row_count_in_range(model, min_count, max_count) %}
309
+
310
+ with row_count as (
311
+ select count(*) as cnt from {{ model }}
312
+ )
313
+
314
+ select cnt
315
+ from row_count
316
+ where cnt < {{ min_count }} or cnt > {{ max_count }}
317
+
318
+ {% endtest %}
319
+
320
+ -- Usage in schema.yml:
321
+ -- tests:
322
+ -- - row_count_in_range:
323
+ -- min_count: 1000
324
+ -- max_count: 10000000
325
+ ```
326
+
327
+ ```sql
328
+ -- tests/generic/test_sequential_values.sql
329
+ {% test sequential_values(model, column_name, interval=1) %}
330
+
331
+ with lagged as (
332
+ select
333
+ {{ column_name }},
334
+ lag({{ column_name }}) over (order by {{ column_name }}) as prev_value
335
+ from {{ model }}
336
+ )
337
+
338
+ select *
339
+ from lagged
340
+ where {{ column_name }} - prev_value != {{ interval }}
341
+ and prev_value is not null
342
+
343
+ {% endtest %}
344
+ ```
345
+
346
+ ```sql
347
+ -- tests/singular/assert_orders_customers_match.sql
348
+ -- Singular test: specific business rule
349
+
350
+ with orders_customers as (
351
+ select distinct customer_id from {{ ref('fct_orders') }}
352
+ ),
353
+
354
+ dim_customers as (
355
+ select customer_id from {{ ref('dim_customers') }}
356
+ ),
357
+
358
+ orphaned_orders as (
359
+ select o.customer_id
360
+ from orders_customers o
361
+ left join dim_customers c using (customer_id)
362
+ where c.customer_id is null
363
+ )
364
+
365
+ select * from orphaned_orders
366
+ -- Test passes if this returns 0 rows
367
+ ```
368
+
369
+ ### Pattern 5: Data Contracts
370
+
371
+ ```yaml
372
+ # contracts/orders_contract.yaml
373
+ apiVersion: datacontract.com/v1.0.0
374
+ kind: DataContract
375
+ metadata:
376
+ name: orders
377
+ version: 1.0.0
378
+ owner: data-platform-team
379
+ contact: data-team@company.com
380
+
381
+ info:
382
+ title: Orders Data Contract
383
+ description: Contract for order event data from the ecommerce platform
384
+ purpose: Analytics, reporting, and ML features
385
+
386
+ servers:
387
+ production:
388
+ type: snowflake
389
+ account: company.us-east-1
390
+ database: ANALYTICS
391
+ schema: CORE
392
+
393
+ terms:
394
+ usage: Internal analytics only
395
+ limitations: PII must not be exposed in downstream marts
396
+ billing: Charged per query TB scanned
397
+
398
+ schema:
399
+ type: object
400
+ properties:
401
+ order_id:
402
+ type: string
403
+ format: uuid
404
+ description: Unique order identifier
405
+ required: true
406
+ unique: true
407
+ pii: false
408
+
409
+ customer_id:
410
+ type: string
411
+ format: uuid
412
+ description: Customer identifier
413
+ required: true
414
+ pii: true
415
+ piiClassification: indirect
416
+
417
+ total_amount:
418
+ type: number
419
+ minimum: 0
420
+ maximum: 100000
421
+ description: Order total in USD
422
+
423
+ created_at:
424
+ type: string
425
+ format: date-time
426
+ description: Order creation timestamp
427
+ required: true
428
+
429
+ status:
430
+ type: string
431
+ enum: [pending, processing, shipped, delivered, cancelled]
432
+ description: Current order status
433
+
434
+ quality:
435
+ type: SodaCL
436
+ specification:
437
+ checks for orders:
438
+ - row_count > 0
439
+ - missing_count(order_id) = 0
440
+ - duplicate_count(order_id) = 0
441
+ - invalid_count(status) = 0:
442
+ valid values: [pending, processing, shipped, delivered, cancelled]
443
+ - freshness(created_at) < 24h
444
+
445
+ sla:
446
+ availability: 99.9%
447
+ freshness: 1 hour
448
+ latency: 5 minutes
449
+ ```
450
+
451
+ ### Pattern 6: Automated Quality Pipeline
452
+
453
+ ```python
454
+ # quality_pipeline.py
455
+ from dataclasses import dataclass
456
+ from typing import List, Dict, Any
457
+ import great_expectations as gx
458
+ from datetime import datetime
459
+
460
+ @dataclass
461
+ class QualityResult:
462
+ table: str
463
+ passed: bool
464
+ total_expectations: int
465
+ failed_expectations: int
466
+ details: List[Dict[str, Any]]
467
+ timestamp: datetime
468
+
469
+ class DataQualityPipeline:
470
+ """Orchestrate data quality checks across tables"""
471
+
472
+ def __init__(self, context: gx.DataContext):
473
+ self.context = context
474
+ self.results: List[QualityResult] = []
475
+
476
+ def validate_table(self, table: str, suite: str) -> QualityResult:
477
+ """Validate a single table against expectation suite"""
478
+
479
+ checkpoint_config = {
480
+ "name": f"{table}_validation",
481
+ "config_version": 1.0,
482
+ "class_name": "Checkpoint",
483
+ "validations": [{
484
+ "batch_request": {
485
+ "datasource_name": "warehouse",
486
+ "data_asset_name": table,
487
+ },
488
+ "expectation_suite_name": suite,
489
+ }],
490
+ }
491
+
492
+ result = self.context.run_checkpoint(**checkpoint_config)
493
+
494
+ # Parse results
495
+ validation_result = list(result.run_results.values())[0]
496
+ results = validation_result.results
497
+
498
+ failed = [r for r in results if not r.success]
499
+
500
+ return QualityResult(
501
+ table=table,
502
+ passed=result.success,
503
+ total_expectations=len(results),
504
+ failed_expectations=len(failed),
505
+ details=[{
506
+ "expectation": r.expectation_config.expectation_type,
507
+ "success": r.success,
508
+ "observed_value": r.result.get("observed_value"),
509
+ } for r in results],
510
+ timestamp=datetime.now()
511
+ )
512
+
513
+ def run_all(self, tables: Dict[str, str]) -> Dict[str, QualityResult]:
514
+ """Run validation for all tables"""
515
+ results = {}
516
+
517
+ for table, suite in tables.items():
518
+ print(f"Validating {table}...")
519
+ results[table] = self.validate_table(table, suite)
520
+
521
+ return results
522
+
523
+ def generate_report(self, results: Dict[str, QualityResult]) -> str:
524
+ """Generate quality report"""
525
+ report = ["# Data Quality Report", f"Generated: {datetime.now()}", ""]
526
+
527
+ total_passed = sum(1 for r in results.values() if r.passed)
528
+ total_tables = len(results)
529
+
530
+ report.append(f"## Summary: {total_passed}/{total_tables} tables passed")
531
+ report.append("")
532
+
533
+ for table, result in results.items():
534
+ status = "PASS" if result.passed else "FAIL"
535
+ report.append(f"### {status} {table}")
536
+ report.append(f"- Expectations: {result.total_expectations}")
537
+ report.append(f"- Failed: {result.failed_expectations}")
538
+
539
+ if not result.passed:
540
+ report.append("- Failed checks:")
541
+ for detail in result.details:
542
+ if not detail["success"]:
543
+ report.append(f" - {detail['expectation']}: {detail['observed_value']}")
544
+ report.append("")
545
+
546
+ return "\n".join(report)
547
+
548
+ # Usage
549
+ context = gx.get_context()
550
+ pipeline = DataQualityPipeline(context)
551
+
552
+ tables_to_validate = {
553
+ "orders": "orders_suite",
554
+ "customers": "customers_suite",
555
+ "products": "products_suite",
556
+ }
557
+
558
+ results = pipeline.run_all(tables_to_validate)
559
+ report = pipeline.generate_report(results)
560
+
561
+ # Fail pipeline if any table failed
562
+ if not all(r.passed for r in results.values()):
563
+ print(report)
564
+ raise ValueError("Data quality checks failed!")
565
+ ```
566
+
567
+ ## Best Practices
568
+
569
+ ### Do's
570
+
571
+ - **Test early** - Validate source data before transformations
572
+ - **Test incrementally** - Add tests as you find issues
573
+ - **Document expectations** - Clear descriptions for each test
574
+ - **Alert on failures** - Integrate with monitoring
575
+ - **Version contracts** - Track schema changes
576
+
577
+ ### Don'ts
578
+
579
+ - **Don't test everything** - Focus on critical columns
580
+ - **Don't ignore warnings** - They often precede failures
581
+ - **Don't skip freshness** - Stale data is bad data
582
+ - **Don't hardcode thresholds** - Use dynamic baselines
583
+ - **Don't test in isolation** - Test relationships too