agentic-team-templates 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +280 -0
  2. package/bin/cli.js +5 -0
  3. package/package.json +47 -0
  4. package/src/index.js +521 -0
  5. package/templates/_shared/code-quality.md +162 -0
  6. package/templates/_shared/communication.md +114 -0
  7. package/templates/_shared/core-principles.md +62 -0
  8. package/templates/_shared/git-workflow.md +165 -0
  9. package/templates/_shared/security-fundamentals.md +173 -0
  10. package/templates/blockchain/.cursorrules/defi-patterns.md +520 -0
  11. package/templates/blockchain/.cursorrules/gas-optimization.md +339 -0
  12. package/templates/blockchain/.cursorrules/overview.md +130 -0
  13. package/templates/blockchain/.cursorrules/security.md +318 -0
  14. package/templates/blockchain/.cursorrules/smart-contracts.md +364 -0
  15. package/templates/blockchain/.cursorrules/testing.md +415 -0
  16. package/templates/blockchain/.cursorrules/web3-integration.md +538 -0
  17. package/templates/blockchain/CLAUDE.md +389 -0
  18. package/templates/cli-tools/.cursorrules/architecture.md +412 -0
  19. package/templates/cli-tools/.cursorrules/arguments.md +406 -0
  20. package/templates/cli-tools/.cursorrules/distribution.md +546 -0
  21. package/templates/cli-tools/.cursorrules/error-handling.md +455 -0
  22. package/templates/cli-tools/.cursorrules/overview.md +136 -0
  23. package/templates/cli-tools/.cursorrules/testing.md +537 -0
  24. package/templates/cli-tools/.cursorrules/user-experience.md +545 -0
  25. package/templates/cli-tools/CLAUDE.md +356 -0
  26. package/templates/data-engineering/.cursorrules/data-modeling.md +367 -0
  27. package/templates/data-engineering/.cursorrules/data-quality.md +455 -0
  28. package/templates/data-engineering/.cursorrules/overview.md +85 -0
  29. package/templates/data-engineering/.cursorrules/performance.md +339 -0
  30. package/templates/data-engineering/.cursorrules/pipeline-design.md +280 -0
  31. package/templates/data-engineering/.cursorrules/security.md +460 -0
  32. package/templates/data-engineering/.cursorrules/testing.md +452 -0
  33. package/templates/data-engineering/CLAUDE.md +974 -0
  34. package/templates/devops-sre/.cursorrules/capacity-planning.md +653 -0
  35. package/templates/devops-sre/.cursorrules/change-management.md +584 -0
  36. package/templates/devops-sre/.cursorrules/chaos-engineering.md +651 -0
  37. package/templates/devops-sre/.cursorrules/disaster-recovery.md +641 -0
  38. package/templates/devops-sre/.cursorrules/incident-management.md +565 -0
  39. package/templates/devops-sre/.cursorrules/observability.md +714 -0
  40. package/templates/devops-sre/.cursorrules/overview.md +230 -0
  41. package/templates/devops-sre/.cursorrules/postmortems.md +588 -0
  42. package/templates/devops-sre/.cursorrules/runbooks.md +760 -0
  43. package/templates/devops-sre/.cursorrules/slo-sli.md +617 -0
  44. package/templates/devops-sre/.cursorrules/toil-reduction.md +567 -0
  45. package/templates/devops-sre/CLAUDE.md +1007 -0
  46. package/templates/documentation/.cursorrules/adr.md +277 -0
  47. package/templates/documentation/.cursorrules/api-documentation.md +411 -0
  48. package/templates/documentation/.cursorrules/code-comments.md +253 -0
  49. package/templates/documentation/.cursorrules/maintenance.md +260 -0
  50. package/templates/documentation/.cursorrules/overview.md +82 -0
  51. package/templates/documentation/.cursorrules/readme-standards.md +306 -0
  52. package/templates/documentation/CLAUDE.md +120 -0
  53. package/templates/fullstack/.cursorrules/api-contracts.md +331 -0
  54. package/templates/fullstack/.cursorrules/architecture.md +298 -0
  55. package/templates/fullstack/.cursorrules/overview.md +109 -0
  56. package/templates/fullstack/.cursorrules/shared-types.md +348 -0
  57. package/templates/fullstack/.cursorrules/testing.md +386 -0
  58. package/templates/fullstack/CLAUDE.md +349 -0
  59. package/templates/ml-ai/.cursorrules/data-engineering.md +483 -0
  60. package/templates/ml-ai/.cursorrules/deployment.md +601 -0
  61. package/templates/ml-ai/.cursorrules/model-development.md +538 -0
  62. package/templates/ml-ai/.cursorrules/monitoring.md +658 -0
  63. package/templates/ml-ai/.cursorrules/overview.md +131 -0
  64. package/templates/ml-ai/.cursorrules/security.md +637 -0
  65. package/templates/ml-ai/.cursorrules/testing.md +678 -0
  66. package/templates/ml-ai/CLAUDE.md +1136 -0
  67. package/templates/mobile/.cursorrules/navigation.md +246 -0
  68. package/templates/mobile/.cursorrules/offline-first.md +302 -0
  69. package/templates/mobile/.cursorrules/overview.md +71 -0
  70. package/templates/mobile/.cursorrules/performance.md +345 -0
  71. package/templates/mobile/.cursorrules/testing.md +339 -0
  72. package/templates/mobile/CLAUDE.md +233 -0
  73. package/templates/platform-engineering/.cursorrules/ci-cd.md +778 -0
  74. package/templates/platform-engineering/.cursorrules/developer-experience.md +632 -0
  75. package/templates/platform-engineering/.cursorrules/infrastructure-as-code.md +600 -0
  76. package/templates/platform-engineering/.cursorrules/kubernetes.md +710 -0
  77. package/templates/platform-engineering/.cursorrules/observability.md +747 -0
  78. package/templates/platform-engineering/.cursorrules/overview.md +215 -0
  79. package/templates/platform-engineering/.cursorrules/security.md +855 -0
  80. package/templates/platform-engineering/.cursorrules/testing.md +878 -0
  81. package/templates/platform-engineering/CLAUDE.md +850 -0
  82. package/templates/utility-agent/.cursorrules/action-control.md +284 -0
  83. package/templates/utility-agent/.cursorrules/context-management.md +186 -0
  84. package/templates/utility-agent/.cursorrules/hallucination-prevention.md +253 -0
  85. package/templates/utility-agent/.cursorrules/overview.md +78 -0
  86. package/templates/utility-agent/.cursorrules/token-optimization.md +369 -0
  87. package/templates/utility-agent/CLAUDE.md +513 -0
  88. package/templates/web-backend/.cursorrules/api-design.md +255 -0
  89. package/templates/web-backend/.cursorrules/authentication.md +309 -0
  90. package/templates/web-backend/.cursorrules/database-patterns.md +298 -0
  91. package/templates/web-backend/.cursorrules/error-handling.md +366 -0
  92. package/templates/web-backend/.cursorrules/overview.md +69 -0
  93. package/templates/web-backend/.cursorrules/security.md +358 -0
  94. package/templates/web-backend/.cursorrules/testing.md +395 -0
  95. package/templates/web-backend/CLAUDE.md +366 -0
  96. package/templates/web-frontend/.cursorrules/accessibility.md +296 -0
  97. package/templates/web-frontend/.cursorrules/component-patterns.md +204 -0
  98. package/templates/web-frontend/.cursorrules/overview.md +72 -0
  99. package/templates/web-frontend/.cursorrules/performance.md +325 -0
  100. package/templates/web-frontend/.cursorrules/state-management.md +227 -0
  101. package/templates/web-frontend/.cursorrules/styling.md +271 -0
  102. package/templates/web-frontend/.cursorrules/testing.md +311 -0
  103. package/templates/web-frontend/CLAUDE.md +399 -0
@@ -0,0 +1,483 @@
1
+ # Data Engineering for ML
2
+
3
+ Guidelines for data pipelines, validation, feature engineering, and ensuring data quality throughout the ML lifecycle.
4
+
5
+ ## Data Validation
6
+
7
+ ### Schema Validation
8
+
9
+ Define explicit schemas for all data:
10
+
11
+ ```python
12
+ import pandera as pa
13
+ from pandera.typing import Series, DataFrame
14
+
15
+ class TrainingDataSchema(pa.DataFrameModel):
16
+ """Schema for training data validation."""
17
+
18
+ user_id: Series[str] = pa.Field(nullable=False)
19
+ timestamp: Series[pa.DateTime] = pa.Field(nullable=False)
20
+ feature_numeric: Series[float] = pa.Field(ge=0, le=1)
21
+ feature_categorical: Series[str] = pa.Field(isin=["A", "B", "C"])
22
+ label: Series[int] = pa.Field(isin=[0, 1])
23
+
24
+ class Config:
25
+ strict = True # Reject extra columns
26
+ coerce = True # Auto-convert types
27
+
28
+ @pa.check_types
29
+ def load_training_data(path: str) -> DataFrame[TrainingDataSchema]:
30
+ """Load and validate training data."""
31
+ df = pd.read_parquet(path)
32
+ return df # Automatically validated
33
+ ```
34
+
35
+ ### Data Quality Checks
36
+
37
+ Use Great Expectations for comprehensive quality checks:
38
+
39
+ ```python
40
+ from great_expectations.core import ExpectationSuite
41
+
42
+ def create_quality_suite() -> ExpectationSuite:
43
+ suite = ExpectationSuite("training_data_quality")
44
+
45
+ # Completeness
46
+ suite.add_expectation(
47
+ expectation_type="expect_column_values_to_not_be_null",
48
+ kwargs={"column": "user_id"}
49
+ )
50
+
51
+ # Uniqueness
52
+ suite.add_expectation(
53
+ expectation_type="expect_column_values_to_be_unique",
54
+ kwargs={"column": "transaction_id"}
55
+ )
56
+
57
+ # Freshness
58
+ suite.add_expectation(
59
+ expectation_type="expect_column_max_to_be_between",
60
+ kwargs={
61
+ "column": "timestamp",
62
+ "min_value": "{{ yesterday }}",
63
+ "parse_strings_as_datetimes": True
64
+ }
65
+ )
66
+
67
+ # Distribution stability
68
+ suite.add_expectation(
69
+ expectation_type="expect_column_mean_to_be_between",
70
+ kwargs={"column": "amount", "min_value": 90, "max_value": 110}
71
+ )
72
+
73
+ # Referential integrity
74
+ suite.add_expectation(
75
+ expectation_type="expect_column_values_to_be_in_set",
76
+ kwargs={"column": "country_code", "value_set": VALID_COUNTRIES}
77
+ )
78
+
79
+ return suite
80
+ ```
81
+
82
+ ### Validation Pipeline Integration
83
+
84
+ ```python
85
+ from prefect import flow, task
86
+
87
+ @task
88
+ def validate_data(df: pd.DataFrame, suite_name: str) -> ValidationResult:
89
+ """Validate data against expectations."""
90
+ context = ge.get_context()
91
+ suite = context.get_expectation_suite(suite_name)
92
+
93
+ result = context.run_validation_operator(
94
+ "action_list_operator",
95
+ assets_to_validate=[df],
96
+ expectation_suite=suite,
97
+ )
98
+
99
+ if not result.success:
100
+ failed_expectations = [
101
+ exp for exp in result.results if not exp.success
102
+ ]
103
+ raise DataQualityError(f"Validation failed: {failed_expectations}")
104
+
105
+ return result
106
+
107
+ @flow
108
+ def data_ingestion_pipeline(source: str):
109
+ raw_data = extract_data(source)
110
+ validate_data(raw_data, "raw_data_quality")
111
+
112
+ processed_data = transform_data(raw_data)
113
+ validate_data(processed_data, "processed_data_quality")
114
+
115
+ load_data(processed_data)
116
+ ```
117
+
118
+ ## Feature Engineering
119
+
120
+ ### Feature Store Integration
121
+
122
+ ```python
123
+ from feast import FeatureStore, Entity, FeatureView, Field
124
+ from feast.types import Float32, Int64, String
125
+
126
+ # Define entities
127
+ user = Entity(name="user", join_keys=["user_id"], description="User entity")
128
+
129
+ # Define feature view
130
+ user_behavior_features = FeatureView(
131
+ name="user_behavior_features",
132
+ entities=[user],
133
+ schema=[
134
+ Field(name="session_count_7d", dtype=Int64),
135
+ Field(name="avg_session_duration_7d", dtype=Float32),
136
+ Field(name="purchase_count_30d", dtype=Int64),
137
+ Field(name="total_spend_30d", dtype=Float32),
138
+ ],
139
+ online=True, # Enable online serving
140
+ source=user_behavior_source,
141
+ ttl=timedelta(days=1),
142
+ tags={"team": "ml", "feature_group": "user_behavior"},
143
+ )
144
+
145
+ # Retrieve features for training
146
+ def get_training_features(entity_df: pd.DataFrame) -> pd.DataFrame:
147
+ """Get historical features for training."""
148
+ store = FeatureStore(repo_path="feature_repo/")
149
+
150
+ training_df = store.get_historical_features(
151
+ entity_df=entity_df,
152
+ features=[
153
+ "user_behavior_features:session_count_7d",
154
+ "user_behavior_features:avg_session_duration_7d",
155
+ "user_behavior_features:purchase_count_30d",
156
+ "user_behavior_features:total_spend_30d",
157
+ ],
158
+ ).to_df()
159
+
160
+ return training_df
161
+
162
+ # Retrieve features for online inference
163
+ def get_online_features(user_ids: list[str]) -> dict:
164
+ """Get features for real-time inference."""
165
+ store = FeatureStore(repo_path="feature_repo/")
166
+
167
+ entity_rows = [{"user_id": uid} for uid in user_ids]
168
+
169
+ features = store.get_online_features(
170
+ features=[
171
+ "user_behavior_features:session_count_7d",
172
+ "user_behavior_features:avg_session_duration_7d",
173
+ ],
174
+ entity_rows=entity_rows,
175
+ ).to_dict()
176
+
177
+ return features
178
+ ```
179
+
180
+ ### Feature Transformations
181
+
182
+ Ensure identical transforms for training and serving:
183
+
184
+ ```python
185
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
186
+ from sklearn.compose import ColumnTransformer
187
+ import joblib
188
+
189
+ class FeatureTransformer:
190
+ """Serializable feature transformer for training/serving parity."""
191
+
192
+ def __init__(self, config: FeatureConfig):
193
+ self.config = config
194
+ self.numeric_transformer = StandardScaler()
195
+ self.categorical_encoders: dict[str, LabelEncoder] = {}
196
+ self._fitted = False
197
+
198
+ def fit(self, df: pd.DataFrame) -> "FeatureTransformer":
199
+ """Fit transformer on training data."""
200
+ # Fit numeric scaler
201
+ if self.config.numeric_cols:
202
+ self.numeric_transformer.fit(df[self.config.numeric_cols])
203
+
204
+ # Fit categorical encoders
205
+ for col in self.config.categorical_cols:
206
+ encoder = LabelEncoder()
207
+ encoder.fit(df[col].fillna("__MISSING__"))
208
+ self.categorical_encoders[col] = encoder
209
+
210
+ self._fitted = True
211
+ return self
212
+
213
+ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
214
+ """Transform features using fitted parameters."""
215
+ if not self._fitted:
216
+ raise ValueError("Transformer not fitted")
217
+
218
+ result = df.copy()
219
+
220
+ # Transform numeric
221
+ if self.config.numeric_cols:
222
+ result[self.config.numeric_cols] = self.numeric_transformer.transform(
223
+ result[self.config.numeric_cols]
224
+ )
225
+
226
+ # Transform categorical
227
+ for col, encoder in self.categorical_encoders.items():
228
+ # Handle unseen categories
229
+ result[col] = result[col].fillna("__MISSING__")
230
+ result[col] = result[col].apply(
231
+ lambda x: x if x in encoder.classes_ else "__UNKNOWN__"
232
+ )
233
+ result[col] = encoder.transform(result[col])
234
+
235
+ return result
236
+
237
+ def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
238
+ """Fit and transform in one step."""
239
+ return self.fit(df).transform(df)
240
+
241
+ def save(self, path: str) -> None:
242
+ """Serialize transformer."""
243
+ joblib.dump({
244
+ "config": self.config,
245
+ "numeric_transformer": self.numeric_transformer,
246
+ "categorical_encoders": self.categorical_encoders,
247
+ }, path)
248
+
249
+ @classmethod
250
+ def load(cls, path: str) -> "FeatureTransformer":
251
+ """Load serialized transformer."""
252
+ data = joblib.load(path)
253
+ transformer = cls(data["config"])
254
+ transformer.numeric_transformer = data["numeric_transformer"]
255
+ transformer.categorical_encoders = data["categorical_encoders"]
256
+ transformer._fitted = True
257
+ return transformer
258
+ ```
259
+
260
+ ## Data Versioning
261
+
262
+ ### DVC for Data Version Control
263
+
264
+ ```yaml
265
+ # dvc.yaml
266
+ stages:
267
+ prepare_data:
268
+ cmd: python src/data/prepare.py
269
+ deps:
270
+ - src/data/prepare.py
271
+ - data/raw/
272
+ outs:
273
+ - data/processed/
274
+ params:
275
+ - prepare.split_ratio
276
+ - prepare.random_seed
277
+
278
+ extract_features:
279
+ cmd: python src/features/extract.py
280
+ deps:
281
+ - src/features/extract.py
282
+ - data/processed/
283
+ outs:
284
+ - data/features/
285
+ params:
286
+ - features.window_size
287
+ - features.aggregations
288
+ ```
289
+
290
+ ```bash
291
+ # Track data versions
292
+ dvc add data/raw/dataset_v1.parquet
293
+ git add data/raw/dataset_v1.parquet.dvc
294
+ git commit -m "data: add dataset v1"
295
+
296
+ # Switch between versions
297
+ git checkout v1.0
298
+ dvc checkout
299
+ ```
300
+
301
+ ### Data Lineage Tracking
302
+
303
+ ```python
304
+ from dataclasses import dataclass
305
+ from datetime import datetime
306
+ import hashlib
307
+
308
+ @dataclass
309
+ class DataLineage:
310
+ """Track data provenance."""
311
+ source: str
312
+ version: str
313
+ created_at: datetime
314
+ row_count: int
315
+ column_count: int
316
+ checksum: str
317
+ schema_version: str
318
+ transformations: list[str]
319
+ parent_lineage: Optional["DataLineage"] = None
320
+
321
+ @classmethod
322
+ def from_dataframe(
323
+ cls,
324
+ df: pd.DataFrame,
325
+ source: str,
326
+ version: str,
327
+ transformations: list[str] = None,
328
+ parent: "DataLineage" = None,
329
+ ) -> "DataLineage":
330
+ """Create lineage record from DataFrame."""
331
+ checksum = hashlib.sha256(
332
+ pd.util.hash_pandas_object(df).values
333
+ ).hexdigest()
334
+
335
+ return cls(
336
+ source=source,
337
+ version=version,
338
+ created_at=datetime.utcnow(),
339
+ row_count=len(df),
340
+ column_count=len(df.columns),
341
+ checksum=checksum,
342
+ schema_version=get_schema_version(df),
343
+ transformations=transformations or [],
344
+ parent_lineage=parent,
345
+ )
346
+
347
+ def to_dict(self) -> dict:
348
+ """Serialize for logging."""
349
+ return {
350
+ "source": self.source,
351
+ "version": self.version,
352
+ "created_at": self.created_at.isoformat(),
353
+ "row_count": self.row_count,
354
+ "column_count": self.column_count,
355
+ "checksum": self.checksum,
356
+ "schema_version": self.schema_version,
357
+ "transformations": self.transformations,
358
+ "parent_checksum": self.parent_lineage.checksum if self.parent_lineage else None,
359
+ }
360
+ ```
361
+
362
+ ## Training/Serving Skew Prevention
363
+
364
+ ### Common Causes
365
+
366
+ | Skew Type | Cause | Prevention |
367
+ |-----------|-------|------------|
368
+ | Data Processing | Different preprocessing code | Serialize transformers |
369
+ | Feature Computation | Time-dependent features computed differently | Use feature store timestamps |
370
+ | Data Distribution | Training on old data, serving on new | Monitor drift continuously |
371
+ | Missing Values | Different imputation strategies | Document and version imputation |
372
+
373
+ ### Prevention Strategies
374
+
375
+ ```python
376
+ # 1. Single source of truth for transforms
377
+ class FeaturePipeline:
378
+ """Unified pipeline for training and serving."""
379
+
380
+ def __init__(self, config_path: str):
381
+ self.config = load_config(config_path)
382
+ self.transformer = None
383
+
384
+ def fit(self, df: pd.DataFrame) -> None:
385
+ """Fit on training data."""
386
+ self.transformer = FeatureTransformer(self.config)
387
+ self.transformer.fit(df)
388
+
389
+ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
390
+ """Transform for both training and serving."""
391
+ return self.transformer.transform(df)
392
+
393
+ def save(self, path: str) -> None:
394
+ """Save entire pipeline."""
395
+ joblib.dump({
396
+ "config": self.config,
397
+ "transformer": self.transformer,
398
+ }, path)
399
+
400
+ # 2. Test for skew
401
+ def test_training_serving_parity():
402
+ """Verify training and serving produce identical features."""
403
+ training_pipeline = FeaturePipeline.load("training_pipeline.pkl")
404
+ serving_pipeline = FeaturePipeline.load("serving_pipeline.pkl")
405
+
406
+ test_data = load_test_data()
407
+
408
+ training_features = training_pipeline.transform(test_data)
409
+ serving_features = serving_pipeline.transform(test_data)
410
+
411
+ pd.testing.assert_frame_equal(training_features, serving_features)
412
+ ```
413
+
414
+ ## Data Pipeline Patterns
415
+
416
+ ### Idempotent Pipelines
417
+
418
+ ```python
419
+ @task
420
+ def process_partition(date: str, force: bool = False) -> str:
421
+ """Process a single date partition idempotently."""
422
+ output_path = f"s3://processed/{date}/data.parquet"
423
+
424
+ # Check if already processed
425
+ if not force and file_exists(output_path):
426
+ checksum = get_checksum(output_path)
427
+ logger.info(f"Partition {date} already exists: {checksum}")
428
+ return output_path
429
+
430
+ # Process
431
+ raw_data = load_raw_data(date)
432
+ processed = transform(raw_data)
433
+
434
+ # Write atomically (write to temp, then rename)
435
+ temp_path = f"{output_path}.tmp"
436
+ processed.to_parquet(temp_path)
437
+ rename(temp_path, output_path)
438
+
439
+ return output_path
440
+ ```
441
+
442
+ ### Backfill Strategy
443
+
444
+ ```python
445
+ @flow
446
+ def backfill_features(start_date: str, end_date: str, parallelism: int = 4):
447
+ """Backfill features for a date range."""
448
+ dates = pd.date_range(start_date, end_date, freq="D")
449
+
450
+ # Process in parallel batches
451
+ for batch in chunked(dates, parallelism):
452
+ futures = [
453
+ process_partition.submit(date.strftime("%Y-%m-%d"))
454
+ for date in batch
455
+ ]
456
+
457
+ # Wait for batch to complete
458
+ results = [f.result() for f in futures]
459
+
460
+ # Validate results
461
+ for date, result in zip(batch, results):
462
+ validate_partition(result)
463
+ ```
464
+
465
+ ## Best Practices
466
+
467
+ ### Do
468
+
469
+ - Define explicit schemas for all data
470
+ - Validate data at pipeline boundaries
471
+ - Version datasets alongside code
472
+ - Serialize feature transformers
473
+ - Test for training/serving skew
474
+ - Monitor data distributions continuously
475
+
476
+ ### Don't
477
+
478
+ - Trust external data without validation
479
+ - Hardcode feature engineering parameters
480
+ - Use different preprocessing in training vs serving
481
+ - Ignore data freshness requirements
482
+ - Skip data quality checks in production
483
+ - Assume data distributions are stable