omgkit 2.13.0 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/README.md +129 -10
  2. package/package.json +2 -2
  3. package/plugin/agents/api-designer.md +5 -0
  4. package/plugin/agents/architect.md +8 -0
  5. package/plugin/agents/brainstormer.md +4 -0
  6. package/plugin/agents/cicd-manager.md +6 -0
  7. package/plugin/agents/code-reviewer.md +6 -0
  8. package/plugin/agents/copywriter.md +2 -0
  9. package/plugin/agents/data-engineer.md +255 -0
  10. package/plugin/agents/database-admin.md +10 -0
  11. package/plugin/agents/debugger.md +10 -0
  12. package/plugin/agents/devsecops.md +314 -0
  13. package/plugin/agents/docs-manager.md +4 -0
  14. package/plugin/agents/domain-decomposer.md +181 -0
  15. package/plugin/agents/embedded-systems.md +397 -0
  16. package/plugin/agents/fullstack-developer.md +12 -0
  17. package/plugin/agents/game-systems-designer.md +375 -0
  18. package/plugin/agents/git-manager.md +10 -0
  19. package/plugin/agents/journal-writer.md +2 -0
  20. package/plugin/agents/ml-engineer.md +284 -0
  21. package/plugin/agents/observability-engineer.md +353 -0
  22. package/plugin/agents/oracle.md +9 -0
  23. package/plugin/agents/performance-engineer.md +290 -0
  24. package/plugin/agents/pipeline-architect.md +6 -0
  25. package/plugin/agents/planner.md +12 -0
  26. package/plugin/agents/platform-engineer.md +325 -0
  27. package/plugin/agents/project-manager.md +3 -0
  28. package/plugin/agents/researcher.md +5 -0
  29. package/plugin/agents/scientific-computing.md +426 -0
  30. package/plugin/agents/scout.md +3 -0
  31. package/plugin/agents/security-auditor.md +7 -0
  32. package/plugin/agents/sprint-master.md +17 -0
  33. package/plugin/agents/tester.md +10 -0
  34. package/plugin/agents/ui-ux-designer.md +12 -0
  35. package/plugin/agents/vulnerability-scanner.md +6 -0
  36. package/plugin/commands/data/pipeline.md +47 -0
  37. package/plugin/commands/data/quality.md +49 -0
  38. package/plugin/commands/domain/analyze.md +34 -0
  39. package/plugin/commands/domain/map.md +41 -0
  40. package/plugin/commands/game/balance.md +56 -0
  41. package/plugin/commands/game/optimize.md +62 -0
  42. package/plugin/commands/iot/provision.md +58 -0
  43. package/plugin/commands/ml/evaluate.md +47 -0
  44. package/plugin/commands/ml/train.md +48 -0
  45. package/plugin/commands/perf/benchmark.md +54 -0
  46. package/plugin/commands/perf/profile.md +49 -0
  47. package/plugin/commands/platform/blueprint.md +56 -0
  48. package/plugin/commands/security/audit.md +54 -0
  49. package/plugin/commands/security/scan.md +55 -0
  50. package/plugin/commands/sre/dashboard.md +53 -0
  51. package/plugin/registry.yaml +787 -0
  52. package/plugin/skills/ai-ml/experiment-tracking/SKILL.md +338 -0
  53. package/plugin/skills/ai-ml/feature-stores/SKILL.md +340 -0
  54. package/plugin/skills/ai-ml/llm-ops/SKILL.md +454 -0
  55. package/plugin/skills/ai-ml/ml-pipelines/SKILL.md +390 -0
  56. package/plugin/skills/ai-ml/model-monitoring/SKILL.md +398 -0
  57. package/plugin/skills/ai-ml/model-serving/SKILL.md +386 -0
  58. package/plugin/skills/event-driven/cqrs-patterns/SKILL.md +348 -0
  59. package/plugin/skills/event-driven/event-sourcing/SKILL.md +334 -0
  60. package/plugin/skills/event-driven/kafka-deep/SKILL.md +252 -0
  61. package/plugin/skills/event-driven/saga-orchestration/SKILL.md +335 -0
  62. package/plugin/skills/event-driven/schema-registry/SKILL.md +328 -0
  63. package/plugin/skills/event-driven/stream-processing/SKILL.md +313 -0
  64. package/plugin/skills/game/game-audio/SKILL.md +446 -0
  65. package/plugin/skills/game/game-networking/SKILL.md +490 -0
  66. package/plugin/skills/game/godot-patterns/SKILL.md +413 -0
  67. package/plugin/skills/game/shader-programming/SKILL.md +492 -0
  68. package/plugin/skills/game/unity-patterns/SKILL.md +488 -0
  69. package/plugin/skills/iot/device-provisioning/SKILL.md +405 -0
  70. package/plugin/skills/iot/edge-computing/SKILL.md +369 -0
  71. package/plugin/skills/iot/industrial-protocols/SKILL.md +438 -0
  72. package/plugin/skills/iot/mqtt-deep/SKILL.md +418 -0
  73. package/plugin/skills/iot/ota-updates/SKILL.md +426 -0
  74. package/plugin/skills/microservices/api-gateway-patterns/SKILL.md +201 -0
  75. package/plugin/skills/microservices/circuit-breaker-patterns/SKILL.md +246 -0
  76. package/plugin/skills/microservices/contract-testing/SKILL.md +284 -0
  77. package/plugin/skills/microservices/distributed-tracing/SKILL.md +246 -0
  78. package/plugin/skills/microservices/service-discovery/SKILL.md +304 -0
  79. package/plugin/skills/microservices/service-mesh/SKILL.md +181 -0
  80. package/plugin/skills/mobile-advanced/mobile-ci-cd/SKILL.md +407 -0
  81. package/plugin/skills/mobile-advanced/mobile-security/SKILL.md +403 -0
  82. package/plugin/skills/mobile-advanced/offline-first/SKILL.md +473 -0
  83. package/plugin/skills/mobile-advanced/push-notifications/SKILL.md +494 -0
  84. package/plugin/skills/mobile-advanced/react-native-deep/SKILL.md +374 -0
  85. package/plugin/skills/simulation/numerical-methods/SKILL.md +434 -0
  86. package/plugin/skills/simulation/parallel-computing/SKILL.md +382 -0
  87. package/plugin/skills/simulation/physics-engines/SKILL.md +377 -0
  88. package/plugin/skills/simulation/validation-verification/SKILL.md +479 -0
  89. package/plugin/skills/simulation/visualization-scientific/SKILL.md +365 -0
  90. package/plugin/stdrules/ALIGNMENT_PRINCIPLE.md +240 -0
  91. package/plugin/workflows/ai-engineering/agent-development.md +3 -3
  92. package/plugin/workflows/ai-engineering/fine-tuning.md +3 -3
  93. package/plugin/workflows/ai-engineering/model-evaluation.md +3 -3
  94. package/plugin/workflows/ai-engineering/prompt-engineering.md +2 -2
  95. package/plugin/workflows/ai-engineering/rag-development.md +4 -4
  96. package/plugin/workflows/ai-ml/data-pipeline.md +188 -0
  97. package/plugin/workflows/ai-ml/experiment-cycle.md +203 -0
  98. package/plugin/workflows/ai-ml/feature-engineering.md +208 -0
  99. package/plugin/workflows/ai-ml/model-deployment.md +199 -0
  100. package/plugin/workflows/ai-ml/monitoring-setup.md +227 -0
  101. package/plugin/workflows/api/api-design.md +1 -1
  102. package/plugin/workflows/api/api-testing.md +2 -2
  103. package/plugin/workflows/content/technical-docs.md +1 -1
  104. package/plugin/workflows/database/migration.md +1 -1
  105. package/plugin/workflows/database/optimization.md +1 -1
  106. package/plugin/workflows/database/schema-design.md +3 -3
  107. package/plugin/workflows/development/bug-fix.md +3 -3
  108. package/plugin/workflows/development/code-review.md +2 -1
  109. package/plugin/workflows/development/feature.md +3 -3
  110. package/plugin/workflows/development/refactor.md +2 -2
  111. package/plugin/workflows/event-driven/consumer-groups.md +190 -0
  112. package/plugin/workflows/event-driven/event-storming.md +172 -0
  113. package/plugin/workflows/event-driven/replay-testing.md +186 -0
  114. package/plugin/workflows/event-driven/saga-implementation.md +206 -0
  115. package/plugin/workflows/event-driven/schema-evolution.md +173 -0
  116. package/plugin/workflows/fullstack/authentication.md +4 -4
  117. package/plugin/workflows/fullstack/full-feature.md +4 -4
  118. package/plugin/workflows/game-dev/content-pipeline.md +218 -0
  119. package/plugin/workflows/game-dev/platform-submission.md +263 -0
  120. package/plugin/workflows/game-dev/playtesting.md +237 -0
  121. package/plugin/workflows/game-dev/prototype-to-production.md +205 -0
  122. package/plugin/workflows/microservices/contract-first.md +151 -0
  123. package/plugin/workflows/microservices/distributed-tracing.md +166 -0
  124. package/plugin/workflows/microservices/domain-decomposition.md +123 -0
  125. package/plugin/workflows/microservices/integration-testing.md +149 -0
  126. package/plugin/workflows/microservices/service-mesh-setup.md +153 -0
  127. package/plugin/workflows/microservices/service-scaffolding.md +151 -0
  128. package/plugin/workflows/omega/1000x-innovation.md +2 -2
  129. package/plugin/workflows/omega/100x-architecture.md +2 -2
  130. package/plugin/workflows/omega/10x-improvement.md +2 -2
  131. package/plugin/workflows/quality/performance-optimization.md +2 -2
  132. package/plugin/workflows/research/best-practices.md +1 -1
  133. package/plugin/workflows/research/technology-research.md +1 -1
  134. package/plugin/workflows/security/penetration-testing.md +3 -3
  135. package/plugin/workflows/security/security-audit.md +3 -3
  136. package/plugin/workflows/sprint/sprint-execution.md +2 -2
  137. package/plugin/workflows/sprint/sprint-retrospective.md +1 -1
  138. package/plugin/workflows/sprint/sprint-setup.md +1 -1
@@ -0,0 +1,390 @@
1
+ # ML Pipelines
2
+
3
+ Kubeflow, Airflow ML, MLflow Pipelines, end-to-end workflow orchestration, and CI/CD for ML.
4
+
5
+ ## Overview
6
+
7
+ ML pipelines orchestrate the end-to-end machine learning workflow from data ingestion to model deployment, ensuring reproducibility and automation.
8
+
9
+ ## Core Concepts
10
+
11
+ ### Pipeline Components
12
+ - **Data Ingestion**: Load and validate data
13
+ - **Data Processing**: Transform and feature engineering
14
+ - **Training**: Model training and hyperparameter tuning
15
+ - **Evaluation**: Model validation and metrics
16
+ - **Deployment**: Model serving and monitoring
17
+
18
+ ### Pipeline Properties
19
+ - **Reproducibility**: Same inputs → same outputs
20
+ - **Versioning**: Track data, code, models
21
+ - **Orchestration**: Dependency management
22
+ - **Scalability**: Distributed execution
23
+
24
+ ## Kubeflow Pipelines
25
+
26
+ ### Pipeline Definition
27
+ ```python
28
+ from kfp import dsl
29
+ from kfp.dsl import Input, Output, Dataset, Model, Metrics
30
+
31
+ @dsl.component(base_image="python:3.10")
32
+ def load_data(
33
+ data_path: str,
34
+ output_data: Output[Dataset]
35
+ ):
36
+ import pandas as pd
37
+
38
+ df = pd.read_parquet(data_path)
39
+ df.to_parquet(output_data.path)
40
+
41
+ @dsl.component(base_image="python:3.10-slim")
42
+ def preprocess_data(
43
+ input_data: Input[Dataset],
44
+ output_data: Output[Dataset],
45
+ test_size: float = 0.2
46
+ ):
47
+ import pandas as pd
48
+ from sklearn.model_selection import train_test_split
49
+ from sklearn.preprocessing import StandardScaler
50
+
51
+ df = pd.read_parquet(input_data.path)
52
+
53
+ X = df.drop("target", axis=1)
54
+ y = df["target"]
55
+
56
+ X_train, X_test, y_train, y_test = train_test_split(
57
+ X, y, test_size=test_size, random_state=42
58
+ )
59
+
60
+ scaler = StandardScaler()
61
+ X_train_scaled = scaler.fit_transform(X_train)
62
+ X_test_scaled = scaler.transform(X_test)
63
+
64
+ # Save processed data
65
+ result = {
66
+ "X_train": X_train_scaled.tolist(),
67
+ "X_test": X_test_scaled.tolist(),
68
+ "y_train": y_train.tolist(),
69
+ "y_test": y_test.tolist()
70
+ }
71
+ pd.DataFrame(result).to_parquet(output_data.path)
72
+
73
+ @dsl.component(
74
+ base_image="python:3.10",
75
+ packages_to_install=["scikit-learn", "xgboost"]
76
+ )
77
+ def train_model(
78
+ input_data: Input[Dataset],
79
+ model_output: Output[Model],
80
+ metrics_output: Output[Metrics],
81
+ n_estimators: int = 100,
82
+ max_depth: int = 6
83
+ ):
84
+ import pandas as pd
85
+ import xgboost as xgb
86
+ from sklearn.metrics import accuracy_score, f1_score
87
+ import joblib
88
+
89
+ data = pd.read_parquet(input_data.path)
90
+
91
+ model = xgb.XGBClassifier(
92
+ n_estimators=n_estimators,
93
+ max_depth=max_depth,
94
+ random_state=42
95
+ )
96
+
97
+ model.fit(data["X_train"], data["y_train"])
98
+
99
+ predictions = model.predict(data["X_test"])
100
+ accuracy = accuracy_score(data["y_test"], predictions)
101
+ f1 = f1_score(data["y_test"], predictions, average="weighted")
102
+
103
+ # Log metrics
104
+ metrics_output.log_metric("accuracy", accuracy)
105
+ metrics_output.log_metric("f1_score", f1)
106
+
107
+ # Save model
108
+ joblib.dump(model, model_output.path)
109
+
110
+ @dsl.pipeline(
111
+ name="ML Training Pipeline",
112
+ description="End-to-end ML training pipeline"
113
+ )
114
+ def ml_pipeline(
115
+ data_path: str,
116
+ n_estimators: int = 100,
117
+ max_depth: int = 6
118
+ ):
119
+ load_task = load_data(data_path=data_path)
120
+
121
+ preprocess_task = preprocess_data(
122
+ input_data=load_task.outputs["output_data"]
123
+ )
124
+
125
+ train_task = train_model(
126
+ input_data=preprocess_task.outputs["output_data"],
127
+ n_estimators=n_estimators,
128
+ max_depth=max_depth
129
+ )
130
+ ```
131
+
132
+ ### Pipeline Compilation and Execution
133
+ ```python
134
+ from kfp import compiler
135
+ from kfp.client import Client
136
+
137
+ # Compile pipeline
138
+ compiler.Compiler().compile(
139
+ pipeline_func=ml_pipeline,
140
+ package_path="ml_pipeline.yaml"
141
+ )
142
+
143
+ # Submit to Kubeflow
144
+ client = Client(host="https://kubeflow.example.com")
145
+
146
+ run = client.create_run_from_pipeline_func(
147
+ ml_pipeline,
148
+ arguments={
149
+ "data_path": "s3://bucket/data.parquet",
150
+ "n_estimators": 200,
151
+ "max_depth": 8
152
+ },
153
+ experiment_name="ml-experiments"
154
+ )
155
+ ```
156
+
157
+ ## Apache Airflow ML
158
+
159
+ ### DAG Definition
160
+ ```python
161
+ from airflow import DAG
162
+ from airflow.operators.python import PythonOperator
163
+ from airflow.providers.amazon.aws.operators.s3 import S3Hook
164
+ from datetime import datetime, timedelta
165
+
166
+ default_args = {
167
+ "owner": "ml-team",
168
+ "depends_on_past": False,
169
+ "email_on_failure": True,
170
+ "email": ["ml-team@example.com"],
171
+ "retries": 3,
172
+ "retry_delay": timedelta(minutes=5)
173
+ }
174
+
175
+ def extract_data(**context):
176
+ # Extract data from source
177
+ s3 = S3Hook(aws_conn_id="aws_default")
178
+ data = s3.read_key("raw-data/latest.parquet", bucket_name="data-bucket")
179
+ context["ti"].xcom_push(key="raw_data_path", value=data)
180
+
181
+ def transform_data(**context):
182
+ import pandas as pd
183
+ from sklearn.preprocessing import StandardScaler
184
+
185
+ raw_path = context["ti"].xcom_pull(key="raw_data_path")
186
+ df = pd.read_parquet(raw_path)
187
+
188
+ # Feature engineering
189
+ scaler = StandardScaler()
190
+ df_scaled = scaler.fit_transform(df)
191
+
192
+ output_path = f"s3://data-bucket/processed/{context['ds']}/data.parquet"
193
+ pd.DataFrame(df_scaled).to_parquet(output_path)
194
+
195
+ context["ti"].xcom_push(key="processed_data_path", value=output_path)
196
+
197
+ def train_model(**context):
198
+ import mlflow
199
+ from sklearn.ensemble import RandomForestClassifier
200
+
201
+ data_path = context["ti"].xcom_pull(key="processed_data_path")
202
+
203
+ with mlflow.start_run():
204
+ model = RandomForestClassifier(n_estimators=100)
205
+ model.fit(X_train, y_train)
206
+
207
+ mlflow.sklearn.log_model(model, "model")
208
+ mlflow.log_metric("accuracy", accuracy)
209
+
210
+ def evaluate_model(**context):
211
+ # Evaluate and decide on deployment
212
+ metrics = get_model_metrics()
213
+ if metrics["accuracy"] > 0.85:
214
+ context["ti"].xcom_push(key="deploy", value=True)
215
+ else:
216
+ context["ti"].xcom_push(key="deploy", value=False)
217
+
218
+ def deploy_model(**context):
219
+ should_deploy = context["ti"].xcom_pull(key="deploy")
220
+ if should_deploy:
221
+ # Deploy to serving infrastructure
222
+ deploy_to_kubernetes()
223
+
224
+ with DAG(
225
+ "ml_training_pipeline",
226
+ default_args=default_args,
227
+ description="ML Training Pipeline",
228
+ schedule_interval="0 2 * * *", # Daily at 2 AM
229
+ start_date=datetime(2024, 1, 1),
230
+ catchup=False,
231
+ tags=["ml", "training"]
232
+ ) as dag:
233
+
234
+ extract = PythonOperator(
235
+ task_id="extract_data",
236
+ python_callable=extract_data
237
+ )
238
+
239
+ transform = PythonOperator(
240
+ task_id="transform_data",
241
+ python_callable=transform_data
242
+ )
243
+
244
+ train = PythonOperator(
245
+ task_id="train_model",
246
+ python_callable=train_model
247
+ )
248
+
249
+ evaluate = PythonOperator(
250
+ task_id="evaluate_model",
251
+ python_callable=evaluate_model
252
+ )
253
+
254
+ deploy = PythonOperator(
255
+ task_id="deploy_model",
256
+ python_callable=deploy_model
257
+ )
258
+
259
+ extract >> transform >> train >> evaluate >> deploy
260
+ ```
261
+
262
+ ## CI/CD for ML
263
+
264
+ ### GitHub Actions Pipeline
265
+ ```yaml
266
+ name: ML Pipeline CI/CD
267
+
268
+ on:
269
+ push:
270
+ paths:
271
+ - 'models/**'
272
+ - 'data/**'
273
+ - 'pipelines/**'
274
+
275
+ env:
276
+ MLFLOW_TRACKING_URI: ${{ secrets.MLFLOW_URI }}
277
+ AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
278
+ AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
279
+
280
+ jobs:
281
+ test:
282
+ runs-on: ubuntu-latest
283
+ steps:
284
+ - uses: actions/checkout@v3
285
+
286
+ - name: Set up Python
287
+ uses: actions/setup-python@v4
288
+ with:
289
+ python-version: '3.10'
290
+
291
+ - name: Install dependencies
292
+ run: |
293
+ pip install -r requirements.txt
294
+ pip install pytest pytest-cov
295
+
296
+ - name: Run tests
297
+ run: pytest tests/ --cov=models --cov-report=xml
298
+
299
+ - name: Data validation
300
+ run: python scripts/validate_data.py
301
+
302
+ train:
303
+ needs: test
304
+ runs-on: ubuntu-latest
305
+ steps:
306
+ - uses: actions/checkout@v3
307
+
308
+ - name: Train model
309
+ run: python pipelines/train.py --experiment-name "ci-${GITHUB_SHA}"
310
+
311
+ - name: Evaluate model
312
+ id: evaluate
313
+ run: |
314
+ METRICS=$(python pipelines/evaluate.py)
315
+ echo "accuracy=$(echo $METRICS | jq .accuracy)" >> $GITHUB_OUTPUT
316
+
317
+ - name: Check quality gate
318
+ run: |
319
+ if (( $(echo "${{ steps.evaluate.outputs.accuracy }} < 0.85" | bc -l) )); then
320
+ echo "Model accuracy below threshold"
321
+ exit 1
322
+ fi
323
+
324
+ deploy:
325
+ needs: train
326
+ if: github.ref == 'refs/heads/main'
327
+ runs-on: ubuntu-latest
328
+ steps:
329
+ - name: Deploy to staging
330
+ run: |
331
+ python pipelines/deploy.py --environment staging
332
+
333
+ - name: Run integration tests
334
+ run: pytest tests/integration/
335
+
336
+ - name: Deploy to production
337
+ run: |
338
+ python pipelines/deploy.py --environment production
339
+ ```
340
+
341
+ ## Best Practices
342
+
343
+ 1. **Idempotent Steps**: Re-runnable without side effects
344
+ 2. **Data Versioning**: DVC or similar tools
345
+ 3. **Artifact Tracking**: Store all intermediate outputs
346
+ 4. **Parameterization**: Make pipelines configurable
347
+ 5. **Testing**: Unit and integration tests
348
+
349
+ ## Pipeline Patterns
350
+
351
+ ### Feature/Training/Inference Split
352
+ ```
353
+ Feature Pipeline (scheduled):
354
+ Raw Data → Feature Engineering → Feature Store
355
+
356
+ Training Pipeline (on-demand/scheduled):
357
+ Feature Store → Training → Model Registry
358
+
359
+ Inference Pipeline (real-time):
360
+ Feature Store → Model → Predictions
361
+ ```
362
+
363
+ ### Continuous Training
364
+ ```
365
+ Data Change → Trigger Pipeline → Train → Evaluate → Deploy
366
+ Model Drift → Trigger Retrain → Update Model
367
+ ```
368
+
369
+ ## Anti-Patterns
370
+
371
+ - Monolithic pipelines
372
+ - Hardcoded paths/configs
373
+ - Missing data validation
374
+ - No artifact versioning
375
+ - Skipping testing steps
376
+
377
+ ## When to Use
378
+
379
+ - Production ML systems
380
+ - Need reproducibility
381
+ - Team collaboration
382
+ - Automated retraining
383
+ - Regulatory compliance
384
+
385
+ ## When NOT to Use
386
+
387
+ - Exploratory analysis
388
+ - One-off experiments
389
+ - Simple batch jobs
390
+ - No automation needed