omgkit 2.13.0 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -10
- package/package.json +2 -2
- package/plugin/agents/api-designer.md +5 -0
- package/plugin/agents/architect.md +8 -0
- package/plugin/agents/brainstormer.md +4 -0
- package/plugin/agents/cicd-manager.md +6 -0
- package/plugin/agents/code-reviewer.md +6 -0
- package/plugin/agents/copywriter.md +2 -0
- package/plugin/agents/data-engineer.md +255 -0
- package/plugin/agents/database-admin.md +10 -0
- package/plugin/agents/debugger.md +10 -0
- package/plugin/agents/devsecops.md +314 -0
- package/plugin/agents/docs-manager.md +4 -0
- package/plugin/agents/domain-decomposer.md +181 -0
- package/plugin/agents/embedded-systems.md +397 -0
- package/plugin/agents/fullstack-developer.md +12 -0
- package/plugin/agents/game-systems-designer.md +375 -0
- package/plugin/agents/git-manager.md +10 -0
- package/plugin/agents/journal-writer.md +2 -0
- package/plugin/agents/ml-engineer.md +284 -0
- package/plugin/agents/observability-engineer.md +353 -0
- package/plugin/agents/oracle.md +9 -0
- package/plugin/agents/performance-engineer.md +290 -0
- package/plugin/agents/pipeline-architect.md +6 -0
- package/plugin/agents/planner.md +12 -0
- package/plugin/agents/platform-engineer.md +325 -0
- package/plugin/agents/project-manager.md +3 -0
- package/plugin/agents/researcher.md +5 -0
- package/plugin/agents/scientific-computing.md +426 -0
- package/plugin/agents/scout.md +3 -0
- package/plugin/agents/security-auditor.md +7 -0
- package/plugin/agents/sprint-master.md +17 -0
- package/plugin/agents/tester.md +10 -0
- package/plugin/agents/ui-ux-designer.md +12 -0
- package/plugin/agents/vulnerability-scanner.md +6 -0
- package/plugin/commands/data/pipeline.md +47 -0
- package/plugin/commands/data/quality.md +49 -0
- package/plugin/commands/domain/analyze.md +34 -0
- package/plugin/commands/domain/map.md +41 -0
- package/plugin/commands/game/balance.md +56 -0
- package/plugin/commands/game/optimize.md +62 -0
- package/plugin/commands/iot/provision.md +58 -0
- package/plugin/commands/ml/evaluate.md +47 -0
- package/plugin/commands/ml/train.md +48 -0
- package/plugin/commands/perf/benchmark.md +54 -0
- package/plugin/commands/perf/profile.md +49 -0
- package/plugin/commands/platform/blueprint.md +56 -0
- package/plugin/commands/security/audit.md +54 -0
- package/plugin/commands/security/scan.md +55 -0
- package/plugin/commands/sre/dashboard.md +53 -0
- package/plugin/registry.yaml +711 -0
- package/plugin/skills/ai-ml/experiment-tracking/SKILL.md +338 -0
- package/plugin/skills/ai-ml/feature-stores/SKILL.md +340 -0
- package/plugin/skills/ai-ml/llm-ops/SKILL.md +454 -0
- package/plugin/skills/ai-ml/ml-pipelines/SKILL.md +390 -0
- package/plugin/skills/ai-ml/model-monitoring/SKILL.md +398 -0
- package/plugin/skills/ai-ml/model-serving/SKILL.md +386 -0
- package/plugin/skills/event-driven/cqrs-patterns/SKILL.md +348 -0
- package/plugin/skills/event-driven/event-sourcing/SKILL.md +334 -0
- package/plugin/skills/event-driven/kafka-deep/SKILL.md +252 -0
- package/plugin/skills/event-driven/saga-orchestration/SKILL.md +335 -0
- package/plugin/skills/event-driven/schema-registry/SKILL.md +328 -0
- package/plugin/skills/event-driven/stream-processing/SKILL.md +313 -0
- package/plugin/skills/game/game-audio/SKILL.md +446 -0
- package/plugin/skills/game/game-networking/SKILL.md +490 -0
- package/plugin/skills/game/godot-patterns/SKILL.md +413 -0
- package/plugin/skills/game/shader-programming/SKILL.md +492 -0
- package/plugin/skills/game/unity-patterns/SKILL.md +488 -0
- package/plugin/skills/iot/device-provisioning/SKILL.md +405 -0
- package/plugin/skills/iot/edge-computing/SKILL.md +369 -0
- package/plugin/skills/iot/industrial-protocols/SKILL.md +438 -0
- package/plugin/skills/iot/mqtt-deep/SKILL.md +418 -0
- package/plugin/skills/iot/ota-updates/SKILL.md +426 -0
- package/plugin/skills/microservices/api-gateway-patterns/SKILL.md +201 -0
- package/plugin/skills/microservices/circuit-breaker-patterns/SKILL.md +246 -0
- package/plugin/skills/microservices/contract-testing/SKILL.md +284 -0
- package/plugin/skills/microservices/distributed-tracing/SKILL.md +246 -0
- package/plugin/skills/microservices/service-discovery/SKILL.md +304 -0
- package/plugin/skills/microservices/service-mesh/SKILL.md +181 -0
- package/plugin/skills/mobile-advanced/mobile-ci-cd/SKILL.md +407 -0
- package/plugin/skills/mobile-advanced/mobile-security/SKILL.md +403 -0
- package/plugin/skills/mobile-advanced/offline-first/SKILL.md +473 -0
- package/plugin/skills/mobile-advanced/push-notifications/SKILL.md +494 -0
- package/plugin/skills/mobile-advanced/react-native-deep/SKILL.md +374 -0
- package/plugin/skills/simulation/numerical-methods/SKILL.md +434 -0
- package/plugin/skills/simulation/parallel-computing/SKILL.md +382 -0
- package/plugin/skills/simulation/physics-engines/SKILL.md +377 -0
- package/plugin/skills/simulation/validation-verification/SKILL.md +479 -0
- package/plugin/skills/simulation/visualization-scientific/SKILL.md +365 -0
- package/plugin/workflows/ai-engineering/agent-development.md +3 -3
- package/plugin/workflows/ai-engineering/fine-tuning.md +3 -3
- package/plugin/workflows/ai-engineering/model-evaluation.md +3 -3
- package/plugin/workflows/ai-engineering/prompt-engineering.md +2 -2
- package/plugin/workflows/ai-engineering/rag-development.md +4 -4
- package/plugin/workflows/ai-ml/data-pipeline.md +188 -0
- package/plugin/workflows/ai-ml/experiment-cycle.md +203 -0
- package/plugin/workflows/ai-ml/feature-engineering.md +208 -0
- package/plugin/workflows/ai-ml/model-deployment.md +199 -0
- package/plugin/workflows/ai-ml/monitoring-setup.md +227 -0
- package/plugin/workflows/api/api-design.md +1 -1
- package/plugin/workflows/api/api-testing.md +2 -2
- package/plugin/workflows/content/technical-docs.md +1 -1
- package/plugin/workflows/database/migration.md +1 -1
- package/plugin/workflows/database/optimization.md +1 -1
- package/plugin/workflows/database/schema-design.md +3 -3
- package/plugin/workflows/development/bug-fix.md +3 -3
- package/plugin/workflows/development/code-review.md +2 -1
- package/plugin/workflows/development/feature.md +3 -3
- package/plugin/workflows/development/refactor.md +2 -2
- package/plugin/workflows/event-driven/consumer-groups.md +190 -0
- package/plugin/workflows/event-driven/event-storming.md +172 -0
- package/plugin/workflows/event-driven/replay-testing.md +186 -0
- package/plugin/workflows/event-driven/saga-implementation.md +206 -0
- package/plugin/workflows/event-driven/schema-evolution.md +173 -0
- package/plugin/workflows/fullstack/authentication.md +4 -4
- package/plugin/workflows/fullstack/full-feature.md +4 -4
- package/plugin/workflows/game-dev/content-pipeline.md +218 -0
- package/plugin/workflows/game-dev/platform-submission.md +263 -0
- package/plugin/workflows/game-dev/playtesting.md +237 -0
- package/plugin/workflows/game-dev/prototype-to-production.md +205 -0
- package/plugin/workflows/microservices/contract-first.md +151 -0
- package/plugin/workflows/microservices/distributed-tracing.md +166 -0
- package/plugin/workflows/microservices/domain-decomposition.md +123 -0
- package/plugin/workflows/microservices/integration-testing.md +149 -0
- package/plugin/workflows/microservices/service-mesh-setup.md +153 -0
- package/plugin/workflows/microservices/service-scaffolding.md +151 -0
- package/plugin/workflows/omega/1000x-innovation.md +2 -2
- package/plugin/workflows/omega/100x-architecture.md +2 -2
- package/plugin/workflows/omega/10x-improvement.md +2 -2
- package/plugin/workflows/quality/performance-optimization.md +2 -2
- package/plugin/workflows/research/best-practices.md +1 -1
- package/plugin/workflows/research/technology-research.md +1 -1
- package/plugin/workflows/security/penetration-testing.md +3 -3
- package/plugin/workflows/security/security-audit.md +3 -3
- package/plugin/workflows/sprint/sprint-execution.md +2 -2
- package/plugin/workflows/sprint/sprint-retrospective.md +1 -1
- package/plugin/workflows/sprint/sprint-setup.md +1 -1
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
# Model Serving
|
|
2
|
+
|
|
3
|
+
TensorFlow Serving, TorchServe, Triton Inference Server, model optimization, and production deployment patterns.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Model serving provides infrastructure for deploying ML models to production with low latency, high throughput, and operational reliability.
|
|
8
|
+
|
|
9
|
+
## Core Concepts
|
|
10
|
+
|
|
11
|
+
### Serving Patterns
|
|
12
|
+
- **Online Inference**: Real-time, synchronous
|
|
13
|
+
- **Batch Inference**: Offline, bulk processing
|
|
14
|
+
- **Streaming Inference**: Continuous data streams
|
|
15
|
+
- **Edge Inference**: On-device deployment
|
|
16
|
+
|
|
17
|
+
### Key Metrics
|
|
18
|
+
- **Latency**: p50, p95, p99 response times
|
|
19
|
+
- **Throughput**: Requests per second
|
|
20
|
+
- **Availability**: Uptime percentage
|
|
21
|
+
- **Resource Utilization**: CPU/GPU/Memory
|
|
22
|
+
|
|
23
|
+
## TensorFlow Serving
|
|
24
|
+
|
|
25
|
+
### Model Export
|
|
26
|
+
```python
|
|
27
|
+
import tensorflow as tf
|
|
28
|
+
|
|
29
|
+
# Save model in SavedModel format
|
|
30
|
+
model = tf.keras.models.load_model("model.h5")
|
|
31
|
+
|
|
32
|
+
# Define serving signature
|
|
33
|
+
@tf.function(input_signature=[tf.TensorSpec(shape=[None, 224, 224, 3], dtype=tf.float32)])
|
|
34
|
+
def serve(images):
|
|
35
|
+
return {"predictions": model(images)}
|
|
36
|
+
|
|
37
|
+
tf.saved_model.save(
|
|
38
|
+
model,
|
|
39
|
+
"models/image_classifier/1",
|
|
40
|
+
signatures={"serving_default": serve}
|
|
41
|
+
)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Docker Deployment
|
|
45
|
+
```dockerfile
|
|
46
|
+
FROM tensorflow/serving:latest
|
|
47
|
+
|
|
48
|
+
COPY models /models
|
|
49
|
+
|
|
50
|
+
ENV MODEL_NAME=image_classifier
|
|
51
|
+
ENV MODEL_BASE_PATH=/models
|
|
52
|
+
|
|
53
|
+
EXPOSE 8500 8501
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Client Request
|
|
57
|
+
```python
|
|
58
|
+
import requests
|
|
59
|
+
import numpy as np
|
|
60
|
+
|
|
61
|
+
# REST API
|
|
62
|
+
data = {"instances": images.tolist()}
|
|
63
|
+
response = requests.post(
|
|
64
|
+
"http://localhost:8501/v1/models/image_classifier:predict",
|
|
65
|
+
json=data
|
|
66
|
+
)
|
|
67
|
+
predictions = response.json()["predictions"]
|
|
68
|
+
|
|
69
|
+
# gRPC (more efficient)
|
|
70
|
+
import grpc
|
|
71
|
+
from tensorflow_serving.apis import predict_pb2, prediction_service_pb2_grpc
|
|
72
|
+
|
|
73
|
+
channel = grpc.insecure_channel("localhost:8500")
|
|
74
|
+
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
|
|
75
|
+
|
|
76
|
+
request = predict_pb2.PredictRequest()
|
|
77
|
+
request.model_spec.name = "image_classifier"
|
|
78
|
+
request.inputs["images"].CopyFrom(tf.make_tensor_proto(images))
|
|
79
|
+
|
|
80
|
+
response = stub.Predict(request, timeout=10.0)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## TorchServe
|
|
84
|
+
|
|
85
|
+
### Model Handler
|
|
86
|
+
```python
|
|
87
|
+
# custom_handler.py
|
|
88
|
+
from ts.torch_handler.base_handler import BaseHandler
|
|
89
|
+
import torch
|
|
90
|
+
import json
|
|
91
|
+
|
|
92
|
+
class ImageClassifierHandler(BaseHandler):
|
|
93
|
+
def __init__(self):
|
|
94
|
+
super().__init__()
|
|
95
|
+
self.initialized = False
|
|
96
|
+
|
|
97
|
+
def initialize(self, context):
|
|
98
|
+
self.manifest = context.manifest
|
|
99
|
+
model_dir = context.system_properties.get("model_dir")
|
|
100
|
+
|
|
101
|
+
self.model = torch.jit.load(f"{model_dir}/model.pt")
|
|
102
|
+
self.model.eval()
|
|
103
|
+
self.initialized = True
|
|
104
|
+
|
|
105
|
+
def preprocess(self, data):
|
|
106
|
+
images = []
|
|
107
|
+
for row in data:
|
|
108
|
+
image = row.get("data") or row.get("body")
|
|
109
|
+
# Transform image
|
|
110
|
+
tensor = self.transform(image)
|
|
111
|
+
images.append(tensor)
|
|
112
|
+
return torch.stack(images)
|
|
113
|
+
|
|
114
|
+
def inference(self, data):
|
|
115
|
+
with torch.no_grad():
|
|
116
|
+
return self.model(data)
|
|
117
|
+
|
|
118
|
+
def postprocess(self, inference_output):
|
|
119
|
+
return inference_output.tolist()
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Model Archive
|
|
123
|
+
```bash
|
|
124
|
+
# Create model archive
|
|
125
|
+
torch-model-archiver \
|
|
126
|
+
--model-name image_classifier \
|
|
127
|
+
--version 1.0 \
|
|
128
|
+
--model-file model.py \
|
|
129
|
+
--serialized-file model.pt \
|
|
130
|
+
--handler custom_handler.py \
|
|
131
|
+
--extra-files index_to_name.json
|
|
132
|
+
|
|
133
|
+
# Start TorchServe
|
|
134
|
+
torchserve --start \
|
|
135
|
+
--model-store model_store \
|
|
136
|
+
--models image_classifier=image_classifier.mar
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Triton Inference Server
|
|
140
|
+
|
|
141
|
+
### Model Repository
|
|
142
|
+
```
|
|
143
|
+
model_repository/
|
|
144
|
+
├── image_classifier/
|
|
145
|
+
│ ├── config.pbtxt
|
|
146
|
+
│ ├── 1/
|
|
147
|
+
│ │ └── model.onnx
|
|
148
|
+
│ └── 2/
|
|
149
|
+
│ └── model.onnx
|
|
150
|
+
└── text_classifier/
|
|
151
|
+
├── config.pbtxt
|
|
152
|
+
└── 1/
|
|
153
|
+
└── model.pt
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Configuration
|
|
157
|
+
```protobuf
|
|
158
|
+
# config.pbtxt
|
|
159
|
+
name: "image_classifier"
|
|
160
|
+
platform: "onnxruntime_onnx"
|
|
161
|
+
max_batch_size: 32
|
|
162
|
+
|
|
163
|
+
input [
|
|
164
|
+
{
|
|
165
|
+
name: "images"
|
|
166
|
+
data_type: TYPE_FP32
|
|
167
|
+
dims: [ 3, 224, 224 ]
|
|
168
|
+
}
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
output [
|
|
172
|
+
{
|
|
173
|
+
name: "predictions"
|
|
174
|
+
data_type: TYPE_FP32
|
|
175
|
+
dims: [ 1000 ]
|
|
176
|
+
}
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
instance_group [
|
|
180
|
+
{
|
|
181
|
+
count: 2
|
|
182
|
+
kind: KIND_GPU
|
|
183
|
+
gpus: [ 0 ]
|
|
184
|
+
}
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
dynamic_batching {
|
|
188
|
+
preferred_batch_size: [ 8, 16, 32 ]
|
|
189
|
+
max_queue_delay_microseconds: 100
|
|
190
|
+
}
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### Ensemble Models
|
|
194
|
+
```protobuf
|
|
195
|
+
# ensemble_config.pbtxt
|
|
196
|
+
name: "ensemble_pipeline"
|
|
197
|
+
platform: "ensemble"
|
|
198
|
+
max_batch_size: 32
|
|
199
|
+
|
|
200
|
+
input [
|
|
201
|
+
{ name: "raw_image" data_type: TYPE_UINT8 dims: [ -1, -1, 3 ] }
|
|
202
|
+
]
|
|
203
|
+
|
|
204
|
+
output [
|
|
205
|
+
{ name: "classification" data_type: TYPE_FP32 dims: [ 1000 ] }
|
|
206
|
+
]
|
|
207
|
+
|
|
208
|
+
ensemble_scheduling {
|
|
209
|
+
step [
|
|
210
|
+
{
|
|
211
|
+
model_name: "preprocessing"
|
|
212
|
+
model_version: -1
|
|
213
|
+
input_map { key: "raw_image" value: "raw_image" }
|
|
214
|
+
output_map { key: "processed_image" value: "images" }
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
model_name: "image_classifier"
|
|
218
|
+
model_version: -1
|
|
219
|
+
input_map { key: "images" value: "processed_image" }
|
|
220
|
+
output_map { key: "predictions" value: "classification" }
|
|
221
|
+
}
|
|
222
|
+
]
|
|
223
|
+
}
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
## Model Optimization
|
|
227
|
+
|
|
228
|
+
### Quantization
|
|
229
|
+
```python
|
|
230
|
+
import torch
|
|
231
|
+
|
|
232
|
+
# Dynamic quantization (CPU)
|
|
233
|
+
quantized_model = torch.quantization.quantize_dynamic(
|
|
234
|
+
model,
|
|
235
|
+
{torch.nn.Linear, torch.nn.LSTM},
|
|
236
|
+
dtype=torch.qint8
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Static quantization
|
|
240
|
+
model.qconfig = torch.quantization.get_default_qconfig("fbgemm")
|
|
241
|
+
prepared_model = torch.quantization.prepare(model)
|
|
242
|
+
# Calibrate with representative data
|
|
243
|
+
for data in calibration_loader:
|
|
244
|
+
prepared_model(data)
|
|
245
|
+
quantized_model = torch.quantization.convert(prepared_model)
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### ONNX Export
|
|
249
|
+
```python
|
|
250
|
+
import torch.onnx
|
|
251
|
+
|
|
252
|
+
torch.onnx.export(
|
|
253
|
+
model,
|
|
254
|
+
dummy_input,
|
|
255
|
+
"model.onnx",
|
|
256
|
+
export_params=True,
|
|
257
|
+
opset_version=13,
|
|
258
|
+
do_constant_folding=True,
|
|
259
|
+
input_names=["input"],
|
|
260
|
+
output_names=["output"],
|
|
261
|
+
dynamic_axes={
|
|
262
|
+
"input": {0: "batch_size"},
|
|
263
|
+
"output": {0: "batch_size"}
|
|
264
|
+
}
|
|
265
|
+
)
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### TensorRT Optimization
|
|
269
|
+
```python
|
|
270
|
+
import tensorrt as trt
|
|
271
|
+
|
|
272
|
+
logger = trt.Logger(trt.Logger.WARNING)
|
|
273
|
+
builder = trt.Builder(logger)
|
|
274
|
+
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
|
|
275
|
+
parser = trt.OnnxParser(network, logger)
|
|
276
|
+
|
|
277
|
+
with open("model.onnx", "rb") as f:
|
|
278
|
+
parser.parse(f.read())
|
|
279
|
+
|
|
280
|
+
config = builder.create_builder_config()
|
|
281
|
+
config.max_workspace_size = 1 << 30 # 1GB
|
|
282
|
+
config.set_flag(trt.BuilderFlag.FP16) # Enable FP16
|
|
283
|
+
|
|
284
|
+
engine = builder.build_engine(network, config)
|
|
285
|
+
|
|
286
|
+
with open("model.trt", "wb") as f:
|
|
287
|
+
f.write(engine.serialize())
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## Kubernetes Deployment
|
|
291
|
+
|
|
292
|
+
### Serving Deployment
|
|
293
|
+
```yaml
|
|
294
|
+
apiVersion: apps/v1
|
|
295
|
+
kind: Deployment
|
|
296
|
+
metadata:
|
|
297
|
+
name: model-serving
|
|
298
|
+
spec:
|
|
299
|
+
replicas: 3
|
|
300
|
+
selector:
|
|
301
|
+
matchLabels:
|
|
302
|
+
app: model-serving
|
|
303
|
+
template:
|
|
304
|
+
metadata:
|
|
305
|
+
labels:
|
|
306
|
+
app: model-serving
|
|
307
|
+
spec:
|
|
308
|
+
containers:
|
|
309
|
+
- name: triton
|
|
310
|
+
image: nvcr.io/nvidia/tritonserver:23.10-py3
|
|
311
|
+
args:
|
|
312
|
+
- tritonserver
|
|
313
|
+
- --model-repository=s3://models/repository
|
|
314
|
+
- --strict-model-config=false
|
|
315
|
+
ports:
|
|
316
|
+
- containerPort: 8000
|
|
317
|
+
name: http
|
|
318
|
+
- containerPort: 8001
|
|
319
|
+
name: grpc
|
|
320
|
+
- containerPort: 8002
|
|
321
|
+
name: metrics
|
|
322
|
+
resources:
|
|
323
|
+
limits:
|
|
324
|
+
nvidia.com/gpu: 1
|
|
325
|
+
memory: 8Gi
|
|
326
|
+
requests:
|
|
327
|
+
memory: 4Gi
|
|
328
|
+
livenessProbe:
|
|
329
|
+
httpGet:
|
|
330
|
+
path: /v2/health/live
|
|
331
|
+
port: 8000
|
|
332
|
+
readinessProbe:
|
|
333
|
+
httpGet:
|
|
334
|
+
path: /v2/health/ready
|
|
335
|
+
port: 8000
|
|
336
|
+
---
|
|
337
|
+
apiVersion: autoscaling/v2
|
|
338
|
+
kind: HorizontalPodAutoscaler
|
|
339
|
+
metadata:
|
|
340
|
+
name: model-serving-hpa
|
|
341
|
+
spec:
|
|
342
|
+
scaleTargetRef:
|
|
343
|
+
apiVersion: apps/v1
|
|
344
|
+
kind: Deployment
|
|
345
|
+
name: model-serving
|
|
346
|
+
minReplicas: 2
|
|
347
|
+
maxReplicas: 10
|
|
348
|
+
metrics:
|
|
349
|
+
- type: Resource
|
|
350
|
+
resource:
|
|
351
|
+
name: cpu
|
|
352
|
+
target:
|
|
353
|
+
type: Utilization
|
|
354
|
+
averageUtilization: 70
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
## Best Practices
|
|
358
|
+
|
|
359
|
+
1. **Model Versioning**: Support multiple versions
|
|
360
|
+
2. **A/B Testing**: Traffic splitting capabilities
|
|
361
|
+
3. **Canary Deployments**: Gradual rollouts
|
|
362
|
+
4. **Health Checks**: Readiness and liveness probes
|
|
363
|
+
5. **Monitoring**: Latency, throughput, errors
|
|
364
|
+
|
|
365
|
+
## Anti-Patterns
|
|
366
|
+
|
|
367
|
+
- No model versioning
|
|
368
|
+
- Missing health checks
|
|
369
|
+
- Ignoring batch optimization
|
|
370
|
+
- No request validation
|
|
371
|
+
- Skipping load testing
|
|
372
|
+
|
|
373
|
+
## When to Use
|
|
374
|
+
|
|
375
|
+
- Real-time predictions needed
|
|
376
|
+
- Multiple models to serve
|
|
377
|
+
- Need for GPU acceleration
|
|
378
|
+
- High availability requirements
|
|
379
|
+
- Model version management
|
|
380
|
+
|
|
381
|
+
## When NOT to Use
|
|
382
|
+
|
|
383
|
+
- Simple batch processing
|
|
384
|
+
- One-off predictions
|
|
385
|
+
- No latency requirements
|
|
386
|
+
- Single model, low traffic
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
# CQRS Patterns
|
|
2
|
+
|
|
3
|
+
Command Query Responsibility Segregation patterns for separating read and write models, eventual consistency, and optimized query performance.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
CQRS separates read and write operations into different models, allowing each to be optimized independently.
|
|
8
|
+
|
|
9
|
+
## Core Concepts
|
|
10
|
+
|
|
11
|
+
### Command Side (Write)
|
|
12
|
+
- Handles state changes
|
|
13
|
+
- Validates business rules
|
|
14
|
+
- Emits domain events
|
|
15
|
+
- Optimized for consistency
|
|
16
|
+
|
|
17
|
+
### Query Side (Read)
|
|
18
|
+
- Handles data retrieval
|
|
19
|
+
- Denormalized for queries
|
|
20
|
+
- Eventually consistent
|
|
21
|
+
- Optimized for performance
|
|
22
|
+
|
|
23
|
+
## Architecture Patterns
|
|
24
|
+
|
|
25
|
+
### Simple CQRS
|
|
26
|
+
```
|
|
27
|
+
┌─────────────┐ ┌─────────────┐
|
|
28
|
+
│ Client │ │ Client │
|
|
29
|
+
└──────┬──────┘ └──────┬──────┘
|
|
30
|
+
│ │
|
|
31
|
+
Commands Queries
|
|
32
|
+
│ │
|
|
33
|
+
▼ ▼
|
|
34
|
+
┌─────────────┐ ┌─────────────┐
|
|
35
|
+
│ Command │ │ Query │
|
|
36
|
+
│ Handler │ │ Handler │
|
|
37
|
+
└──────┬──────┘ └──────┬──────┘
|
|
38
|
+
│ │
|
|
39
|
+
▼ ▼
|
|
40
|
+
┌─────────────────────────────────┐
|
|
41
|
+
│ Single Database │
|
|
42
|
+
│ (Different tables/views) │
|
|
43
|
+
└─────────────────────────────────┘
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Full CQRS with Event Sourcing
|
|
47
|
+
```
|
|
48
|
+
┌─────────────┐ ┌─────────────┐
|
|
49
|
+
│ Client │ │ Client │
|
|
50
|
+
└──────┬──────┘ └──────┬──────┘
|
|
51
|
+
│ │
|
|
52
|
+
Commands Queries
|
|
53
|
+
│ │
|
|
54
|
+
▼ ▼
|
|
55
|
+
┌─────────────┐ ┌─────────────┐
|
|
56
|
+
│ Command │ │ Query │
|
|
57
|
+
│ Handler │ │ Service │
|
|
58
|
+
└──────┬──────┘ └──────┬──────┘
|
|
59
|
+
│ │
|
|
60
|
+
▼ ▼
|
|
61
|
+
┌─────────────┐ ┌─────────────┐
|
|
62
|
+
│ Event │────▶│ Read │
|
|
63
|
+
│ Store │ │ Database │
|
|
64
|
+
└─────────────┘ └─────────────┘
|
|
65
|
+
│
|
|
66
|
+
│ Events
|
|
67
|
+
▼
|
|
68
|
+
┌─────────────┐
|
|
69
|
+
│ Projections │
|
|
70
|
+
└─────────────┘
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Command Implementation
|
|
74
|
+
|
|
75
|
+
### Command Handler
|
|
76
|
+
```typescript
|
|
77
|
+
// Command definition
|
|
78
|
+
interface CreateOrderCommand {
|
|
79
|
+
type: 'CreateOrder';
|
|
80
|
+
orderId: string;
|
|
81
|
+
customerId: string;
|
|
82
|
+
items: OrderItem[];
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Command handler
|
|
86
|
+
class CreateOrderHandler {
|
|
87
|
+
constructor(
|
|
88
|
+
private readonly orderRepository: OrderRepository,
|
|
89
|
+
private readonly eventBus: EventBus
|
|
90
|
+
) {}
|
|
91
|
+
|
|
92
|
+
async handle(command: CreateOrderCommand): Promise<void> {
|
|
93
|
+
// Validate
|
|
94
|
+
const customer = await this.customerRepository.find(command.customerId);
|
|
95
|
+
if (!customer) {
|
|
96
|
+
throw new CustomerNotFoundError(command.customerId);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Create aggregate
|
|
100
|
+
const order = Order.create({
|
|
101
|
+
orderId: command.orderId,
|
|
102
|
+
customerId: command.customerId,
|
|
103
|
+
items: command.items
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// Persist
|
|
107
|
+
await this.orderRepository.save(order);
|
|
108
|
+
|
|
109
|
+
// Publish events
|
|
110
|
+
for (const event of order.getUncommittedEvents()) {
|
|
111
|
+
await this.eventBus.publish(event);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Command Bus
|
|
118
|
+
```typescript
|
|
119
|
+
class CommandBus {
|
|
120
|
+
private handlers: Map<string, CommandHandler<any>> = new Map();
|
|
121
|
+
|
|
122
|
+
register<T extends Command>(
|
|
123
|
+
commandType: string,
|
|
124
|
+
handler: CommandHandler<T>
|
|
125
|
+
): void {
|
|
126
|
+
this.handlers.set(commandType, handler);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async dispatch<T extends Command>(command: T): Promise<void> {
|
|
130
|
+
const handler = this.handlers.get(command.type);
|
|
131
|
+
if (!handler) {
|
|
132
|
+
throw new Error(`No handler for command: ${command.type}`);
|
|
133
|
+
}
|
|
134
|
+
await handler.handle(command);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Query Implementation
|
|
140
|
+
|
|
141
|
+
### Read Model
|
|
142
|
+
```typescript
|
|
143
|
+
// Denormalized read model
|
|
144
|
+
interface OrderSummaryReadModel {
|
|
145
|
+
orderId: string;
|
|
146
|
+
customerName: string;
|
|
147
|
+
customerEmail: string;
|
|
148
|
+
status: string;
|
|
149
|
+
itemCount: number;
|
|
150
|
+
totalAmount: number;
|
|
151
|
+
createdAt: Date;
|
|
152
|
+
updatedAt: Date;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Query
|
|
156
|
+
interface GetOrdersQuery {
|
|
157
|
+
customerId?: string;
|
|
158
|
+
status?: string;
|
|
159
|
+
fromDate?: Date;
|
|
160
|
+
toDate?: Date;
|
|
161
|
+
page: number;
|
|
162
|
+
pageSize: number;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Query handler
|
|
166
|
+
class GetOrdersQueryHandler {
|
|
167
|
+
constructor(private readonly readDb: ReadDatabase) {}
|
|
168
|
+
|
|
169
|
+
async handle(query: GetOrdersQuery): Promise<PaginatedResult<OrderSummaryReadModel>> {
|
|
170
|
+
const conditions: string[] = [];
|
|
171
|
+
const params: any[] = [];
|
|
172
|
+
|
|
173
|
+
if (query.customerId) {
|
|
174
|
+
conditions.push(`customer_id = $${params.length + 1}`);
|
|
175
|
+
params.push(query.customerId);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (query.status) {
|
|
179
|
+
conditions.push(`status = $${params.length + 1}`);
|
|
180
|
+
params.push(query.status);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const where = conditions.length > 0
|
|
184
|
+
? `WHERE ${conditions.join(' AND ')}`
|
|
185
|
+
: '';
|
|
186
|
+
|
|
187
|
+
const offset = (query.page - 1) * query.pageSize;
|
|
188
|
+
|
|
189
|
+
const result = await this.readDb.query(`
|
|
190
|
+
SELECT * FROM order_summaries
|
|
191
|
+
${where}
|
|
192
|
+
ORDER BY created_at DESC
|
|
193
|
+
LIMIT $${params.length + 1} OFFSET $${params.length + 2}
|
|
194
|
+
`, [...params, query.pageSize, offset]);
|
|
195
|
+
|
|
196
|
+
return {
|
|
197
|
+
items: result.rows,
|
|
198
|
+
page: query.page,
|
|
199
|
+
pageSize: query.pageSize,
|
|
200
|
+
total: await this.getCount(where, params)
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## Projection Patterns
|
|
207
|
+
|
|
208
|
+
### Synchronous Projection (Same Transaction)
|
|
209
|
+
```typescript
|
|
210
|
+
class OrderProjector {
|
|
211
|
+
async project(event: DomainEvent, transaction: Transaction): Promise<void> {
|
|
212
|
+
switch (event.type) {
|
|
213
|
+
case 'OrderCreated':
|
|
214
|
+
await transaction.query(`
|
|
215
|
+
INSERT INTO order_summaries (...)
|
|
216
|
+
VALUES (...)
|
|
217
|
+
`);
|
|
218
|
+
break;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Used in command handler
|
|
224
|
+
await this.db.transaction(async (tx) => {
|
|
225
|
+
await this.eventStore.append(events, tx);
|
|
226
|
+
for (const event of events) {
|
|
227
|
+
await this.projector.project(event, tx);
|
|
228
|
+
}
|
|
229
|
+
});
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### Asynchronous Projection
|
|
233
|
+
```typescript
|
|
234
|
+
class OrderProjectionHandler {
|
|
235
|
+
@Subscribe('order-events')
|
|
236
|
+
async handle(event: DomainEvent): Promise<void> {
|
|
237
|
+
await this.projector.project(event);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Idempotent projection
|
|
242
|
+
class IdempotentProjector {
|
|
243
|
+
async project(event: DomainEvent): Promise<void> {
|
|
244
|
+
const processed = await this.checkProcessed(event.eventId);
|
|
245
|
+
if (processed) return;
|
|
246
|
+
|
|
247
|
+
await this.db.transaction(async (tx) => {
|
|
248
|
+
await this.applyProjection(event, tx);
|
|
249
|
+
await this.markProcessed(event.eventId, tx);
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
## Eventual Consistency
|
|
256
|
+
|
|
257
|
+
### Handling in UI
|
|
258
|
+
```typescript
|
|
259
|
+
// Optimistic UI update
|
|
260
|
+
async function submitOrder(orderData: CreateOrderCommand): Promise<void> {
|
|
261
|
+
// Immediately update UI
|
|
262
|
+
uiState.orders.push({
|
|
263
|
+
...orderData,
|
|
264
|
+
status: 'pending',
|
|
265
|
+
createdAt: new Date()
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
// Send command
|
|
269
|
+
await commandBus.dispatch(orderData);
|
|
270
|
+
|
|
271
|
+
// Poll for confirmation
|
|
272
|
+
await waitForEventualConsistency(orderData.orderId);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
async function waitForEventualConsistency(orderId: string): Promise<void> {
|
|
276
|
+
const maxAttempts = 10;
|
|
277
|
+
const delay = 500;
|
|
278
|
+
|
|
279
|
+
for (let i = 0; i < maxAttempts; i++) {
|
|
280
|
+
const order = await queryService.getOrder(orderId);
|
|
281
|
+
if (order && order.status !== 'pending') {
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
await sleep(delay);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
throw new Error('Read model not updated in time');
|
|
288
|
+
}
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
## Best Practices
|
|
292
|
+
|
|
293
|
+
1. **Command Validation**: Validate before persisting
|
|
294
|
+
2. **Idempotent Projections**: Handle duplicate events
|
|
295
|
+
3. **Event Ordering**: Maintain order in projections
|
|
296
|
+
4. **Projection Rebuild**: Ability to rebuild from events
|
|
297
|
+
5. **Monitoring**: Track projection lag
|
|
298
|
+
|
|
299
|
+
## Read Model Optimization
|
|
300
|
+
|
|
301
|
+
### Materialized Views
|
|
302
|
+
```sql
|
|
303
|
+
-- Denormalized for specific query pattern
|
|
304
|
+
CREATE MATERIALIZED VIEW customer_order_stats AS
|
|
305
|
+
SELECT
|
|
306
|
+
c.customer_id,
|
|
307
|
+
c.name,
|
|
308
|
+
COUNT(o.order_id) as total_orders,
|
|
309
|
+
SUM(o.total_amount) as lifetime_value,
|
|
310
|
+
MAX(o.created_at) as last_order_date
|
|
311
|
+
FROM customers c
|
|
312
|
+
LEFT JOIN orders o ON c.customer_id = o.customer_id
|
|
313
|
+
GROUP BY c.customer_id, c.name;
|
|
314
|
+
|
|
315
|
+
-- Refresh strategy
|
|
316
|
+
REFRESH MATERIALIZED VIEW CONCURRENTLY customer_order_stats;
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
### Multiple Read Models
|
|
320
|
+
```
|
|
321
|
+
Same events -> Different projections:
|
|
322
|
+
- OrderListProjection -> For order listing
|
|
323
|
+
- OrderSearchProjection -> For full-text search (Elasticsearch)
|
|
324
|
+
- OrderAnalyticsProjection -> For dashboards (ClickHouse)
|
|
325
|
+
- OrderNotificationProjection -> For alerts
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
## Anti-Patterns
|
|
329
|
+
|
|
330
|
+
- Using queries in command handlers
|
|
331
|
+
- Sharing models between read and write
|
|
332
|
+
- Not planning for projection failures
|
|
333
|
+
- Over-complicated when simple CRUD suffices
|
|
334
|
+
- Ignoring eventual consistency in UX
|
|
335
|
+
|
|
336
|
+
## When to Use
|
|
337
|
+
|
|
338
|
+
- Complex domains with different read/write patterns
|
|
339
|
+
- High read-to-write ratio
|
|
340
|
+
- Need for specialized query databases
|
|
341
|
+
- Event sourcing architectures
|
|
342
|
+
|
|
343
|
+
## When NOT to Use
|
|
344
|
+
|
|
345
|
+
- Simple CRUD applications
|
|
346
|
+
- Low complexity domains
|
|
347
|
+
- When strong consistency is mandatory
|
|
348
|
+
- Small teams without CQRS experience
|