omgkit 2.13.0 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/README.md +93 -10
  2. package/package.json +2 -2
  3. package/plugin/agents/api-designer.md +5 -0
  4. package/plugin/agents/architect.md +8 -0
  5. package/plugin/agents/brainstormer.md +4 -0
  6. package/plugin/agents/cicd-manager.md +6 -0
  7. package/plugin/agents/code-reviewer.md +6 -0
  8. package/plugin/agents/copywriter.md +2 -0
  9. package/plugin/agents/data-engineer.md +255 -0
  10. package/plugin/agents/database-admin.md +10 -0
  11. package/plugin/agents/debugger.md +10 -0
  12. package/plugin/agents/devsecops.md +314 -0
  13. package/plugin/agents/docs-manager.md +4 -0
  14. package/plugin/agents/domain-decomposer.md +181 -0
  15. package/plugin/agents/embedded-systems.md +397 -0
  16. package/plugin/agents/fullstack-developer.md +12 -0
  17. package/plugin/agents/game-systems-designer.md +375 -0
  18. package/plugin/agents/git-manager.md +10 -0
  19. package/plugin/agents/journal-writer.md +2 -0
  20. package/plugin/agents/ml-engineer.md +284 -0
  21. package/plugin/agents/observability-engineer.md +353 -0
  22. package/plugin/agents/oracle.md +9 -0
  23. package/plugin/agents/performance-engineer.md +290 -0
  24. package/plugin/agents/pipeline-architect.md +6 -0
  25. package/plugin/agents/planner.md +12 -0
  26. package/plugin/agents/platform-engineer.md +325 -0
  27. package/plugin/agents/project-manager.md +3 -0
  28. package/plugin/agents/researcher.md +5 -0
  29. package/plugin/agents/scientific-computing.md +426 -0
  30. package/plugin/agents/scout.md +3 -0
  31. package/plugin/agents/security-auditor.md +7 -0
  32. package/plugin/agents/sprint-master.md +17 -0
  33. package/plugin/agents/tester.md +10 -0
  34. package/plugin/agents/ui-ux-designer.md +12 -0
  35. package/plugin/agents/vulnerability-scanner.md +6 -0
  36. package/plugin/commands/data/pipeline.md +47 -0
  37. package/plugin/commands/data/quality.md +49 -0
  38. package/plugin/commands/domain/analyze.md +34 -0
  39. package/plugin/commands/domain/map.md +41 -0
  40. package/plugin/commands/game/balance.md +56 -0
  41. package/plugin/commands/game/optimize.md +62 -0
  42. package/plugin/commands/iot/provision.md +58 -0
  43. package/plugin/commands/ml/evaluate.md +47 -0
  44. package/plugin/commands/ml/train.md +48 -0
  45. package/plugin/commands/perf/benchmark.md +54 -0
  46. package/plugin/commands/perf/profile.md +49 -0
  47. package/plugin/commands/platform/blueprint.md +56 -0
  48. package/plugin/commands/security/audit.md +54 -0
  49. package/plugin/commands/security/scan.md +55 -0
  50. package/plugin/commands/sre/dashboard.md +53 -0
  51. package/plugin/registry.yaml +711 -0
  52. package/plugin/skills/ai-ml/experiment-tracking/SKILL.md +338 -0
  53. package/plugin/skills/ai-ml/feature-stores/SKILL.md +340 -0
  54. package/plugin/skills/ai-ml/llm-ops/SKILL.md +454 -0
  55. package/plugin/skills/ai-ml/ml-pipelines/SKILL.md +390 -0
  56. package/plugin/skills/ai-ml/model-monitoring/SKILL.md +398 -0
  57. package/plugin/skills/ai-ml/model-serving/SKILL.md +386 -0
  58. package/plugin/skills/event-driven/cqrs-patterns/SKILL.md +348 -0
  59. package/plugin/skills/event-driven/event-sourcing/SKILL.md +334 -0
  60. package/plugin/skills/event-driven/kafka-deep/SKILL.md +252 -0
  61. package/plugin/skills/event-driven/saga-orchestration/SKILL.md +335 -0
  62. package/plugin/skills/event-driven/schema-registry/SKILL.md +328 -0
  63. package/plugin/skills/event-driven/stream-processing/SKILL.md +313 -0
  64. package/plugin/skills/game/game-audio/SKILL.md +446 -0
  65. package/plugin/skills/game/game-networking/SKILL.md +490 -0
  66. package/plugin/skills/game/godot-patterns/SKILL.md +413 -0
  67. package/plugin/skills/game/shader-programming/SKILL.md +492 -0
  68. package/plugin/skills/game/unity-patterns/SKILL.md +488 -0
  69. package/plugin/skills/iot/device-provisioning/SKILL.md +405 -0
  70. package/plugin/skills/iot/edge-computing/SKILL.md +369 -0
  71. package/plugin/skills/iot/industrial-protocols/SKILL.md +438 -0
  72. package/plugin/skills/iot/mqtt-deep/SKILL.md +418 -0
  73. package/plugin/skills/iot/ota-updates/SKILL.md +426 -0
  74. package/plugin/skills/microservices/api-gateway-patterns/SKILL.md +201 -0
  75. package/plugin/skills/microservices/circuit-breaker-patterns/SKILL.md +246 -0
  76. package/plugin/skills/microservices/contract-testing/SKILL.md +284 -0
  77. package/plugin/skills/microservices/distributed-tracing/SKILL.md +246 -0
  78. package/plugin/skills/microservices/service-discovery/SKILL.md +304 -0
  79. package/plugin/skills/microservices/service-mesh/SKILL.md +181 -0
  80. package/plugin/skills/mobile-advanced/mobile-ci-cd/SKILL.md +407 -0
  81. package/plugin/skills/mobile-advanced/mobile-security/SKILL.md +403 -0
  82. package/plugin/skills/mobile-advanced/offline-first/SKILL.md +473 -0
  83. package/plugin/skills/mobile-advanced/push-notifications/SKILL.md +494 -0
  84. package/plugin/skills/mobile-advanced/react-native-deep/SKILL.md +374 -0
  85. package/plugin/skills/simulation/numerical-methods/SKILL.md +434 -0
  86. package/plugin/skills/simulation/parallel-computing/SKILL.md +382 -0
  87. package/plugin/skills/simulation/physics-engines/SKILL.md +377 -0
  88. package/plugin/skills/simulation/validation-verification/SKILL.md +479 -0
  89. package/plugin/skills/simulation/visualization-scientific/SKILL.md +365 -0
  90. package/plugin/workflows/ai-engineering/agent-development.md +3 -3
  91. package/plugin/workflows/ai-engineering/fine-tuning.md +3 -3
  92. package/plugin/workflows/ai-engineering/model-evaluation.md +3 -3
  93. package/plugin/workflows/ai-engineering/prompt-engineering.md +2 -2
  94. package/plugin/workflows/ai-engineering/rag-development.md +4 -4
  95. package/plugin/workflows/ai-ml/data-pipeline.md +188 -0
  96. package/plugin/workflows/ai-ml/experiment-cycle.md +203 -0
  97. package/plugin/workflows/ai-ml/feature-engineering.md +208 -0
  98. package/plugin/workflows/ai-ml/model-deployment.md +199 -0
  99. package/plugin/workflows/ai-ml/monitoring-setup.md +227 -0
  100. package/plugin/workflows/api/api-design.md +1 -1
  101. package/plugin/workflows/api/api-testing.md +2 -2
  102. package/plugin/workflows/content/technical-docs.md +1 -1
  103. package/plugin/workflows/database/migration.md +1 -1
  104. package/plugin/workflows/database/optimization.md +1 -1
  105. package/plugin/workflows/database/schema-design.md +3 -3
  106. package/plugin/workflows/development/bug-fix.md +3 -3
  107. package/plugin/workflows/development/code-review.md +2 -1
  108. package/plugin/workflows/development/feature.md +3 -3
  109. package/plugin/workflows/development/refactor.md +2 -2
  110. package/plugin/workflows/event-driven/consumer-groups.md +190 -0
  111. package/plugin/workflows/event-driven/event-storming.md +172 -0
  112. package/plugin/workflows/event-driven/replay-testing.md +186 -0
  113. package/plugin/workflows/event-driven/saga-implementation.md +206 -0
  114. package/plugin/workflows/event-driven/schema-evolution.md +173 -0
  115. package/plugin/workflows/fullstack/authentication.md +4 -4
  116. package/plugin/workflows/fullstack/full-feature.md +4 -4
  117. package/plugin/workflows/game-dev/content-pipeline.md +218 -0
  118. package/plugin/workflows/game-dev/platform-submission.md +263 -0
  119. package/plugin/workflows/game-dev/playtesting.md +237 -0
  120. package/plugin/workflows/game-dev/prototype-to-production.md +205 -0
  121. package/plugin/workflows/microservices/contract-first.md +151 -0
  122. package/plugin/workflows/microservices/distributed-tracing.md +166 -0
  123. package/plugin/workflows/microservices/domain-decomposition.md +123 -0
  124. package/plugin/workflows/microservices/integration-testing.md +149 -0
  125. package/plugin/workflows/microservices/service-mesh-setup.md +153 -0
  126. package/plugin/workflows/microservices/service-scaffolding.md +151 -0
  127. package/plugin/workflows/omega/1000x-innovation.md +2 -2
  128. package/plugin/workflows/omega/100x-architecture.md +2 -2
  129. package/plugin/workflows/omega/10x-improvement.md +2 -2
  130. package/plugin/workflows/quality/performance-optimization.md +2 -2
  131. package/plugin/workflows/research/best-practices.md +1 -1
  132. package/plugin/workflows/research/technology-research.md +1 -1
  133. package/plugin/workflows/security/penetration-testing.md +3 -3
  134. package/plugin/workflows/security/security-audit.md +3 -3
  135. package/plugin/workflows/sprint/sprint-execution.md +2 -2
  136. package/plugin/workflows/sprint/sprint-retrospective.md +1 -1
  137. package/plugin/workflows/sprint/sprint-setup.md +1 -1
@@ -0,0 +1,454 @@
1
+ # LLMOps
2
+
3
+ LLM deployment, prompt management, RAG pipelines, fine-tuning workflows, and LLM evaluation frameworks.
4
+
5
+ ## Overview
6
+
7
+ LLMOps extends MLOps practices for Large Language Models, addressing unique challenges like prompt engineering, context management, and evaluation of generative outputs.
8
+
9
+ ## Core Concepts
10
+
11
+ ### LLM Lifecycle
12
+ - **Prompt Engineering**: Design and iterate prompts
13
+ - **Fine-tuning**: Adapt models to specific tasks
14
+ - **Deployment**: Serve models at scale
15
+ - **Evaluation**: Measure quality and safety
16
+ - **Monitoring**: Track performance and costs
17
+
18
+ ### Key Challenges
19
+ - Non-deterministic outputs
20
+ - Context window limitations
21
+ - Cost management
22
+ - Latency optimization
23
+ - Safety and alignment
24
+
25
+ ## Prompt Management
26
+
27
+ ### Prompt Registry
28
+ ```python
29
+ from dataclasses import dataclass, field
30
+ from typing import Dict, List, Optional
31
+ from datetime import datetime
32
+ import hashlib
33
+ import json
34
+
35
+ @dataclass
36
+ class PromptTemplate:
37
+ name: str
38
+ version: str
39
+ template: str
40
+ variables: List[str]
41
+ model: str
42
+ temperature: float = 0.7
43
+ max_tokens: int = 1000
44
+ metadata: Dict = field(default_factory=dict)
45
+ created_at: datetime = field(default_factory=datetime.now)
46
+
47
+ @property
48
+ def hash(self) -> str:
49
+ content = f"{self.template}{self.model}{self.temperature}"
50
+ return hashlib.sha256(content.encode()).hexdigest()[:12]
51
+
52
+ class PromptRegistry:
53
+ def __init__(self, storage_backend):
54
+ self.storage = storage_backend
55
+
56
+ def register(self, prompt: PromptTemplate) -> str:
57
+ # Check for existing version
58
+ existing = self.get(prompt.name, prompt.version)
59
+ if existing and existing.hash != prompt.hash:
60
+ raise ValueError(f"Version {prompt.version} exists with different content")
61
+
62
+ self.storage.save(prompt)
63
+ return prompt.hash
64
+
65
+ def get(self, name: str, version: str = "latest") -> Optional[PromptTemplate]:
66
+ return self.storage.load(name, version)
67
+
68
+ def list_versions(self, name: str) -> List[str]:
69
+ return self.storage.list_versions(name)
70
+
71
+ def render(self, name: str, version: str, variables: Dict) -> str:
72
+ prompt = self.get(name, version)
73
+ return prompt.template.format(**variables)
74
+
75
+ # Usage
76
+ registry = PromptRegistry(storage)
77
+
78
+ prompt = PromptTemplate(
79
+ name="customer_support",
80
+ version="1.2.0",
81
+ template="""You are a helpful customer support agent for {company_name}.
82
+
83
+ Customer query: {query}
84
+
85
+ Respond helpfully and professionally. If you don't know the answer, say so.""",
86
+ variables=["company_name", "query"],
87
+ model="gpt-4",
88
+ temperature=0.3
89
+ )
90
+
91
+ registry.register(prompt)
92
+ ```
93
+
94
+ ### A/B Testing Prompts
95
+ ```python
96
+ import random
97
+ from typing import Callable
98
+
99
+ class PromptExperiment:
100
+ def __init__(
101
+ self,
102
+ name: str,
103
+ variants: Dict[str, PromptTemplate],
104
+ weights: Optional[Dict[str, float]] = None
105
+ ):
106
+ self.name = name
107
+ self.variants = variants
108
+ self.weights = weights or {k: 1.0/len(variants) for k in variants}
109
+
110
+ def select_variant(self, user_id: str) -> tuple[str, PromptTemplate]:
111
+ # Deterministic selection based on user_id for consistency
112
+ hash_val = int(hashlib.md5(f"{self.name}:{user_id}".encode()).hexdigest(), 16)
113
+ rand_val = (hash_val % 1000) / 1000
114
+
115
+ cumulative = 0
116
+ for variant_name, weight in self.weights.items():
117
+ cumulative += weight
118
+ if rand_val < cumulative:
119
+ return variant_name, self.variants[variant_name]
120
+
121
+ return list(self.variants.items())[-1]
122
+
123
+ class ExperimentTracker:
124
+ def __init__(self, db):
125
+ self.db = db
126
+
127
+ def log_experiment(
128
+ self,
129
+ experiment_name: str,
130
+ variant: str,
131
+ user_id: str,
132
+ input_data: dict,
133
+ output: str,
134
+ metrics: dict
135
+ ):
136
+ self.db.insert({
137
+ "experiment": experiment_name,
138
+ "variant": variant,
139
+ "user_id": user_id,
140
+ "input": input_data,
141
+ "output": output,
142
+ "metrics": metrics,
143
+ "timestamp": datetime.now()
144
+ })
145
+
146
+ def get_variant_metrics(self, experiment_name: str) -> Dict:
147
+ # Aggregate metrics per variant
148
+ return self.db.aggregate([
149
+ {"$match": {"experiment": experiment_name}},
150
+ {"$group": {
151
+ "_id": "$variant",
152
+ "count": {"$sum": 1},
153
+ "avg_latency": {"$avg": "$metrics.latency"},
154
+ "avg_quality": {"$avg": "$metrics.quality_score"}
155
+ }}
156
+ ])
157
+ ```
158
+
159
+ ## RAG Pipeline
160
+
161
+ ### Document Processing
162
+ ```python
163
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
164
+ from langchain.embeddings import OpenAIEmbeddings
165
+ from langchain.vectorstores import Pinecone
166
+ import pinecone
167
+
168
+ class RAGPipeline:
169
+ def __init__(self, index_name: str):
170
+ self.embeddings = OpenAIEmbeddings()
171
+ self.text_splitter = RecursiveCharacterTextSplitter(
172
+ chunk_size=1000,
173
+ chunk_overlap=200,
174
+ separators=["\n\n", "\n", " ", ""]
175
+ )
176
+
177
+ pinecone.init(api_key=os.environ["PINECONE_API_KEY"])
178
+ self.vectorstore = Pinecone.from_existing_index(
179
+ index_name,
180
+ self.embeddings
181
+ )
182
+
183
+ def ingest_documents(self, documents: List[Document]):
184
+ # Split documents
185
+ chunks = self.text_splitter.split_documents(documents)
186
+
187
+ # Add metadata
188
+ for i, chunk in enumerate(chunks):
189
+ chunk.metadata["chunk_id"] = f"{chunk.metadata['source']}_{i}"
190
+ chunk.metadata["ingested_at"] = datetime.now().isoformat()
191
+
192
+ # Embed and store
193
+ self.vectorstore.add_documents(chunks)
194
+
195
+ def retrieve(
196
+ self,
197
+ query: str,
198
+ k: int = 5,
199
+ filter: Optional[dict] = None
200
+ ) -> List[Document]:
201
+ return self.vectorstore.similarity_search(
202
+ query,
203
+ k=k,
204
+ filter=filter
205
+ )
206
+
207
+ def generate(
208
+ self,
209
+ query: str,
210
+ context_docs: List[Document],
211
+ model: str = "gpt-4"
212
+ ) -> str:
213
+ context = "\n\n".join([doc.page_content for doc in context_docs])
214
+
215
+ prompt = f"""Answer the question based on the context below.
216
+
217
+ Context:
218
+ {context}
219
+
220
+ Question: {query}
221
+
222
+ Answer:"""
223
+
224
+ response = openai.ChatCompletion.create(
225
+ model=model,
226
+ messages=[{"role": "user", "content": prompt}],
227
+ temperature=0.3
228
+ )
229
+
230
+ return response.choices[0].message.content
231
+ ```
232
+
233
+ ### Hybrid Search
234
+ ```python
235
+ from rank_bm25 import BM25Okapi
236
+ import numpy as np
237
+
238
+ class HybridRetriever:
239
+ def __init__(self, vectorstore, documents: List[str]):
240
+ self.vectorstore = vectorstore
241
+ self.documents = documents
242
+
243
+ # BM25 for keyword search
244
+ tokenized = [doc.split() for doc in documents]
245
+ self.bm25 = BM25Okapi(tokenized)
246
+
247
+ def retrieve(
248
+ self,
249
+ query: str,
250
+ k: int = 10,
251
+ alpha: float = 0.5 # Weight for vector search
252
+ ) -> List[Document]:
253
+ # Vector search
254
+ vector_results = self.vectorstore.similarity_search_with_score(query, k=k*2)
255
+
256
+ # BM25 search
257
+ tokenized_query = query.split()
258
+ bm25_scores = self.bm25.get_scores(tokenized_query)
259
+ bm25_top_k = np.argsort(bm25_scores)[-k*2:][::-1]
260
+
261
+ # Combine scores with RRF (Reciprocal Rank Fusion)
262
+ scores = {}
263
+ for rank, (doc, score) in enumerate(vector_results):
264
+ doc_id = doc.metadata["chunk_id"]
265
+ scores[doc_id] = scores.get(doc_id, 0) + alpha / (rank + 60)
266
+
267
+ for rank, idx in enumerate(bm25_top_k):
268
+ doc_id = self.documents[idx].metadata["chunk_id"]
269
+ scores[doc_id] = scores.get(doc_id, 0) + (1-alpha) / (rank + 60)
270
+
271
+ # Sort and return top k
272
+ sorted_ids = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)[:k]
273
+ return [self._get_doc(doc_id) for doc_id in sorted_ids]
274
+ ```
275
+
276
+ ## LLM Evaluation
277
+
278
+ ### Evaluation Framework
279
+ ```python
280
+ from typing import List, Dict, Callable
281
+ from dataclasses import dataclass
282
+ import openai
283
+
284
+ @dataclass
285
+ class EvalResult:
286
+ score: float
287
+ reasoning: str
288
+ metadata: dict
289
+
290
+ class LLMEvaluator:
291
+ def __init__(self, judge_model: str = "gpt-4"):
292
+ self.judge_model = judge_model
293
+
294
+ def evaluate_relevance(
295
+ self,
296
+ query: str,
297
+ response: str,
298
+ context: str
299
+ ) -> EvalResult:
300
+ prompt = f"""Rate the relevance of the response to the query on a scale of 1-5.
301
+
302
+ Query: {query}
303
+ Context: {context}
304
+ Response: {response}
305
+
306
+ Provide your rating and reasoning in JSON format:
307
+ {{"score": <1-5>, "reasoning": "<explanation>"}}"""
308
+
309
+ result = openai.ChatCompletion.create(
310
+ model=self.judge_model,
311
+ messages=[{"role": "user", "content": prompt}],
312
+ temperature=0
313
+ )
314
+
315
+ parsed = json.loads(result.choices[0].message.content)
316
+ return EvalResult(
317
+ score=parsed["score"] / 5,
318
+ reasoning=parsed["reasoning"],
319
+ metadata={"query": query}
320
+ )
321
+
322
+ def evaluate_faithfulness(
323
+ self,
324
+ response: str,
325
+ context: str
326
+ ) -> EvalResult:
327
+ prompt = f"""Evaluate if the response is faithful to the context (no hallucinations).
328
+
329
+ Context: {context}
330
+ Response: {response}
331
+
332
+ Rate faithfulness 1-5 and explain any hallucinations:
333
+ {{"score": <1-5>, "reasoning": "<explanation>", "hallucinations": ["<list of hallucinated claims>"]}}"""
334
+
335
+ result = openai.ChatCompletion.create(
336
+ model=self.judge_model,
337
+ messages=[{"role": "user", "content": prompt}],
338
+ temperature=0
339
+ )
340
+
341
+ parsed = json.loads(result.choices[0].message.content)
342
+ return EvalResult(
343
+ score=parsed["score"] / 5,
344
+ reasoning=parsed["reasoning"],
345
+ metadata={"hallucinations": parsed.get("hallucinations", [])}
346
+ )
347
+
348
+ class EvalPipeline:
349
+ def __init__(self, evaluators: List[Callable]):
350
+ self.evaluators = evaluators
351
+
352
+ def run(self, test_cases: List[dict]) -> Dict:
353
+ results = []
354
+ for case in test_cases:
355
+ case_results = {}
356
+ for evaluator in self.evaluators:
357
+ case_results[evaluator.__name__] = evaluator(**case)
358
+ results.append(case_results)
359
+
360
+ return {
361
+ "individual": results,
362
+ "aggregate": self._aggregate(results)
363
+ }
364
+
365
+ def _aggregate(self, results: List[dict]) -> dict:
366
+ metrics = {}
367
+ for metric in results[0].keys():
368
+ scores = [r[metric].score for r in results]
369
+ metrics[metric] = {
370
+ "mean": np.mean(scores),
371
+ "std": np.std(scores),
372
+ "min": min(scores),
373
+ "max": max(scores)
374
+ }
375
+ return metrics
376
+ ```
377
+
378
+ ## Cost Management
379
+
380
+ ### Token Tracking
381
+ ```python
382
+ import tiktoken
383
+ from functools import wraps
384
+
385
+ class TokenTracker:
386
+ def __init__(self):
387
+ self.usage = {}
388
+
389
+ def track(self, model: str, prompt_tokens: int, completion_tokens: int):
390
+ if model not in self.usage:
391
+ self.usage[model] = {"prompt": 0, "completion": 0}
392
+
393
+ self.usage[model]["prompt"] += prompt_tokens
394
+ self.usage[model]["completion"] += completion_tokens
395
+
396
+ def estimate_cost(self) -> float:
397
+ pricing = {
398
+ "gpt-4": {"prompt": 0.03, "completion": 0.06},
399
+ "gpt-4-turbo": {"prompt": 0.01, "completion": 0.03},
400
+ "gpt-3.5-turbo": {"prompt": 0.0005, "completion": 0.0015}
401
+ }
402
+
403
+ total = 0
404
+ for model, usage in self.usage.items():
405
+ if model in pricing:
406
+ total += (usage["prompt"] / 1000) * pricing[model]["prompt"]
407
+ total += (usage["completion"] / 1000) * pricing[model]["completion"]
408
+ return total
409
+
410
+ def track_tokens(tracker: TokenTracker):
411
+ def decorator(func):
412
+ @wraps(func)
413
+ def wrapper(*args, **kwargs):
414
+ result = func(*args, **kwargs)
415
+ tracker.track(
416
+ model=kwargs.get("model", "gpt-4"),
417
+ prompt_tokens=result.usage.prompt_tokens,
418
+ completion_tokens=result.usage.completion_tokens
419
+ )
420
+ return result
421
+ return wrapper
422
+ return decorator
423
+ ```
424
+
425
+ ## Best Practices
426
+
427
+ 1. **Version Prompts**: Track all prompt changes
428
+ 2. **Cache Responses**: Reduce costs and latency
429
+ 3. **Structured Outputs**: Use JSON mode when possible
430
+ 4. **Fallback Models**: Have cheaper alternatives
431
+ 5. **Rate Limiting**: Protect against cost spikes
432
+
433
+ ## Anti-Patterns
434
+
435
+ - Hardcoded prompts in code
436
+ - No evaluation pipeline
437
+ - Ignoring token costs
438
+ - Missing safety filters
439
+ - No prompt versioning
440
+
441
+ ## When to Use
442
+
443
+ - Production LLM applications
444
+ - Multiple prompt iterations
445
+ - Team collaboration on prompts
446
+ - Cost-sensitive deployments
447
+ - RAG systems at scale
448
+
449
+ ## When NOT to Use
450
+
451
+ - Simple one-off queries
452
+ - Prototyping phase
453
+ - No iteration expected
454
+ - Single developer project