@sylix/coworker 2.0.11 → 2.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/dist/commands/slash/config.d.ts.map +1 -1
  2. package/dist/commands/slash/config.js +22 -4
  3. package/dist/commands/slash/config.js.map +1 -1
  4. package/dist/core/CoWorkerAgent.d.ts.map +1 -1
  5. package/dist/core/CoWorkerAgent.js +6 -3
  6. package/dist/core/CoWorkerAgent.js.map +1 -1
  7. package/dist/skills/defaults/accessibility/screen-reader-testing.md +545 -0
  8. package/dist/skills/defaults/accessibility/wcag-audit-patterns.md +555 -0
  9. package/dist/skills/defaults/ai-ml/rag.md +276 -0
  10. package/dist/skills/defaults/backend-development/api-design-principles.md +528 -0
  11. package/dist/skills/defaults/backend-development/api-design.md +285 -0
  12. package/dist/skills/defaults/backend-development/architecture-patterns.md +494 -0
  13. package/dist/skills/defaults/backend-development/async-python.md +237 -0
  14. package/dist/skills/defaults/backend-development/auth-implementation-patterns.md +638 -0
  15. package/dist/skills/defaults/backend-development/bazel-build-optimization.md +387 -0
  16. package/dist/skills/defaults/backend-development/billing-automation/SKILL.md +566 -0
  17. package/dist/skills/defaults/backend-development/code-review-excellence.md +538 -0
  18. package/dist/skills/defaults/backend-development/cqrs-implementation.md +554 -0
  19. package/dist/skills/defaults/backend-development/database-design.md +305 -0
  20. package/dist/skills/defaults/backend-development/debugging-strategies.md +536 -0
  21. package/dist/skills/defaults/backend-development/e2e-testing-patterns.md +544 -0
  22. package/dist/skills/defaults/backend-development/error-handling-patterns.md +641 -0
  23. package/dist/skills/defaults/backend-development/fastapi-templates.md +559 -0
  24. package/dist/skills/defaults/backend-development/fastapi.md +309 -0
  25. package/dist/skills/defaults/backend-development/git-advanced-workflows.md +405 -0
  26. package/dist/skills/defaults/backend-development/microservices-patterns.md +595 -0
  27. package/dist/skills/defaults/backend-development/microservices.md +284 -0
  28. package/dist/skills/defaults/backend-development/monorepo-management.md +623 -0
  29. package/dist/skills/defaults/backend-development/nodejs-backend-patterns.md +1048 -0
  30. package/dist/skills/defaults/backend-development/nx-workspace-patterns.md +457 -0
  31. package/dist/skills/defaults/backend-development/paypal-integration/SKILL.md +478 -0
  32. package/dist/skills/defaults/backend-development/pci-compliance/SKILL.md +480 -0
  33. package/dist/skills/defaults/backend-development/python-anti-patterns.md +349 -0
  34. package/dist/skills/defaults/backend-development/python-background-jobs.md +364 -0
  35. package/dist/skills/defaults/backend-development/python-code-style.md +360 -0
  36. package/dist/skills/defaults/backend-development/python-configuration.md +368 -0
  37. package/dist/skills/defaults/backend-development/python-design-patterns.md +296 -0
  38. package/dist/skills/defaults/backend-development/python-error-handling.md +323 -0
  39. package/dist/skills/defaults/backend-development/python-packaging.md +887 -0
  40. package/dist/skills/defaults/backend-development/python-performance-optimization.md +874 -0
  41. package/dist/skills/defaults/backend-development/python-project-structure.md +252 -0
  42. package/dist/skills/defaults/backend-development/python-resilience.md +376 -0
  43. package/dist/skills/defaults/backend-development/python-resource-management.md +421 -0
  44. package/dist/skills/defaults/backend-development/python-type-safety.md +428 -0
  45. package/dist/skills/defaults/backend-development/sql-optimization-patterns.md +509 -0
  46. package/dist/skills/defaults/backend-development/stripe-integration/SKILL.md +522 -0
  47. package/dist/skills/defaults/backend-development/turborepo-caching.md +376 -0
  48. package/dist/skills/defaults/blockchain/defi-protocol-templates.md +430 -0
  49. package/dist/skills/defaults/blockchain/nft-standards.md +364 -0
  50. package/dist/skills/defaults/blockchain/solidity-security.md +514 -0
  51. package/dist/skills/defaults/blockchain/web3-testing.md +360 -0
  52. package/dist/skills/defaults/business/competitive-landscape/SKILL.md +527 -0
  53. package/dist/skills/defaults/business/market-sizing-analysis/SKILL.md +451 -0
  54. package/dist/skills/defaults/business/startup-financial-modeling/SKILL.md +494 -0
  55. package/dist/skills/defaults/business/startup-metrics-framework/SKILL.md +564 -0
  56. package/dist/skills/defaults/business/team-composition-analysis.md +437 -0
  57. package/dist/skills/defaults/compliance/employment-contract-templates/SKILL.md +527 -0
  58. package/dist/skills/defaults/compliance/gdpr-data-handling/SKILL.md +630 -0
  59. package/dist/skills/defaults/data-engineering/airflow-dag-patterns.md +436 -0
  60. package/dist/skills/defaults/data-engineering/airflow.md +519 -0
  61. package/dist/skills/defaults/data-engineering/data-quality.md +583 -0
  62. package/dist/skills/defaults/data-engineering/dbt-transformation-patterns.md +482 -0
  63. package/dist/skills/defaults/data-engineering/dbt.md +556 -0
  64. package/dist/skills/defaults/data-engineering/ml-pipeline-workflow/SKILL.md +247 -0
  65. package/dist/skills/defaults/data-engineering/spark-optimization.md +348 -0
  66. package/dist/skills/defaults/data-engineering/spark.md +411 -0
  67. package/dist/skills/defaults/database/postgresql.md +202 -0
  68. package/dist/skills/defaults/debugging/systematic-debugging.md +249 -0
  69. package/dist/skills/defaults/devops/architecture-decision-records.md +448 -0
  70. package/dist/skills/defaults/devops/changelog-automation.md +580 -0
  71. package/dist/skills/defaults/devops/cicd.md +314 -0
  72. package/dist/skills/defaults/devops/cloud.md +263 -0
  73. package/dist/skills/defaults/devops/code-review-excellence.md +299 -0
  74. package/dist/skills/defaults/devops/cost-optimization.md +295 -0
  75. package/dist/skills/defaults/devops/deployment-pipeline-design.md +356 -0
  76. package/dist/skills/defaults/devops/docker.md +281 -0
  77. package/dist/skills/defaults/devops/git-workflows.md +205 -0
  78. package/dist/skills/defaults/devops/github-actions.md +311 -0
  79. package/dist/skills/defaults/devops/gitlab-ci-patterns.md +266 -0
  80. package/dist/skills/defaults/devops/hybrid-cloud-networking.md +241 -0
  81. package/dist/skills/defaults/devops/istio-traffic-management.md +327 -0
  82. package/dist/skills/defaults/devops/kubernetes.md +339 -0
  83. package/dist/skills/defaults/devops/linkerd-patterns.md +311 -0
  84. package/dist/skills/defaults/devops/multi-cloud-architecture.md +181 -0
  85. package/dist/skills/defaults/devops/observability.md +243 -0
  86. package/dist/skills/defaults/devops/openapi-spec-generation.md +1024 -0
  87. package/dist/skills/defaults/devops/postmortem-writing.md +396 -0
  88. package/dist/skills/defaults/devops/prometheus-configuration.md +265 -0
  89. package/dist/skills/defaults/devops/secrets-management.md +341 -0
  90. package/dist/skills/defaults/devops/service-mesh-observability.md +385 -0
  91. package/dist/skills/defaults/devops/terraform-module-library.md +244 -0
  92. package/dist/skills/defaults/finance/backtesting-frameworks/SKILL.md +663 -0
  93. package/dist/skills/defaults/finance/risk-metrics-calculation/SKILL.md +557 -0
  94. package/dist/skills/defaults/frontend/accessibility-compliance.md +420 -0
  95. package/dist/skills/defaults/frontend/design-system-patterns.md +337 -0
  96. package/dist/skills/defaults/frontend/interaction-design.md +327 -0
  97. package/dist/skills/defaults/frontend/javascript.md +311 -0
  98. package/dist/skills/defaults/frontend/modern-javascript-patterns.md +927 -0
  99. package/dist/skills/defaults/frontend/react-native-design.md +440 -0
  100. package/dist/skills/defaults/frontend/react.md +345 -0
  101. package/dist/skills/defaults/frontend/responsive-design.md +472 -0
  102. package/dist/skills/defaults/frontend/tailwind-design-system.md +337 -0
  103. package/dist/skills/defaults/frontend/typescript-advanced-types.md +724 -0
  104. package/dist/skills/defaults/frontend/typescript.md +334 -0
  105. package/dist/skills/defaults/frontend/visual-design-foundations.md +326 -0
  106. package/dist/skills/defaults/frontend/web-component-design.md +279 -0
  107. package/dist/skills/defaults/game-development/godot-gdscript-patterns.md +188 -0
  108. package/dist/skills/defaults/game-development/unity-ecs-patterns.md +594 -0
  109. package/dist/skills/defaults/kubernetes/gitops-workflow.md +285 -0
  110. package/dist/skills/defaults/kubernetes/gitops.md +280 -0
  111. package/dist/skills/defaults/kubernetes/helm-chart-scaffolding.md +553 -0
  112. package/dist/skills/defaults/kubernetes/helm.md +343 -0
  113. package/dist/skills/defaults/kubernetes/k8s-manifest-generator.md +501 -0
  114. package/dist/skills/defaults/kubernetes/k8s-security-policies.md +342 -0
  115. package/dist/skills/defaults/kubernetes/manifests.md +330 -0
  116. package/dist/skills/defaults/kubernetes/security.md +337 -0
  117. package/dist/skills/defaults/llm-application/embedding-strategies.md +608 -0
  118. package/dist/skills/defaults/llm-application/hybrid-search-implementation.md +570 -0
  119. package/dist/skills/defaults/llm-application/hybrid-search.md +570 -0
  120. package/dist/skills/defaults/llm-application/langchain-architecture.md +666 -0
  121. package/dist/skills/defaults/llm-application/langchain.md +259 -0
  122. package/dist/skills/defaults/llm-application/llm-evaluation.md +695 -0
  123. package/dist/skills/defaults/llm-application/prompt-engineering-patterns.md +449 -0
  124. package/dist/skills/defaults/llm-application/prompt-engineering.md +219 -0
  125. package/dist/skills/defaults/llm-application/rag-implementation.md +434 -0
  126. package/dist/skills/defaults/llm-application/similarity-search-patterns.md +560 -0
  127. package/dist/skills/defaults/llm-application/similarity-search.md +560 -0
  128. package/dist/skills/defaults/llm-application/vector-index-tuning.md +523 -0
  129. package/dist/skills/defaults/mobile/mobile-android-design.md +440 -0
  130. package/dist/skills/defaults/mobile/mobile-ios-design.md +266 -0
  131. package/dist/skills/defaults/monitoring/distributed-tracing.md +436 -0
  132. package/dist/skills/defaults/monitoring/grafana-dashboards.md +370 -0
  133. package/dist/skills/defaults/monitoring/prometheus-configuration.md +379 -0
  134. package/dist/skills/defaults/monitoring/slo-implementation.md +323 -0
  135. package/dist/skills/defaults/refactoring/code-refactoring.md +349 -0
  136. package/dist/skills/defaults/security/anti-reversing-techniques/SKILL.md +559 -0
  137. package/dist/skills/defaults/security/auditor.md +168 -0
  138. package/dist/skills/defaults/security/binary-analysis-patterns/SKILL.md +438 -0
  139. package/dist/skills/defaults/security/memory-forensics/SKILL.md +483 -0
  140. package/dist/skills/defaults/security/mtls-configuration.md +349 -0
  141. package/dist/skills/defaults/security/protocol-reverse-engineering/SKILL.md +520 -0
  142. package/dist/skills/defaults/security/sast-configuration.md +182 -0
  143. package/dist/skills/defaults/security/security.md +313 -0
  144. package/dist/skills/defaults/security/stride-analysis.md +273 -0
  145. package/dist/skills/defaults/security/threat-mitigation-mapping.md +290 -0
  146. package/dist/skills/defaults/systems/bash-defensive-patterns/SKILL.md +539 -0
  147. package/dist/skills/defaults/systems/bats-testing-patterns/SKILL.md +631 -0
  148. package/dist/skills/defaults/systems/go-concurrency-patterns.md +657 -0
  149. package/dist/skills/defaults/systems/memory-safety-patterns.md +605 -0
  150. package/dist/skills/defaults/systems/rust-async-patterns.md +519 -0
  151. package/dist/skills/defaults/systems/shellcheck-configuration/SKILL.md +456 -0
  152. package/dist/skills/defaults/team-collaboration/multi-reviewer-patterns.md +126 -0
  153. package/dist/skills/defaults/team-collaboration/parallel-feature-development.md +151 -0
  154. package/dist/skills/defaults/testing/javascript-testing-patterns.md +1021 -0
  155. package/dist/skills/defaults/testing/python-testing-patterns.md +351 -0
  156. package/dist/skills/defaults/testing/testing.md +332 -0
  157. package/dist/skills/defaults/workflows/context-driven-development.md +384 -0
  158. package/dist/skills/defaults/workflows/track-management.md +592 -0
  159. package/dist/skills/defaults/workflows/workflow-patterns.md +622 -0
  160. package/dist/skills/index.d.ts +11 -0
  161. package/dist/skills/index.d.ts.map +1 -0
  162. package/dist/skills/index.js +129 -0
  163. package/dist/skills/index.js.map +1 -0
  164. package/dist/utils/character.js +4 -4
  165. package/dist/utils/character.js.map +1 -1
  166. package/dist/utils/inputbar.d.ts.map +1 -1
  167. package/dist/utils/inputbar.js +7 -0
  168. package/dist/utils/inputbar.js.map +1 -1
  169. package/package.json +1 -1
@@ -0,0 +1,523 @@
1
+ ---
2
+ name: vector-index-tuning
3
+ description: Optimize vector index performance for latency, recall, and memory. Use when tuning HNSW parameters, selecting quantization strategies, or scaling vector search infrastructure.
4
+ ---
5
+
6
+ # Vector Index Tuning
7
+
8
+ Guide to optimizing vector indexes for production performance.
9
+
10
+ ## When to Use This Skill
11
+
12
+ - Tuning HNSW parameters
13
+ - Implementing quantization
14
+ - Optimizing memory usage
15
+ - Reducing search latency
16
+ - Balancing recall vs speed
17
+ - Scaling to billions of vectors
18
+
19
+ ## Core Concepts
20
+
21
+ ### 1. Index Type Selection
22
+
23
+ ```
24
+ Data Size Recommended Index
25
+ ──────────────────────────────────────
26
+ < 10K vectors → Flat (exact search)
27
+ 10K - 1M → HNSW
28
+ 1M - 100M → HNSW + Quantization
29
+ > 100M → IVF + PQ or DiskANN
30
+ ```
31
+
32
+ ### 2. HNSW Parameters
33
+
34
+ | Parameter | Default | Effect |
35
+ | ------------------ | ------- | ---------------------------------------------------- |
36
+ | **M** | 16 | Connections per node, ↑ = better recall, more memory |
37
+ | **efConstruction** | 100 | Build quality, ↑ = better index, slower build |
38
+ | **efSearch** | 50 | Search quality, ↑ = better recall, slower search |
39
+
40
+ ### 3. Quantization Types
41
+
42
+ ```
43
+ Full Precision (FP32): 4 bytes × dimensions
44
+ Half Precision (FP16): 2 bytes × dimensions
45
+ INT8 Scalar: 1 byte × dimensions
46
+ Product Quantization: ~32-64 bytes total
47
+ Binary: dimensions/8 bytes
48
+ ```
49
+
50
+ ## Templates
51
+
52
+ ### Template 1: HNSW Parameter Tuning
53
+
54
+ ```python
55
+ import numpy as np
56
+ from typing import List, Tuple
57
+ import time
58
+
59
+ def benchmark_hnsw_parameters(
60
+ vectors: np.ndarray,
61
+ queries: np.ndarray,
62
+ ground_truth: np.ndarray,
63
+ m_values: List[int] = [8, 16, 32, 64],
64
+ ef_construction_values: List[int] = [64, 128, 256],
65
+ ef_search_values: List[int] = [32, 64, 128, 256]
66
+ ) -> List[dict]:
67
+ """Benchmark different HNSW configurations."""
68
+ import hnswlib
69
+
70
+ results = []
71
+ dim = vectors.shape[1]
72
+ n = vectors.shape[0]
73
+
74
+ for m in m_values:
75
+ for ef_construction in ef_construction_values:
76
+ # Build index
77
+ index = hnswlib.Index(space='cosine', dim=dim)
78
+ index.init_index(max_elements=n, M=m, ef_construction=ef_construction)
79
+
80
+ build_start = time.time()
81
+ index.add_items(vectors)
82
+ build_time = time.time() - build_start
83
+
84
+ # Get memory usage
85
+ memory_bytes = index.element_count * (
86
+ dim * 4 + # Vector storage
87
+ m * 2 * 4 # Graph edges (approximate)
88
+ )
89
+
90
+ for ef_search in ef_search_values:
91
+ index.set_ef(ef_search)
92
+
93
+ # Measure search
94
+ search_start = time.time()
95
+ labels, distances = index.knn_query(queries, k=10)
96
+ search_time = time.time() - search_start
97
+
98
+ # Calculate recall
99
+ recall = calculate_recall(labels, ground_truth, k=10)
100
+
101
+ results.append({
102
+ "M": m,
103
+ "ef_construction": ef_construction,
104
+ "ef_search": ef_search,
105
+ "build_time_s": build_time,
106
+ "search_time_ms": search_time * 1000 / len(queries),
107
+ "recall@10": recall,
108
+ "memory_mb": memory_bytes / 1024 / 1024
109
+ })
110
+
111
+ return results
112
+
113
+
114
+ def calculate_recall(predictions: np.ndarray, ground_truth: np.ndarray, k: int) -> float:
115
+ """Calculate recall@k."""
116
+ correct = 0
117
+ for pred, truth in zip(predictions, ground_truth):
118
+ correct += len(set(pred[:k]) & set(truth[:k]))
119
+ return correct / (len(predictions) * k)
120
+
121
+
122
+ def recommend_hnsw_params(
123
+ num_vectors: int,
124
+ target_recall: float = 0.95,
125
+ max_latency_ms: float = 10,
126
+ available_memory_gb: float = 8
127
+ ) -> dict:
128
+ """Recommend HNSW parameters based on requirements."""
129
+
130
+ # Base recommendations
131
+ if num_vectors < 100_000:
132
+ m = 16
133
+ ef_construction = 100
134
+ elif num_vectors < 1_000_000:
135
+ m = 32
136
+ ef_construction = 200
137
+ else:
138
+ m = 48
139
+ ef_construction = 256
140
+
141
+ # Adjust ef_search based on recall target
142
+ if target_recall >= 0.99:
143
+ ef_search = 256
144
+ elif target_recall >= 0.95:
145
+ ef_search = 128
146
+ else:
147
+ ef_search = 64
148
+
149
+ return {
150
+ "M": m,
151
+ "ef_construction": ef_construction,
152
+ "ef_search": ef_search,
153
+ "notes": f"Estimated for {num_vectors:,} vectors, {target_recall:.0%} recall"
154
+ }
155
+ ```
156
+
157
+ ### Template 2: Quantization Strategies
158
+
159
+ ```python
160
+ import numpy as np
161
+ from typing import Optional
162
+
163
+ class VectorQuantizer:
164
+ """Quantization strategies for vector compression."""
165
+
166
+ @staticmethod
167
+ def scalar_quantize_int8(
168
+ vectors: np.ndarray,
169
+ min_val: Optional[float] = None,
170
+ max_val: Optional[float] = None
171
+ ) -> Tuple[np.ndarray, dict]:
172
+ """Scalar quantization to INT8."""
173
+ if min_val is None:
174
+ min_val = vectors.min()
175
+ if max_val is None:
176
+ max_val = vectors.max()
177
+
178
+ # Scale to 0-255 range
179
+ scale = 255.0 / (max_val - min_val)
180
+ quantized = np.clip(
181
+ np.round((vectors - min_val) * scale),
182
+ 0, 255
183
+ ).astype(np.uint8)
184
+
185
+ params = {"min_val": min_val, "max_val": max_val, "scale": scale}
186
+ return quantized, params
187
+
188
+ @staticmethod
189
+ def dequantize_int8(
190
+ quantized: np.ndarray,
191
+ params: dict
192
+ ) -> np.ndarray:
193
+ """Dequantize INT8 vectors."""
194
+ return quantized.astype(np.float32) / params["scale"] + params["min_val"]
195
+
196
+ @staticmethod
197
+ def product_quantize(
198
+ vectors: np.ndarray,
199
+ n_subvectors: int = 8,
200
+ n_centroids: int = 256
201
+ ) -> Tuple[np.ndarray, dict]:
202
+ """Product quantization for aggressive compression."""
203
+ from sklearn.cluster import KMeans
204
+
205
+ n, dim = vectors.shape
206
+ assert dim % n_subvectors == 0
207
+ subvector_dim = dim // n_subvectors
208
+
209
+ codebooks = []
210
+ codes = np.zeros((n, n_subvectors), dtype=np.uint8)
211
+
212
+ for i in range(n_subvectors):
213
+ start = i * subvector_dim
214
+ end = (i + 1) * subvector_dim
215
+ subvectors = vectors[:, start:end]
216
+
217
+ kmeans = KMeans(n_clusters=n_centroids, random_state=42)
218
+ codes[:, i] = kmeans.fit_predict(subvectors)
219
+ codebooks.append(kmeans.cluster_centers_)
220
+
221
+ params = {
222
+ "codebooks": codebooks,
223
+ "n_subvectors": n_subvectors,
224
+ "subvector_dim": subvector_dim
225
+ }
226
+ return codes, params
227
+
228
+ @staticmethod
229
+ def binary_quantize(vectors: np.ndarray) -> np.ndarray:
230
+ """Binary quantization (sign of each dimension)."""
231
+ # Convert to binary: positive = 1, negative = 0
232
+ binary = (vectors > 0).astype(np.uint8)
233
+
234
+ # Pack bits into bytes
235
+ n, dim = vectors.shape
236
+ packed_dim = (dim + 7) // 8
237
+
238
+ packed = np.zeros((n, packed_dim), dtype=np.uint8)
239
+ for i in range(dim):
240
+ byte_idx = i // 8
241
+ bit_idx = i % 8
242
+ packed[:, byte_idx] |= (binary[:, i] << bit_idx)
243
+
244
+ return packed
245
+
246
+
247
+ def estimate_memory_usage(
248
+ num_vectors: int,
249
+ dimensions: int,
250
+ quantization: str = "fp32",
251
+ index_type: str = "hnsw",
252
+ hnsw_m: int = 16
253
+ ) -> dict:
254
+ """Estimate memory usage for different configurations."""
255
+
256
+ # Vector storage
257
+ bytes_per_dimension = {
258
+ "fp32": 4,
259
+ "fp16": 2,
260
+ "int8": 1,
261
+ "pq": 0.05, # Approximate
262
+ "binary": 0.125
263
+ }
264
+
265
+ vector_bytes = num_vectors * dimensions * bytes_per_dimension[quantization]
266
+
267
+ # Index overhead
268
+ if index_type == "hnsw":
269
+ # Each node has ~M*2 edges, each edge is 4 bytes (int32)
270
+ index_bytes = num_vectors * hnsw_m * 2 * 4
271
+ elif index_type == "ivf":
272
+ # Inverted lists + centroids
273
+ index_bytes = num_vectors * 8 + 65536 * dimensions * 4
274
+ else:
275
+ index_bytes = 0
276
+
277
+ total_bytes = vector_bytes + index_bytes
278
+
279
+ return {
280
+ "vector_storage_mb": vector_bytes / 1024 / 1024,
281
+ "index_overhead_mb": index_bytes / 1024 / 1024,
282
+ "total_mb": total_bytes / 1024 / 1024,
283
+ "total_gb": total_bytes / 1024 / 1024 / 1024
284
+ }
285
+ ```
286
+
287
+ ### Template 3: Qdrant Index Configuration
288
+
289
+ ```python
290
+ from qdrant_client import QdrantClient
291
+ from qdrant_client.http import models
292
+
293
+ def create_optimized_collection(
294
+ client: QdrantClient,
295
+ collection_name: str,
296
+ vector_size: int,
297
+ num_vectors: int,
298
+ optimize_for: str = "balanced" # "recall", "speed", "memory"
299
+ ) -> None:
300
+ """Create collection with optimized settings."""
301
+
302
+ # HNSW configuration based on optimization target
303
+ hnsw_configs = {
304
+ "recall": models.HnswConfigDiff(m=32, ef_construct=256),
305
+ "speed": models.HnswConfigDiff(m=16, ef_construct=64),
306
+ "balanced": models.HnswConfigDiff(m=16, ef_construct=128),
307
+ "memory": models.HnswConfigDiff(m=8, ef_construct=64)
308
+ }
309
+
310
+ # Quantization configuration
311
+ quantization_configs = {
312
+ "recall": None, # No quantization for max recall
313
+ "speed": models.ScalarQuantization(
314
+ scalar=models.ScalarQuantizationConfig(
315
+ type=models.ScalarType.INT8,
316
+ quantile=0.99,
317
+ always_ram=True
318
+ )
319
+ ),
320
+ "balanced": models.ScalarQuantization(
321
+ scalar=models.ScalarQuantizationConfig(
322
+ type=models.ScalarType.INT8,
323
+ quantile=0.99,
324
+ always_ram=False
325
+ )
326
+ ),
327
+ "memory": models.ProductQuantization(
328
+ product=models.ProductQuantizationConfig(
329
+ compression=models.CompressionRatio.X16,
330
+ always_ram=False
331
+ )
332
+ )
333
+ }
334
+
335
+ # Optimizer configuration
336
+ optimizer_configs = {
337
+ "recall": models.OptimizersConfigDiff(
338
+ indexing_threshold=10000,
339
+ memmap_threshold=50000
340
+ ),
341
+ "speed": models.OptimizersConfigDiff(
342
+ indexing_threshold=5000,
343
+ memmap_threshold=20000
344
+ ),
345
+ "balanced": models.OptimizersConfigDiff(
346
+ indexing_threshold=20000,
347
+ memmap_threshold=50000
348
+ ),
349
+ "memory": models.OptimizersConfigDiff(
350
+ indexing_threshold=50000,
351
+ memmap_threshold=10000 # Use disk sooner
352
+ )
353
+ }
354
+
355
+ client.create_collection(
356
+ collection_name=collection_name,
357
+ vectors_config=models.VectorParams(
358
+ size=vector_size,
359
+ distance=models.Distance.COSINE
360
+ ),
361
+ hnsw_config=hnsw_configs[optimize_for],
362
+ quantization_config=quantization_configs[optimize_for],
363
+ optimizers_config=optimizer_configs[optimize_for]
364
+ )
365
+
366
+
367
+ def tune_search_parameters(
368
+ client: QdrantClient,
369
+ collection_name: str,
370
+ target_recall: float = 0.95
371
+ ) -> dict:
372
+ """Tune search parameters for target recall."""
373
+
374
+ # Search parameter recommendations
375
+ if target_recall >= 0.99:
376
+ search_params = models.SearchParams(
377
+ hnsw_ef=256,
378
+ exact=False,
379
+ quantization=models.QuantizationSearchParams(
380
+ ignore=True, # Don't use quantization for search
381
+ rescore=True
382
+ )
383
+ )
384
+ elif target_recall >= 0.95:
385
+ search_params = models.SearchParams(
386
+ hnsw_ef=128,
387
+ exact=False,
388
+ quantization=models.QuantizationSearchParams(
389
+ ignore=False,
390
+ rescore=True,
391
+ oversampling=2.0
392
+ )
393
+ )
394
+ else:
395
+ search_params = models.SearchParams(
396
+ hnsw_ef=64,
397
+ exact=False,
398
+ quantization=models.QuantizationSearchParams(
399
+ ignore=False,
400
+ rescore=False
401
+ )
402
+ )
403
+
404
+ return search_params
405
+ ```
406
+
407
+ ### Template 4: Performance Monitoring
408
+
409
+ ```python
410
+ import time
411
+ from dataclasses import dataclass
412
+ from typing import List
413
+ import numpy as np
414
+
415
+ @dataclass
416
+ class SearchMetrics:
417
+ latency_p50_ms: float
418
+ latency_p95_ms: float
419
+ latency_p99_ms: float
420
+ recall: float
421
+ qps: float
422
+
423
+
424
+ class VectorSearchMonitor:
425
+ """Monitor vector search performance."""
426
+
427
+ def __init__(self, ground_truth_fn=None):
428
+ self.latencies = []
429
+ self.recalls = []
430
+ self.ground_truth_fn = ground_truth_fn
431
+
432
+ def measure_search(
433
+ self,
434
+ search_fn,
435
+ query_vectors: np.ndarray,
436
+ k: int = 10,
437
+ num_iterations: int = 100
438
+ ) -> SearchMetrics:
439
+ """Benchmark search performance."""
440
+ latencies = []
441
+
442
+ for _ in range(num_iterations):
443
+ for query in query_vectors:
444
+ start = time.perf_counter()
445
+ results = search_fn(query, k=k)
446
+ latency = (time.perf_counter() - start) * 1000
447
+ latencies.append(latency)
448
+
449
+ latencies = np.array(latencies)
450
+ total_queries = num_iterations * len(query_vectors)
451
+ total_time = sum(latencies) / 1000 # seconds
452
+
453
+ return SearchMetrics(
454
+ latency_p50_ms=np.percentile(latencies, 50),
455
+ latency_p95_ms=np.percentile(latencies, 95),
456
+ latency_p99_ms=np.percentile(latencies, 99),
457
+ recall=self._calculate_recall(search_fn, query_vectors, k) if self.ground_truth_fn else 0,
458
+ qps=total_queries / total_time
459
+ )
460
+
461
+ def _calculate_recall(self, search_fn, queries: np.ndarray, k: int) -> float:
462
+ """Calculate recall against ground truth."""
463
+ if not self.ground_truth_fn:
464
+ return 0
465
+
466
+ correct = 0
467
+ total = 0
468
+
469
+ for query in queries:
470
+ predicted = set(search_fn(query, k=k))
471
+ actual = set(self.ground_truth_fn(query, k=k))
472
+ correct += len(predicted & actual)
473
+ total += k
474
+
475
+ return correct / total
476
+
477
+
478
+ def profile_index_build(
479
+ build_fn,
480
+ vectors: np.ndarray,
481
+ batch_sizes: List[int] = [1000, 10000, 50000]
482
+ ) -> dict:
483
+ """Profile index build performance."""
484
+ results = {}
485
+
486
+ for batch_size in batch_sizes:
487
+ times = []
488
+ for i in range(0, len(vectors), batch_size):
489
+ batch = vectors[i:i + batch_size]
490
+ start = time.perf_counter()
491
+ build_fn(batch)
492
+ times.append(time.perf_counter() - start)
493
+
494
+ results[batch_size] = {
495
+ "avg_batch_time_s": np.mean(times),
496
+ "vectors_per_second": batch_size / np.mean(times)
497
+ }
498
+
499
+ return results
500
+ ```
501
+
502
+ ## Best Practices
503
+
504
+ ### Do's
505
+
506
+ - **Benchmark with real queries** - Synthetic may not represent production
507
+ - **Monitor recall continuously** - Can degrade with data drift
508
+ - **Start with defaults** - Tune only when needed
509
+ - **Use quantization** - Significant memory savings
510
+ - **Consider tiered storage** - Hot/cold data separation
511
+
512
+ ### Don'ts
513
+
514
+ - **Don't over-optimize early** - Profile first
515
+ - **Don't ignore build time** - Index updates have cost
516
+ - **Don't forget reindexing** - Plan for maintenance
517
+ - **Don't skip warming** - Cold indexes are slow
518
+
519
+ ## Resources
520
+
521
+ - [HNSW Paper](https://arxiv.org/abs/1603.09320)
522
+ - [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki)
523
+ - [ANN Benchmarks](https://ann-benchmarks.com/)