@sylix/coworker 2.0.11 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/dist/commands/slash/config.d.ts.map +1 -1
  2. package/dist/commands/slash/config.js +22 -4
  3. package/dist/commands/slash/config.js.map +1 -1
  4. package/dist/core/CoWorkerAgent.d.ts.map +1 -1
  5. package/dist/core/CoWorkerAgent.js +6 -3
  6. package/dist/core/CoWorkerAgent.js.map +1 -1
  7. package/dist/skills/defaults/accessibility/screen-reader-testing.md +545 -0
  8. package/dist/skills/defaults/accessibility/wcag-audit-patterns.md +555 -0
  9. package/dist/skills/defaults/ai-ml/rag.md +276 -0
  10. package/dist/skills/defaults/backend-development/api-design-principles.md +528 -0
  11. package/dist/skills/defaults/backend-development/api-design.md +285 -0
  12. package/dist/skills/defaults/backend-development/architecture-patterns.md +494 -0
  13. package/dist/skills/defaults/backend-development/async-python.md +237 -0
  14. package/dist/skills/defaults/backend-development/auth-implementation-patterns.md +638 -0
  15. package/dist/skills/defaults/backend-development/bazel-build-optimization.md +387 -0
  16. package/dist/skills/defaults/backend-development/billing-automation/SKILL.md +566 -0
  17. package/dist/skills/defaults/backend-development/code-review-excellence.md +538 -0
  18. package/dist/skills/defaults/backend-development/cqrs-implementation.md +554 -0
  19. package/dist/skills/defaults/backend-development/database-design.md +305 -0
  20. package/dist/skills/defaults/backend-development/debugging-strategies.md +536 -0
  21. package/dist/skills/defaults/backend-development/e2e-testing-patterns.md +544 -0
  22. package/dist/skills/defaults/backend-development/error-handling-patterns.md +641 -0
  23. package/dist/skills/defaults/backend-development/fastapi-templates.md +559 -0
  24. package/dist/skills/defaults/backend-development/fastapi.md +309 -0
  25. package/dist/skills/defaults/backend-development/git-advanced-workflows.md +405 -0
  26. package/dist/skills/defaults/backend-development/microservices-patterns.md +595 -0
  27. package/dist/skills/defaults/backend-development/microservices.md +284 -0
  28. package/dist/skills/defaults/backend-development/monorepo-management.md +623 -0
  29. package/dist/skills/defaults/backend-development/nodejs-backend-patterns.md +1048 -0
  30. package/dist/skills/defaults/backend-development/nx-workspace-patterns.md +457 -0
  31. package/dist/skills/defaults/backend-development/paypal-integration/SKILL.md +478 -0
  32. package/dist/skills/defaults/backend-development/pci-compliance/SKILL.md +480 -0
  33. package/dist/skills/defaults/backend-development/python-anti-patterns.md +349 -0
  34. package/dist/skills/defaults/backend-development/python-background-jobs.md +364 -0
  35. package/dist/skills/defaults/backend-development/python-code-style.md +360 -0
  36. package/dist/skills/defaults/backend-development/python-configuration.md +368 -0
  37. package/dist/skills/defaults/backend-development/python-design-patterns.md +296 -0
  38. package/dist/skills/defaults/backend-development/python-error-handling.md +323 -0
  39. package/dist/skills/defaults/backend-development/python-packaging.md +887 -0
  40. package/dist/skills/defaults/backend-development/python-performance-optimization.md +874 -0
  41. package/dist/skills/defaults/backend-development/python-project-structure.md +252 -0
  42. package/dist/skills/defaults/backend-development/python-resilience.md +376 -0
  43. package/dist/skills/defaults/backend-development/python-resource-management.md +421 -0
  44. package/dist/skills/defaults/backend-development/python-type-safety.md +428 -0
  45. package/dist/skills/defaults/backend-development/sql-optimization-patterns.md +509 -0
  46. package/dist/skills/defaults/backend-development/stripe-integration/SKILL.md +522 -0
  47. package/dist/skills/defaults/backend-development/turborepo-caching.md +376 -0
  48. package/dist/skills/defaults/blockchain/defi-protocol-templates.md +430 -0
  49. package/dist/skills/defaults/blockchain/nft-standards.md +364 -0
  50. package/dist/skills/defaults/blockchain/solidity-security.md +514 -0
  51. package/dist/skills/defaults/blockchain/web3-testing.md +360 -0
  52. package/dist/skills/defaults/business/competitive-landscape/SKILL.md +527 -0
  53. package/dist/skills/defaults/business/market-sizing-analysis/SKILL.md +451 -0
  54. package/dist/skills/defaults/business/startup-financial-modeling/SKILL.md +494 -0
  55. package/dist/skills/defaults/business/startup-metrics-framework/SKILL.md +564 -0
  56. package/dist/skills/defaults/business/team-composition-analysis.md +437 -0
  57. package/dist/skills/defaults/compliance/employment-contract-templates/SKILL.md +527 -0
  58. package/dist/skills/defaults/compliance/gdpr-data-handling/SKILL.md +630 -0
  59. package/dist/skills/defaults/data-engineering/airflow-dag-patterns.md +436 -0
  60. package/dist/skills/defaults/data-engineering/airflow.md +519 -0
  61. package/dist/skills/defaults/data-engineering/data-quality.md +583 -0
  62. package/dist/skills/defaults/data-engineering/dbt-transformation-patterns.md +482 -0
  63. package/dist/skills/defaults/data-engineering/dbt.md +556 -0
  64. package/dist/skills/defaults/data-engineering/ml-pipeline-workflow/SKILL.md +247 -0
  65. package/dist/skills/defaults/data-engineering/spark-optimization.md +348 -0
  66. package/dist/skills/defaults/data-engineering/spark.md +411 -0
  67. package/dist/skills/defaults/database/postgresql.md +202 -0
  68. package/dist/skills/defaults/debugging/systematic-debugging.md +249 -0
  69. package/dist/skills/defaults/devops/architecture-decision-records.md +448 -0
  70. package/dist/skills/defaults/devops/changelog-automation.md +580 -0
  71. package/dist/skills/defaults/devops/cicd.md +314 -0
  72. package/dist/skills/defaults/devops/cloud.md +263 -0
  73. package/dist/skills/defaults/devops/code-review-excellence.md +299 -0
  74. package/dist/skills/defaults/devops/cost-optimization.md +295 -0
  75. package/dist/skills/defaults/devops/deployment-pipeline-design.md +356 -0
  76. package/dist/skills/defaults/devops/docker.md +281 -0
  77. package/dist/skills/defaults/devops/git-workflows.md +205 -0
  78. package/dist/skills/defaults/devops/github-actions.md +311 -0
  79. package/dist/skills/defaults/devops/gitlab-ci-patterns.md +266 -0
  80. package/dist/skills/defaults/devops/hybrid-cloud-networking.md +241 -0
  81. package/dist/skills/defaults/devops/istio-traffic-management.md +327 -0
  82. package/dist/skills/defaults/devops/kubernetes.md +339 -0
  83. package/dist/skills/defaults/devops/linkerd-patterns.md +311 -0
  84. package/dist/skills/defaults/devops/multi-cloud-architecture.md +181 -0
  85. package/dist/skills/defaults/devops/observability.md +243 -0
  86. package/dist/skills/defaults/devops/openapi-spec-generation.md +1024 -0
  87. package/dist/skills/defaults/devops/postmortem-writing.md +396 -0
  88. package/dist/skills/defaults/devops/prometheus-configuration.md +265 -0
  89. package/dist/skills/defaults/devops/secrets-management.md +341 -0
  90. package/dist/skills/defaults/devops/service-mesh-observability.md +385 -0
  91. package/dist/skills/defaults/devops/terraform-module-library.md +244 -0
  92. package/dist/skills/defaults/finance/backtesting-frameworks/SKILL.md +663 -0
  93. package/dist/skills/defaults/finance/risk-metrics-calculation/SKILL.md +557 -0
  94. package/dist/skills/defaults/frontend/accessibility-compliance.md +420 -0
  95. package/dist/skills/defaults/frontend/design-system-patterns.md +337 -0
  96. package/dist/skills/defaults/frontend/interaction-design.md +327 -0
  97. package/dist/skills/defaults/frontend/javascript.md +311 -0
  98. package/dist/skills/defaults/frontend/modern-javascript-patterns.md +927 -0
  99. package/dist/skills/defaults/frontend/react-native-design.md +440 -0
  100. package/dist/skills/defaults/frontend/react.md +345 -0
  101. package/dist/skills/defaults/frontend/responsive-design.md +472 -0
  102. package/dist/skills/defaults/frontend/tailwind-design-system.md +337 -0
  103. package/dist/skills/defaults/frontend/typescript-advanced-types.md +724 -0
  104. package/dist/skills/defaults/frontend/typescript.md +334 -0
  105. package/dist/skills/defaults/frontend/visual-design-foundations.md +326 -0
  106. package/dist/skills/defaults/frontend/web-component-design.md +279 -0
  107. package/dist/skills/defaults/game-development/godot-gdscript-patterns.md +188 -0
  108. package/dist/skills/defaults/game-development/unity-ecs-patterns.md +594 -0
  109. package/dist/skills/defaults/kubernetes/gitops-workflow.md +285 -0
  110. package/dist/skills/defaults/kubernetes/gitops.md +280 -0
  111. package/dist/skills/defaults/kubernetes/helm-chart-scaffolding.md +553 -0
  112. package/dist/skills/defaults/kubernetes/helm.md +343 -0
  113. package/dist/skills/defaults/kubernetes/k8s-manifest-generator.md +501 -0
  114. package/dist/skills/defaults/kubernetes/k8s-security-policies.md +342 -0
  115. package/dist/skills/defaults/kubernetes/manifests.md +330 -0
  116. package/dist/skills/defaults/kubernetes/security.md +337 -0
  117. package/dist/skills/defaults/llm-application/embedding-strategies.md +608 -0
  118. package/dist/skills/defaults/llm-application/hybrid-search-implementation.md +570 -0
  119. package/dist/skills/defaults/llm-application/hybrid-search.md +570 -0
  120. package/dist/skills/defaults/llm-application/langchain-architecture.md +666 -0
  121. package/dist/skills/defaults/llm-application/langchain.md +259 -0
  122. package/dist/skills/defaults/llm-application/llm-evaluation.md +695 -0
  123. package/dist/skills/defaults/llm-application/prompt-engineering-patterns.md +449 -0
  124. package/dist/skills/defaults/llm-application/prompt-engineering.md +219 -0
  125. package/dist/skills/defaults/llm-application/rag-implementation.md +434 -0
  126. package/dist/skills/defaults/llm-application/similarity-search-patterns.md +560 -0
  127. package/dist/skills/defaults/llm-application/similarity-search.md +560 -0
  128. package/dist/skills/defaults/llm-application/vector-index-tuning.md +523 -0
  129. package/dist/skills/defaults/mobile/mobile-android-design.md +440 -0
  130. package/dist/skills/defaults/mobile/mobile-ios-design.md +266 -0
  131. package/dist/skills/defaults/monitoring/distributed-tracing.md +436 -0
  132. package/dist/skills/defaults/monitoring/grafana-dashboards.md +370 -0
  133. package/dist/skills/defaults/monitoring/prometheus-configuration.md +379 -0
  134. package/dist/skills/defaults/monitoring/slo-implementation.md +323 -0
  135. package/dist/skills/defaults/refactoring/code-refactoring.md +349 -0
  136. package/dist/skills/defaults/security/anti-reversing-techniques/SKILL.md +559 -0
  137. package/dist/skills/defaults/security/auditor.md +168 -0
  138. package/dist/skills/defaults/security/binary-analysis-patterns/SKILL.md +438 -0
  139. package/dist/skills/defaults/security/memory-forensics/SKILL.md +483 -0
  140. package/dist/skills/defaults/security/mtls-configuration.md +349 -0
  141. package/dist/skills/defaults/security/protocol-reverse-engineering/SKILL.md +520 -0
  142. package/dist/skills/defaults/security/sast-configuration.md +182 -0
  143. package/dist/skills/defaults/security/security.md +313 -0
  144. package/dist/skills/defaults/security/stride-analysis.md +273 -0
  145. package/dist/skills/defaults/security/threat-mitigation-mapping.md +290 -0
  146. package/dist/skills/defaults/systems/bash-defensive-patterns/SKILL.md +539 -0
  147. package/dist/skills/defaults/systems/bats-testing-patterns/SKILL.md +631 -0
  148. package/dist/skills/defaults/systems/go-concurrency-patterns.md +657 -0
  149. package/dist/skills/defaults/systems/memory-safety-patterns.md +605 -0
  150. package/dist/skills/defaults/systems/rust-async-patterns.md +519 -0
  151. package/dist/skills/defaults/systems/shellcheck-configuration/SKILL.md +456 -0
  152. package/dist/skills/defaults/team-collaboration/multi-reviewer-patterns.md +126 -0
  153. package/dist/skills/defaults/team-collaboration/parallel-feature-development.md +151 -0
  154. package/dist/skills/defaults/testing/javascript-testing-patterns.md +1021 -0
  155. package/dist/skills/defaults/testing/python-testing-patterns.md +351 -0
  156. package/dist/skills/defaults/testing/testing.md +332 -0
  157. package/dist/skills/defaults/workflows/context-driven-development.md +384 -0
  158. package/dist/skills/defaults/workflows/track-management.md +592 -0
  159. package/dist/skills/defaults/workflows/workflow-patterns.md +622 -0
  160. package/dist/skills/index.d.ts +11 -0
  161. package/dist/skills/index.d.ts.map +1 -0
  162. package/dist/skills/index.js +129 -0
  163. package/dist/skills/index.js.map +1 -0
  164. package/dist/utils/character.js +4 -4
  165. package/dist/utils/character.js.map +1 -1
  166. package/dist/utils/inputbar.d.ts.map +1 -1
  167. package/dist/utils/inputbar.js +7 -0
  168. package/dist/utils/inputbar.js.map +1 -1
  169. package/package.json +1 -1
@@ -0,0 +1,434 @@
1
+ ---
2
+ name: rag-implementation
3
+ description: Build Retrieval-Augmented Generation (RAG) systems for LLM applications with vector databases and semantic search
4
+ ---
5
+
6
+ # RAG Implementation
7
+
8
+ Master Retrieval-Augmented Generation (RAG) to build LLM applications that provide accurate, grounded responses using external knowledge sources.
9
+
10
+ ## When to Use This Skill
11
+
12
+ - Building Q&A systems over proprietary documents
13
+ - Creating chatbots with current, factual information
14
+ - Implementing semantic search with natural language queries
15
+ - Reducing hallucinations with grounded responses
16
+ - Enabling LLMs to access domain-specific knowledge
17
+ - Building documentation assistants
18
+ - Creating research tools with source citation
19
+
20
+ ## Core Components
21
+
22
+ ### 1. Vector Databases
23
+
24
+ **Options:**
25
+
26
+ - **Pinecone**: Managed, scalable, serverless
27
+ - **Weaviate**: Open-source, hybrid search, GraphQL
28
+ - **Milvus**: High performance, on-premise
29
+ - **Chroma**: Lightweight, easy to use, local development
30
+ - **Qdrant**: Fast, filtered search, Rust-based
31
+ - **pgvector**: PostgreSQL extension, SQL integration
32
+
33
+ ### 2. Embeddings
34
+
35
+ | Model | Dimensions | Best For |
36
+ |-------|------------|----------|
37
+ | **voyage-3-large** | 1024 | Claude apps (Anthropic recommended) |
38
+ | **voyage-code-3** | 1024 | Code search |
39
+ | **text-embedding-3-large** | 3072 | OpenAI apps, high accuracy |
40
+ | **text-embedding-3-small** | 1536 | OpenAI apps, cost-effective |
41
+ | **bge-large-en-v1.5** | 1024 | Open source, local deployment |
42
+ | **multilingual-e5-large** | 1024 | Multi-language support |
43
+
44
+ ### 3. Retrieval Strategies
45
+
46
+ - **Dense Retrieval**: Semantic similarity via embeddings
47
+ - **Sparse Retrieval**: Keyword matching (BM25, TF-IDF)
48
+ - **Hybrid Search**: Combine dense + sparse with weighted fusion
49
+ - **Multi-Query**: Generate multiple query variations
50
+ - **HyDE**: Generate hypothetical documents for better retrieval
51
+
52
+ ### 4. Reranking
53
+
54
+ - **Cross-Encoders**: BERT-based reranking (ms-marco-MiniLM)
55
+ - **Cohere Rerank**: API-based reranking
56
+ - **Maximal Marginal Relevance (MMR)**: Diversity + relevance
57
+ - **LLM-based**: Use LLM to score relevance
58
+
59
+ ## Quick Start with LangGraph
60
+
61
+ ```python
62
+ from langgraph.graph import StateGraph, START, END
63
+ from langchain_anthropic import ChatAnthropic
64
+ from langchain_voyageai import VoyageAIEmbeddings
65
+ from langchain_pinecone import PineconeVectorStore
66
+ from langchain_core.documents import Document
67
+ from langchain_core.prompts import ChatPromptTemplate
68
+ from typing import TypedDict, Annotated
69
+
70
+ class RAGState(TypedDict):
71
+ question: str
72
+ context: list[Document]
73
+ answer: str
74
+
75
+ llm = ChatAnthropic(model="claude-sonnet-4-6")
76
+ embeddings = VoyageAIEmbeddings(model="voyage-3-large")
77
+ vectorstore = PineconeVectorStore(index_name="docs", embedding=embeddings)
78
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
79
+
80
+ rag_prompt = ChatPromptTemplate.from_template(
81
+ """Answer based on the context below. If you cannot answer, say so.
82
+
83
+ Context:
84
+ {context}
85
+
86
+ Question: {question}
87
+
88
+ Answer:"""
89
+ )
90
+
91
+ async def retrieve(state: RAGState) -> RAGState:
92
+ docs = await retriever.ainvoke(state["question"])
93
+ return {"context": docs}
94
+
95
+ async def generate(state: RAGState) -> RAGState:
96
+ context_text = "\n\n".join(doc.page_content for doc in state["context"])
97
+ messages = rag_prompt.format_messages(context=context_text, question=state["question"])
98
+ response = await llm.ainvoke(messages)
99
+ return {"answer": response.content}
100
+
101
+ builder = StateGraph(RAGState)
102
+ builder.add_node("retrieve", retrieve)
103
+ builder.add_node("generate", generate)
104
+ builder.add_edge(START, "retrieve")
105
+ builder.add_edge("retrieve", "generate")
106
+ builder.add_edge("generate", END)
107
+
108
+ rag_chain = builder.compile()
109
+
110
+ result = await rag_chain.ainvoke({"question": "What are the main features?"})
111
+ print(result["answer"])
112
+ ```
113
+
114
+ ## Advanced RAG Patterns
115
+
116
+ ### Pattern 1: Hybrid Search with RRF
117
+
118
+ ```python
119
+ from langchain_community.retrievers import BM25Retriever
120
+ from langchain.retrievers import EnsembleRetriever
121
+
122
+ bm25_retriever = BM25Retriever.from_documents(documents)
123
+ bm25_retriever.k = 10
124
+
125
+ dense_retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
126
+
127
+ ensemble_retriever = EnsembleRetriever(
128
+ retrievers=[bm25_retriever, dense_retriever],
129
+ weights=[0.3, 0.7]
130
+ )
131
+ ```
132
+
133
+ ### Pattern 2: Multi-Query Retrieval
134
+
135
+ ```python
136
+ from langchain.retrievers.multi_query import MultiQueryRetriever
137
+
138
+ multi_query_retriever = MultiQueryRetriever.from_llm(
139
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
140
+ llm=llm
141
+ )
142
+
143
+ results = await multi_query_retriever.ainvoke("What is the main topic?")
144
+ ```
145
+
146
+ ### Pattern 3: Contextual Compression
147
+
148
+ ```python
149
+ from langchain.retrievers import ContextualCompressionRetriever
150
+ from langchain.retrievers.document_compressors import LLMChainExtractor
151
+
152
+ compressor = LLMChainExtractor.from_llm(llm)
153
+
154
+ compression_retriever = ContextualCompressionRetriever(
155
+ base_compressor=compressor,
156
+ base_retriever=vectorstore.as_retriever(search_kwargs={"k": 10})
157
+ )
158
+
159
+ compressed_docs = await compression_retriever.ainvoke("specific query")
160
+ ```
161
+
162
+ ### Pattern 4: Parent Document Retriever
163
+
164
+ ```python
165
+ from langchain.retrievers import ParentDocumentRetriever
166
+ from langchain.storage import InMemoryStore
167
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
168
+
169
+ child_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)
170
+ parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
171
+
172
+ docstore = InMemoryStore()
173
+
174
+ parent_retriever = ParentDocumentRetriever(
175
+ vectorstore=vectorstore,
176
+ docstore=docstore,
177
+ child_splitter=child_splitter,
178
+ parent_splitter=parent_splitter
179
+ )
180
+
181
+ await parent_retriever.aadd_documents(documents)
182
+
183
+ results = await parent_retriever.ainvoke("query")
184
+ ```
185
+
186
+ ### Pattern 5: HyDE (Hypothetical Document Embeddings)
187
+
188
+ ```python
189
+ from langchain_core.prompts import ChatPromptTemplate
190
+
191
+ hyde_prompt = ChatPromptTemplate.from_template(
192
+ """Write a detailed passage that would answer this question:
193
+
194
+ Question: {question}
195
+
196
+ Passage:"""
197
+ )
198
+
199
+ async def generate_hypothetical(state: HyDEState) -> HyDEState:
200
+ messages = hyde_prompt.format_messages(question=state["question"])
201
+ response = await llm.ainvoke(messages)
202
+ return {"hypothetical_doc": response.content}
203
+
204
+ async def retrieve_with_hyde(state: HyDEState) -> HyDEState:
205
+ docs = await retriever.ainvoke(state["hypothetical_doc"])
206
+ return {"context": docs}
207
+
208
+ builder = StateGraph(HyDEState)
209
+ builder.add_node("hypothetical", generate_hypothetical)
210
+ builder.add_node("retrieve", retrieve_with_hyde)
211
+ builder.add_node("generate", generate)
212
+ builder.add_edge(START, "hypothetical")
213
+ builder.add_edge("hypothetical", "retrieve")
214
+ builder.add_edge("retrieve", "generate")
215
+ builder.add_edge("generate", END)
216
+
217
+ hyde_rag = builder.compile()
218
+ ```
219
+
220
+ ## Document Chunking Strategies
221
+
222
+ ### Recursive Character Text Splitter
223
+
224
+ ```python
225
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
226
+
227
+ splitter = RecursiveCharacterTextSplitter(
228
+ chunk_size=1000,
229
+ chunk_overlap=200,
230
+ length_function=len,
231
+ separators=["\n\n", "\n", ". ", " ", ""]
232
+ )
233
+
234
+ chunks = splitter.split_documents(documents)
235
+ ```
236
+
237
+ ### Semantic Chunking
238
+
239
+ ```python
240
+ from langchain_experimental.text_splitter import SemanticChunker
241
+
242
+ splitter = SemanticChunker(
243
+ embeddings=embeddings,
244
+ breakpoint_threshold_type="percentile",
245
+ breakpoint_threshold_amount=95
246
+ )
247
+ ```
248
+
249
+ ### Markdown Header Splitter
250
+
251
+ ```python
252
+ from langchain_text_splitters import MarkdownHeaderTextSplitter
253
+
254
+ headers_to_split_on = [
255
+ ("#", "Header 1"),
256
+ ("##", "Header 2"),
257
+ ("###", "Header 3"),
258
+ ]
259
+
260
+ splitter = MarkdownHeaderTextSplitter(
261
+ headers_to_split_on=headers_to_split_on,
262
+ strip_headers=False
263
+ )
264
+ ```
265
+
266
+ ## Vector Store Configurations
267
+
268
+ ### Pinecone (Serverless)
269
+
270
+ ```python
271
+ from pinecone import Pinecone, ServerlessSpec
272
+ from langchain_pinecone import PineconeVectorStore
273
+
274
+ pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
275
+
276
+ if "my-index" not in pc.list_indexes().names():
277
+ pc.create_index(
278
+ name="my-index",
279
+ dimension=1024,
280
+ metric="cosine",
281
+ spec=ServerlessSpec(cloud="aws", region="us-east-1")
282
+ )
283
+
284
+ index = pc.Index("my-index")
285
+ vectorstore = PineconeVectorStore(index=index, embedding=embeddings)
286
+ ```
287
+
288
+ ### Chroma (Local Development)
289
+
290
+ ```python
291
+ from langchain_chroma import Chroma
292
+
293
+ vectorstore = Chroma(
294
+ collection_name="my_collection",
295
+ embedding_function=embeddings,
296
+ persist_directory="./chroma_db"
297
+ )
298
+ ```
299
+
300
+ ### pgvector (PostgreSQL)
301
+
302
+ ```python
303
+ from langchain_postgres.vectorstores import PGVector
304
+
305
+ connection_string = "postgresql+psycopg://user:pass@localhost:5432/vectordb"
306
+
307
+ vectorstore = PGVector(
308
+ embeddings=embeddings,
309
+ collection_name="documents",
310
+ connection=connection_string,
311
+ )
312
+ ```
313
+
314
+ ## Retrieval Optimization
315
+
316
+ ### Metadata Filtering
317
+
318
+ ```python
319
+ results = await vectorstore.asimilarity_search(
320
+ "query",
321
+ filter={"category": "technical"},
322
+ k=5
323
+ )
324
+ ```
325
+
326
+ ### Maximal Marginal Relevance (MMR)
327
+
328
+ ```python
329
+ results = await vectorstore.amax_marginal_relevance_search(
330
+ "query",
331
+ k=5,
332
+ fetch_k=20,
333
+ lambda_mult=0.5
334
+ )
335
+ ```
336
+
337
+ ### Reranking with Cross-Encoder
338
+
339
+ ```python
340
+ from sentence_transformers import CrossEncoder
341
+
342
+ reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
343
+
344
+ async def retrieve_and_rerank(query: str, k: int = 5) -> list[Document]:
345
+ candidates = await vectorstore.asimilarity_search(query, k=20)
346
+ pairs = [[query, doc.page_content] for doc in candidates]
347
+ scores = reranker.predict(pairs)
348
+ ranked = sorted(zip(candidates, scores), key=lambda x: x[1], reverse=True)
349
+ return [doc for doc, score in ranked[:k]]
350
+ ```
351
+
352
+ ## Prompt Engineering for RAG
353
+
354
+ ```python
355
+ rag_prompt = ChatPromptTemplate.from_template(
356
+ """Answer the question based on the context below. Include citations using [1], [2], etc.
357
+
358
+ If you cannot answer based on the context, say "I don't have enough information."
359
+
360
+ Context:
361
+ {context}
362
+
363
+ Question: {question}
364
+
365
+ Instructions:
366
+ 1. Use only information from the context
367
+ 2. Cite sources with [1], [2] format
368
+ 3. If uncertain, express uncertainty
369
+
370
+ Answer (with citations):"""
371
+ )
372
+ ```
373
+
374
+ ### Structured Output for RAG
375
+
376
+ ```python
377
+ from pydantic import BaseModel, Field
378
+
379
+ class RAGResponse(BaseModel):
380
+ answer: str = Field(description="The answer based on context")
381
+ confidence: float = Field(description="Confidence score 0-1")
382
+ sources: list[str] = Field(description="Source document IDs used")
383
+ reasoning: str = Field(description="Brief reasoning for the answer")
384
+
385
+ structured_llm = llm.with_structured_output(RAGResponse)
386
+ ```
387
+
388
+ ## Evaluation Metrics
389
+
390
+ ```python
391
+ class RAGEvalMetrics(TypedDict):
392
+ retrieval_precision: float
393
+ retrieval_recall: float
394
+ answer_relevance: float
395
+ faithfulness: float
396
+ context_relevance: float
397
+
398
+ async def evaluate_rag_system(rag_chain, test_cases: list[dict]) -> RAGEvalMetrics:
399
+ metrics = {k: [] for k in RAGEvalMetrics.__annotations__}
400
+
401
+ for test in test_cases:
402
+ result = await rag_chain.ainvoke({"question": test["question"]})
403
+
404
+ retrieved_ids = {doc.metadata["id"] for doc in result["context"]}
405
+ relevant_ids = set(test["relevant_doc_ids"])
406
+
407
+ precision = len(retrieved_ids & relevant_ids) / len(retrieved_ids)
408
+ recall = len(retrieved_ids & relevant_ids) / len(relevant_ids)
409
+
410
+ metrics["retrieval_precision"].append(precision)
411
+ metrics["retrieval_recall"].append(recall)
412
+
413
+ return {k: sum(v) / len(v) for k, v in metrics.items()}
414
+ ```
415
+
416
+ ## Best Practices
417
+
418
+ 1. **Chunk Size**: Balance between context (larger) and specificity (smaller) - typically 500-1000 tokens
419
+ 2. **Overlap**: Use 10-20% overlap to preserve context at boundaries
420
+ 3. **Metadata**: Include source, page, timestamp for filtering and debugging
421
+ 4. **Hybrid Search**: Combine semantic and keyword search for best recall
422
+ 5. **Reranking**: Use cross-encoder reranking for precision-critical applications
423
+ 6. **Citations**: Always return source documents for transparency
424
+ 7. **Evaluation**: Continuously test retrieval quality and answer accuracy
425
+ 8. **Monitoring**: Track retrieval metrics and latency in production
426
+
427
+ ## Common Issues
428
+
429
+ - **Poor Retrieval**: Check embedding quality, chunk size, query formulation
430
+ - **Irrelevant Results**: Add metadata filtering, use hybrid search, rerank
431
+ - **Missing Information**: Ensure documents are properly indexed, check chunking
432
+ - **Slow Queries**: Optimize vector store, use caching, reduce k
433
+ - **Hallucinations**: Improve grounding prompt, add verification step
434
+ - **Context Too Long**: Use compression or parent document retriever