aigroup-workflow 2.2.0 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/fix-build.md +10 -5
- package/.claude/commands/init-project.md +13 -8
- package/.claude/commands/plan.md +15 -8
- package/.claude/commands/review.md +12 -6
- package/.claude/commands/tdd.md +11 -5
- package/.claude/commands/workflow-start.md +20 -11
- package/.claude/settings.json +28 -0
- package/.codex/agents/architect.toml +207 -0
- package/.codex/agents/build-error-resolver.toml +110 -0
- package/.codex/agents/code-reviewer.toml +233 -0
- package/.codex/agents/doc-updater.toml +103 -0
- package/.codex/agents/e2e-runner.toml +103 -0
- package/.codex/agents/get-current-datetime.toml +23 -0
- package/.codex/agents/init-architect.toml +181 -0
- package/.codex/agents/planner.toml +208 -0
- package/.codex/agents/refactor-cleaner.toml +81 -0
- package/.codex/agents/rust-reviewer.toml +90 -0
- package/.codex/agents/security-reviewer.toml +104 -0
- package/.codex/agents/tdd-guide.toml +87 -0
- package/AGENTS.md +2 -2
- package/CLAUDE.md +23 -1
- package/LICENSE +20 -20
- package/README.md +333 -333
- package/agents/a11y-architect.md +141 -141
- package/agents/architect.md +211 -211
- package/agents/build-error-resolver.md +114 -114
- package/agents/chief-of-staff.md +151 -151
- package/agents/code-architect.md +71 -71
- package/agents/code-explorer.md +69 -69
- package/agents/code-reviewer.md +237 -237
- package/agents/code-simplifier.md +47 -47
- package/agents/comment-analyzer.md +45 -45
- package/agents/conversation-analyzer.md +52 -52
- package/agents/cpp-build-resolver.md +90 -90
- package/agents/cpp-reviewer.md +72 -72
- package/agents/csharp-reviewer.md +101 -101
- package/agents/dart-build-resolver.md +201 -201
- package/agents/database-reviewer.md +91 -91
- package/agents/doc-updater.md +107 -107
- package/agents/docs-lookup.md +68 -68
- package/agents/e2e-runner.md +107 -107
- package/agents/flutter-reviewer.md +243 -243
- package/agents/gan-evaluator.md +209 -209
- package/agents/gan-generator.md +131 -131
- package/agents/gan-planner.md +99 -99
- package/agents/get-current-datetime.md +26 -26
- package/agents/go-build-resolver.md +94 -94
- package/agents/go-reviewer.md +76 -76
- package/agents/harness-optimizer.md +35 -35
- package/agents/healthcare-reviewer.md +83 -83
- package/agents/java-build-resolver.md +153 -153
- package/agents/java-reviewer.md +92 -92
- package/agents/kotlin-build-resolver.md +118 -118
- package/agents/kotlin-reviewer.md +159 -159
- package/agents/loop-operator.md +36 -36
- package/agents/opensource-forker.md +198 -198
- package/agents/opensource-packager.md +249 -249
- package/agents/opensource-sanitizer.md +188 -188
- package/agents/performance-optimizer.md +446 -446
- package/agents/planner.md +212 -212
- package/agents/pr-test-analyzer.md +45 -45
- package/agents/python-reviewer.md +98 -98
- package/agents/pytorch-build-resolver.md +120 -120
- package/agents/refactor-cleaner.md +85 -85
- package/agents/rust-build-resolver.md +148 -148
- package/agents/rust-reviewer.md +94 -94
- package/agents/security-reviewer.md +108 -108
- package/agents/seo-specialist.md +59 -59
- package/agents/silent-failure-hunter.md +50 -50
- package/agents/tdd-guide.md +91 -91
- package/agents/type-design-analyzer.md +41 -41
- package/agents/typescript-reviewer.md +112 -112
- package/cli/commands/update.mjs +1 -1
- package/cli/utils/scaffold.mjs +53 -0
- package/docs/rules/agents.md +166 -50
- package/docs/rules/cpp/coding-style.md +44 -44
- package/docs/rules/cpp/hooks.md +39 -39
- package/docs/rules/cpp/patterns.md +51 -51
- package/docs/rules/cpp/security.md +51 -51
- package/docs/rules/cpp/testing.md +44 -44
- package/docs/rules/csharp/coding-style.md +72 -72
- package/docs/rules/csharp/hooks.md +25 -25
- package/docs/rules/csharp/patterns.md +50 -50
- package/docs/rules/csharp/security.md +58 -58
- package/docs/rules/csharp/testing.md +46 -46
- package/docs/rules/dart/coding-style.md +159 -159
- package/docs/rules/dart/hooks.md +66 -66
- package/docs/rules/dart/patterns.md +261 -261
- package/docs/rules/dart/security.md +135 -135
- package/docs/rules/dart/testing.md +215 -215
- package/docs/rules/golang/coding-style.md +32 -32
- package/docs/rules/golang/hooks.md +17 -17
- package/docs/rules/golang/patterns.md +45 -45
- package/docs/rules/golang/security.md +34 -34
- package/docs/rules/golang/testing.md +31 -31
- package/docs/rules/java/coding-style.md +114 -114
- package/docs/rules/java/hooks.md +18 -18
- package/docs/rules/java/patterns.md +146 -146
- package/docs/rules/java/security.md +100 -100
- package/docs/rules/java/testing.md +131 -131
- package/docs/rules/kotlin/coding-style.md +86 -86
- package/docs/rules/kotlin/hooks.md +17 -17
- package/docs/rules/kotlin/patterns.md +146 -146
- package/docs/rules/kotlin/security.md +82 -82
- package/docs/rules/kotlin/testing.md +128 -128
- package/docs/rules/perl/coding-style.md +46 -46
- package/docs/rules/perl/hooks.md +22 -22
- package/docs/rules/perl/patterns.md +76 -76
- package/docs/rules/perl/security.md +69 -69
- package/docs/rules/perl/testing.md +54 -54
- package/docs/rules/php/coding-style.md +40 -40
- package/docs/rules/php/hooks.md +24 -24
- package/docs/rules/php/patterns.md +33 -33
- package/docs/rules/php/security.md +37 -37
- package/docs/rules/php/testing.md +39 -39
- package/docs/rules/python/coding-style.md +42 -42
- package/docs/rules/python/hooks.md +19 -19
- package/docs/rules/python/patterns.md +39 -39
- package/docs/rules/python/security.md +30 -30
- package/docs/rules/python/testing.md +38 -38
- package/docs/rules/rust/coding-style.md +151 -151
- package/docs/rules/rust/hooks.md +16 -16
- package/docs/rules/rust/patterns.md +168 -168
- package/docs/rules/rust/security.md +141 -141
- package/docs/rules/rust/testing.md +154 -154
- package/docs/rules/swift/coding-style.md +47 -47
- package/docs/rules/swift/hooks.md +20 -20
- package/docs/rules/swift/patterns.md +66 -66
- package/docs/rules/swift/security.md +33 -33
- package/docs/rules/swift/testing.md +45 -45
- package/docs/rules/typescript/coding-style.md +199 -199
- package/docs/rules/typescript/hooks.md +22 -22
- package/docs/rules/typescript/patterns.md +52 -52
- package/docs/rules/typescript/security.md +28 -28
- package/docs/rules/typescript/testing.md +18 -18
- package/docs/rules/web/coding-style.md +96 -96
- package/docs/rules/web/design-quality.md +62 -62
- package/docs/rules/web/hooks.md +120 -120
- package/docs/rules/web/patterns.md +79 -79
- package/docs/rules/web/performance.md +64 -64
- package/docs/rules/web/security.md +57 -57
- package/docs/rules/web/testing.md +55 -55
- package/docs/templates/README.md +36 -36
- package/docs/templates/ai-project-final.md +124 -124
- package/docs/templates/ai-project.md +105 -105
- package/docs/templates/api.md +157 -157
- package/docs/templates/bug.md +62 -62
- package/docs/templates/code-review.md +87 -87
- package/docs/templates/generic.md +116 -116
- package/docs/templates/implementation-plan.md +1 -1
- package/docs/templates/meeting.md +68 -68
- package/docs/templates/prd.md +98 -98
- package/docs/templates/ui.md +134 -134
- package/docs/workflow-pipeline.md +11 -10
- package/package.json +40 -39
- package/scripts/hooks/checks/orchestration-artifacts.cjs +28 -23
- package/scripts/hooks/checks/workflow-state.cjs +4 -5
- package/scripts/orchestration/lib/orchestrator.cjs +344 -117
- package/scripts/orchestration/lib/validate.cjs +145 -0
- package/scripts/orchestration/session.cjs +88 -44
- package/skills/SUPERPOWERS-LICENSE +21 -21
- package/skills/ai-ml/fine-tuning-expert/SKILL.md +162 -162
- package/skills/ai-ml/fine-tuning-expert/references/dataset-preparation.md +540 -540
- package/skills/ai-ml/fine-tuning-expert/references/deployment-optimization.md +673 -673
- package/skills/ai-ml/fine-tuning-expert/references/evaluation-metrics.md +597 -597
- package/skills/ai-ml/fine-tuning-expert/references/hyperparameter-tuning.md +565 -565
- package/skills/ai-ml/fine-tuning-expert/references/lora-peft.md +347 -347
- package/skills/ai-ml/ml-pipeline/SKILL.md +159 -159
- package/skills/ai-ml/ml-pipeline/references/experiment-tracking.md +833 -833
- package/skills/ai-ml/ml-pipeline/references/feature-engineering.md +631 -631
- package/skills/ai-ml/ml-pipeline/references/model-validation.md +978 -978
- package/skills/ai-ml/ml-pipeline/references/pipeline-orchestration.md +907 -907
- package/skills/ai-ml/ml-pipeline/references/training-pipelines.md +782 -782
- package/skills/ai-ml/rag-architect/SKILL.md +194 -194
- package/skills/ai-ml/rag-architect/references/chunking-strategies.md +878 -878
- package/skills/ai-ml/rag-architect/references/embedding-models.md +561 -561
- package/skills/ai-ml/rag-architect/references/rag-evaluation.md +833 -833
- package/skills/ai-ml/rag-architect/references/retrieval-optimization.md +795 -795
- package/skills/ai-ml/rag-architect/references/vector-databases.md +589 -589
- package/skills/ai-ml/spark-engineer/SKILL.md +148 -148
- package/skills/ai-ml/spark-engineer/references/partitioning-caching.md +543 -543
- package/skills/ai-ml/spark-engineer/references/performance-tuning.md +544 -544
- package/skills/ai-ml/spark-engineer/references/rdd-operations.md +599 -599
- package/skills/ai-ml/spark-engineer/references/spark-sql-dataframes.md +474 -474
- package/skills/ai-ml/spark-engineer/references/streaming-patterns.md +786 -786
- package/skills/backend/api-designer/SKILL.md +217 -217
- package/skills/backend/api-designer/references/error-handling.md +541 -541
- package/skills/backend/api-designer/references/openapi.md +824 -824
- package/skills/backend/api-designer/references/pagination.md +494 -494
- package/skills/backend/api-designer/references/rest-patterns.md +335 -335
- package/skills/backend/api-designer/references/versioning.md +391 -391
- package/skills/backend/architecture-designer/SKILL.md +117 -117
- package/skills/backend/architecture-designer/references/adr-template.md +116 -116
- package/skills/backend/architecture-designer/references/architecture-patterns.md +111 -111
- package/skills/backend/architecture-designer/references/database-selection.md +102 -102
- package/skills/backend/architecture-designer/references/nfr-checklist.md +112 -112
- package/skills/backend/architecture-designer/references/system-design.md +100 -100
- package/skills/backend/code-documenter/SKILL.md +147 -147
- package/skills/backend/code-documenter/references/api-docs-fastapi-django.md +166 -166
- package/skills/backend/code-documenter/references/api-docs-nestjs-express.md +220 -220
- package/skills/backend/code-documenter/references/coverage-reports.md +125 -125
- package/skills/backend/code-documenter/references/documentation-systems.md +333 -333
- package/skills/backend/code-documenter/references/interactive-api-docs.md +531 -531
- package/skills/backend/code-documenter/references/python-docstrings.md +121 -121
- package/skills/backend/code-documenter/references/typescript-jsdoc.md +145 -145
- package/skills/backend/code-documenter/references/user-guides-tutorials.md +530 -530
- package/skills/backend/debugging-wizard/SKILL.md +105 -105
- package/skills/backend/debugging-wizard/references/common-patterns.md +132 -132
- package/skills/backend/debugging-wizard/references/debugging-tools.md +140 -140
- package/skills/backend/debugging-wizard/references/quick-fixes.md +177 -177
- package/skills/backend/debugging-wizard/references/strategies.md +142 -142
- package/skills/backend/debugging-wizard/references/systematic-debugging.md +367 -367
- package/skills/backend/feature-forge/SKILL.md +98 -98
- package/skills/backend/feature-forge/references/acceptance-criteria.md +104 -104
- package/skills/backend/feature-forge/references/ears-syntax.md +99 -99
- package/skills/backend/feature-forge/references/interview-questions.md +150 -150
- package/skills/backend/feature-forge/references/pre-discovery-subagents.md +54 -54
- package/skills/backend/feature-forge/references/specification-template.md +103 -103
- package/skills/backend/fullstack-guardian/SKILL.md +105 -105
- package/skills/backend/fullstack-guardian/references/api-design-standards.md +307 -307
- package/skills/backend/fullstack-guardian/references/architecture-decisions.md +350 -350
- package/skills/backend/fullstack-guardian/references/backend-patterns.md +237 -237
- package/skills/backend/fullstack-guardian/references/common-patterns.md +134 -134
- package/skills/backend/fullstack-guardian/references/deliverables-checklist.md +354 -354
- package/skills/backend/fullstack-guardian/references/design-template.md +91 -91
- package/skills/backend/fullstack-guardian/references/error-handling.md +135 -135
- package/skills/backend/fullstack-guardian/references/frontend-patterns.md +340 -340
- package/skills/backend/fullstack-guardian/references/integration-patterns.md +333 -333
- package/skills/backend/fullstack-guardian/references/security-checklist.md +106 -106
- package/skills/backend/graphql-architect/SKILL.md +146 -146
- package/skills/backend/graphql-architect/references/federation.md +418 -418
- package/skills/backend/graphql-architect/references/migration-from-rest.md +1141 -1141
- package/skills/backend/graphql-architect/references/resolvers.md +425 -425
- package/skills/backend/graphql-architect/references/schema-design.md +393 -393
- package/skills/backend/graphql-architect/references/security.md +569 -569
- package/skills/backend/graphql-architect/references/subscriptions.md +510 -510
- package/skills/backend/legacy-modernizer/SKILL.md +137 -137
- package/skills/backend/legacy-modernizer/references/legacy-testing.md +381 -381
- package/skills/backend/legacy-modernizer/references/migration-strategies.md +423 -423
- package/skills/backend/legacy-modernizer/references/refactoring-patterns.md +395 -395
- package/skills/backend/legacy-modernizer/references/strangler-fig-pattern.md +281 -281
- package/skills/backend/legacy-modernizer/references/system-assessment.md +487 -487
- package/skills/backend/microservices-architect/SKILL.md +164 -164
- package/skills/backend/microservices-architect/references/communication.md +499 -499
- package/skills/backend/microservices-architect/references/data.md +721 -721
- package/skills/backend/microservices-architect/references/decomposition.md +344 -344
- package/skills/backend/microservices-architect/references/observability.md +805 -805
- package/skills/backend/microservices-architect/references/patterns.md +603 -603
- package/skills/database/database-optimizer/SKILL.md +147 -147
- package/skills/database/database-optimizer/references/index-strategies.md +331 -331
- package/skills/database/database-optimizer/references/monitoring-analysis.md +501 -501
- package/skills/database/database-optimizer/references/mysql-tuning.md +452 -452
- package/skills/database/database-optimizer/references/postgresql-tuning.md +413 -413
- package/skills/database/database-optimizer/references/query-optimization.md +251 -251
- package/skills/database/postgres-pro/SKILL.md +152 -152
- package/skills/database/postgres-pro/references/extensions.md +404 -404
- package/skills/database/postgres-pro/references/jsonb.md +321 -321
- package/skills/database/postgres-pro/references/maintenance.md +481 -481
- package/skills/database/postgres-pro/references/performance.md +265 -265
- package/skills/database/postgres-pro/references/replication.md +446 -446
- package/skills/database/sql-pro/SKILL.md +129 -129
- package/skills/database/sql-pro/references/database-design.md +402 -402
- package/skills/database/sql-pro/references/dialect-differences.md +419 -419
- package/skills/database/sql-pro/references/optimization.md +384 -384
- package/skills/database/sql-pro/references/query-patterns.md +285 -285
- package/skills/database/sql-pro/references/window-functions.md +328 -328
- package/skills/dotnet/csharp-developer/SKILL.md +125 -125
- package/skills/dotnet/csharp-developer/references/aspnet-core.md +394 -394
- package/skills/dotnet/csharp-developer/references/blazor.md +553 -553
- package/skills/dotnet/csharp-developer/references/entity-framework.md +409 -409
- package/skills/dotnet/csharp-developer/references/modern-csharp.md +248 -248
- package/skills/dotnet/csharp-developer/references/performance.md +498 -498
- package/skills/dotnet/dotnet-core-expert/SKILL.md +138 -138
- package/skills/dotnet/dotnet-core-expert/references/authentication.md +546 -546
- package/skills/dotnet/dotnet-core-expert/references/clean-architecture.md +455 -455
- package/skills/dotnet/dotnet-core-expert/references/cloud-native.md +548 -548
- package/skills/dotnet/dotnet-core-expert/references/entity-framework.md +440 -440
- package/skills/dotnet/dotnet-core-expert/references/minimal-apis.md +319 -319
- package/skills/frontend/angular-architect/SKILL.md +152 -152
- package/skills/frontend/angular-architect/references/components.md +297 -297
- package/skills/frontend/angular-architect/references/ngrx.md +401 -401
- package/skills/frontend/angular-architect/references/routing.md +361 -361
- package/skills/frontend/angular-architect/references/rxjs.md +319 -319
- package/skills/frontend/angular-architect/references/testing.md +405 -405
- package/skills/frontend/design-commands/design.md +91 -91
- package/skills/frontend/design-commands/handoff.md +97 -97
- package/skills/frontend/design-commands/prototype.md +120 -120
- package/skills/frontend/design-commands/spec.md +160 -160
- package/skills/frontend/design-commands/style.md +78 -78
- package/skills/frontend/flutter-expert/SKILL.md +138 -138
- package/skills/frontend/flutter-expert/references/bloc-state.md +259 -259
- package/skills/frontend/flutter-expert/references/gorouter-navigation.md +119 -119
- package/skills/frontend/flutter-expert/references/performance.md +99 -99
- package/skills/frontend/flutter-expert/references/project-structure.md +118 -118
- package/skills/frontend/flutter-expert/references/riverpod-state.md +130 -130
- package/skills/frontend/flutter-expert/references/widget-patterns.md +123 -123
- package/skills/frontend/nextjs-developer/SKILL.md +143 -143
- package/skills/frontend/nextjs-developer/references/app-router.md +311 -311
- package/skills/frontend/nextjs-developer/references/data-fetching.md +482 -482
- package/skills/frontend/nextjs-developer/references/deployment.md +545 -545
- package/skills/frontend/nextjs-developer/references/server-actions.md +462 -462
- package/skills/frontend/nextjs-developer/references/server-components.md +384 -384
- package/skills/frontend/react-expert/SKILL.md +149 -149
- package/skills/frontend/react-expert/references/hooks-patterns.md +162 -162
- package/skills/frontend/react-expert/references/migration-class-to-modern.md +1119 -1119
- package/skills/frontend/react-expert/references/performance.md +168 -168
- package/skills/frontend/react-expert/references/react-19-features.md +174 -174
- package/skills/frontend/react-expert/references/server-components.md +143 -143
- package/skills/frontend/react-expert/references/state-management.md +171 -171
- package/skills/frontend/react-expert/references/testing-react.md +174 -174
- package/skills/frontend/react-native-expert/SKILL.md +185 -185
- package/skills/frontend/react-native-expert/references/expo-router.md +187 -187
- package/skills/frontend/react-native-expert/references/list-optimization.md +204 -204
- package/skills/frontend/react-native-expert/references/platform-handling.md +188 -188
- package/skills/frontend/react-native-expert/references/project-structure.md +171 -171
- package/skills/frontend/react-native-expert/references/storage-hooks.md +173 -173
- package/skills/frontend/senior-frontend/SKILL.md +477 -477
- package/skills/frontend/senior-frontend/references/frontend_best_practices.md +806 -806
- package/skills/frontend/senior-frontend/references/nextjs_optimization_guide.md +724 -724
- package/skills/frontend/senior-frontend/references/react_patterns.md +746 -746
- package/skills/frontend/senior-frontend/scripts/bundle_analyzer.py +407 -407
- package/skills/frontend/senior-frontend/scripts/component_generator.py +329 -329
- package/skills/frontend/senior-frontend/scripts/frontend_scaffolder.py +1005 -1005
- package/skills/frontend/ui-ux-pro-max/SKILL.md +386 -386
- package/skills/frontend/ui-ux-pro-max/data/charts.csv +26 -26
- package/skills/frontend/ui-ux-pro-max/data/colors.csv +97 -97
- package/skills/frontend/ui-ux-pro-max/data/icons.csv +101 -101
- package/skills/frontend/ui-ux-pro-max/data/landing.csv +31 -31
- package/skills/frontend/ui-ux-pro-max/data/products.csv +96 -96
- package/skills/frontend/ui-ux-pro-max/data/react-performance.csv +45 -45
- package/skills/frontend/ui-ux-pro-max/data/stacks/astro.csv +54 -54
- package/skills/frontend/ui-ux-pro-max/data/stacks/flutter.csv +53 -53
- package/skills/frontend/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -56
- package/skills/frontend/ui-ux-pro-max/data/stacks/jetpack-compose.csv +53 -53
- package/skills/frontend/ui-ux-pro-max/data/stacks/nextjs.csv +53 -53
- package/skills/frontend/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -51
- package/skills/frontend/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -59
- package/skills/frontend/ui-ux-pro-max/data/stacks/react-native.csv +52 -52
- package/skills/frontend/ui-ux-pro-max/data/stacks/react.csv +54 -54
- package/skills/frontend/ui-ux-pro-max/data/stacks/shadcn.csv +61 -61
- package/skills/frontend/ui-ux-pro-max/data/stacks/svelte.csv +54 -54
- package/skills/frontend/ui-ux-pro-max/data/stacks/swiftui.csv +51 -51
- package/skills/frontend/ui-ux-pro-max/data/stacks/vue.csv +50 -50
- package/skills/frontend/ui-ux-pro-max/data/styles.csv +68 -68
- package/skills/frontend/ui-ux-pro-max/data/typography.csv +57 -57
- package/skills/frontend/ui-ux-pro-max/data/ui-reasoning.csv +101 -101
- package/skills/frontend/ui-ux-pro-max/data/ux-guidelines.csv +99 -99
- package/skills/frontend/ui-ux-pro-max/data/web-interface.csv +31 -31
- package/skills/frontend/ui-ux-pro-max/scripts/core.py +253 -253
- package/skills/frontend/ui-ux-pro-max/scripts/design_system.py +1067 -1067
- package/skills/frontend/ui-ux-pro-max/scripts/search.py +114 -114
- package/skills/frontend/vue-expert/SKILL.md +98 -98
- package/skills/frontend/vue-expert/references/build-tooling.md +480 -480
- package/skills/frontend/vue-expert/references/components.md +448 -448
- package/skills/frontend/vue-expert/references/composition-api.md +299 -299
- package/skills/frontend/vue-expert/references/mobile-hybrid.md +636 -636
- package/skills/frontend/vue-expert/references/nuxt.md +669 -669
- package/skills/frontend/vue-expert/references/state-management.md +449 -449
- package/skills/frontend/vue-expert/references/typescript.md +584 -584
- package/skills/frontend/vue-expert-js/SKILL.md +167 -167
- package/skills/frontend/vue-expert-js/references/component-architecture.md +219 -219
- package/skills/frontend/vue-expert-js/references/composables-patterns.md +183 -183
- package/skills/frontend/vue-expert-js/references/jsdoc-typing.md +535 -535
- package/skills/frontend/vue-expert-js/references/state-management.md +249 -249
- package/skills/frontend/vue-expert-js/references/testing-patterns.md +237 -237
- package/skills/go-rust-cpp/cpp-pro/SKILL.md +115 -115
- package/skills/go-rust-cpp/cpp-pro/references/build-tooling.md +440 -440
- package/skills/go-rust-cpp/cpp-pro/references/concurrency.md +437 -437
- package/skills/go-rust-cpp/cpp-pro/references/memory-performance.md +397 -397
- package/skills/go-rust-cpp/cpp-pro/references/modern-cpp.md +304 -304
- package/skills/go-rust-cpp/cpp-pro/references/templates.md +357 -357
- package/skills/go-rust-cpp/golang-pro/SKILL.md +122 -122
- package/skills/go-rust-cpp/golang-pro/references/concurrency.md +329 -329
- package/skills/go-rust-cpp/golang-pro/references/generics.md +442 -442
- package/skills/go-rust-cpp/golang-pro/references/interfaces.md +432 -432
- package/skills/go-rust-cpp/golang-pro/references/project-structure.md +477 -477
- package/skills/go-rust-cpp/golang-pro/references/testing.md +451 -451
- package/skills/go-rust-cpp/rust-engineer/SKILL.md +167 -167
- package/skills/go-rust-cpp/rust-engineer/references/async.md +458 -458
- package/skills/go-rust-cpp/rust-engineer/references/error-handling.md +334 -334
- package/skills/go-rust-cpp/rust-engineer/references/ownership.md +278 -278
- package/skills/go-rust-cpp/rust-engineer/references/testing.md +470 -470
- package/skills/go-rust-cpp/rust-engineer/references/traits.md +413 -413
- package/skills/infra/cli-developer/SKILL.md +113 -113
- package/skills/infra/cli-developer/references/design-patterns.md +221 -221
- package/skills/infra/cli-developer/references/go-cli.md +540 -540
- package/skills/infra/cli-developer/references/node-cli.md +383 -383
- package/skills/infra/cli-developer/references/python-cli.md +422 -422
- package/skills/infra/cli-developer/references/ux-patterns.md +448 -448
- package/skills/infra/cloud-architect/SKILL.md +216 -216
- package/skills/infra/cloud-architect/references/aws.md +394 -394
- package/skills/infra/cloud-architect/references/azure.md +562 -562
- package/skills/infra/cloud-architect/references/cost.md +582 -582
- package/skills/infra/cloud-architect/references/gcp.md +633 -633
- package/skills/infra/cloud-architect/references/multi-cloud.md +483 -483
- package/skills/infra/devops-engineer/SKILL.md +144 -144
- package/skills/infra/devops-engineer/references/deployment-strategies.md +241 -241
- package/skills/infra/devops-engineer/references/docker-patterns.md +113 -113
- package/skills/infra/devops-engineer/references/github-actions.md +139 -139
- package/skills/infra/devops-engineer/references/incident-response.md +331 -331
- package/skills/infra/devops-engineer/references/kubernetes.md +154 -154
- package/skills/infra/devops-engineer/references/platform-engineering.md +417 -417
- package/skills/infra/devops-engineer/references/release-automation.md +527 -527
- package/skills/infra/devops-engineer/references/terraform-iac.md +141 -141
- package/skills/infra/kubernetes-specialist/SKILL.md +241 -241
- package/skills/infra/kubernetes-specialist/references/configuration.md +452 -452
- package/skills/infra/kubernetes-specialist/references/cost-optimization.md +458 -458
- package/skills/infra/kubernetes-specialist/references/custom-operators.md +563 -563
- package/skills/infra/kubernetes-specialist/references/gitops.md +530 -530
- package/skills/infra/kubernetes-specialist/references/helm-charts.md +912 -912
- package/skills/infra/kubernetes-specialist/references/multi-cluster.md +507 -507
- package/skills/infra/kubernetes-specialist/references/networking.md +447 -447
- package/skills/infra/kubernetes-specialist/references/service-mesh.md +459 -459
- package/skills/infra/kubernetes-specialist/references/storage.md +535 -535
- package/skills/infra/kubernetes-specialist/references/troubleshooting.md +414 -414
- package/skills/infra/kubernetes-specialist/references/workloads.md +377 -377
- package/skills/infra/mcp-developer/SKILL.md +143 -143
- package/skills/infra/mcp-developer/references/protocol.md +244 -244
- package/skills/infra/mcp-developer/references/python-sdk.md +367 -367
- package/skills/infra/mcp-developer/references/resources.md +554 -554
- package/skills/infra/mcp-developer/references/tools.md +480 -480
- package/skills/infra/mcp-developer/references/typescript-sdk.md +350 -350
- package/skills/infra/monitoring-expert/SKILL.md +176 -176
- package/skills/infra/monitoring-expert/references/alerting-rules.md +141 -141
- package/skills/infra/monitoring-expert/references/application-profiling.md +331 -331
- package/skills/infra/monitoring-expert/references/capacity-planning.md +344 -344
- package/skills/infra/monitoring-expert/references/dashboards.md +126 -126
- package/skills/infra/monitoring-expert/references/opentelemetry.md +123 -123
- package/skills/infra/monitoring-expert/references/performance-testing.md +269 -269
- package/skills/infra/monitoring-expert/references/prometheus-metrics.md +136 -136
- package/skills/infra/monitoring-expert/references/structured-logging.md +142 -142
- package/skills/infra/sre-engineer/SKILL.md +181 -181
- package/skills/infra/sre-engineer/references/automation-toil.md +492 -492
- package/skills/infra/sre-engineer/references/error-budget-policy.md +334 -334
- package/skills/infra/sre-engineer/references/incident-chaos.md +576 -576
- package/skills/infra/sre-engineer/references/monitoring-alerting.md +424 -424
- package/skills/infra/sre-engineer/references/slo-sli-management.md +238 -238
- package/skills/infra/terraform-engineer/SKILL.md +143 -143
- package/skills/infra/terraform-engineer/references/best-practices.md +583 -583
- package/skills/infra/terraform-engineer/references/module-patterns.md +297 -297
- package/skills/infra/terraform-engineer/references/providers.md +452 -452
- package/skills/infra/terraform-engineer/references/state-management.md +371 -371
- package/skills/infra/terraform-engineer/references/testing.md +486 -486
- package/skills/infra/websocket-engineer/SKILL.md +168 -168
- package/skills/infra/websocket-engineer/references/alternatives.md +391 -391
- package/skills/infra/websocket-engineer/references/patterns.md +400 -400
- package/skills/infra/websocket-engineer/references/protocol.md +195 -195
- package/skills/infra/websocket-engineer/references/scaling.md +333 -333
- package/skills/infra/websocket-engineer/references/security.md +474 -474
- package/skills/java/java-architect/SKILL.md +132 -132
- package/skills/java/java-architect/references/jpa-optimization.md +393 -393
- package/skills/java/java-architect/references/reactive-webflux.md +356 -356
- package/skills/java/java-architect/references/spring-boot-setup.md +269 -269
- package/skills/java/java-architect/references/spring-security.md +445 -445
- package/skills/java/java-architect/references/testing-patterns.md +500 -500
- package/skills/java/kotlin-specialist/SKILL.md +147 -147
- package/skills/java/kotlin-specialist/references/android-compose.md +419 -419
- package/skills/java/kotlin-specialist/references/coroutines-flow.md +276 -276
- package/skills/java/kotlin-specialist/references/dsl-idioms.md +421 -421
- package/skills/java/kotlin-specialist/references/ktor-server.md +426 -426
- package/skills/java/kotlin-specialist/references/multiplatform-kmp.md +380 -380
- package/skills/java/spring-boot-engineer/SKILL.md +195 -195
- package/skills/java/spring-boot-engineer/references/cloud.md +498 -498
- package/skills/java/spring-boot-engineer/references/data.md +381 -381
- package/skills/java/spring-boot-engineer/references/security.md +459 -459
- package/skills/java/spring-boot-engineer/references/testing.md +545 -545
- package/skills/java/spring-boot-engineer/references/web.md +295 -295
- package/skills/javascript/javascript-pro/SKILL.md +132 -132
- package/skills/javascript/javascript-pro/references/async-patterns.md +334 -334
- package/skills/javascript/javascript-pro/references/browser-apis.md +398 -398
- package/skills/javascript/javascript-pro/references/modern-syntax.md +272 -272
- package/skills/javascript/javascript-pro/references/modules.md +357 -357
- package/skills/javascript/javascript-pro/references/node-essentials.md +471 -471
- package/skills/javascript/nestjs-expert/SKILL.md +206 -206
- package/skills/javascript/nestjs-expert/references/authentication.md +166 -166
- package/skills/javascript/nestjs-expert/references/controllers-routing.md +111 -111
- package/skills/javascript/nestjs-expert/references/dtos-validation.md +153 -153
- package/skills/javascript/nestjs-expert/references/migration-from-express.md +1237 -1237
- package/skills/javascript/nestjs-expert/references/services-di.md +140 -140
- package/skills/javascript/nestjs-expert/references/testing-patterns.md +186 -186
- package/skills/javascript/typescript-pro/SKILL.md +145 -145
- package/skills/javascript/typescript-pro/references/advanced-types.md +259 -259
- package/skills/javascript/typescript-pro/references/configuration.md +445 -445
- package/skills/javascript/typescript-pro/references/patterns.md +484 -484
- package/skills/javascript/typescript-pro/references/type-guards.md +352 -352
- package/skills/javascript/typescript-pro/references/utility-types.md +329 -329
- package/skills/php/laravel-specialist/SKILL.md +262 -262
- package/skills/php/laravel-specialist/references/eloquent.md +351 -351
- package/skills/php/laravel-specialist/references/livewire.md +512 -512
- package/skills/php/laravel-specialist/references/queues.md +423 -423
- package/skills/php/laravel-specialist/references/routing.md +362 -362
- package/skills/php/laravel-specialist/references/testing.md +522 -522
- package/skills/php/php-pro/SKILL.md +206 -206
- package/skills/php/php-pro/references/async-patterns.md +412 -412
- package/skills/php/php-pro/references/laravel-patterns.md +377 -377
- package/skills/php/php-pro/references/modern-php-features.md +323 -323
- package/skills/php/php-pro/references/symfony-patterns.md +466 -466
- package/skills/php/php-pro/references/testing-quality.md +466 -466
- package/skills/product/competitive-analysis/SKILL.md +257 -257
- package/skills/product/meeting-notes/SKILL.md +266 -266
- package/skills/product/prd-template/SKILL.md +150 -150
- package/skills/product/stakeholder-update/SKILL.md +225 -225
- package/skills/product/user-research-synthesis/SKILL.md +235 -235
- package/skills/python/django-expert/SKILL.md +162 -162
- package/skills/python/django-expert/references/authentication.md +145 -145
- package/skills/python/django-expert/references/drf-serializers.md +148 -148
- package/skills/python/django-expert/references/models-orm.md +151 -151
- package/skills/python/django-expert/references/testing-django.md +204 -204
- package/skills/python/django-expert/references/viewsets-views.md +153 -153
- package/skills/python/fastapi-expert/SKILL.md +185 -185
- package/skills/python/fastapi-expert/references/async-sqlalchemy.md +146 -146
- package/skills/python/fastapi-expert/references/authentication.md +159 -159
- package/skills/python/fastapi-expert/references/endpoints-routing.md +142 -142
- package/skills/python/fastapi-expert/references/migration-from-django.md +996 -996
- package/skills/python/fastapi-expert/references/pydantic-v2.md +135 -135
- package/skills/python/fastapi-expert/references/testing-async.md +159 -159
- package/skills/python/pandas-pro/SKILL.md +178 -178
- package/skills/python/pandas-pro/references/aggregation-groupby.md +545 -545
- package/skills/python/pandas-pro/references/data-cleaning.md +500 -500
- package/skills/python/pandas-pro/references/dataframe-operations.md +420 -420
- package/skills/python/pandas-pro/references/merging-joining.md +596 -596
- package/skills/python/pandas-pro/references/performance-optimization.md +597 -597
- package/skills/python/python-pro/SKILL.md +177 -177
- package/skills/python/python-pro/references/async-patterns.md +356 -356
- package/skills/python/python-pro/references/packaging.md +460 -460
- package/skills/python/python-pro/references/standard-library.md +378 -378
- package/skills/python/python-pro/references/testing.md +404 -404
- package/skills/python/python-pro/references/type-system.md +290 -290
- package/skills/quality/chaos-engineer/SKILL.md +182 -182
- package/skills/quality/chaos-engineer/references/chaos-tools.md +511 -511
- package/skills/quality/chaos-engineer/references/experiment-design.md +229 -229
- package/skills/quality/chaos-engineer/references/game-days.md +434 -434
- package/skills/quality/chaos-engineer/references/infrastructure-chaos.md +348 -348
- package/skills/quality/chaos-engineer/references/kubernetes-chaos.md +432 -432
- package/skills/quality/code-reviewer/SKILL.md +119 -119
- package/skills/quality/code-reviewer/references/common-issues.md +142 -142
- package/skills/quality/code-reviewer/references/feedback-examples.md +144 -144
- package/skills/quality/code-reviewer/references/receiving-feedback.md +238 -238
- package/skills/quality/code-reviewer/references/report-template.md +109 -109
- package/skills/quality/code-reviewer/references/review-checklist.md +88 -88
- package/skills/quality/code-reviewer/references/spec-compliance-review.md +258 -258
- package/skills/quality/playwright-expert/SKILL.md +169 -169
- package/skills/quality/playwright-expert/references/api-mocking.md +140 -140
- package/skills/quality/playwright-expert/references/configuration.md +155 -155
- package/skills/quality/playwright-expert/references/debugging-flaky.md +150 -150
- package/skills/quality/playwright-expert/references/page-object-model.md +152 -152
- package/skills/quality/playwright-expert/references/selectors-locators.md +119 -119
- package/skills/quality/secure-code-guardian/SKILL.md +191 -191
- package/skills/quality/secure-code-guardian/references/authentication.md +136 -136
- package/skills/quality/secure-code-guardian/references/input-validation.md +146 -146
- package/skills/quality/secure-code-guardian/references/owasp-prevention.md +135 -135
- package/skills/quality/secure-code-guardian/references/security-headers.md +133 -133
- package/skills/quality/secure-code-guardian/references/xss-csrf.md +157 -157
- package/skills/quality/security-reviewer/SKILL.md +103 -103
- package/skills/quality/security-reviewer/references/infrastructure-security.md +268 -268
- package/skills/quality/security-reviewer/references/penetration-testing.md +268 -268
- package/skills/quality/security-reviewer/references/report-template.md +170 -170
- package/skills/quality/security-reviewer/references/sast-tools.md +117 -117
- package/skills/quality/security-reviewer/references/secret-scanning.md +125 -125
- package/skills/quality/security-reviewer/references/vulnerability-patterns.md +152 -152
- package/skills/quality/senior-qa/README.md +196 -196
- package/skills/quality/senior-qa/SKILL.md +399 -399
- package/skills/quality/senior-qa/references/qa_best_practices.md +964 -964
- package/skills/quality/senior-qa/references/test_automation_patterns.md +1009 -1009
- package/skills/quality/senior-qa/references/testing_strategies.md +649 -649
- package/skills/quality/senior-qa/scripts/coverage_analyzer.py +836 -836
- package/skills/quality/senior-qa/scripts/e2e_test_scaffolder.py +820 -820
- package/skills/quality/senior-qa/scripts/test_suite_generator.py +605 -605
- package/skills/quality/tdd-guide/HOW_TO_USE.md +313 -313
- package/skills/quality/tdd-guide/README.md +680 -680
- package/skills/quality/tdd-guide/SKILL.md +122 -122
- package/skills/quality/tdd-guide/assets/expected_output.json +77 -77
- package/skills/quality/tdd-guide/assets/sample_input_python.json +39 -39
- package/skills/quality/tdd-guide/assets/sample_input_typescript.json +36 -36
- package/skills/quality/tdd-guide/references/ci-integration.md +195 -195
- package/skills/quality/tdd-guide/references/framework-guide.md +206 -206
- package/skills/quality/tdd-guide/references/tdd-best-practices.md +128 -128
- package/skills/quality/tdd-guide/scripts/coverage_analyzer.py +434 -434
- package/skills/quality/tdd-guide/scripts/fixture_generator.py +440 -440
- package/skills/quality/tdd-guide/scripts/format_detector.py +384 -384
- package/skills/quality/tdd-guide/scripts/framework_adapter.py +428 -428
- package/skills/quality/tdd-guide/scripts/metrics_calculator.py +456 -456
- package/skills/quality/tdd-guide/scripts/output_formatter.py +354 -354
- package/skills/quality/tdd-guide/scripts/tdd_workflow.py +474 -474
- package/skills/quality/tdd-guide/scripts/test_generator.py +438 -438
- package/skills/quality/test-master/SKILL.md +94 -94
- package/skills/quality/test-master/references/automation-frameworks.md +294 -294
- package/skills/quality/test-master/references/e2e-testing.md +128 -128
- package/skills/quality/test-master/references/integration-testing.md +120 -120
- package/skills/quality/test-master/references/performance-testing.md +118 -118
- package/skills/quality/test-master/references/qa-methodology.md +247 -247
- package/skills/quality/test-master/references/security-testing.md +127 -127
- package/skills/quality/test-master/references/tdd-iron-laws.md +174 -174
- package/skills/quality/test-master/references/test-reports.md +104 -104
- package/skills/quality/test-master/references/testing-anti-patterns.md +231 -231
- package/skills/quality/test-master/references/unit-testing.md +113 -113
- package/skills/ruby/rails-expert/SKILL.md +154 -154
- package/skills/ruby/rails-expert/references/active-record.md +244 -244
- package/skills/ruby/rails-expert/references/api-development.md +401 -401
- package/skills/ruby/rails-expert/references/background-jobs.md +272 -272
- package/skills/ruby/rails-expert/references/hotwire-turbo.md +228 -228
- package/skills/ruby/rails-expert/references/rspec-testing.md +367 -367
- package/skills/swift/swift-expert/SKILL.md +163 -163
- package/skills/swift/swift-expert/references/async-concurrency.md +360 -360
- package/skills/swift/swift-expert/references/memory-performance.md +377 -377
- package/skills/swift/swift-expert/references/protocol-oriented.md +354 -354
- package/skills/swift/swift-expert/references/swiftui-patterns.md +291 -291
- package/skills/swift/swift-expert/references/testing-patterns.md +399 -399
- package/skills/workflow/brainstorming/SKILL.md +164 -164
- package/skills/workflow/brainstorming/scripts/frame-template.html +214 -214
- package/skills/workflow/brainstorming/scripts/helper.js +88 -88
- package/skills/workflow/brainstorming/scripts/server.cjs +354 -354
- package/skills/workflow/brainstorming/scripts/start-server.sh +148 -148
- package/skills/workflow/brainstorming/scripts/stop-server.sh +56 -56
- package/skills/workflow/brainstorming/spec-document-reviewer-prompt.md +49 -49
- package/skills/workflow/brainstorming/visual-companion.md +287 -287
- package/skills/workflow/documentation/SKILL.md +45 -45
- package/skills/workflow/entropy-management/SKILL.md +115 -115
- package/skills/workflow/executing-plans/SKILL.md +70 -70
- package/skills/workflow/finishing-a-development-branch/SKILL.md +200 -200
- package/skills/workflow/receiving-code-review/SKILL.md +213 -213
- package/skills/workflow/requesting-code-review/SKILL.md +105 -105
- package/skills/workflow/requesting-code-review/code-reviewer.md +146 -146
- package/skills/workflow/requirement-engineering/SKILL.md +111 -111
- package/skills/workflow/systematic-debugging/CREATION-LOG.md +119 -119
- package/skills/workflow/systematic-debugging/SKILL.md +296 -296
- package/skills/workflow/systematic-debugging/condition-based-waiting-example.ts +158 -158
- package/skills/workflow/systematic-debugging/condition-based-waiting.md +115 -115
- package/skills/workflow/systematic-debugging/defense-in-depth.md +122 -122
- package/skills/workflow/systematic-debugging/find-polluter.sh +63 -63
- package/skills/workflow/systematic-debugging/root-cause-tracing.md +169 -169
- package/skills/workflow/systematic-debugging/test-academic.md +14 -14
- package/skills/workflow/systematic-debugging/test-pressure-1.md +58 -58
- package/skills/workflow/systematic-debugging/test-pressure-2.md +68 -68
- package/skills/workflow/systematic-debugging/test-pressure-3.md +69 -69
- package/skills/workflow/using-git-worktrees/SKILL.md +218 -218
- package/skills/workflow/verification-before-completion/SKILL.md +139 -139
- package/skills/workflow/writing-plans/SKILL.md +151 -151
- package/skills/workflow/writing-plans/plan-document-reviewer-prompt.md +49 -49
- package/skills/workflow/writing-skills/SKILL.md +655 -655
- package/skills/workflow/writing-skills/anthropic-best-practices.md +1150 -1150
- package/skills/workflow/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -189
- package/skills/workflow/writing-skills/persuasion-principles.md +187 -187
- package/skills/workflow/writing-skills/render-graphs.js +168 -168
- package/skills/workflow/writing-skills/testing-skills-with-subagents.md +384 -384
|
@@ -1,795 +1,795 @@
|
|
|
1
|
-
# Retrieval Optimization
|
|
2
|
-
|
|
3
|
-
---
|
|
4
|
-
|
|
5
|
-
## Optimization Techniques Overview
|
|
6
|
-
|
|
7
|
-
| Technique | Impact | Complexity | When to Use |
|
|
8
|
-
|-----------|--------|------------|-------------|
|
|
9
|
-
| **Hybrid Search** | High | Medium | Always for production |
|
|
10
|
-
| **Reranking** | High | Low | Top-k refinement |
|
|
11
|
-
| **Query Expansion** | Medium | Medium | Ambiguous queries |
|
|
12
|
-
| **HyDE** | Medium-High | Medium | Concept-heavy retrieval |
|
|
13
|
-
| **Metadata Filtering** | High | Low | Multi-tenant, categorical |
|
|
14
|
-
| **Query Decomposition** | Medium | High | Complex questions |
|
|
15
|
-
| **Contextual Compression** | Medium | Medium | Long retrieved chunks |
|
|
16
|
-
|
|
17
|
-
---
|
|
18
|
-
|
|
19
|
-
## Hybrid Search (Vector + Keyword)
|
|
20
|
-
|
|
21
|
-
### Reciprocal Rank Fusion (RRF)
|
|
22
|
-
|
|
23
|
-
```python
|
|
24
|
-
from dataclasses import dataclass
|
|
25
|
-
from typing import Callable
|
|
26
|
-
|
|
27
|
-
@dataclass
|
|
28
|
-
class SearchResult:
|
|
29
|
-
id: str
|
|
30
|
-
text: str
|
|
31
|
-
score: float
|
|
32
|
-
source: str # "vector" or "keyword"
|
|
33
|
-
|
|
34
|
-
def reciprocal_rank_fusion(
|
|
35
|
-
vector_results: list[SearchResult],
|
|
36
|
-
keyword_results: list[SearchResult],
|
|
37
|
-
k: int = 60,
|
|
38
|
-
vector_weight: float = 0.5
|
|
39
|
-
) -> list[SearchResult]:
|
|
40
|
-
"""
|
|
41
|
-
Combine vector and keyword results using RRF.
|
|
42
|
-
k is a constant that reduces the impact of high rankings (typically 60).
|
|
43
|
-
"""
|
|
44
|
-
scores: dict[str, float] = {}
|
|
45
|
-
docs: dict[str, SearchResult] = {}
|
|
46
|
-
|
|
47
|
-
# Score vector results
|
|
48
|
-
for rank, result in enumerate(vector_results, 1):
|
|
49
|
-
rrf_score = vector_weight * (1 / (k + rank))
|
|
50
|
-
scores[result.id] = scores.get(result.id, 0) + rrf_score
|
|
51
|
-
docs[result.id] = result
|
|
52
|
-
|
|
53
|
-
# Score keyword results
|
|
54
|
-
keyword_weight = 1 - vector_weight
|
|
55
|
-
for rank, result in enumerate(keyword_results, 1):
|
|
56
|
-
rrf_score = keyword_weight * (1 / (k + rank))
|
|
57
|
-
scores[result.id] = scores.get(result.id, 0) + rrf_score
|
|
58
|
-
if result.id not in docs:
|
|
59
|
-
docs[result.id] = result
|
|
60
|
-
|
|
61
|
-
# Sort by combined score
|
|
62
|
-
sorted_ids = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)
|
|
63
|
-
|
|
64
|
-
return [
|
|
65
|
-
SearchResult(
|
|
66
|
-
id=doc_id,
|
|
67
|
-
text=docs[doc_id].text,
|
|
68
|
-
score=scores[doc_id],
|
|
69
|
-
source="hybrid"
|
|
70
|
-
)
|
|
71
|
-
for doc_id in sorted_ids
|
|
72
|
-
]
|
|
73
|
-
|
|
74
|
-
# Usage
|
|
75
|
-
hybrid_results = reciprocal_rank_fusion(
|
|
76
|
-
vector_results=vector_search(query_embedding, top_k=20),
|
|
77
|
-
keyword_results=bm25_search(query_text, top_k=20),
|
|
78
|
-
vector_weight=0.6 # Favor semantic similarity
|
|
79
|
-
)
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
### BM25 + Vector with Weaviate
|
|
83
|
-
|
|
84
|
-
```python
|
|
85
|
-
from weaviate.classes.query import HybridFusion
|
|
86
|
-
|
|
87
|
-
collection = client.collections.get("Documents")
|
|
88
|
-
|
|
89
|
-
# Hybrid search with configurable fusion
|
|
90
|
-
results = collection.query.hybrid(
|
|
91
|
-
query="how to configure authentication",
|
|
92
|
-
alpha=0.5, # 0 = pure BM25, 1 = pure vector
|
|
93
|
-
fusion_type=HybridFusion.RELATIVE_SCORE, # or RANKED
|
|
94
|
-
limit=10,
|
|
95
|
-
return_metadata=["score", "explain_score"]
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
# Iterate results
|
|
99
|
-
for obj in results.objects:
|
|
100
|
-
print(f"Score: {obj.metadata.score}")
|
|
101
|
-
print(f"Explanation: {obj.metadata.explain_score}")
|
|
102
|
-
print(f"Text: {obj.properties['content'][:200]}")
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
### Pinecone Sparse-Dense
|
|
106
|
-
|
|
107
|
-
```python
|
|
108
|
-
from pinecone_text.sparse import BM25Encoder
|
|
109
|
-
|
|
110
|
-
# Train BM25 encoder on your corpus
|
|
111
|
-
bm25 = BM25Encoder()
|
|
112
|
-
bm25.fit(corpus_documents)
|
|
113
|
-
|
|
114
|
-
# Encode query for hybrid search
|
|
115
|
-
sparse_vector = bm25.encode_queries(query_text)
|
|
116
|
-
dense_vector = get_embedding(query_text)
|
|
117
|
-
|
|
118
|
-
# Search with both vectors
|
|
119
|
-
results = index.query(
|
|
120
|
-
vector=dense_vector,
|
|
121
|
-
sparse_vector=sparse_vector,
|
|
122
|
-
top_k=10,
|
|
123
|
-
include_metadata=True
|
|
124
|
-
)
|
|
125
|
-
```
|
|
126
|
-
|
|
127
|
-
---
|
|
128
|
-
|
|
129
|
-
## Reranking
|
|
130
|
-
|
|
131
|
-
### Cohere Rerank
|
|
132
|
-
|
|
133
|
-
```python
|
|
134
|
-
import cohere
|
|
135
|
-
|
|
136
|
-
co = cohere.Client(api_key="your-api-key")
|
|
137
|
-
|
|
138
|
-
def rerank_results(
|
|
139
|
-
query: str,
|
|
140
|
-
documents: list[str],
|
|
141
|
-
top_n: int = 5,
|
|
142
|
-
model: str = "rerank-english-v3.0"
|
|
143
|
-
) -> list[dict]:
|
|
144
|
-
"""Rerank documents using Cohere."""
|
|
145
|
-
response = co.rerank(
|
|
146
|
-
query=query,
|
|
147
|
-
documents=documents,
|
|
148
|
-
top_n=top_n,
|
|
149
|
-
model=model,
|
|
150
|
-
return_documents=True
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
return [
|
|
154
|
-
{
|
|
155
|
-
"text": result.document.text,
|
|
156
|
-
"relevance_score": result.relevance_score,
|
|
157
|
-
"original_index": result.index
|
|
158
|
-
}
|
|
159
|
-
for result in response.results
|
|
160
|
-
]
|
|
161
|
-
|
|
162
|
-
# Pipeline: retrieve more, rerank fewer
|
|
163
|
-
initial_results = vector_search(query_embedding, top_k=50)
|
|
164
|
-
documents = [r.text for r in initial_results]
|
|
165
|
-
|
|
166
|
-
reranked = rerank_results(
|
|
167
|
-
query="how to configure OAuth2 authentication",
|
|
168
|
-
documents=documents,
|
|
169
|
-
top_n=5
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
# Use top 5 reranked docs for LLM context
|
|
173
|
-
context = "\n\n".join([r["text"] for r in reranked])
|
|
174
|
-
```
|
|
175
|
-
|
|
176
|
-
### Cross-Encoder Reranking (Open Source)
|
|
177
|
-
|
|
178
|
-
```python
|
|
179
|
-
from sentence_transformers import CrossEncoder
|
|
180
|
-
|
|
181
|
-
class Reranker:
|
|
182
|
-
"""Rerank using cross-encoder model."""
|
|
183
|
-
|
|
184
|
-
def __init__(self, model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"):
|
|
185
|
-
self.model = CrossEncoder(model_name)
|
|
186
|
-
|
|
187
|
-
def rerank(
|
|
188
|
-
self,
|
|
189
|
-
query: str,
|
|
190
|
-
documents: list[str],
|
|
191
|
-
top_k: int = 5
|
|
192
|
-
) -> list[tuple[str, float]]:
|
|
193
|
-
"""Rerank documents by relevance to query."""
|
|
194
|
-
# Create query-document pairs
|
|
195
|
-
pairs = [[query, doc] for doc in documents]
|
|
196
|
-
|
|
197
|
-
# Get relevance scores
|
|
198
|
-
scores = self.model.predict(pairs)
|
|
199
|
-
|
|
200
|
-
# Sort by score
|
|
201
|
-
doc_scores = list(zip(documents, scores))
|
|
202
|
-
doc_scores.sort(key=lambda x: x[1], reverse=True)
|
|
203
|
-
|
|
204
|
-
return doc_scores[:top_k]
|
|
205
|
-
|
|
206
|
-
# Usage
|
|
207
|
-
reranker = Reranker()
|
|
208
|
-
top_docs = reranker.rerank(
|
|
209
|
-
query="OAuth2 setup guide",
|
|
210
|
-
documents=retrieved_documents,
|
|
211
|
-
top_k=5
|
|
212
|
-
)
|
|
213
|
-
```
|
|
214
|
-
|
|
215
|
-
### ColBERT-Style Late Interaction
|
|
216
|
-
|
|
217
|
-
```python
|
|
218
|
-
from colbert import Searcher
|
|
219
|
-
from colbert.infra import Run, RunConfig
|
|
220
|
-
|
|
221
|
-
# Setup ColBERT index (one-time)
|
|
222
|
-
with Run().context(RunConfig(nranks=1)):
|
|
223
|
-
searcher = Searcher(index="path/to/colbert_index")
|
|
224
|
-
|
|
225
|
-
# Search with late interaction scoring
|
|
226
|
-
results = searcher.search(
|
|
227
|
-
query="how to configure authentication",
|
|
228
|
-
k=10
|
|
229
|
-
)
|
|
230
|
-
|
|
231
|
-
# Results include token-level matching scores
|
|
232
|
-
for passage_id, rank, score in zip(*results):
|
|
233
|
-
print(f"Rank {rank}: Doc {passage_id}, Score: {score}")
|
|
234
|
-
```
|
|
235
|
-
|
|
236
|
-
---
|
|
237
|
-
|
|
238
|
-
## Query Expansion
|
|
239
|
-
|
|
240
|
-
### LLM-Based Query Expansion
|
|
241
|
-
|
|
242
|
-
```python
|
|
243
|
-
from openai import OpenAI
|
|
244
|
-
|
|
245
|
-
client = OpenAI()
|
|
246
|
-
|
|
247
|
-
def expand_query(query: str, num_expansions: int = 3) -> list[str]:
|
|
248
|
-
"""Generate query variations using LLM."""
|
|
249
|
-
response = client.chat.completions.create(
|
|
250
|
-
model="gpt-4o-mini",
|
|
251
|
-
messages=[
|
|
252
|
-
{
|
|
253
|
-
"role": "system",
|
|
254
|
-
"content": f"""Generate {num_expansions} alternative search queries
|
|
255
|
-
that would help find relevant documents for the user's question.
|
|
256
|
-
Include:
|
|
257
|
-
- Synonym variations
|
|
258
|
-
- More specific versions
|
|
259
|
-
- More general versions
|
|
260
|
-
Return as JSON array of strings."""
|
|
261
|
-
},
|
|
262
|
-
{
|
|
263
|
-
"role": "user",
|
|
264
|
-
"content": query
|
|
265
|
-
}
|
|
266
|
-
],
|
|
267
|
-
response_format={"type": "json_object"}
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
import json
|
|
271
|
-
result = json.loads(response.choices[0].message.content)
|
|
272
|
-
return [query] + result.get("queries", [])
|
|
273
|
-
|
|
274
|
-
# Usage
|
|
275
|
-
original_query = "how to fix memory leak"
|
|
276
|
-
expanded_queries = expand_query(original_query)
|
|
277
|
-
# ["how to fix memory leak", "debug memory issues", "memory leak detection",
|
|
278
|
-
# "troubleshoot high memory usage"]
|
|
279
|
-
|
|
280
|
-
# Search with all queries and merge results
|
|
281
|
-
all_results = []
|
|
282
|
-
for q in expanded_queries:
|
|
283
|
-
results = vector_search(get_embedding(q), top_k=10)
|
|
284
|
-
all_results.extend(results)
|
|
285
|
-
|
|
286
|
-
# Deduplicate and rank by frequency
|
|
287
|
-
deduped = deduplicate_by_id(all_results)
|
|
288
|
-
```
|
|
289
|
-
|
|
290
|
-
### Query Rewriting
|
|
291
|
-
|
|
292
|
-
```python
|
|
293
|
-
def rewrite_query_for_retrieval(
|
|
294
|
-
conversational_query: str,
|
|
295
|
-
chat_history: list[dict]
|
|
296
|
-
) -> str:
|
|
297
|
-
"""Rewrite conversational query to standalone search query."""
|
|
298
|
-
response = client.chat.completions.create(
|
|
299
|
-
model="gpt-4o-mini",
|
|
300
|
-
messages=[
|
|
301
|
-
{
|
|
302
|
-
"role": "system",
|
|
303
|
-
"content": """Rewrite the user's question as a standalone search query.
|
|
304
|
-
Include relevant context from chat history.
|
|
305
|
-
Output only the rewritten query, nothing else."""
|
|
306
|
-
},
|
|
307
|
-
{
|
|
308
|
-
"role": "user",
|
|
309
|
-
"content": f"""Chat history:
|
|
310
|
-
{format_chat_history(chat_history)}
|
|
311
|
-
|
|
312
|
-
User's question: {conversational_query}
|
|
313
|
-
|
|
314
|
-
Rewritten search query:"""
|
|
315
|
-
}
|
|
316
|
-
],
|
|
317
|
-
max_tokens=100
|
|
318
|
-
)
|
|
319
|
-
|
|
320
|
-
return response.choices[0].message.content.strip()
|
|
321
|
-
|
|
322
|
-
# Example
|
|
323
|
-
history = [
|
|
324
|
-
{"role": "user", "content": "Tell me about Python web frameworks"},
|
|
325
|
-
{"role": "assistant", "content": "Popular Python web frameworks include Django, Flask, and FastAPI..."}
|
|
326
|
-
]
|
|
327
|
-
query = "Which one is best for APIs?"
|
|
328
|
-
|
|
329
|
-
rewritten = rewrite_query_for_retrieval(query, history)
|
|
330
|
-
# Output: "Best Python web framework for building REST APIs: Django vs Flask vs FastAPI"
|
|
331
|
-
```
|
|
332
|
-
|
|
333
|
-
---
|
|
334
|
-
|
|
335
|
-
## HyDE (Hypothetical Document Embeddings)
|
|
336
|
-
|
|
337
|
-
```python
|
|
338
|
-
def hyde_search(
|
|
339
|
-
query: str,
|
|
340
|
-
vector_store,
|
|
341
|
-
embedding_model,
|
|
342
|
-
top_k: int = 10
|
|
343
|
-
) -> list[SearchResult]:
|
|
344
|
-
"""
|
|
345
|
-
Generate hypothetical answer, embed it, and search.
|
|
346
|
-
Aligns query embedding space with document embedding space.
|
|
347
|
-
"""
|
|
348
|
-
# Generate hypothetical document
|
|
349
|
-
response = client.chat.completions.create(
|
|
350
|
-
model="gpt-4o-mini",
|
|
351
|
-
messages=[
|
|
352
|
-
{
|
|
353
|
-
"role": "system",
|
|
354
|
-
"content": """Write a passage that would answer the user's question.
|
|
355
|
-
Write as if you're an expert documentation author.
|
|
356
|
-
Be specific and technical. About 100-200 words."""
|
|
357
|
-
},
|
|
358
|
-
{
|
|
359
|
-
"role": "user",
|
|
360
|
-
"content": query
|
|
361
|
-
}
|
|
362
|
-
],
|
|
363
|
-
max_tokens=300
|
|
364
|
-
)
|
|
365
|
-
|
|
366
|
-
hypothetical_doc = response.choices[0].message.content
|
|
367
|
-
|
|
368
|
-
# Embed hypothetical document
|
|
369
|
-
hyde_embedding = embedding_model.encode(hypothetical_doc)
|
|
370
|
-
|
|
371
|
-
# Search with hypothetical doc embedding
|
|
372
|
-
results = vector_store.search(
|
|
373
|
-
vector=hyde_embedding,
|
|
374
|
-
top_k=top_k
|
|
375
|
-
)
|
|
376
|
-
|
|
377
|
-
return results
|
|
378
|
-
|
|
379
|
-
# Usage
|
|
380
|
-
results = hyde_search(
|
|
381
|
-
query="How do I handle rate limiting in my API?",
|
|
382
|
-
vector_store=qdrant_client,
|
|
383
|
-
embedding_model=sentence_transformer
|
|
384
|
-
)
|
|
385
|
-
```
|
|
386
|
-
|
|
387
|
-
### Multi-HyDE (Multiple Perspectives)
|
|
388
|
-
|
|
389
|
-
```python
|
|
390
|
-
def multi_hyde_search(
|
|
391
|
-
query: str,
|
|
392
|
-
vector_store,
|
|
393
|
-
embedding_model,
|
|
394
|
-
num_hypotheticals: int = 3,
|
|
395
|
-
top_k: int = 10
|
|
396
|
-
) -> list[SearchResult]:
|
|
397
|
-
"""Generate multiple hypothetical docs for diverse retrieval."""
|
|
398
|
-
response = client.chat.completions.create(
|
|
399
|
-
model="gpt-4o-mini",
|
|
400
|
-
messages=[
|
|
401
|
-
{
|
|
402
|
-
"role": "system",
|
|
403
|
-
"content": f"""Generate {num_hypotheticals} different passages
|
|
404
|
-
that could answer the question from different angles:
|
|
405
|
-
1. Technical deep-dive
|
|
406
|
-
2. Beginner-friendly explanation
|
|
407
|
-
3. Best practices summary
|
|
408
|
-
|
|
409
|
-
Return as JSON with "passages" array."""
|
|
410
|
-
},
|
|
411
|
-
{
|
|
412
|
-
"role": "user",
|
|
413
|
-
"content": query
|
|
414
|
-
}
|
|
415
|
-
],
|
|
416
|
-
response_format={"type": "json_object"}
|
|
417
|
-
)
|
|
418
|
-
|
|
419
|
-
import json
|
|
420
|
-
passages = json.loads(response.choices[0].message.content)["passages"]
|
|
421
|
-
|
|
422
|
-
# Embed all hypotheticals
|
|
423
|
-
all_results = []
|
|
424
|
-
for passage in passages:
|
|
425
|
-
embedding = embedding_model.encode(passage)
|
|
426
|
-
results = vector_store.search(vector=embedding, top_k=top_k)
|
|
427
|
-
all_results.extend(results)
|
|
428
|
-
|
|
429
|
-
# Deduplicate and combine scores
|
|
430
|
-
return deduplicate_and_merge(all_results)
|
|
431
|
-
```
|
|
432
|
-
|
|
433
|
-
---
|
|
434
|
-
|
|
435
|
-
## Metadata Filtering
|
|
436
|
-
|
|
437
|
-
### Multi-Tenant Filtering
|
|
438
|
-
|
|
439
|
-
```python
|
|
440
|
-
class MultiTenantRetriever:
|
|
441
|
-
"""Retriever with mandatory tenant isolation."""
|
|
442
|
-
|
|
443
|
-
def __init__(self, vector_store):
|
|
444
|
-
self.vector_store = vector_store
|
|
445
|
-
|
|
446
|
-
def search(
|
|
447
|
-
self,
|
|
448
|
-
query_embedding: list[float],
|
|
449
|
-
tenant_id: str,
|
|
450
|
-
top_k: int = 10,
|
|
451
|
-
additional_filters: dict | None = None
|
|
452
|
-
) -> list[SearchResult]:
|
|
453
|
-
"""Search with mandatory tenant filter."""
|
|
454
|
-
# Build filter - tenant is always required
|
|
455
|
-
filters = {"tenant_id": {"$eq": tenant_id}}
|
|
456
|
-
|
|
457
|
-
if additional_filters:
|
|
458
|
-
filters = {"$and": [filters, additional_filters]}
|
|
459
|
-
|
|
460
|
-
return self.vector_store.search(
|
|
461
|
-
vector=query_embedding,
|
|
462
|
-
filter=filters,
|
|
463
|
-
top_k=top_k
|
|
464
|
-
)
|
|
465
|
-
|
|
466
|
-
# Usage
|
|
467
|
-
retriever = MultiTenantRetriever(pinecone_index)
|
|
468
|
-
results = retriever.search(
|
|
469
|
-
query_embedding=embedding,
|
|
470
|
-
tenant_id="acme-corp",
|
|
471
|
-
additional_filters={
|
|
472
|
-
"doc_type": {"$in": ["manual", "faq"]},
|
|
473
|
-
"published": {"$eq": True}
|
|
474
|
-
}
|
|
475
|
-
)
|
|
476
|
-
```
|
|
477
|
-
|
|
478
|
-
### Temporal Filtering
|
|
479
|
-
|
|
480
|
-
```python
|
|
481
|
-
from datetime import datetime, timedelta
|
|
482
|
-
|
|
483
|
-
def search_recent_documents(
|
|
484
|
-
query_embedding: list[float],
|
|
485
|
-
vector_store,
|
|
486
|
-
days_back: int = 30,
|
|
487
|
-
top_k: int = 10
|
|
488
|
-
) -> list[SearchResult]:
|
|
489
|
-
"""Search documents updated within time window."""
|
|
490
|
-
cutoff_date = datetime.utcnow() - timedelta(days=days_back)
|
|
491
|
-
|
|
492
|
-
return vector_store.search(
|
|
493
|
-
vector=query_embedding,
|
|
494
|
-
filter={
|
|
495
|
-
"updated_at": {"$gte": cutoff_date.isoformat()}
|
|
496
|
-
},
|
|
497
|
-
top_k=top_k
|
|
498
|
-
)
|
|
499
|
-
|
|
500
|
-
def search_with_recency_boost(
|
|
501
|
-
query_embedding: list[float],
|
|
502
|
-
vector_store,
|
|
503
|
-
recency_weight: float = 0.2,
|
|
504
|
-
top_k: int = 10
|
|
505
|
-
) -> list[SearchResult]:
|
|
506
|
-
"""Boost recent documents in ranking."""
|
|
507
|
-
# Get more results to apply post-filtering
|
|
508
|
-
results = vector_store.search(
|
|
509
|
-
vector=query_embedding,
|
|
510
|
-
top_k=top_k * 3
|
|
511
|
-
)
|
|
512
|
-
|
|
513
|
-
now = datetime.utcnow()
|
|
514
|
-
|
|
515
|
-
def compute_boosted_score(result):
|
|
516
|
-
doc_date = datetime.fromisoformat(result.metadata["updated_at"])
|
|
517
|
-
days_old = (now - doc_date).days
|
|
518
|
-
recency_score = max(0, 1 - (days_old / 365)) # Decay over 1 year
|
|
519
|
-
return result.score * (1 - recency_weight) + recency_score * recency_weight
|
|
520
|
-
|
|
521
|
-
# Rerank with recency boost
|
|
522
|
-
for result in results:
|
|
523
|
-
result.boosted_score = compute_boosted_score(result)
|
|
524
|
-
|
|
525
|
-
results.sort(key=lambda x: x.boosted_score, reverse=True)
|
|
526
|
-
return results[:top_k]
|
|
527
|
-
```
|
|
528
|
-
|
|
529
|
-
---
|
|
530
|
-
|
|
531
|
-
## Query Decomposition
|
|
532
|
-
|
|
533
|
-
```python
|
|
534
|
-
def decompose_complex_query(query: str) -> list[str]:
|
|
535
|
-
"""Break complex query into sub-questions."""
|
|
536
|
-
response = client.chat.completions.create(
|
|
537
|
-
model="gpt-4o-mini",
|
|
538
|
-
messages=[
|
|
539
|
-
{
|
|
540
|
-
"role": "system",
|
|
541
|
-
"content": """Break this complex question into simpler sub-questions
|
|
542
|
-
that can be answered independently. Each sub-question should be
|
|
543
|
-
searchable. Return as JSON with "questions" array."""
|
|
544
|
-
},
|
|
545
|
-
{
|
|
546
|
-
"role": "user",
|
|
547
|
-
"content": query
|
|
548
|
-
}
|
|
549
|
-
],
|
|
550
|
-
response_format={"type": "json_object"}
|
|
551
|
-
)
|
|
552
|
-
|
|
553
|
-
import json
|
|
554
|
-
result = json.loads(response.choices[0].message.content)
|
|
555
|
-
return result.get("questions", [query])
|
|
556
|
-
|
|
557
|
-
def search_with_decomposition(
|
|
558
|
-
complex_query: str,
|
|
559
|
-
vector_store,
|
|
560
|
-
embedding_model,
|
|
561
|
-
top_k_per_subquery: int = 5
|
|
562
|
-
) -> dict:
|
|
563
|
-
"""Search for each sub-question and aggregate results."""
|
|
564
|
-
sub_questions = decompose_complex_query(complex_query)
|
|
565
|
-
|
|
566
|
-
aggregated_results = {
|
|
567
|
-
"sub_questions": [],
|
|
568
|
-
"all_documents": []
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
seen_doc_ids = set()
|
|
572
|
-
|
|
573
|
-
for sub_q in sub_questions:
|
|
574
|
-
embedding = embedding_model.encode(sub_q)
|
|
575
|
-
results = vector_store.search(vector=embedding, top_k=top_k_per_subquery)
|
|
576
|
-
|
|
577
|
-
sub_q_results = []
|
|
578
|
-
for r in results:
|
|
579
|
-
if r.id not in seen_doc_ids:
|
|
580
|
-
seen_doc_ids.add(r.id)
|
|
581
|
-
sub_q_results.append(r)
|
|
582
|
-
aggregated_results["all_documents"].append(r)
|
|
583
|
-
|
|
584
|
-
aggregated_results["sub_questions"].append({
|
|
585
|
-
"question": sub_q,
|
|
586
|
-
"results": sub_q_results
|
|
587
|
-
})
|
|
588
|
-
|
|
589
|
-
return aggregated_results
|
|
590
|
-
|
|
591
|
-
# Usage
|
|
592
|
-
complex_q = "Compare the security features of OAuth2 and API keys, and explain when to use each"
|
|
593
|
-
results = search_with_decomposition(complex_q, vector_store, embedding_model)
|
|
594
|
-
```
|
|
595
|
-
|
|
596
|
-
---
|
|
597
|
-
|
|
598
|
-
## Contextual Compression
|
|
599
|
-
|
|
600
|
-
```python
|
|
601
|
-
def compress_retrieved_context(
|
|
602
|
-
query: str,
|
|
603
|
-
documents: list[str],
|
|
604
|
-
max_tokens: int = 2000
|
|
605
|
-
) -> str:
|
|
606
|
-
"""Extract only query-relevant parts from documents."""
|
|
607
|
-
response = client.chat.completions.create(
|
|
608
|
-
model="gpt-4o-mini",
|
|
609
|
-
messages=[
|
|
610
|
-
{
|
|
611
|
-
"role": "system",
|
|
612
|
-
"content": f"""Extract only the parts of these documents that are
|
|
613
|
-
relevant to answering the user's question.
|
|
614
|
-
Remove irrelevant information.
|
|
615
|
-
Keep extracted content under {max_tokens} tokens.
|
|
616
|
-
Maintain source attribution."""
|
|
617
|
-
},
|
|
618
|
-
{
|
|
619
|
-
"role": "user",
|
|
620
|
-
"content": f"""Question: {query}
|
|
621
|
-
|
|
622
|
-
Documents:
|
|
623
|
-
{chr(10).join([f'[Doc {i+1}]: {doc}' for i, doc in enumerate(documents)])}
|
|
624
|
-
|
|
625
|
-
Extracted relevant content:"""
|
|
626
|
-
}
|
|
627
|
-
],
|
|
628
|
-
max_tokens=max_tokens
|
|
629
|
-
)
|
|
630
|
-
|
|
631
|
-
return response.choices[0].message.content
|
|
632
|
-
```
|
|
633
|
-
|
|
634
|
-
### Extractive Compression with Cross-Encoder
|
|
635
|
-
|
|
636
|
-
```python
|
|
637
|
-
from sentence_transformers import CrossEncoder
|
|
638
|
-
|
|
639
|
-
def extractive_compress(
|
|
640
|
-
query: str,
|
|
641
|
-
document: str,
|
|
642
|
-
cross_encoder: CrossEncoder,
|
|
643
|
-
top_k_sentences: int = 5
|
|
644
|
-
) -> str:
|
|
645
|
-
"""Extract most relevant sentences from document."""
|
|
646
|
-
import re
|
|
647
|
-
sentences = re.split(r'(?<=[.!?])\s+', document)
|
|
648
|
-
|
|
649
|
-
if len(sentences) <= top_k_sentences:
|
|
650
|
-
return document
|
|
651
|
-
|
|
652
|
-
# Score each sentence
|
|
653
|
-
pairs = [[query, sent] for sent in sentences]
|
|
654
|
-
scores = cross_encoder.predict(pairs)
|
|
655
|
-
|
|
656
|
-
# Get top sentences in original order
|
|
657
|
-
scored_sentences = list(zip(range(len(sentences)), sentences, scores))
|
|
658
|
-
top_sentences = sorted(scored_sentences, key=lambda x: x[2], reverse=True)[:top_k_sentences]
|
|
659
|
-
top_sentences = sorted(top_sentences, key=lambda x: x[0]) # Restore order
|
|
660
|
-
|
|
661
|
-
return " ".join([s[1] for s in top_sentences])
|
|
662
|
-
```
|
|
663
|
-
|
|
664
|
-
---
|
|
665
|
-
|
|
666
|
-
## Complete Optimized Pipeline
|
|
667
|
-
|
|
668
|
-
```python
|
|
669
|
-
class OptimizedRetriever:
|
|
670
|
-
"""Production retrieval pipeline with all optimizations."""
|
|
671
|
-
|
|
672
|
-
def __init__(
|
|
673
|
-
self,
|
|
674
|
-
vector_store,
|
|
675
|
-
embedding_model,
|
|
676
|
-
reranker,
|
|
677
|
-
bm25_index
|
|
678
|
-
):
|
|
679
|
-
self.vector_store = vector_store
|
|
680
|
-
self.embedding_model = embedding_model
|
|
681
|
-
self.reranker = reranker
|
|
682
|
-
self.bm25_index = bm25_index
|
|
683
|
-
|
|
684
|
-
async def retrieve(
|
|
685
|
-
self,
|
|
686
|
-
query: str,
|
|
687
|
-
tenant_id: str,
|
|
688
|
-
top_k: int = 5,
|
|
689
|
-
use_hyde: bool = False,
|
|
690
|
-
use_query_expansion: bool = True
|
|
691
|
-
) -> list[dict]:
|
|
692
|
-
"""Full optimized retrieval pipeline."""
|
|
693
|
-
# Step 1: Query preprocessing
|
|
694
|
-
processed_query = self._preprocess_query(query)
|
|
695
|
-
|
|
696
|
-
# Step 2: Optional HyDE
|
|
697
|
-
if use_hyde:
|
|
698
|
-
query_embedding = await self._hyde_embed(processed_query)
|
|
699
|
-
else:
|
|
700
|
-
query_embedding = self.embedding_model.encode(processed_query)
|
|
701
|
-
|
|
702
|
-
# Step 3: Hybrid search (vector + BM25)
|
|
703
|
-
vector_results = self.vector_store.search(
|
|
704
|
-
vector=query_embedding,
|
|
705
|
-
filter={"tenant_id": tenant_id},
|
|
706
|
-
top_k=50
|
|
707
|
-
)
|
|
708
|
-
bm25_results = self.bm25_index.search(processed_query, top_k=50)
|
|
709
|
-
|
|
710
|
-
# Step 4: Merge with RRF
|
|
711
|
-
merged = reciprocal_rank_fusion(
|
|
712
|
-
vector_results,
|
|
713
|
-
bm25_results,
|
|
714
|
-
vector_weight=0.6
|
|
715
|
-
)[:30]
|
|
716
|
-
|
|
717
|
-
# Step 5: Optional query expansion
|
|
718
|
-
if use_query_expansion:
|
|
719
|
-
expanded_queries = await self._expand_query(processed_query)
|
|
720
|
-
for exp_query in expanded_queries[1:]: # Skip original
|
|
721
|
-
exp_embedding = self.embedding_model.encode(exp_query)
|
|
722
|
-
exp_results = self.vector_store.search(
|
|
723
|
-
vector=exp_embedding,
|
|
724
|
-
filter={"tenant_id": tenant_id},
|
|
725
|
-
top_k=10
|
|
726
|
-
)
|
|
727
|
-
merged.extend(exp_results)
|
|
728
|
-
merged = deduplicate_by_id(merged)[:30]
|
|
729
|
-
|
|
730
|
-
# Step 6: Rerank
|
|
731
|
-
documents = [r.text for r in merged]
|
|
732
|
-
reranked = self.reranker.rerank(
|
|
733
|
-
query=processed_query,
|
|
734
|
-
documents=documents,
|
|
735
|
-
top_k=top_k
|
|
736
|
-
)
|
|
737
|
-
|
|
738
|
-
return [
|
|
739
|
-
{
|
|
740
|
-
"text": doc,
|
|
741
|
-
"score": score,
|
|
742
|
-
"metadata": merged[i].metadata
|
|
743
|
-
}
|
|
744
|
-
for i, (doc, score) in enumerate(reranked)
|
|
745
|
-
]
|
|
746
|
-
|
|
747
|
-
def _preprocess_query(self, query: str) -> str:
|
|
748
|
-
"""Clean and normalize query."""
|
|
749
|
-
import re
|
|
750
|
-
query = re.sub(r'\s+', ' ', query).strip()
|
|
751
|
-
return query
|
|
752
|
-
|
|
753
|
-
async def _hyde_embed(self, query: str) -> list[float]:
|
|
754
|
-
"""Generate hypothetical document and embed."""
|
|
755
|
-
# Implementation from HyDE section
|
|
756
|
-
pass
|
|
757
|
-
|
|
758
|
-
async def _expand_query(self, query: str) -> list[str]:
|
|
759
|
-
"""Expand query with variations."""
|
|
760
|
-
# Implementation from Query Expansion section
|
|
761
|
-
pass
|
|
762
|
-
```
|
|
763
|
-
|
|
764
|
-
---
|
|
765
|
-
|
|
766
|
-
## Performance Benchmarks
|
|
767
|
-
|
|
768
|
-
| Technique | Latency Impact | Quality Impact | Cost Impact |
|
|
769
|
-
|-----------|----------------|----------------|-------------|
|
|
770
|
-
| Vector only | Baseline | Baseline | Baseline |
|
|
771
|
-
| + BM25 hybrid | +10-20ms | +5-15% precision | Minimal |
|
|
772
|
-
| + Reranking | +50-100ms | +10-20% precision | +$0.001/query |
|
|
773
|
-
| + Query expansion | +100-200ms | +5-10% recall | +$0.002/query |
|
|
774
|
-
| + HyDE | +200-500ms | +10-25% precision | +$0.003/query |
|
|
775
|
-
|
|
776
|
-
---
|
|
777
|
-
|
|
778
|
-
## Quick Reference
|
|
779
|
-
|
|
780
|
-
| Goal | Technique | Implementation |
|
|
781
|
-
|------|-----------|----------------|
|
|
782
|
-
| Improve precision | Reranking | Cross-encoder or Cohere |
|
|
783
|
-
| Improve recall | Query expansion | LLM-generated variations |
|
|
784
|
-
| Handle synonyms | Hybrid search | BM25 + vector with RRF |
|
|
785
|
-
| Concept search | HyDE | Hypothetical doc embedding |
|
|
786
|
-
| Multi-tenant | Metadata filter | Mandatory tenant_id |
|
|
787
|
-
| Fresh content | Temporal filter | Date range queries |
|
|
788
|
-
| Complex questions | Decomposition | Sub-question retrieval |
|
|
789
|
-
|
|
790
|
-
## Related Skills
|
|
791
|
-
|
|
792
|
-
- **RAG Architect** - System design and architecture
|
|
793
|
-
- **NLP Engineer** - Query understanding
|
|
794
|
-
- **Python Pro** - Async implementation
|
|
795
|
-
- **ML Pipeline** - Model serving for rerankers
|
|
1
|
+
# Retrieval Optimization
|
|
2
|
+
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
## Optimization Techniques Overview
|
|
6
|
+
|
|
7
|
+
| Technique | Impact | Complexity | When to Use |
|
|
8
|
+
|-----------|--------|------------|-------------|
|
|
9
|
+
| **Hybrid Search** | High | Medium | Always for production |
|
|
10
|
+
| **Reranking** | High | Low | Top-k refinement |
|
|
11
|
+
| **Query Expansion** | Medium | Medium | Ambiguous queries |
|
|
12
|
+
| **HyDE** | Medium-High | Medium | Concept-heavy retrieval |
|
|
13
|
+
| **Metadata Filtering** | High | Low | Multi-tenant, categorical |
|
|
14
|
+
| **Query Decomposition** | Medium | High | Complex questions |
|
|
15
|
+
| **Contextual Compression** | Medium | Medium | Long retrieved chunks |
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Hybrid Search (Vector + Keyword)
|
|
20
|
+
|
|
21
|
+
### Reciprocal Rank Fusion (RRF)
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from typing import Callable
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class SearchResult:
|
|
29
|
+
id: str
|
|
30
|
+
text: str
|
|
31
|
+
score: float
|
|
32
|
+
source: str # "vector" or "keyword"
|
|
33
|
+
|
|
34
|
+
def reciprocal_rank_fusion(
|
|
35
|
+
vector_results: list[SearchResult],
|
|
36
|
+
keyword_results: list[SearchResult],
|
|
37
|
+
k: int = 60,
|
|
38
|
+
vector_weight: float = 0.5
|
|
39
|
+
) -> list[SearchResult]:
|
|
40
|
+
"""
|
|
41
|
+
Combine vector and keyword results using RRF.
|
|
42
|
+
k is a constant that reduces the impact of high rankings (typically 60).
|
|
43
|
+
"""
|
|
44
|
+
scores: dict[str, float] = {}
|
|
45
|
+
docs: dict[str, SearchResult] = {}
|
|
46
|
+
|
|
47
|
+
# Score vector results
|
|
48
|
+
for rank, result in enumerate(vector_results, 1):
|
|
49
|
+
rrf_score = vector_weight * (1 / (k + rank))
|
|
50
|
+
scores[result.id] = scores.get(result.id, 0) + rrf_score
|
|
51
|
+
docs[result.id] = result
|
|
52
|
+
|
|
53
|
+
# Score keyword results
|
|
54
|
+
keyword_weight = 1 - vector_weight
|
|
55
|
+
for rank, result in enumerate(keyword_results, 1):
|
|
56
|
+
rrf_score = keyword_weight * (1 / (k + rank))
|
|
57
|
+
scores[result.id] = scores.get(result.id, 0) + rrf_score
|
|
58
|
+
if result.id not in docs:
|
|
59
|
+
docs[result.id] = result
|
|
60
|
+
|
|
61
|
+
# Sort by combined score
|
|
62
|
+
sorted_ids = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)
|
|
63
|
+
|
|
64
|
+
return [
|
|
65
|
+
SearchResult(
|
|
66
|
+
id=doc_id,
|
|
67
|
+
text=docs[doc_id].text,
|
|
68
|
+
score=scores[doc_id],
|
|
69
|
+
source="hybrid"
|
|
70
|
+
)
|
|
71
|
+
for doc_id in sorted_ids
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
# Usage
|
|
75
|
+
hybrid_results = reciprocal_rank_fusion(
|
|
76
|
+
vector_results=vector_search(query_embedding, top_k=20),
|
|
77
|
+
keyword_results=bm25_search(query_text, top_k=20),
|
|
78
|
+
vector_weight=0.6 # Favor semantic similarity
|
|
79
|
+
)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### BM25 + Vector with Weaviate
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from weaviate.classes.query import HybridFusion
|
|
86
|
+
|
|
87
|
+
collection = client.collections.get("Documents")
|
|
88
|
+
|
|
89
|
+
# Hybrid search with configurable fusion
|
|
90
|
+
results = collection.query.hybrid(
|
|
91
|
+
query="how to configure authentication",
|
|
92
|
+
alpha=0.5, # 0 = pure BM25, 1 = pure vector
|
|
93
|
+
fusion_type=HybridFusion.RELATIVE_SCORE, # or RANKED
|
|
94
|
+
limit=10,
|
|
95
|
+
return_metadata=["score", "explain_score"]
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Iterate results
|
|
99
|
+
for obj in results.objects:
|
|
100
|
+
print(f"Score: {obj.metadata.score}")
|
|
101
|
+
print(f"Explanation: {obj.metadata.explain_score}")
|
|
102
|
+
print(f"Text: {obj.properties['content'][:200]}")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Pinecone Sparse-Dense
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from pinecone_text.sparse import BM25Encoder
|
|
109
|
+
|
|
110
|
+
# Train BM25 encoder on your corpus
|
|
111
|
+
bm25 = BM25Encoder()
|
|
112
|
+
bm25.fit(corpus_documents)
|
|
113
|
+
|
|
114
|
+
# Encode query for hybrid search
|
|
115
|
+
sparse_vector = bm25.encode_queries(query_text)
|
|
116
|
+
dense_vector = get_embedding(query_text)
|
|
117
|
+
|
|
118
|
+
# Search with both vectors
|
|
119
|
+
results = index.query(
|
|
120
|
+
vector=dense_vector,
|
|
121
|
+
sparse_vector=sparse_vector,
|
|
122
|
+
top_k=10,
|
|
123
|
+
include_metadata=True
|
|
124
|
+
)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Reranking
|
|
130
|
+
|
|
131
|
+
### Cohere Rerank
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
import cohere
|
|
135
|
+
|
|
136
|
+
co = cohere.Client(api_key="your-api-key")
|
|
137
|
+
|
|
138
|
+
def rerank_results(
|
|
139
|
+
query: str,
|
|
140
|
+
documents: list[str],
|
|
141
|
+
top_n: int = 5,
|
|
142
|
+
model: str = "rerank-english-v3.0"
|
|
143
|
+
) -> list[dict]:
|
|
144
|
+
"""Rerank documents using Cohere."""
|
|
145
|
+
response = co.rerank(
|
|
146
|
+
query=query,
|
|
147
|
+
documents=documents,
|
|
148
|
+
top_n=top_n,
|
|
149
|
+
model=model,
|
|
150
|
+
return_documents=True
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return [
|
|
154
|
+
{
|
|
155
|
+
"text": result.document.text,
|
|
156
|
+
"relevance_score": result.relevance_score,
|
|
157
|
+
"original_index": result.index
|
|
158
|
+
}
|
|
159
|
+
for result in response.results
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
# Pipeline: retrieve more, rerank fewer
|
|
163
|
+
initial_results = vector_search(query_embedding, top_k=50)
|
|
164
|
+
documents = [r.text for r in initial_results]
|
|
165
|
+
|
|
166
|
+
reranked = rerank_results(
|
|
167
|
+
query="how to configure OAuth2 authentication",
|
|
168
|
+
documents=documents,
|
|
169
|
+
top_n=5
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Use top 5 reranked docs for LLM context
|
|
173
|
+
context = "\n\n".join([r["text"] for r in reranked])
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Cross-Encoder Reranking (Open Source)
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from sentence_transformers import CrossEncoder
|
|
180
|
+
|
|
181
|
+
class Reranker:
|
|
182
|
+
"""Rerank using cross-encoder model."""
|
|
183
|
+
|
|
184
|
+
def __init__(self, model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"):
|
|
185
|
+
self.model = CrossEncoder(model_name)
|
|
186
|
+
|
|
187
|
+
def rerank(
|
|
188
|
+
self,
|
|
189
|
+
query: str,
|
|
190
|
+
documents: list[str],
|
|
191
|
+
top_k: int = 5
|
|
192
|
+
) -> list[tuple[str, float]]:
|
|
193
|
+
"""Rerank documents by relevance to query."""
|
|
194
|
+
# Create query-document pairs
|
|
195
|
+
pairs = [[query, doc] for doc in documents]
|
|
196
|
+
|
|
197
|
+
# Get relevance scores
|
|
198
|
+
scores = self.model.predict(pairs)
|
|
199
|
+
|
|
200
|
+
# Sort by score
|
|
201
|
+
doc_scores = list(zip(documents, scores))
|
|
202
|
+
doc_scores.sort(key=lambda x: x[1], reverse=True)
|
|
203
|
+
|
|
204
|
+
return doc_scores[:top_k]
|
|
205
|
+
|
|
206
|
+
# Usage
|
|
207
|
+
reranker = Reranker()
|
|
208
|
+
top_docs = reranker.rerank(
|
|
209
|
+
query="OAuth2 setup guide",
|
|
210
|
+
documents=retrieved_documents,
|
|
211
|
+
top_k=5
|
|
212
|
+
)
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### ColBERT-Style Late Interaction
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
from colbert import Searcher
|
|
219
|
+
from colbert.infra import Run, RunConfig
|
|
220
|
+
|
|
221
|
+
# Setup ColBERT index (one-time)
|
|
222
|
+
with Run().context(RunConfig(nranks=1)):
|
|
223
|
+
searcher = Searcher(index="path/to/colbert_index")
|
|
224
|
+
|
|
225
|
+
# Search with late interaction scoring
|
|
226
|
+
results = searcher.search(
|
|
227
|
+
query="how to configure authentication",
|
|
228
|
+
k=10
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Results include token-level matching scores
|
|
232
|
+
for passage_id, rank, score in zip(*results):
|
|
233
|
+
print(f"Rank {rank}: Doc {passage_id}, Score: {score}")
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## Query Expansion
|
|
239
|
+
|
|
240
|
+
### LLM-Based Query Expansion
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
from openai import OpenAI
|
|
244
|
+
|
|
245
|
+
client = OpenAI()
|
|
246
|
+
|
|
247
|
+
def expand_query(query: str, num_expansions: int = 3) -> list[str]:
|
|
248
|
+
"""Generate query variations using LLM."""
|
|
249
|
+
response = client.chat.completions.create(
|
|
250
|
+
model="gpt-4o-mini",
|
|
251
|
+
messages=[
|
|
252
|
+
{
|
|
253
|
+
"role": "system",
|
|
254
|
+
"content": f"""Generate {num_expansions} alternative search queries
|
|
255
|
+
that would help find relevant documents for the user's question.
|
|
256
|
+
Include:
|
|
257
|
+
- Synonym variations
|
|
258
|
+
- More specific versions
|
|
259
|
+
- More general versions
|
|
260
|
+
Return as JSON array of strings."""
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
"role": "user",
|
|
264
|
+
"content": query
|
|
265
|
+
}
|
|
266
|
+
],
|
|
267
|
+
response_format={"type": "json_object"}
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
import json
|
|
271
|
+
result = json.loads(response.choices[0].message.content)
|
|
272
|
+
return [query] + result.get("queries", [])
|
|
273
|
+
|
|
274
|
+
# Usage
|
|
275
|
+
original_query = "how to fix memory leak"
|
|
276
|
+
expanded_queries = expand_query(original_query)
|
|
277
|
+
# ["how to fix memory leak", "debug memory issues", "memory leak detection",
|
|
278
|
+
# "troubleshoot high memory usage"]
|
|
279
|
+
|
|
280
|
+
# Search with all queries and merge results
|
|
281
|
+
all_results = []
|
|
282
|
+
for q in expanded_queries:
|
|
283
|
+
results = vector_search(get_embedding(q), top_k=10)
|
|
284
|
+
all_results.extend(results)
|
|
285
|
+
|
|
286
|
+
# Deduplicate and rank by frequency
|
|
287
|
+
deduped = deduplicate_by_id(all_results)
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
### Query Rewriting
|
|
291
|
+
|
|
292
|
+
```python
|
|
293
|
+
def rewrite_query_for_retrieval(
|
|
294
|
+
conversational_query: str,
|
|
295
|
+
chat_history: list[dict]
|
|
296
|
+
) -> str:
|
|
297
|
+
"""Rewrite conversational query to standalone search query."""
|
|
298
|
+
response = client.chat.completions.create(
|
|
299
|
+
model="gpt-4o-mini",
|
|
300
|
+
messages=[
|
|
301
|
+
{
|
|
302
|
+
"role": "system",
|
|
303
|
+
"content": """Rewrite the user's question as a standalone search query.
|
|
304
|
+
Include relevant context from chat history.
|
|
305
|
+
Output only the rewritten query, nothing else."""
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
"role": "user",
|
|
309
|
+
"content": f"""Chat history:
|
|
310
|
+
{format_chat_history(chat_history)}
|
|
311
|
+
|
|
312
|
+
User's question: {conversational_query}
|
|
313
|
+
|
|
314
|
+
Rewritten search query:"""
|
|
315
|
+
}
|
|
316
|
+
],
|
|
317
|
+
max_tokens=100
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
return response.choices[0].message.content.strip()
|
|
321
|
+
|
|
322
|
+
# Example
|
|
323
|
+
history = [
|
|
324
|
+
{"role": "user", "content": "Tell me about Python web frameworks"},
|
|
325
|
+
{"role": "assistant", "content": "Popular Python web frameworks include Django, Flask, and FastAPI..."}
|
|
326
|
+
]
|
|
327
|
+
query = "Which one is best for APIs?"
|
|
328
|
+
|
|
329
|
+
rewritten = rewrite_query_for_retrieval(query, history)
|
|
330
|
+
# Output: "Best Python web framework for building REST APIs: Django vs Flask vs FastAPI"
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
---
|
|
334
|
+
|
|
335
|
+
## HyDE (Hypothetical Document Embeddings)
|
|
336
|
+
|
|
337
|
+
```python
|
|
338
|
+
def hyde_search(
|
|
339
|
+
query: str,
|
|
340
|
+
vector_store,
|
|
341
|
+
embedding_model,
|
|
342
|
+
top_k: int = 10
|
|
343
|
+
) -> list[SearchResult]:
|
|
344
|
+
"""
|
|
345
|
+
Generate hypothetical answer, embed it, and search.
|
|
346
|
+
Aligns query embedding space with document embedding space.
|
|
347
|
+
"""
|
|
348
|
+
# Generate hypothetical document
|
|
349
|
+
response = client.chat.completions.create(
|
|
350
|
+
model="gpt-4o-mini",
|
|
351
|
+
messages=[
|
|
352
|
+
{
|
|
353
|
+
"role": "system",
|
|
354
|
+
"content": """Write a passage that would answer the user's question.
|
|
355
|
+
Write as if you're an expert documentation author.
|
|
356
|
+
Be specific and technical. About 100-200 words."""
|
|
357
|
+
},
|
|
358
|
+
{
|
|
359
|
+
"role": "user",
|
|
360
|
+
"content": query
|
|
361
|
+
}
|
|
362
|
+
],
|
|
363
|
+
max_tokens=300
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
hypothetical_doc = response.choices[0].message.content
|
|
367
|
+
|
|
368
|
+
# Embed hypothetical document
|
|
369
|
+
hyde_embedding = embedding_model.encode(hypothetical_doc)
|
|
370
|
+
|
|
371
|
+
# Search with hypothetical doc embedding
|
|
372
|
+
results = vector_store.search(
|
|
373
|
+
vector=hyde_embedding,
|
|
374
|
+
top_k=top_k
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
return results
|
|
378
|
+
|
|
379
|
+
# Usage
|
|
380
|
+
results = hyde_search(
|
|
381
|
+
query="How do I handle rate limiting in my API?",
|
|
382
|
+
vector_store=qdrant_client,
|
|
383
|
+
embedding_model=sentence_transformer
|
|
384
|
+
)
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
### Multi-HyDE (Multiple Perspectives)
|
|
388
|
+
|
|
389
|
+
```python
|
|
390
|
+
def multi_hyde_search(
|
|
391
|
+
query: str,
|
|
392
|
+
vector_store,
|
|
393
|
+
embedding_model,
|
|
394
|
+
num_hypotheticals: int = 3,
|
|
395
|
+
top_k: int = 10
|
|
396
|
+
) -> list[SearchResult]:
|
|
397
|
+
"""Generate multiple hypothetical docs for diverse retrieval."""
|
|
398
|
+
response = client.chat.completions.create(
|
|
399
|
+
model="gpt-4o-mini",
|
|
400
|
+
messages=[
|
|
401
|
+
{
|
|
402
|
+
"role": "system",
|
|
403
|
+
"content": f"""Generate {num_hypotheticals} different passages
|
|
404
|
+
that could answer the question from different angles:
|
|
405
|
+
1. Technical deep-dive
|
|
406
|
+
2. Beginner-friendly explanation
|
|
407
|
+
3. Best practices summary
|
|
408
|
+
|
|
409
|
+
Return as JSON with "passages" array."""
|
|
410
|
+
},
|
|
411
|
+
{
|
|
412
|
+
"role": "user",
|
|
413
|
+
"content": query
|
|
414
|
+
}
|
|
415
|
+
],
|
|
416
|
+
response_format={"type": "json_object"}
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
import json
|
|
420
|
+
passages = json.loads(response.choices[0].message.content)["passages"]
|
|
421
|
+
|
|
422
|
+
# Embed all hypotheticals
|
|
423
|
+
all_results = []
|
|
424
|
+
for passage in passages:
|
|
425
|
+
embedding = embedding_model.encode(passage)
|
|
426
|
+
results = vector_store.search(vector=embedding, top_k=top_k)
|
|
427
|
+
all_results.extend(results)
|
|
428
|
+
|
|
429
|
+
# Deduplicate and combine scores
|
|
430
|
+
return deduplicate_and_merge(all_results)
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
---
|
|
434
|
+
|
|
435
|
+
## Metadata Filtering
|
|
436
|
+
|
|
437
|
+
### Multi-Tenant Filtering
|
|
438
|
+
|
|
439
|
+
```python
|
|
440
|
+
class MultiTenantRetriever:
|
|
441
|
+
"""Retriever with mandatory tenant isolation."""
|
|
442
|
+
|
|
443
|
+
def __init__(self, vector_store):
|
|
444
|
+
self.vector_store = vector_store
|
|
445
|
+
|
|
446
|
+
def search(
|
|
447
|
+
self,
|
|
448
|
+
query_embedding: list[float],
|
|
449
|
+
tenant_id: str,
|
|
450
|
+
top_k: int = 10,
|
|
451
|
+
additional_filters: dict | None = None
|
|
452
|
+
) -> list[SearchResult]:
|
|
453
|
+
"""Search with mandatory tenant filter."""
|
|
454
|
+
# Build filter - tenant is always required
|
|
455
|
+
filters = {"tenant_id": {"$eq": tenant_id}}
|
|
456
|
+
|
|
457
|
+
if additional_filters:
|
|
458
|
+
filters = {"$and": [filters, additional_filters]}
|
|
459
|
+
|
|
460
|
+
return self.vector_store.search(
|
|
461
|
+
vector=query_embedding,
|
|
462
|
+
filter=filters,
|
|
463
|
+
top_k=top_k
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
# Usage
|
|
467
|
+
retriever = MultiTenantRetriever(pinecone_index)
|
|
468
|
+
results = retriever.search(
|
|
469
|
+
query_embedding=embedding,
|
|
470
|
+
tenant_id="acme-corp",
|
|
471
|
+
additional_filters={
|
|
472
|
+
"doc_type": {"$in": ["manual", "faq"]},
|
|
473
|
+
"published": {"$eq": True}
|
|
474
|
+
}
|
|
475
|
+
)
|
|
476
|
+
```
|
|
477
|
+
|
|
478
|
+
### Temporal Filtering
|
|
479
|
+
|
|
480
|
+
```python
|
|
481
|
+
from datetime import datetime, timedelta
|
|
482
|
+
|
|
483
|
+
def search_recent_documents(
|
|
484
|
+
query_embedding: list[float],
|
|
485
|
+
vector_store,
|
|
486
|
+
days_back: int = 30,
|
|
487
|
+
top_k: int = 10
|
|
488
|
+
) -> list[SearchResult]:
|
|
489
|
+
"""Search documents updated within time window."""
|
|
490
|
+
cutoff_date = datetime.utcnow() - timedelta(days=days_back)
|
|
491
|
+
|
|
492
|
+
return vector_store.search(
|
|
493
|
+
vector=query_embedding,
|
|
494
|
+
filter={
|
|
495
|
+
"updated_at": {"$gte": cutoff_date.isoformat()}
|
|
496
|
+
},
|
|
497
|
+
top_k=top_k
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
def search_with_recency_boost(
|
|
501
|
+
query_embedding: list[float],
|
|
502
|
+
vector_store,
|
|
503
|
+
recency_weight: float = 0.2,
|
|
504
|
+
top_k: int = 10
|
|
505
|
+
) -> list[SearchResult]:
|
|
506
|
+
"""Boost recent documents in ranking."""
|
|
507
|
+
# Get more results to apply post-filtering
|
|
508
|
+
results = vector_store.search(
|
|
509
|
+
vector=query_embedding,
|
|
510
|
+
top_k=top_k * 3
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
now = datetime.utcnow()
|
|
514
|
+
|
|
515
|
+
def compute_boosted_score(result):
|
|
516
|
+
doc_date = datetime.fromisoformat(result.metadata["updated_at"])
|
|
517
|
+
days_old = (now - doc_date).days
|
|
518
|
+
recency_score = max(0, 1 - (days_old / 365)) # Decay over 1 year
|
|
519
|
+
return result.score * (1 - recency_weight) + recency_score * recency_weight
|
|
520
|
+
|
|
521
|
+
# Rerank with recency boost
|
|
522
|
+
for result in results:
|
|
523
|
+
result.boosted_score = compute_boosted_score(result)
|
|
524
|
+
|
|
525
|
+
results.sort(key=lambda x: x.boosted_score, reverse=True)
|
|
526
|
+
return results[:top_k]
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
---
|
|
530
|
+
|
|
531
|
+
## Query Decomposition
|
|
532
|
+
|
|
533
|
+
```python
|
|
534
|
+
def decompose_complex_query(query: str) -> list[str]:
|
|
535
|
+
"""Break complex query into sub-questions."""
|
|
536
|
+
response = client.chat.completions.create(
|
|
537
|
+
model="gpt-4o-mini",
|
|
538
|
+
messages=[
|
|
539
|
+
{
|
|
540
|
+
"role": "system",
|
|
541
|
+
"content": """Break this complex question into simpler sub-questions
|
|
542
|
+
that can be answered independently. Each sub-question should be
|
|
543
|
+
searchable. Return as JSON with "questions" array."""
|
|
544
|
+
},
|
|
545
|
+
{
|
|
546
|
+
"role": "user",
|
|
547
|
+
"content": query
|
|
548
|
+
}
|
|
549
|
+
],
|
|
550
|
+
response_format={"type": "json_object"}
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
import json
|
|
554
|
+
result = json.loads(response.choices[0].message.content)
|
|
555
|
+
return result.get("questions", [query])
|
|
556
|
+
|
|
557
|
+
def search_with_decomposition(
|
|
558
|
+
complex_query: str,
|
|
559
|
+
vector_store,
|
|
560
|
+
embedding_model,
|
|
561
|
+
top_k_per_subquery: int = 5
|
|
562
|
+
) -> dict:
|
|
563
|
+
"""Search for each sub-question and aggregate results."""
|
|
564
|
+
sub_questions = decompose_complex_query(complex_query)
|
|
565
|
+
|
|
566
|
+
aggregated_results = {
|
|
567
|
+
"sub_questions": [],
|
|
568
|
+
"all_documents": []
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
seen_doc_ids = set()
|
|
572
|
+
|
|
573
|
+
for sub_q in sub_questions:
|
|
574
|
+
embedding = embedding_model.encode(sub_q)
|
|
575
|
+
results = vector_store.search(vector=embedding, top_k=top_k_per_subquery)
|
|
576
|
+
|
|
577
|
+
sub_q_results = []
|
|
578
|
+
for r in results:
|
|
579
|
+
if r.id not in seen_doc_ids:
|
|
580
|
+
seen_doc_ids.add(r.id)
|
|
581
|
+
sub_q_results.append(r)
|
|
582
|
+
aggregated_results["all_documents"].append(r)
|
|
583
|
+
|
|
584
|
+
aggregated_results["sub_questions"].append({
|
|
585
|
+
"question": sub_q,
|
|
586
|
+
"results": sub_q_results
|
|
587
|
+
})
|
|
588
|
+
|
|
589
|
+
return aggregated_results
|
|
590
|
+
|
|
591
|
+
# Usage
|
|
592
|
+
complex_q = "Compare the security features of OAuth2 and API keys, and explain when to use each"
|
|
593
|
+
results = search_with_decomposition(complex_q, vector_store, embedding_model)
|
|
594
|
+
```
|
|
595
|
+
|
|
596
|
+
---
|
|
597
|
+
|
|
598
|
+
## Contextual Compression
|
|
599
|
+
|
|
600
|
+
```python
|
|
601
|
+
def compress_retrieved_context(
|
|
602
|
+
query: str,
|
|
603
|
+
documents: list[str],
|
|
604
|
+
max_tokens: int = 2000
|
|
605
|
+
) -> str:
|
|
606
|
+
"""Extract only query-relevant parts from documents."""
|
|
607
|
+
response = client.chat.completions.create(
|
|
608
|
+
model="gpt-4o-mini",
|
|
609
|
+
messages=[
|
|
610
|
+
{
|
|
611
|
+
"role": "system",
|
|
612
|
+
"content": f"""Extract only the parts of these documents that are
|
|
613
|
+
relevant to answering the user's question.
|
|
614
|
+
Remove irrelevant information.
|
|
615
|
+
Keep extracted content under {max_tokens} tokens.
|
|
616
|
+
Maintain source attribution."""
|
|
617
|
+
},
|
|
618
|
+
{
|
|
619
|
+
"role": "user",
|
|
620
|
+
"content": f"""Question: {query}
|
|
621
|
+
|
|
622
|
+
Documents:
|
|
623
|
+
{chr(10).join([f'[Doc {i+1}]: {doc}' for i, doc in enumerate(documents)])}
|
|
624
|
+
|
|
625
|
+
Extracted relevant content:"""
|
|
626
|
+
}
|
|
627
|
+
],
|
|
628
|
+
max_tokens=max_tokens
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
return response.choices[0].message.content
|
|
632
|
+
```
|
|
633
|
+
|
|
634
|
+
### Extractive Compression with Cross-Encoder
|
|
635
|
+
|
|
636
|
+
```python
|
|
637
|
+
from sentence_transformers import CrossEncoder
|
|
638
|
+
|
|
639
|
+
def extractive_compress(
|
|
640
|
+
query: str,
|
|
641
|
+
document: str,
|
|
642
|
+
cross_encoder: CrossEncoder,
|
|
643
|
+
top_k_sentences: int = 5
|
|
644
|
+
) -> str:
|
|
645
|
+
"""Extract most relevant sentences from document."""
|
|
646
|
+
import re
|
|
647
|
+
sentences = re.split(r'(?<=[.!?])\s+', document)
|
|
648
|
+
|
|
649
|
+
if len(sentences) <= top_k_sentences:
|
|
650
|
+
return document
|
|
651
|
+
|
|
652
|
+
# Score each sentence
|
|
653
|
+
pairs = [[query, sent] for sent in sentences]
|
|
654
|
+
scores = cross_encoder.predict(pairs)
|
|
655
|
+
|
|
656
|
+
# Get top sentences in original order
|
|
657
|
+
scored_sentences = list(zip(range(len(sentences)), sentences, scores))
|
|
658
|
+
top_sentences = sorted(scored_sentences, key=lambda x: x[2], reverse=True)[:top_k_sentences]
|
|
659
|
+
top_sentences = sorted(top_sentences, key=lambda x: x[0]) # Restore order
|
|
660
|
+
|
|
661
|
+
return " ".join([s[1] for s in top_sentences])
|
|
662
|
+
```
|
|
663
|
+
|
|
664
|
+
---
|
|
665
|
+
|
|
666
|
+
## Complete Optimized Pipeline
|
|
667
|
+
|
|
668
|
+
```python
|
|
669
|
+
class OptimizedRetriever:
|
|
670
|
+
"""Production retrieval pipeline with all optimizations."""
|
|
671
|
+
|
|
672
|
+
def __init__(
|
|
673
|
+
self,
|
|
674
|
+
vector_store,
|
|
675
|
+
embedding_model,
|
|
676
|
+
reranker,
|
|
677
|
+
bm25_index
|
|
678
|
+
):
|
|
679
|
+
self.vector_store = vector_store
|
|
680
|
+
self.embedding_model = embedding_model
|
|
681
|
+
self.reranker = reranker
|
|
682
|
+
self.bm25_index = bm25_index
|
|
683
|
+
|
|
684
|
+
async def retrieve(
|
|
685
|
+
self,
|
|
686
|
+
query: str,
|
|
687
|
+
tenant_id: str,
|
|
688
|
+
top_k: int = 5,
|
|
689
|
+
use_hyde: bool = False,
|
|
690
|
+
use_query_expansion: bool = True
|
|
691
|
+
) -> list[dict]:
|
|
692
|
+
"""Full optimized retrieval pipeline."""
|
|
693
|
+
# Step 1: Query preprocessing
|
|
694
|
+
processed_query = self._preprocess_query(query)
|
|
695
|
+
|
|
696
|
+
# Step 2: Optional HyDE
|
|
697
|
+
if use_hyde:
|
|
698
|
+
query_embedding = await self._hyde_embed(processed_query)
|
|
699
|
+
else:
|
|
700
|
+
query_embedding = self.embedding_model.encode(processed_query)
|
|
701
|
+
|
|
702
|
+
# Step 3: Hybrid search (vector + BM25)
|
|
703
|
+
vector_results = self.vector_store.search(
|
|
704
|
+
vector=query_embedding,
|
|
705
|
+
filter={"tenant_id": tenant_id},
|
|
706
|
+
top_k=50
|
|
707
|
+
)
|
|
708
|
+
bm25_results = self.bm25_index.search(processed_query, top_k=50)
|
|
709
|
+
|
|
710
|
+
# Step 4: Merge with RRF
|
|
711
|
+
merged = reciprocal_rank_fusion(
|
|
712
|
+
vector_results,
|
|
713
|
+
bm25_results,
|
|
714
|
+
vector_weight=0.6
|
|
715
|
+
)[:30]
|
|
716
|
+
|
|
717
|
+
# Step 5: Optional query expansion
|
|
718
|
+
if use_query_expansion:
|
|
719
|
+
expanded_queries = await self._expand_query(processed_query)
|
|
720
|
+
for exp_query in expanded_queries[1:]: # Skip original
|
|
721
|
+
exp_embedding = self.embedding_model.encode(exp_query)
|
|
722
|
+
exp_results = self.vector_store.search(
|
|
723
|
+
vector=exp_embedding,
|
|
724
|
+
filter={"tenant_id": tenant_id},
|
|
725
|
+
top_k=10
|
|
726
|
+
)
|
|
727
|
+
merged.extend(exp_results)
|
|
728
|
+
merged = deduplicate_by_id(merged)[:30]
|
|
729
|
+
|
|
730
|
+
# Step 6: Rerank
|
|
731
|
+
documents = [r.text for r in merged]
|
|
732
|
+
reranked = self.reranker.rerank(
|
|
733
|
+
query=processed_query,
|
|
734
|
+
documents=documents,
|
|
735
|
+
top_k=top_k
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
return [
|
|
739
|
+
{
|
|
740
|
+
"text": doc,
|
|
741
|
+
"score": score,
|
|
742
|
+
"metadata": merged[i].metadata
|
|
743
|
+
}
|
|
744
|
+
for i, (doc, score) in enumerate(reranked)
|
|
745
|
+
]
|
|
746
|
+
|
|
747
|
+
def _preprocess_query(self, query: str) -> str:
|
|
748
|
+
"""Clean and normalize query."""
|
|
749
|
+
import re
|
|
750
|
+
query = re.sub(r'\s+', ' ', query).strip()
|
|
751
|
+
return query
|
|
752
|
+
|
|
753
|
+
async def _hyde_embed(self, query: str) -> list[float]:
|
|
754
|
+
"""Generate hypothetical document and embed."""
|
|
755
|
+
# Implementation from HyDE section
|
|
756
|
+
pass
|
|
757
|
+
|
|
758
|
+
async def _expand_query(self, query: str) -> list[str]:
|
|
759
|
+
"""Expand query with variations."""
|
|
760
|
+
# Implementation from Query Expansion section
|
|
761
|
+
pass
|
|
762
|
+
```
|
|
763
|
+
|
|
764
|
+
---
|
|
765
|
+
|
|
766
|
+
## Performance Benchmarks
|
|
767
|
+
|
|
768
|
+
| Technique | Latency Impact | Quality Impact | Cost Impact |
|
|
769
|
+
|-----------|----------------|----------------|-------------|
|
|
770
|
+
| Vector only | Baseline | Baseline | Baseline |
|
|
771
|
+
| + BM25 hybrid | +10-20ms | +5-15% precision | Minimal |
|
|
772
|
+
| + Reranking | +50-100ms | +10-20% precision | +$0.001/query |
|
|
773
|
+
| + Query expansion | +100-200ms | +5-10% recall | +$0.002/query |
|
|
774
|
+
| + HyDE | +200-500ms | +10-25% precision | +$0.003/query |
|
|
775
|
+
|
|
776
|
+
---
|
|
777
|
+
|
|
778
|
+
## Quick Reference
|
|
779
|
+
|
|
780
|
+
| Goal | Technique | Implementation |
|
|
781
|
+
|------|-----------|----------------|
|
|
782
|
+
| Improve precision | Reranking | Cross-encoder or Cohere |
|
|
783
|
+
| Improve recall | Query expansion | LLM-generated variations |
|
|
784
|
+
| Handle synonyms | Hybrid search | BM25 + vector with RRF |
|
|
785
|
+
| Concept search | HyDE | Hypothetical doc embedding |
|
|
786
|
+
| Multi-tenant | Metadata filter | Mandatory tenant_id |
|
|
787
|
+
| Fresh content | Temporal filter | Date range queries |
|
|
788
|
+
| Complex questions | Decomposition | Sub-question retrieval |
|
|
789
|
+
|
|
790
|
+
## Related Skills
|
|
791
|
+
|
|
792
|
+
- **RAG Architect** - System design and architecture
|
|
793
|
+
- **NLP Engineer** - Query understanding
|
|
794
|
+
- **Python Pro** - Async implementation
|
|
795
|
+
- **ML Pipeline** - Model serving for rerankers
|