javi-forge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitignore.template +105 -0
- package/.releaserc +44 -0
- package/README.md +45 -0
- package/ai-config/.skillignore +15 -0
- package/ai-config/AUTO_INVOKE.md +300 -0
- package/ai-config/agents/_TEMPLATE.md +93 -0
- package/ai-config/agents/business/api-designer.md +1657 -0
- package/ai-config/agents/business/business-analyst.md +1331 -0
- package/ai-config/agents/business/product-strategist.md +206 -0
- package/ai-config/agents/business/project-manager.md +178 -0
- package/ai-config/agents/business/requirements-analyst.md +1277 -0
- package/ai-config/agents/business/technical-writer.md +1679 -0
- package/ai-config/agents/creative/ux-designer.md +205 -0
- package/ai-config/agents/data-ai/ai-engineer.md +487 -0
- package/ai-config/agents/data-ai/analytics-engineer.md +953 -0
- package/ai-config/agents/data-ai/data-engineer.md +173 -0
- package/ai-config/agents/data-ai/data-scientist.md +672 -0
- package/ai-config/agents/data-ai/mlops-engineer.md +814 -0
- package/ai-config/agents/data-ai/prompt-engineer.md +772 -0
- package/ai-config/agents/development/angular-expert.md +620 -0
- package/ai-config/agents/development/backend-architect.md +795 -0
- package/ai-config/agents/development/database-specialist.md +212 -0
- package/ai-config/agents/development/frontend-specialist.md +686 -0
- package/ai-config/agents/development/fullstack-engineer.md +668 -0
- package/ai-config/agents/development/golang-pro.md +338 -0
- package/ai-config/agents/development/java-enterprise.md +400 -0
- package/ai-config/agents/development/javascript-pro.md +422 -0
- package/ai-config/agents/development/nextjs-pro.md +474 -0
- package/ai-config/agents/development/python-pro.md +570 -0
- package/ai-config/agents/development/react-pro.md +487 -0
- package/ai-config/agents/development/rust-pro.md +246 -0
- package/ai-config/agents/development/spring-boot-4-expert.md +326 -0
- package/ai-config/agents/development/typescript-pro.md +336 -0
- package/ai-config/agents/development/vue-specialist.md +605 -0
- package/ai-config/agents/infrastructure/cloud-architect.md +472 -0
- package/ai-config/agents/infrastructure/deployment-manager.md +358 -0
- package/ai-config/agents/infrastructure/devops-engineer.md +455 -0
- package/ai-config/agents/infrastructure/incident-responder.md +519 -0
- package/ai-config/agents/infrastructure/kubernetes-expert.md +705 -0
- package/ai-config/agents/infrastructure/monitoring-specialist.md +674 -0
- package/ai-config/agents/infrastructure/performance-engineer.md +658 -0
- package/ai-config/agents/orchestrator.md +241 -0
- package/ai-config/agents/quality/accessibility-auditor.md +1204 -0
- package/ai-config/agents/quality/code-reviewer-compact.md +123 -0
- package/ai-config/agents/quality/code-reviewer.md +363 -0
- package/ai-config/agents/quality/dependency-manager.md +743 -0
- package/ai-config/agents/quality/e2e-test-specialist.md +1005 -0
- package/ai-config/agents/quality/performance-tester.md +1086 -0
- package/ai-config/agents/quality/security-auditor.md +133 -0
- package/ai-config/agents/quality/test-engineer.md +453 -0
- package/ai-config/agents/specialists/api-designer.md +87 -0
- package/ai-config/agents/specialists/backend-architect.md +73 -0
- package/ai-config/agents/specialists/code-reviewer.md +77 -0
- package/ai-config/agents/specialists/db-optimizer.md +75 -0
- package/ai-config/agents/specialists/devops-engineer.md +83 -0
- package/ai-config/agents/specialists/documentation-writer.md +78 -0
- package/ai-config/agents/specialists/frontend-developer.md +75 -0
- package/ai-config/agents/specialists/performance-analyst.md +82 -0
- package/ai-config/agents/specialists/refactor-specialist.md +74 -0
- package/ai-config/agents/specialists/security-auditor.md +74 -0
- package/ai-config/agents/specialists/test-engineer.md +81 -0
- package/ai-config/agents/specialists/ux-consultant.md +76 -0
- package/ai-config/agents/specialized/agent-generator.md +1190 -0
- package/ai-config/agents/specialized/blockchain-developer.md +149 -0
- package/ai-config/agents/specialized/code-migrator.md +892 -0
- package/ai-config/agents/specialized/context-manager.md +978 -0
- package/ai-config/agents/specialized/documentation-writer.md +1078 -0
- package/ai-config/agents/specialized/ecommerce-expert.md +1756 -0
- package/ai-config/agents/specialized/embedded-engineer.md +1714 -0
- package/ai-config/agents/specialized/error-detective.md +1034 -0
- package/ai-config/agents/specialized/fintech-specialist.md +1659 -0
- package/ai-config/agents/specialized/freelance-project-planner-v2.md +1988 -0
- package/ai-config/agents/specialized/freelance-project-planner-v3.md +2136 -0
- package/ai-config/agents/specialized/freelance-project-planner-v4.md +4503 -0
- package/ai-config/agents/specialized/freelance-project-planner.md +722 -0
- package/ai-config/agents/specialized/game-developer.md +1963 -0
- package/ai-config/agents/specialized/healthcare-dev.md +1620 -0
- package/ai-config/agents/specialized/mobile-developer.md +188 -0
- package/ai-config/agents/specialized/parallel-plan-executor.md +506 -0
- package/ai-config/agents/specialized/plan-executor.md +485 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/00-INDEX.md +485 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/01-CORE.md +3493 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/02-SELF-CORRECTION.md +778 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/03-PROGRESSIVE-SETUP.md +918 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/04-DEPLOYMENT.md +1537 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/05-TESTING.md +2633 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/06-OPERATIONS.md +5610 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/INSTALL.md +335 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/QUICK-REFERENCE.txt +215 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/README.md +260 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/START-HERE.md +379 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/WORKFLOW-DIAGRAM.md +355 -0
- package/ai-config/agents/specialized/solo-dev-planner-modular/solo-dev-planner.md +279 -0
- package/ai-config/agents/specialized/template-writer.md +347 -0
- package/ai-config/agents/specialized/test-runner.md +99 -0
- package/ai-config/agents/specialized/vibekanban-smart-worker.md +244 -0
- package/ai-config/agents/specialized/wave-executor.md +138 -0
- package/ai-config/agents/specialized/workflow-optimizer.md +1114 -0
- package/ai-config/commands/git/changelog.md +32 -0
- package/ai-config/commands/git/ci-local.md +70 -0
- package/ai-config/commands/git/commit.md +35 -0
- package/ai-config/commands/git/fix-issue.md +23 -0
- package/ai-config/commands/git/pr-create.md +42 -0
- package/ai-config/commands/git/pr-review.md +50 -0
- package/ai-config/commands/git/worktree.md +39 -0
- package/ai-config/commands/refactoring/cleanup.md +24 -0
- package/ai-config/commands/refactoring/dead-code.md +40 -0
- package/ai-config/commands/refactoring/extract.md +31 -0
- package/ai-config/commands/testing/e2e.md +30 -0
- package/ai-config/commands/testing/tdd.md +36 -0
- package/ai-config/commands/testing/test-coverage.md +30 -0
- package/ai-config/commands/testing/test-fix.md +24 -0
- package/ai-config/commands/workflow/generate-agents-md.md +85 -0
- package/ai-config/commands/workflow/planning.md +47 -0
- package/ai-config/commands/workflows/compound.md +89 -0
- package/ai-config/commands/workflows/plan.md +77 -0
- package/ai-config/commands/workflows/review.md +78 -0
- package/ai-config/commands/workflows/work.md +75 -0
- package/ai-config/config.yaml +18 -0
- package/ai-config/hooks/_TEMPLATE.md +96 -0
- package/ai-config/hooks/block-dangerous-commands.md +75 -0
- package/ai-config/hooks/commit-guard.md +90 -0
- package/ai-config/hooks/context-loader.md +73 -0
- package/ai-config/hooks/improve-prompt.md +91 -0
- package/ai-config/hooks/learning-log.md +72 -0
- package/ai-config/hooks/model-router.md +86 -0
- package/ai-config/hooks/secret-scanner.md +64 -0
- package/ai-config/hooks/skill-validator.md +102 -0
- package/ai-config/hooks/task-artifact.md +114 -0
- package/ai-config/hooks/validate-workflow.md +100 -0
- package/ai-config/prompts/base.md +71 -0
- package/ai-config/prompts/modes/debug.md +34 -0
- package/ai-config/prompts/modes/deploy.md +40 -0
- package/ai-config/prompts/modes/research.md +32 -0
- package/ai-config/prompts/modes/review.md +33 -0
- package/ai-config/prompts/review-policy.md +79 -0
- package/ai-config/skills/_TEMPLATE.md +157 -0
- package/ai-config/skills/backend/api-gateway/SKILL.md +254 -0
- package/ai-config/skills/backend/bff-concepts/SKILL.md +239 -0
- package/ai-config/skills/backend/bff-spring/SKILL.md +364 -0
- package/ai-config/skills/backend/chi-router/SKILL.md +396 -0
- package/ai-config/skills/backend/error-handling/SKILL.md +255 -0
- package/ai-config/skills/backend/exceptions-spring/SKILL.md +323 -0
- package/ai-config/skills/backend/fastapi/SKILL.md +302 -0
- package/ai-config/skills/backend/gateway-spring/SKILL.md +390 -0
- package/ai-config/skills/backend/go-backend/SKILL.md +457 -0
- package/ai-config/skills/backend/gradle-multimodule/SKILL.md +274 -0
- package/ai-config/skills/backend/graphql-concepts/SKILL.md +352 -0
- package/ai-config/skills/backend/graphql-spring/SKILL.md +398 -0
- package/ai-config/skills/backend/grpc-concepts/SKILL.md +283 -0
- package/ai-config/skills/backend/grpc-spring/SKILL.md +445 -0
- package/ai-config/skills/backend/jwt-auth/SKILL.md +412 -0
- package/ai-config/skills/backend/notifications-concepts/SKILL.md +259 -0
- package/ai-config/skills/backend/recommendations-concepts/SKILL.md +261 -0
- package/ai-config/skills/backend/search-concepts/SKILL.md +263 -0
- package/ai-config/skills/backend/search-spring/SKILL.md +375 -0
- package/ai-config/skills/backend/spring-boot-4/SKILL.md +172 -0
- package/ai-config/skills/backend/websockets/SKILL.md +532 -0
- package/ai-config/skills/data-ai/ai-ml/SKILL.md +423 -0
- package/ai-config/skills/data-ai/analytics-concepts/SKILL.md +195 -0
- package/ai-config/skills/data-ai/analytics-spring/SKILL.md +340 -0
- package/ai-config/skills/data-ai/duckdb-analytics/SKILL.md +440 -0
- package/ai-config/skills/data-ai/langchain/SKILL.md +238 -0
- package/ai-config/skills/data-ai/mlflow/SKILL.md +302 -0
- package/ai-config/skills/data-ai/onnx-inference/SKILL.md +290 -0
- package/ai-config/skills/data-ai/powerbi/SKILL.md +352 -0
- package/ai-config/skills/data-ai/pytorch/SKILL.md +274 -0
- package/ai-config/skills/data-ai/scikit-learn/SKILL.md +321 -0
- package/ai-config/skills/data-ai/vector-db/SKILL.md +301 -0
- package/ai-config/skills/database/graph-databases/SKILL.md +218 -0
- package/ai-config/skills/database/graph-spring/SKILL.md +361 -0
- package/ai-config/skills/database/pgx-postgres/SKILL.md +512 -0
- package/ai-config/skills/database/redis-cache/SKILL.md +343 -0
- package/ai-config/skills/database/sqlite-embedded/SKILL.md +388 -0
- package/ai-config/skills/database/timescaledb/SKILL.md +320 -0
- package/ai-config/skills/docs/api-documentation/SKILL.md +293 -0
- package/ai-config/skills/docs/docs-spring/SKILL.md +377 -0
- package/ai-config/skills/docs/mustache-templates/SKILL.md +190 -0
- package/ai-config/skills/docs/technical-docs/SKILL.md +447 -0
- package/ai-config/skills/frontend/astro-ssr/SKILL.md +441 -0
- package/ai-config/skills/frontend/frontend-design/SKILL.md +54 -0
- package/ai-config/skills/frontend/frontend-web/SKILL.md +368 -0
- package/ai-config/skills/frontend/mantine-ui/SKILL.md +396 -0
- package/ai-config/skills/frontend/tanstack-query/SKILL.md +439 -0
- package/ai-config/skills/frontend/zod-validation/SKILL.md +417 -0
- package/ai-config/skills/frontend/zustand-state/SKILL.md +350 -0
- package/ai-config/skills/infrastructure/chaos-engineering/SKILL.md +244 -0
- package/ai-config/skills/infrastructure/chaos-spring/SKILL.md +378 -0
- package/ai-config/skills/infrastructure/devops-infra/SKILL.md +435 -0
- package/ai-config/skills/infrastructure/docker-containers/SKILL.md +420 -0
- package/ai-config/skills/infrastructure/kubernetes/SKILL.md +456 -0
- package/ai-config/skills/infrastructure/opentelemetry/SKILL.md +546 -0
- package/ai-config/skills/infrastructure/traefik-proxy/SKILL.md +474 -0
- package/ai-config/skills/infrastructure/woodpecker-ci/SKILL.md +315 -0
- package/ai-config/skills/mobile/ionic-capacitor/SKILL.md +504 -0
- package/ai-config/skills/mobile/mobile-ionic/SKILL.md +448 -0
- package/ai-config/skills/prompt-improver/SKILL.md +125 -0
- package/ai-config/skills/quality/ghagga-review/SKILL.md +216 -0
- package/ai-config/skills/references/hooks-patterns/SKILL.md +238 -0
- package/ai-config/skills/references/mcp-servers/SKILL.md +275 -0
- package/ai-config/skills/references/plugins-reference/SKILL.md +110 -0
- package/ai-config/skills/references/skills-reference/SKILL.md +420 -0
- package/ai-config/skills/references/subagent-templates/SKILL.md +193 -0
- package/ai-config/skills/systems-iot/modbus-protocol/SKILL.md +410 -0
- package/ai-config/skills/systems-iot/mqtt-rumqttc/SKILL.md +408 -0
- package/ai-config/skills/systems-iot/rust-systems/SKILL.md +386 -0
- package/ai-config/skills/systems-iot/tokio-async/SKILL.md +324 -0
- package/ai-config/skills/testing/playwright-e2e/SKILL.md +289 -0
- package/ai-config/skills/testing/testcontainers/SKILL.md +299 -0
- package/ai-config/skills/testing/vitest-testing/SKILL.md +381 -0
- package/ai-config/skills/workflow/ci-local-guide/SKILL.md +118 -0
- package/ai-config/skills/workflow/claude-automation-recommender/SKILL.md +299 -0
- package/ai-config/skills/workflow/claude-md-improver/SKILL.md +158 -0
- package/ai-config/skills/workflow/finishing-a-development-branch/SKILL.md +117 -0
- package/ai-config/skills/workflow/git-github/SKILL.md +334 -0
- package/ai-config/skills/workflow/git-github/references/examples.md +160 -0
- package/ai-config/skills/workflow/git-workflow/SKILL.md +214 -0
- package/ai-config/skills/workflow/ide-plugins/SKILL.md +277 -0
- package/ai-config/skills/workflow/ide-plugins-intellij/SKILL.md +401 -0
- package/ai-config/skills/workflow/obsidian-brain-workflow/SKILL.md +199 -0
- package/ai-config/skills/workflow/using-git-worktrees/SKILL.md +100 -0
- package/ai-config/skills/workflow/verification-before-completion/SKILL.md +73 -0
- package/ai-config/skills/workflow/wave-workflow/SKILL.md +178 -0
- package/ci-local/README.md +170 -0
- package/ci-local/ci-local.sh +297 -0
- package/ci-local/hooks/commit-msg +74 -0
- package/ci-local/hooks/pre-commit +162 -0
- package/ci-local/hooks/pre-push +41 -0
- package/ci-local/install.sh +49 -0
- package/ci-local/semgrep.yml +214 -0
- package/dist/commands/analyze.d.ts +9 -0
- package/dist/commands/analyze.d.ts.map +1 -0
- package/dist/commands/analyze.js +55 -0
- package/dist/commands/analyze.js.map +1 -0
- package/dist/commands/analyze.test.d.ts +2 -0
- package/dist/commands/analyze.test.d.ts.map +1 -0
- package/dist/commands/analyze.test.js +145 -0
- package/dist/commands/analyze.test.js.map +1 -0
- package/dist/commands/doctor.d.ts +7 -0
- package/dist/commands/doctor.d.ts.map +1 -0
- package/dist/commands/doctor.js +158 -0
- package/dist/commands/doctor.js.map +1 -0
- package/dist/commands/doctor.test.d.ts +2 -0
- package/dist/commands/doctor.test.d.ts.map +1 -0
- package/dist/commands/doctor.test.js +200 -0
- package/dist/commands/doctor.test.js.map +1 -0
- package/dist/commands/init.d.ts +9 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +283 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/init.test.d.ts +2 -0
- package/dist/commands/init.test.d.ts.map +1 -0
- package/dist/commands/init.test.js +271 -0
- package/dist/commands/init.test.js.map +1 -0
- package/dist/commands/sync.d.ts +8 -0
- package/dist/commands/sync.d.ts.map +1 -0
- package/dist/commands/sync.js +201 -0
- package/dist/commands/sync.js.map +1 -0
- package/dist/constants.d.ts +21 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +57 -0
- package/dist/constants.js.map +1 -0
- package/dist/e2e/aggressive.e2e.test.d.ts +2 -0
- package/dist/e2e/aggressive.e2e.test.d.ts.map +1 -0
- package/dist/e2e/aggressive.e2e.test.js +350 -0
- package/dist/e2e/aggressive.e2e.test.js.map +1 -0
- package/dist/e2e/commands.e2e.test.d.ts +2 -0
- package/dist/e2e/commands.e2e.test.d.ts.map +1 -0
- package/dist/e2e/commands.e2e.test.js +213 -0
- package/dist/e2e/commands.e2e.test.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +82 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/common.d.ts +17 -0
- package/dist/lib/common.d.ts.map +1 -0
- package/dist/lib/common.js +111 -0
- package/dist/lib/common.js.map +1 -0
- package/dist/lib/common.test.d.ts +2 -0
- package/dist/lib/common.test.d.ts.map +1 -0
- package/dist/lib/common.test.js +316 -0
- package/dist/lib/common.test.js.map +1 -0
- package/dist/lib/frontmatter.d.ts +18 -0
- package/dist/lib/frontmatter.d.ts.map +1 -0
- package/dist/lib/frontmatter.js +61 -0
- package/dist/lib/frontmatter.js.map +1 -0
- package/dist/lib/frontmatter.test.d.ts +2 -0
- package/dist/lib/frontmatter.test.d.ts.map +1 -0
- package/dist/lib/frontmatter.test.js +257 -0
- package/dist/lib/frontmatter.test.js.map +1 -0
- package/dist/lib/template.d.ts +24 -0
- package/dist/lib/template.d.ts.map +1 -0
- package/dist/lib/template.js +78 -0
- package/dist/lib/template.js.map +1 -0
- package/dist/lib/template.test.d.ts +2 -0
- package/dist/lib/template.test.d.ts.map +1 -0
- package/dist/lib/template.test.js +201 -0
- package/dist/lib/template.test.js.map +1 -0
- package/dist/types/index.d.ts +48 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -0
- package/dist/ui/AnalyzeUI.d.ts +7 -0
- package/dist/ui/AnalyzeUI.d.ts.map +1 -0
- package/dist/ui/AnalyzeUI.js +100 -0
- package/dist/ui/AnalyzeUI.js.map +1 -0
- package/dist/ui/App.d.ts +13 -0
- package/dist/ui/App.d.ts.map +1 -0
- package/dist/ui/App.js +100 -0
- package/dist/ui/App.js.map +1 -0
- package/dist/ui/CIContext.d.ts +9 -0
- package/dist/ui/CIContext.d.ts.map +1 -0
- package/dist/ui/CIContext.js +9 -0
- package/dist/ui/CIContext.js.map +1 -0
- package/dist/ui/CISelector.d.ts +8 -0
- package/dist/ui/CISelector.d.ts.map +1 -0
- package/dist/ui/CISelector.js +45 -0
- package/dist/ui/CISelector.js.map +1 -0
- package/dist/ui/Doctor.d.ts +3 -0
- package/dist/ui/Doctor.d.ts.map +1 -0
- package/dist/ui/Doctor.js +89 -0
- package/dist/ui/Doctor.js.map +1 -0
- package/dist/ui/Header.d.ts +8 -0
- package/dist/ui/Header.d.ts.map +1 -0
- package/dist/ui/Header.js +30 -0
- package/dist/ui/Header.js.map +1 -0
- package/dist/ui/MemorySelector.d.ts +8 -0
- package/dist/ui/MemorySelector.d.ts.map +1 -0
- package/dist/ui/MemorySelector.js +46 -0
- package/dist/ui/MemorySelector.js.map +1 -0
- package/dist/ui/NameInput.d.ts +8 -0
- package/dist/ui/NameInput.d.ts.map +1 -0
- package/dist/ui/NameInput.js +69 -0
- package/dist/ui/NameInput.js.map +1 -0
- package/dist/ui/OptionSelector.d.ts +12 -0
- package/dist/ui/OptionSelector.d.ts.map +1 -0
- package/dist/ui/OptionSelector.js +69 -0
- package/dist/ui/OptionSelector.js.map +1 -0
- package/dist/ui/Progress.d.ts +11 -0
- package/dist/ui/Progress.d.ts.map +1 -0
- package/dist/ui/Progress.js +58 -0
- package/dist/ui/Progress.js.map +1 -0
- package/dist/ui/StackSelector.d.ts +9 -0
- package/dist/ui/StackSelector.d.ts.map +1 -0
- package/dist/ui/StackSelector.js +65 -0
- package/dist/ui/StackSelector.js.map +1 -0
- package/dist/ui/Summary.d.ts +12 -0
- package/dist/ui/Summary.d.ts.map +1 -0
- package/dist/ui/Summary.js +114 -0
- package/dist/ui/Summary.js.map +1 -0
- package/dist/ui/SyncUI.d.ts +10 -0
- package/dist/ui/SyncUI.d.ts.map +1 -0
- package/dist/ui/SyncUI.js +64 -0
- package/dist/ui/SyncUI.js.map +1 -0
- package/dist/ui/Welcome.d.ts +7 -0
- package/dist/ui/Welcome.d.ts.map +1 -0
- package/dist/ui/Welcome.js +45 -0
- package/dist/ui/Welcome.js.map +1 -0
- package/dist/ui/theme.d.ts +10 -0
- package/dist/ui/theme.d.ts.map +1 -0
- package/dist/ui/theme.js +9 -0
- package/dist/ui/theme.js.map +1 -0
- package/modules/engram/.gitignore-snippet.txt +6 -0
- package/modules/engram/.mcp-config-snippet.json +11 -0
- package/modules/engram/README.md +146 -0
- package/modules/engram/install-engram.sh +216 -0
- package/modules/ghagga/.env.example +43 -0
- package/modules/ghagga/README.md +153 -0
- package/modules/ghagga/docker-compose.yml +80 -0
- package/modules/ghagga/setup-ghagga.sh +139 -0
- package/modules/memory-simple/.project/NOTES.md +22 -0
- package/modules/memory-simple/README.md +23 -0
- package/modules/obsidian-brain/.obsidian/app.json +23 -0
- package/modules/obsidian-brain/.obsidian/appearance.json +5 -0
- package/modules/obsidian-brain/.obsidian/bookmarks.json +34 -0
- package/modules/obsidian-brain/.obsidian/community-plugins.json +1 -0
- package/modules/obsidian-brain/.obsidian/core-plugins-migration.json +21 -0
- package/modules/obsidian-brain/.obsidian/core-plugins.json +18 -0
- package/modules/obsidian-brain/.obsidian/daily-notes.json +5 -0
- package/modules/obsidian-brain/.obsidian/graph.json +37 -0
- package/modules/obsidian-brain/.obsidian/hotkeys.json +14 -0
- package/modules/obsidian-brain/.obsidian/plugins/dataview/data.json +25 -0
- package/modules/obsidian-brain/.obsidian/plugins/obsidian-kanban/data.json +29 -0
- package/modules/obsidian-brain/.obsidian/plugins/templater-obsidian/data.json +18 -0
- package/modules/obsidian-brain/.obsidian/snippets/project-memory.css +71 -0
- package/modules/obsidian-brain/.obsidian-gitignore-snippet.txt +8 -0
- package/modules/obsidian-brain/.project/Attachments/.gitkeep +0 -0
- package/modules/obsidian-brain/.project/Memory/BLOCKERS.md +78 -0
- package/modules/obsidian-brain/.project/Memory/CONTEXT.md +102 -0
- package/modules/obsidian-brain/.project/Memory/DASHBOARD.md +73 -0
- package/modules/obsidian-brain/.project/Memory/DECISIONS.md +87 -0
- package/modules/obsidian-brain/.project/Memory/KANBAN.md +15 -0
- package/modules/obsidian-brain/.project/Memory/README.md +61 -0
- package/modules/obsidian-brain/.project/Memory/WAVES.md +78 -0
- package/modules/obsidian-brain/.project/Sessions/TEMPLATE.md +99 -0
- package/modules/obsidian-brain/.project/Templates/ADR.md +33 -0
- package/modules/obsidian-brain/.project/Templates/Blocker.md +21 -0
- package/modules/obsidian-brain/.project/Templates/Session.md +88 -0
- package/modules/obsidian-brain/README.md +268 -0
- package/modules/obsidian-brain/new-wave.sh +182 -0
- package/package.json +51 -0
- package/schemas/agent.schema.json +34 -0
- package/schemas/ai-config.schema.json +28 -0
- package/schemas/skill.schema.json +44 -0
- package/src/commands/analyze.test.ts +145 -0
- package/src/commands/analyze.ts +69 -0
- package/src/commands/doctor.test.ts +208 -0
- package/src/commands/doctor.ts +163 -0
- package/src/commands/init.test.ts +298 -0
- package/src/commands/init.ts +285 -0
- package/src/constants.ts +69 -0
- package/src/e2e/aggressive.e2e.test.ts +557 -0
- package/src/e2e/commands.e2e.test.ts +298 -0
- package/src/index.tsx +106 -0
- package/src/lib/common.test.ts +318 -0
- package/src/lib/common.ts +127 -0
- package/src/lib/frontmatter.test.ts +291 -0
- package/src/lib/frontmatter.ts +77 -0
- package/src/lib/template.test.ts +226 -0
- package/src/lib/template.ts +99 -0
- package/src/types/index.ts +53 -0
- package/src/ui/AnalyzeUI.tsx +133 -0
- package/src/ui/App.tsx +175 -0
- package/src/ui/CIContext.tsx +25 -0
- package/src/ui/CISelector.tsx +72 -0
- package/src/ui/Doctor.tsx +122 -0
- package/src/ui/Header.tsx +48 -0
- package/src/ui/MemorySelector.tsx +73 -0
- package/src/ui/NameInput.tsx +82 -0
- package/src/ui/OptionSelector.tsx +100 -0
- package/src/ui/Progress.tsx +88 -0
- package/src/ui/StackSelector.tsx +101 -0
- package/src/ui/Summary.tsx +134 -0
- package/src/ui/Welcome.tsx +54 -0
- package/src/ui/theme.ts +10 -0
- package/stryker.config.json +19 -0
- package/tasks/_TEMPLATE/files-edited.md +3 -0
- package/tasks/_TEMPLATE/plan.md +3 -0
- package/tasks/_TEMPLATE/research.md +3 -0
- package/tasks/_TEMPLATE/verification.md +5 -0
- package/templates/common/dependabot/cargo.yml +11 -0
- package/templates/common/dependabot/github-actions.yml +16 -0
- package/templates/common/dependabot/gomod.yml +15 -0
- package/templates/common/dependabot/gradle.yml +15 -0
- package/templates/common/dependabot/header.yml +3 -0
- package/templates/common/dependabot/maven.yml +15 -0
- package/templates/common/dependabot/npm.yml +20 -0
- package/templates/common/dependabot/pip.yml +11 -0
- package/templates/dependabot.yml +162 -0
- package/templates/github/ci-go.yml +41 -0
- package/templates/github/ci-java.yml +45 -0
- package/templates/github/ci-monorepo.yml +150 -0
- package/templates/github/ci-node.yml +42 -0
- package/templates/github/ci-python.yml +42 -0
- package/templates/github/ci-rust.yml +42 -0
- package/templates/github/dependabot-automerge.yml +40 -0
- package/templates/gitlab/gitlab-ci-go.yml +88 -0
- package/templates/gitlab/gitlab-ci-java.yml +79 -0
- package/templates/gitlab/gitlab-ci-monorepo.yml +126 -0
- package/templates/gitlab/gitlab-ci-node.yml +63 -0
- package/templates/gitlab/gitlab-ci-python.yml +147 -0
- package/templates/gitlab/gitlab-ci-rust.yml +67 -0
- package/templates/global/claude-settings.json +98 -0
- package/templates/global/codex-config.toml +8 -0
- package/templates/global/copilot-instructions/base-rules.instructions.md +13 -0
- package/templates/global/copilot-instructions/sdd-orchestrator.instructions.md +37 -0
- package/templates/global/gemini-commands/cleanup.toml +20 -0
- package/templates/global/gemini-commands/commit.toml +15 -0
- package/templates/global/gemini-commands/dead-code.toml +22 -0
- package/templates/global/gemini-commands/plan.toml +30 -0
- package/templates/global/gemini-commands/review.toml +17 -0
- package/templates/global/gemini-commands/sdd-apply.toml +22 -0
- package/templates/global/gemini-commands/sdd-ff.toml +14 -0
- package/templates/global/gemini-commands/sdd-new.toml +21 -0
- package/templates/global/gemini-commands/sdd-verify.toml +21 -0
- package/templates/global/gemini-commands/tdd.toml +26 -0
- package/templates/global/gemini-settings.json +8 -0
- package/templates/global/opencode-config.json +44 -0
- package/templates/global/sdd-instructions.md +47 -0
- package/templates/global/sdd-orchestrator-claude.md +46 -0
- package/templates/global/sdd-orchestrator-copilot.md +34 -0
- package/templates/renovate.json +69 -0
- package/templates/woodpecker/monorepo/backend.yml +34 -0
- package/templates/woodpecker/monorepo/frontend.yml +34 -0
- package/templates/woodpecker/monorepo/summary.yml +25 -0
- package/templates/woodpecker/woodpecker-go.yml +51 -0
- package/templates/woodpecker/woodpecker-java.yml +67 -0
- package/templates/woodpecker/woodpecker-node.yml +47 -0
- package/templates/woodpecker/woodpecker-python.yml +108 -0
- package/templates/woodpecker/woodpecker-rust.yml +57 -0
- package/tsconfig.json +19 -0
- package/vitest.config.ts +16 -0
- package/workflows/reusable-build-go.yml +111 -0
- package/workflows/reusable-build-java.yml +120 -0
- package/workflows/reusable-build-node.yml +145 -0
- package/workflows/reusable-build-python.yml +159 -0
- package/workflows/reusable-build-rust.yml +135 -0
- package/workflows/reusable-docker.yml +120 -0
- package/workflows/reusable-ghagga-review.yml +165 -0
- package/workflows/reusable-release.yml +91 -0
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scikit-learn
|
|
3
|
+
description: >
|
|
4
|
+
Classical ML with scikit-learn for anomaly detection, classification, and clustering.
|
|
5
|
+
Trigger: sklearn, scikit-learn, classical ml, anomaly detection, classification, clustering
|
|
6
|
+
tools:
|
|
7
|
+
- Read
|
|
8
|
+
- Write
|
|
9
|
+
- Bash
|
|
10
|
+
- Grep
|
|
11
|
+
metadata:
|
|
12
|
+
author: plataforma-industrial
|
|
13
|
+
version: "2.0"
|
|
14
|
+
tags: [sklearn, ml, classification, anomaly-detection, clustering]
|
|
15
|
+
updated: "2026-02"
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
# Scikit-learn Skill
|
|
19
|
+
|
|
20
|
+
Classical ML for anomaly detection, classification, regression, and clustering.
|
|
21
|
+
|
|
22
|
+
## Stack
|
|
23
|
+
|
|
24
|
+
```yaml
|
|
25
|
+
scikit-learn: 1.4+
|
|
26
|
+
pandas: 2.2+
|
|
27
|
+
numpy: 1.26+
|
|
28
|
+
joblib: 1.3+
|
|
29
|
+
imbalanced-learn: 0.12+
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Anomaly Detection
|
|
33
|
+
|
|
34
|
+
### Isolation Forest
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from sklearn.ensemble import IsolationForest
|
|
38
|
+
from sklearn.preprocessing import StandardScaler
|
|
39
|
+
from sklearn.pipeline import Pipeline
|
|
40
|
+
import numpy as np
|
|
41
|
+
import joblib
|
|
42
|
+
|
|
43
|
+
class AnomalyDetector:
|
|
44
|
+
def __init__(self, contamination: float = 0.05, n_estimators: int = 100):
|
|
45
|
+
self.pipeline = Pipeline([
|
|
46
|
+
('scaler', StandardScaler()),
|
|
47
|
+
('detector', IsolationForest(
|
|
48
|
+
n_estimators=n_estimators,
|
|
49
|
+
contamination=contamination,
|
|
50
|
+
random_state=42,
|
|
51
|
+
n_jobs=-1
|
|
52
|
+
))
|
|
53
|
+
])
|
|
54
|
+
|
|
55
|
+
def fit(self, X: np.ndarray):
|
|
56
|
+
self.pipeline.fit(X)
|
|
57
|
+
return self
|
|
58
|
+
|
|
59
|
+
def predict(self, X: np.ndarray):
|
|
60
|
+
labels = self.pipeline.predict(X) # -1 anomaly, 1 normal
|
|
61
|
+
scores = self.pipeline.decision_function(X)
|
|
62
|
+
return labels, scores
|
|
63
|
+
|
|
64
|
+
def predict_proba(self, X: np.ndarray):
|
|
65
|
+
scores = self.pipeline.decision_function(X)
|
|
66
|
+
return 1 / (1 + np.exp(scores)) # Convert to probability
|
|
67
|
+
|
|
68
|
+
def save(self, path: str):
|
|
69
|
+
joblib.dump(self.pipeline, path)
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def load(cls, path: str):
|
|
73
|
+
detector = cls()
|
|
74
|
+
detector.pipeline = joblib.load(path)
|
|
75
|
+
return detector
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### One-Class SVM & LOF
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from sklearn.svm import OneClassSVM
|
|
82
|
+
from sklearn.neighbors import LocalOutlierFactor
|
|
83
|
+
from sklearn.preprocessing import RobustScaler
|
|
84
|
+
|
|
85
|
+
# One-Class SVM (robust to outliers)
|
|
86
|
+
class RobustDetector:
|
|
87
|
+
def __init__(self, nu: float = 0.05):
|
|
88
|
+
self.pipeline = Pipeline([
|
|
89
|
+
('scaler', RobustScaler()),
|
|
90
|
+
('svm', OneClassSVM(nu=nu, kernel='rbf', gamma='scale'))
|
|
91
|
+
])
|
|
92
|
+
|
|
93
|
+
def fit(self, X):
|
|
94
|
+
self.pipeline.fit(X)
|
|
95
|
+
return self
|
|
96
|
+
|
|
97
|
+
def predict(self, X):
|
|
98
|
+
return self.pipeline.predict(X)
|
|
99
|
+
|
|
100
|
+
# Local Outlier Factor (streaming)
|
|
101
|
+
class StreamingLOF:
|
|
102
|
+
def __init__(self, n_neighbors: int = 20, contamination: float = 0.05):
|
|
103
|
+
self.lof = LocalOutlierFactor(
|
|
104
|
+
n_neighbors=n_neighbors,
|
|
105
|
+
contamination=contamination,
|
|
106
|
+
novelty=True # Enable predict on new data
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def fit(self, X):
|
|
110
|
+
self.lof.fit(X)
|
|
111
|
+
return self
|
|
112
|
+
|
|
113
|
+
def predict(self, X):
|
|
114
|
+
return self.lof.predict(X)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Classification
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
|
121
|
+
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
|
122
|
+
from sklearn.pipeline import Pipeline
|
|
123
|
+
from sklearn.metrics import classification_report
|
|
124
|
+
import pandas as pd
|
|
125
|
+
|
|
126
|
+
class EquipmentClassifier:
|
|
127
|
+
STATES = ['normal', 'degraded', 'maintenance_required', 'critical']
|
|
128
|
+
|
|
129
|
+
def __init__(self, model_type: str = 'random_forest'):
|
|
130
|
+
self.label_encoder = LabelEncoder()
|
|
131
|
+
self.label_encoder.fit(self.STATES)
|
|
132
|
+
|
|
133
|
+
classifier = RandomForestClassifier(
|
|
134
|
+
n_estimators=200, max_depth=10, min_samples_split=5,
|
|
135
|
+
class_weight='balanced', n_jobs=-1, random_state=42
|
|
136
|
+
) if model_type == 'random_forest' else GradientBoostingClassifier(
|
|
137
|
+
n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
self.pipeline = Pipeline([
|
|
141
|
+
('scaler', StandardScaler()),
|
|
142
|
+
('classifier', classifier)
|
|
143
|
+
])
|
|
144
|
+
|
|
145
|
+
def fit(self, X: np.ndarray, y: np.ndarray):
|
|
146
|
+
if isinstance(y[0], str):
|
|
147
|
+
y = self.label_encoder.transform(y)
|
|
148
|
+
self.pipeline.fit(X, y)
|
|
149
|
+
return self
|
|
150
|
+
|
|
151
|
+
def predict(self, X: np.ndarray):
|
|
152
|
+
y_pred = self.pipeline.predict(X)
|
|
153
|
+
return self.label_encoder.inverse_transform(y_pred)
|
|
154
|
+
|
|
155
|
+
def predict_proba(self, X: np.ndarray) -> pd.DataFrame:
|
|
156
|
+
proba = self.pipeline.predict_proba(X)
|
|
157
|
+
return pd.DataFrame(proba, columns=self.STATES)
|
|
158
|
+
|
|
159
|
+
def feature_importance(self, feature_names: list) -> pd.DataFrame:
|
|
160
|
+
importances = self.pipeline.named_steps['classifier'].feature_importances_
|
|
161
|
+
return pd.DataFrame({'feature': feature_names, 'importance': importances}).sort_values('importance', ascending=False)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Feature Engineering
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
def extract_features(df: pd.DataFrame, window_size: int = 60) -> pd.DataFrame:
|
|
168
|
+
features = pd.DataFrame()
|
|
169
|
+
|
|
170
|
+
for col in ['temperature', 'pressure', 'vibration', 'flow_rate']:
|
|
171
|
+
features[f'{col}_mean'] = df[col].rolling(window_size).mean()
|
|
172
|
+
features[f'{col}_std'] = df[col].rolling(window_size).std()
|
|
173
|
+
features[f'{col}_min'] = df[col].rolling(window_size).min()
|
|
174
|
+
features[f'{col}_max'] = df[col].rolling(window_size).max()
|
|
175
|
+
features[f'{col}_range'] = features[f'{col}_max'] - features[f'{col}_min']
|
|
176
|
+
features[f'{col}_diff'] = df[col].diff()
|
|
177
|
+
features[f'{col}_diff_mean'] = features[f'{col}_diff'].rolling(window_size).mean()
|
|
178
|
+
|
|
179
|
+
return features.dropna()
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Forecasting (Multi-step)
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
from sklearn.multioutput import MultiOutputRegressor
|
|
186
|
+
from sklearn.ensemble import GradientBoostingRegressor
|
|
187
|
+
|
|
188
|
+
class Forecaster:
|
|
189
|
+
def __init__(self, horizon: int = 12, lookback: int = 24):
|
|
190
|
+
self.horizon = horizon
|
|
191
|
+
self.lookback = lookback
|
|
192
|
+
self.pipeline = Pipeline([
|
|
193
|
+
('scaler', StandardScaler()),
|
|
194
|
+
('regressor', MultiOutputRegressor(
|
|
195
|
+
GradientBoostingRegressor(n_estimators=100, max_depth=5, random_state=42),
|
|
196
|
+
n_jobs=-1
|
|
197
|
+
))
|
|
198
|
+
])
|
|
199
|
+
|
|
200
|
+
def create_sequences(self, data: np.ndarray):
|
|
201
|
+
X, y = [], []
|
|
202
|
+
for i in range(len(data) - self.lookback - self.horizon + 1):
|
|
203
|
+
X.append(data[i:i + self.lookback].flatten())
|
|
204
|
+
y.append(data[i + self.lookback:i + self.lookback + self.horizon].flatten())
|
|
205
|
+
return np.array(X), np.array(y)
|
|
206
|
+
|
|
207
|
+
def fit(self, data: np.ndarray):
|
|
208
|
+
X, y = self.create_sequences(data)
|
|
209
|
+
self.pipeline.fit(X, y)
|
|
210
|
+
return self
|
|
211
|
+
|
|
212
|
+
def predict(self, recent_data: np.ndarray) -> np.ndarray:
|
|
213
|
+
X = recent_data.flatten().reshape(1, -1)
|
|
214
|
+
y_pred = self.pipeline.predict(X)
|
|
215
|
+
return y_pred.reshape(self.horizon, recent_data.shape[1])
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Clustering
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
from sklearn.cluster import KMeans, DBSCAN
|
|
222
|
+
from sklearn.decomposition import PCA
|
|
223
|
+
from sklearn.metrics import silhouette_score
|
|
224
|
+
|
|
225
|
+
class ModeDetector:
|
|
226
|
+
def __init__(self, n_modes: int = None, method: str = 'kmeans'):
|
|
227
|
+
self.n_modes = n_modes
|
|
228
|
+
self.scaler = StandardScaler()
|
|
229
|
+
self.pca = PCA(n_components=0.95)
|
|
230
|
+
self.clusterer = None
|
|
231
|
+
|
|
232
|
+
def fit(self, X: np.ndarray):
|
|
233
|
+
X_scaled = self.scaler.fit_transform(X)
|
|
234
|
+
X_pca = self.pca.fit_transform(X_scaled)
|
|
235
|
+
|
|
236
|
+
if self.n_modes is None:
|
|
237
|
+
self.n_modes = self._find_optimal_k(X_pca)
|
|
238
|
+
|
|
239
|
+
self.clusterer = KMeans(n_clusters=self.n_modes, n_init=10, random_state=42)
|
|
240
|
+
self.clusterer.fit(X_pca)
|
|
241
|
+
return self
|
|
242
|
+
|
|
243
|
+
def predict(self, X: np.ndarray):
|
|
244
|
+
X_pca = self.pca.transform(self.scaler.transform(X))
|
|
245
|
+
return self.clusterer.predict(X_pca)
|
|
246
|
+
|
|
247
|
+
def _find_optimal_k(self, X: np.ndarray, max_k: int = 10) -> int:
|
|
248
|
+
scores = [silhouette_score(X, KMeans(n_clusters=k, n_init=10).fit_predict(X))
|
|
249
|
+
for k in range(2, max_k + 1)]
|
|
250
|
+
return np.argmax(scores) + 2
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
## Hyperparameter Tuning
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
|
|
257
|
+
|
|
258
|
+
def tune_classifier(X: np.ndarray, y: np.ndarray):
|
|
259
|
+
tscv = TimeSeriesSplit(n_splits=5)
|
|
260
|
+
|
|
261
|
+
param_grid = {
|
|
262
|
+
'n_estimators': [100, 200, 300],
|
|
263
|
+
'max_depth': [5, 10, 15, None],
|
|
264
|
+
'min_samples_split': [2, 5, 10],
|
|
265
|
+
'min_samples_leaf': [1, 2, 4],
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
search = RandomizedSearchCV(
|
|
269
|
+
RandomForestClassifier(random_state=42, n_jobs=-1),
|
|
270
|
+
param_grid,
|
|
271
|
+
n_iter=50,
|
|
272
|
+
cv=tscv,
|
|
273
|
+
scoring='f1_weighted',
|
|
274
|
+
n_jobs=-1
|
|
275
|
+
)
|
|
276
|
+
search.fit(X, y)
|
|
277
|
+
return search.best_estimator_, search.best_params_
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
## Model Persistence
|
|
281
|
+
|
|
282
|
+
```python
|
|
283
|
+
import json
|
|
284
|
+
from pathlib import Path
|
|
285
|
+
from datetime import datetime
|
|
286
|
+
|
|
287
|
+
def save_model(model, path: str, metadata: dict = None, feature_names: list = None):
|
|
288
|
+
path = Path(path)
|
|
289
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
290
|
+
joblib.dump(model, path)
|
|
291
|
+
|
|
292
|
+
meta = {
|
|
293
|
+
'saved_at': datetime.utcnow().isoformat(),
|
|
294
|
+
'sklearn_version': __import__('sklearn').__version__,
|
|
295
|
+
'feature_names': feature_names,
|
|
296
|
+
**(metadata or {})
|
|
297
|
+
}
|
|
298
|
+
path.with_suffix('.json').write_text(json.dumps(meta, indent=2))
|
|
299
|
+
|
|
300
|
+
def load_model(path: str):
|
|
301
|
+
path = Path(path)
|
|
302
|
+
model = joblib.load(path)
|
|
303
|
+
meta_path = path.with_suffix('.json')
|
|
304
|
+
metadata = json.loads(meta_path.read_text()) if meta_path.exists() else {}
|
|
305
|
+
return model, metadata
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
## Best Practices
|
|
309
|
+
|
|
310
|
+
1. **Use pipelines** - Combine preprocessing and model for reproducibility
|
|
311
|
+
2. **Handle imbalance** - SMOTE or `class_weight='balanced'`
|
|
312
|
+
3. **Time series CV** - Use `TimeSeriesSplit` instead of random splits
|
|
313
|
+
4. **Feature importance** - Analyze with `.feature_importances_`
|
|
314
|
+
5. **Version models** - Save with metadata (version, features, metrics)
|
|
315
|
+
|
|
316
|
+
## Related Skills
|
|
317
|
+
|
|
318
|
+
- `mlflow`: Experiment tracking
|
|
319
|
+
- `pytorch`: Deep learning alternative
|
|
320
|
+
- `duckdb-analytics`: Data preprocessing
|
|
321
|
+
- `onnx-inference`: Model deployment
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: vector-db
|
|
3
|
+
description: >
|
|
4
|
+
Vector databases for RAG and semantic search with ChromaDB and pgvector.
|
|
5
|
+
Trigger: vector database, embeddings, rag, semantic search, chromadb, pgvector
|
|
6
|
+
tools:
|
|
7
|
+
- Read
|
|
8
|
+
- Write
|
|
9
|
+
- Bash
|
|
10
|
+
- Grep
|
|
11
|
+
metadata:
|
|
12
|
+
author: plataforma-industrial
|
|
13
|
+
version: "2.0"
|
|
14
|
+
tags: [vector-db, rag, embeddings, semantic-search, chromadb, pgvector]
|
|
15
|
+
updated: "2026-02"
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
# Vector Database Skill
|
|
19
|
+
|
|
20
|
+
Vector databases for RAG and semantic search.
|
|
21
|
+
|
|
22
|
+
## Stack
|
|
23
|
+
|
|
24
|
+
```yaml
|
|
25
|
+
# Primary
|
|
26
|
+
chromadb: 0.4+
|
|
27
|
+
pgvector: 0.6+ # PostgreSQL extension
|
|
28
|
+
|
|
29
|
+
# Alternatives
|
|
30
|
+
qdrant-client: 1.7+
|
|
31
|
+
pinecone-client: 3.0+
|
|
32
|
+
|
|
33
|
+
# Embeddings
|
|
34
|
+
openai: 1.12+
|
|
35
|
+
sentence-transformers: 2.5+
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## ChromaDB
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
import chromadb
|
|
42
|
+
from chromadb.config import Settings
|
|
43
|
+
from chromadb.utils import embedding_functions
|
|
44
|
+
import os
|
|
45
|
+
|
|
46
|
+
class ChromaVectorStore:
|
|
47
|
+
def __init__(self, collection_name: str = "documents", persist_dir: str = "./chroma_db"):
|
|
48
|
+
self.client = chromadb.PersistentClient(
|
|
49
|
+
path=persist_dir,
|
|
50
|
+
settings=Settings(anonymized_telemetry=False)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
self.embedding_fn = embedding_functions.OpenAIEmbeddingFunction(
|
|
54
|
+
api_key=os.getenv("OPENAI_API_KEY"),
|
|
55
|
+
model_name="text-embedding-3-small"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
self.collection = self.client.get_or_create_collection(
|
|
59
|
+
name=collection_name,
|
|
60
|
+
embedding_function=self.embedding_fn,
|
|
61
|
+
metadata={"hnsw:space": "cosine"}
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def add_documents(self, documents: list[str], metadatas: list[dict] = None, ids: list[str] = None):
|
|
65
|
+
if ids is None:
|
|
66
|
+
ids = [f"doc_{i}" for i in range(len(documents))]
|
|
67
|
+
self.collection.add(documents=documents, metadatas=metadatas, ids=ids)
|
|
68
|
+
|
|
69
|
+
def query(self, query_text: str, n_results: int = 5, where: dict = None):
|
|
70
|
+
return self.collection.query(
|
|
71
|
+
query_texts=[query_text],
|
|
72
|
+
n_results=n_results,
|
|
73
|
+
where=where,
|
|
74
|
+
include=["documents", "metadatas", "distances"]
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def delete(self, ids: list[str] = None, where: dict = None):
|
|
78
|
+
if ids:
|
|
79
|
+
self.collection.delete(ids=ids)
|
|
80
|
+
elif where:
|
|
81
|
+
self.collection.delete(where=where)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## pgvector (PostgreSQL)
|
|
85
|
+
|
|
86
|
+
### Schema
|
|
87
|
+
|
|
88
|
+
```sql
|
|
89
|
+
CREATE EXTENSION IF NOT EXISTS vector;
|
|
90
|
+
|
|
91
|
+
CREATE TABLE documents (
|
|
92
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
93
|
+
content TEXT NOT NULL,
|
|
94
|
+
embedding vector(1536), -- OpenAI dimension
|
|
95
|
+
metadata JSONB DEFAULT '{}',
|
|
96
|
+
tenant_id UUID NOT NULL,
|
|
97
|
+
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
-- HNSW index for fast search
|
|
101
|
+
CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64);
|
|
102
|
+
CREATE INDEX idx_documents_tenant ON documents (tenant_id);
|
|
103
|
+
CREATE INDEX idx_documents_metadata ON documents USING gin (metadata);
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Python Client
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
import asyncpg
|
|
110
|
+
from pgvector.asyncpg import register_vector
|
|
111
|
+
import numpy as np
|
|
112
|
+
from openai import AsyncOpenAI
|
|
113
|
+
|
|
114
|
+
class PgVectorStore:
|
|
115
|
+
def __init__(self, pool: asyncpg.Pool, embedding_model: str = "text-embedding-3-small"):
|
|
116
|
+
self.pool = pool
|
|
117
|
+
self.openai = AsyncOpenAI()
|
|
118
|
+
self.embedding_model = embedding_model
|
|
119
|
+
|
|
120
|
+
async def setup(self):
|
|
121
|
+
async with self.pool.acquire() as conn:
|
|
122
|
+
await register_vector(conn)
|
|
123
|
+
|
|
124
|
+
async def embed(self, texts: list[str]) -> list[np.ndarray]:
|
|
125
|
+
response = await self.openai.embeddings.create(model=self.embedding_model, input=texts)
|
|
126
|
+
return [np.array(e.embedding) for e in response.data]
|
|
127
|
+
|
|
128
|
+
async def add_documents(self, contents: list[str], metadatas: list[dict], tenant_id: str) -> list[str]:
|
|
129
|
+
embeddings = await self.embed(contents)
|
|
130
|
+
ids = []
|
|
131
|
+
async with self.pool.acquire() as conn:
|
|
132
|
+
for content, embedding, metadata in zip(contents, embeddings, metadatas):
|
|
133
|
+
row = await conn.fetchrow(
|
|
134
|
+
"INSERT INTO documents (content, embedding, metadata, tenant_id) VALUES ($1, $2, $3, $4) RETURNING id",
|
|
135
|
+
content, embedding, metadata, tenant_id
|
|
136
|
+
)
|
|
137
|
+
ids.append(str(row['id']))
|
|
138
|
+
return ids
|
|
139
|
+
|
|
140
|
+
async def similarity_search(self, query: str, tenant_id: str, k: int = 5, metadata_filter: dict = None):
|
|
141
|
+
query_embedding = (await self.embed([query]))[0]
|
|
142
|
+
|
|
143
|
+
sql = """
|
|
144
|
+
SELECT id, content, metadata, 1 - (embedding <=> $1) AS similarity
|
|
145
|
+
FROM documents WHERE tenant_id = $2
|
|
146
|
+
"""
|
|
147
|
+
params = [query_embedding, tenant_id]
|
|
148
|
+
|
|
149
|
+
if metadata_filter:
|
|
150
|
+
sql += " AND metadata @> $3"
|
|
151
|
+
params.append(metadata_filter)
|
|
152
|
+
|
|
153
|
+
sql += f" ORDER BY embedding <=> $1 LIMIT ${len(params) + 1}"
|
|
154
|
+
params.append(k)
|
|
155
|
+
|
|
156
|
+
async with self.pool.acquire() as conn:
|
|
157
|
+
rows = await conn.fetch(sql, *params)
|
|
158
|
+
|
|
159
|
+
return [{"id": str(r['id']), "content": r['content'], "metadata": dict(r['metadata']), "similarity": float(r['similarity'])} for r in rows]
|
|
160
|
+
|
|
161
|
+
async def hybrid_search(self, query: str, tenant_id: str, k: int = 5, keyword_weight: float = 0.3):
|
|
162
|
+
query_embedding = (await self.embed([query]))[0]
|
|
163
|
+
|
|
164
|
+
sql = """
|
|
165
|
+
WITH vector_search AS (
|
|
166
|
+
SELECT id, content, metadata, 1 - (embedding <=> $1) AS vector_score
|
|
167
|
+
FROM documents WHERE tenant_id = $2
|
|
168
|
+
ORDER BY embedding <=> $1 LIMIT $3 * 2
|
|
169
|
+
),
|
|
170
|
+
keyword_search AS (
|
|
171
|
+
SELECT id, ts_rank(to_tsvector('english', content), plainto_tsquery('english', $4)) AS keyword_score
|
|
172
|
+
FROM documents WHERE tenant_id = $2 AND to_tsvector('english', content) @@ plainto_tsquery('english', $4)
|
|
173
|
+
)
|
|
174
|
+
SELECT v.id, v.content, v.metadata,
|
|
175
|
+
(1 - $5) * v.vector_score + $5 * COALESCE(k.keyword_score, 0) AS combined_score
|
|
176
|
+
FROM vector_search v LEFT JOIN keyword_search k ON v.id = k.id
|
|
177
|
+
ORDER BY combined_score DESC LIMIT $3
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
async with self.pool.acquire() as conn:
|
|
181
|
+
rows = await conn.fetch(sql, query_embedding, tenant_id, k, query, keyword_weight)
|
|
182
|
+
|
|
183
|
+
return [{"id": str(r['id']), "content": r['content'], "metadata": dict(r['metadata']), "score": float(r['combined_score'])} for r in rows]
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Document Chunking
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
from langchain_text_splitters import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter
|
|
190
|
+
|
|
191
|
+
def chunk_documents(documents: list[dict], chunk_size: int = 1000, chunk_overlap: int = 200) -> list[dict]:
|
|
192
|
+
splitter = RecursiveCharacterTextSplitter(
|
|
193
|
+
chunk_size=chunk_size,
|
|
194
|
+
chunk_overlap=chunk_overlap,
|
|
195
|
+
separators=["\n\n", "\n", ". ", " ", ""]
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
chunks = []
|
|
199
|
+
for doc in documents:
|
|
200
|
+
for i, chunk in enumerate(splitter.split_text(doc["content"])):
|
|
201
|
+
chunks.append({
|
|
202
|
+
"content": chunk,
|
|
203
|
+
"metadata": {**doc.get("metadata", {}), "source_id": doc.get("id"), "chunk_index": i}
|
|
204
|
+
})
|
|
205
|
+
return chunks
|
|
206
|
+
|
|
207
|
+
def chunk_markdown(content: str, source_metadata: dict = None) -> list[dict]:
|
|
208
|
+
header_splitter = MarkdownHeaderTextSplitter(
|
|
209
|
+
headers_to_split_on=[("#", "h1"), ("##", "h2"), ("###", "h3")]
|
|
210
|
+
)
|
|
211
|
+
size_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
|
212
|
+
|
|
213
|
+
chunks = []
|
|
214
|
+
for split in header_splitter.split_text(content):
|
|
215
|
+
text, metadata = split.page_content, {**(source_metadata or {}), **split.metadata}
|
|
216
|
+
if len(text) > 1000:
|
|
217
|
+
for i, sub_chunk in enumerate(size_splitter.split_text(text)):
|
|
218
|
+
chunks.append({"content": sub_chunk, "metadata": {**metadata, "sub_chunk": i}})
|
|
219
|
+
else:
|
|
220
|
+
chunks.append({"content": text, "metadata": metadata})
|
|
221
|
+
return chunks
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## RAG Chain
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
from openai import AsyncOpenAI
|
|
228
|
+
|
|
229
|
+
class RAGChain:
|
|
230
|
+
def __init__(self, vector_store: PgVectorStore, model: str = "gpt-4-turbo-preview", k: int = 5):
|
|
231
|
+
self.vector_store = vector_store
|
|
232
|
+
self.openai = AsyncOpenAI()
|
|
233
|
+
self.model = model
|
|
234
|
+
self.k = k
|
|
235
|
+
|
|
236
|
+
async def query(self, question: str, tenant_id: str, system_prompt: str = None, metadata_filter: dict = None):
|
|
237
|
+
docs = await self.vector_store.similarity_search(
|
|
238
|
+
query=question, tenant_id=tenant_id, k=self.k, metadata_filter=metadata_filter
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
context = "\n\n---\n\n".join([f"[Source: {d['metadata'].get('source', 'Unknown')}]\n{d['content']}" for d in docs])
|
|
242
|
+
|
|
243
|
+
if system_prompt is None:
|
|
244
|
+
system_prompt = "Answer based on the provided context. If not in context, say so. Cite sources."
|
|
245
|
+
|
|
246
|
+
response = await self.openai.chat.completions.create(
|
|
247
|
+
model=self.model,
|
|
248
|
+
messages=[
|
|
249
|
+
{"role": "system", "content": system_prompt},
|
|
250
|
+
{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}
|
|
251
|
+
],
|
|
252
|
+
temperature=0
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
return {
|
|
256
|
+
"answer": response.choices[0].message.content,
|
|
257
|
+
"sources": [d['metadata'].get('source') for d in docs],
|
|
258
|
+
"docs": docs
|
|
259
|
+
}
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
## Indexing Pipeline
|
|
263
|
+
|
|
264
|
+
```python
|
|
265
|
+
class IndexingPipeline:
|
|
266
|
+
def __init__(self, vector_store: PgVectorStore, chunk_size: int = 1000, batch_size: int = 100):
|
|
267
|
+
self.vector_store = vector_store
|
|
268
|
+
self.chunk_size = chunk_size
|
|
269
|
+
self.batch_size = batch_size
|
|
270
|
+
|
|
271
|
+
async def index_documents(self, documents: list[dict], tenant_id: str) -> int:
|
|
272
|
+
chunks = chunk_documents(documents, chunk_size=self.chunk_size)
|
|
273
|
+
total = 0
|
|
274
|
+
|
|
275
|
+
for i in range(0, len(chunks), self.batch_size):
|
|
276
|
+
batch = chunks[i:i + self.batch_size]
|
|
277
|
+
await self.vector_store.add_documents(
|
|
278
|
+
contents=[c['content'] for c in batch],
|
|
279
|
+
metadatas=[c['metadata'] for c in batch],
|
|
280
|
+
tenant_id=tenant_id
|
|
281
|
+
)
|
|
282
|
+
total += len(batch)
|
|
283
|
+
print(f"Indexed {total}/{len(chunks)}")
|
|
284
|
+
|
|
285
|
+
return total
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
## Best Practices
|
|
289
|
+
|
|
290
|
+
1. **Chunk size** - Technical docs: 1000-1500, FAQs: 300-500
|
|
291
|
+
2. **Include overlap** - 10-20% overlap for context continuity
|
|
292
|
+
3. **Use metadata** - Filter by type, source, date
|
|
293
|
+
4. **Hybrid search** - Combine vector + keyword for better recall
|
|
294
|
+
5. **Re-rank results** - Cross-encoder for precision: `cross-encoder/ms-marco-MiniLM-L-6-v2`
|
|
295
|
+
|
|
296
|
+
## Related Skills
|
|
297
|
+
|
|
298
|
+
- `langchain`: RAG implementation
|
|
299
|
+
- `ai-ml`: Embedding generation
|
|
300
|
+
- `fastapi`: Vector search API
|
|
301
|
+
- `redis-cache`: Hybrid search caching
|