mega-brain-ai 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.antigravity/README.md +46 -0
- package/.antigravity/rules/.gitkeep +0 -0
- package/.antigravity/rules/mega-brain.md +39 -0
- package/.claude/CLAUDE.md +172 -0
- package/.claude/agents.yaml +44 -0
- package/.claude/commands/agents.md +161 -0
- package/.claude/commands/ask.md +117 -0
- package/.claude/commands/benchmark.md +224 -0
- package/.claude/commands/chat.md +343 -0
- package/.claude/commands/compare.md +116 -0
- package/.claude/commands/conclave.md +196 -0
- package/.claude/commands/config.md +133 -0
- package/.claude/commands/create-agent.md +467 -0
- package/.claude/commands/debate.md +159 -0
- package/.claude/commands/documentation/create-architecture-documentation.md +175 -0
- package/.claude/commands/dossiers.md +180 -0
- package/.claude/commands/evolve.md +223 -0
- package/.claude/commands/extract-dna.md +172 -0
- package/.claude/commands/extract-knowledge.md +507 -0
- package/.claude/commands/gsd/add-phase.md +43 -0
- package/.claude/commands/gsd/add-tests.md +41 -0
- package/.claude/commands/gsd/add-todo.md +47 -0
- package/.claude/commands/gsd/audit-milestone.md +36 -0
- package/.claude/commands/gsd/check-todos.md +45 -0
- package/.claude/commands/gsd/cleanup.md +18 -0
- package/.claude/commands/gsd/complete-milestone.md +136 -0
- package/.claude/commands/gsd/debug.md +167 -0
- package/.claude/commands/gsd/discuss-phase.md +83 -0
- package/.claude/commands/gsd/execute-phase.md +41 -0
- package/.claude/commands/gsd/health.md +22 -0
- package/.claude/commands/gsd/help.md +22 -0
- package/.claude/commands/gsd/insert-phase.md +32 -0
- package/.claude/commands/gsd/join-discord.md +18 -0
- package/.claude/commands/gsd/list-phase-assumptions.md +46 -0
- package/.claude/commands/gsd/map-codebase.md +71 -0
- package/.claude/commands/gsd/new-milestone.md +44 -0
- package/.claude/commands/gsd/new-project.md +42 -0
- package/.claude/commands/gsd/pause-work.md +38 -0
- package/.claude/commands/gsd/plan-milestone-gaps.md +34 -0
- package/.claude/commands/gsd/plan-phase.md +45 -0
- package/.claude/commands/gsd/progress.md +24 -0
- package/.claude/commands/gsd/quick.md +41 -0
- package/.claude/commands/gsd/reapply-patches.md +110 -0
- package/.claude/commands/gsd/remove-phase.md +31 -0
- package/.claude/commands/gsd/research-phase.md +189 -0
- package/.claude/commands/gsd/resume-work.md +40 -0
- package/.claude/commands/gsd/set-profile.md +34 -0
- package/.claude/commands/gsd/settings.md +36 -0
- package/.claude/commands/gsd/update.md +37 -0
- package/.claude/commands/gsd/verify-work.md +38 -0
- package/.claude/commands/inbox.md +296 -0
- package/.claude/commands/ingest-empresa.md +191 -0
- package/.claude/commands/ingest.md +183 -0
- package/.claude/commands/jarvis-briefing.md +67 -0
- package/.claude/commands/jarvis-control.md +169 -0
- package/.claude/commands/jarvis-full.md +182 -0
- package/.claude/commands/jarvis.md +212 -0
- package/.claude/commands/ler-drive.md +212 -0
- package/.claude/commands/log.md +158 -0
- package/.claude/commands/loop.md +133 -0
- package/.claude/commands/loops.md +73 -0
- package/.claude/commands/mission-autopilot.md +538 -0
- package/.claude/commands/mission.md +353 -0
- package/.claude/commands/process-inbox.md +148 -0
- package/.claude/commands/process-jarvis.md +3036 -0
- package/.claude/commands/process-video.md +131 -0
- package/.claude/commands/rag-search.md +78 -0
- package/.claude/commands/resume.md +33 -0
- package/.claude/commands/save.md +38 -0
- package/.claude/commands/scan-inbox.md +125 -0
- package/.claude/commands/setup.md +99 -0
- package/.claude/commands/system-digest.md +243 -0
- package/.claude/commands/verify.md +182 -0
- package/.claude/commands/view-dna.md +169 -0
- package/.claude/get-shit-done/VERSION +1 -0
- package/.claude/get-shit-done/bin/gsd-tools.cjs +588 -0
- package/.claude/get-shit-done/bin/lib/commands.cjs +553 -0
- package/.claude/get-shit-done/bin/lib/config.cjs +162 -0
- package/.claude/get-shit-done/bin/lib/core.cjs +411 -0
- package/.claude/get-shit-done/bin/lib/frontmatter.cjs +299 -0
- package/.claude/get-shit-done/bin/lib/init.cjs +710 -0
- package/.claude/get-shit-done/bin/lib/milestone.cjs +216 -0
- package/.claude/get-shit-done/bin/lib/phase.cjs +871 -0
- package/.claude/get-shit-done/bin/lib/roadmap.cjs +298 -0
- package/.claude/get-shit-done/bin/lib/state.cjs +679 -0
- package/.claude/get-shit-done/bin/lib/template.cjs +222 -0
- package/.claude/get-shit-done/bin/lib/verify.cjs +773 -0
- package/.claude/get-shit-done/references/checkpoints.md +776 -0
- package/.claude/get-shit-done/references/continuation-format.md +249 -0
- package/.claude/get-shit-done/references/decimal-phase-calculation.md +65 -0
- package/.claude/get-shit-done/references/git-integration.md +248 -0
- package/.claude/get-shit-done/references/git-planning-commit.md +38 -0
- package/.claude/get-shit-done/references/model-profile-resolution.md +34 -0
- package/.claude/get-shit-done/references/model-profiles.md +92 -0
- package/.claude/get-shit-done/references/phase-argument-parsing.md +61 -0
- package/.claude/get-shit-done/references/planning-config.md +196 -0
- package/.claude/get-shit-done/references/questioning.md +145 -0
- package/.claude/get-shit-done/references/tdd.md +263 -0
- package/.claude/get-shit-done/references/ui-brand.md +160 -0
- package/.claude/get-shit-done/references/verification-patterns.md +612 -0
- package/.claude/get-shit-done/templates/DEBUG.md +164 -0
- package/.claude/get-shit-done/templates/UAT.md +247 -0
- package/.claude/get-shit-done/templates/VALIDATION.md +76 -0
- package/.claude/get-shit-done/templates/codebase/architecture.md +255 -0
- package/.claude/get-shit-done/templates/codebase/concerns.md +310 -0
- package/.claude/get-shit-done/templates/codebase/conventions.md +307 -0
- package/.claude/get-shit-done/templates/codebase/integrations.md +280 -0
- package/.claude/get-shit-done/templates/codebase/stack.md +186 -0
- package/.claude/get-shit-done/templates/codebase/structure.md +285 -0
- package/.claude/get-shit-done/templates/codebase/testing.md +480 -0
- package/.claude/get-shit-done/templates/config.json +37 -0
- package/.claude/get-shit-done/templates/context.md +283 -0
- package/.claude/get-shit-done/templates/continue-here.md +78 -0
- package/.claude/get-shit-done/templates/debug-subagent-prompt.md +91 -0
- package/.claude/get-shit-done/templates/discovery.md +146 -0
- package/.claude/get-shit-done/templates/milestone-archive.md +123 -0
- package/.claude/get-shit-done/templates/milestone.md +115 -0
- package/.claude/get-shit-done/templates/phase-prompt.md +569 -0
- package/.claude/get-shit-done/templates/planner-subagent-prompt.md +117 -0
- package/.claude/get-shit-done/templates/project.md +184 -0
- package/.claude/get-shit-done/templates/requirements.md +231 -0
- package/.claude/get-shit-done/templates/research-project/ARCHITECTURE.md +204 -0
- package/.claude/get-shit-done/templates/research-project/FEATURES.md +147 -0
- package/.claude/get-shit-done/templates/research-project/PITFALLS.md +200 -0
- package/.claude/get-shit-done/templates/research-project/STACK.md +120 -0
- package/.claude/get-shit-done/templates/research-project/SUMMARY.md +170 -0
- package/.claude/get-shit-done/templates/research.md +552 -0
- package/.claude/get-shit-done/templates/retrospective.md +54 -0
- package/.claude/get-shit-done/templates/roadmap.md +202 -0
- package/.claude/get-shit-done/templates/state.md +176 -0
- package/.claude/get-shit-done/templates/summary-complex.md +59 -0
- package/.claude/get-shit-done/templates/summary-minimal.md +41 -0
- package/.claude/get-shit-done/templates/summary-standard.md +48 -0
- package/.claude/get-shit-done/templates/summary.md +248 -0
- package/.claude/get-shit-done/templates/user-setup.md +311 -0
- package/.claude/get-shit-done/templates/verification-report.md +322 -0
- package/.claude/get-shit-done/workflows/add-phase.md +111 -0
- package/.claude/get-shit-done/workflows/add-tests.md +350 -0
- package/.claude/get-shit-done/workflows/add-todo.md +157 -0
- package/.claude/get-shit-done/workflows/audit-milestone.md +297 -0
- package/.claude/get-shit-done/workflows/check-todos.md +176 -0
- package/.claude/get-shit-done/workflows/cleanup.md +152 -0
- package/.claude/get-shit-done/workflows/complete-milestone.md +763 -0
- package/.claude/get-shit-done/workflows/diagnose-issues.md +219 -0
- package/.claude/get-shit-done/workflows/discovery-phase.md +289 -0
- package/.claude/get-shit-done/workflows/discuss-phase.md +542 -0
- package/.claude/get-shit-done/workflows/execute-phase.md +449 -0
- package/.claude/get-shit-done/workflows/execute-plan.md +448 -0
- package/.claude/get-shit-done/workflows/health.md +156 -0
- package/.claude/get-shit-done/workflows/help.md +489 -0
- package/.claude/get-shit-done/workflows/insert-phase.md +129 -0
- package/.claude/get-shit-done/workflows/list-phase-assumptions.md +178 -0
- package/.claude/get-shit-done/workflows/map-codebase.md +315 -0
- package/.claude/get-shit-done/workflows/new-milestone.md +382 -0
- package/.claude/get-shit-done/workflows/new-project.md +1116 -0
- package/.claude/get-shit-done/workflows/pause-work.md +122 -0
- package/.claude/get-shit-done/workflows/plan-milestone-gaps.md +274 -0
- package/.claude/get-shit-done/workflows/plan-phase.md +569 -0
- package/.claude/get-shit-done/workflows/progress.md +381 -0
- package/.claude/get-shit-done/workflows/quick.md +453 -0
- package/.claude/get-shit-done/workflows/remove-phase.md +154 -0
- package/.claude/get-shit-done/workflows/research-phase.md +73 -0
- package/.claude/get-shit-done/workflows/resume-project.md +306 -0
- package/.claude/get-shit-done/workflows/set-profile.md +80 -0
- package/.claude/get-shit-done/workflows/settings.md +213 -0
- package/.claude/get-shit-done/workflows/transition.md +544 -0
- package/.claude/get-shit-done/workflows/update.md +219 -0
- package/.claude/get-shit-done/workflows/verify-phase.md +242 -0
- package/.claude/get-shit-done/workflows/verify-work.md +569 -0
- package/.claude/gsd-file-manifest.json +144 -0
- package/.claude/hooks/agent_creation_trigger.py +168 -0
- package/.claude/hooks/agent_index_updater.py +255 -0
- package/.claude/hooks/agent_memory_persister.py +203 -0
- package/.claude/hooks/claude_md_agent_sync.py +162 -0
- package/.claude/hooks/claude_md_guard.py +154 -0
- package/.claude/hooks/continuous_save.py +414 -0
- package/.claude/hooks/creation_validator.py +360 -0
- package/.claude/hooks/enforce_dual_location.py +501 -0
- package/.claude/hooks/enforce_plan_mode.py +220 -0
- package/.claude/hooks/gsd-check-update.js +62 -0
- package/.claude/hooks/gsd-context-monitor.js +122 -0
- package/.claude/hooks/gsd-statusline.js +108 -0
- package/.claude/hooks/inbox_age_alert.py +367 -0
- package/.claude/hooks/ledger_updater.py +303 -0
- package/.claude/hooks/memory_hints_injector.py +251 -0
- package/.claude/hooks/memory_updater.py +202 -0
- package/.claude/hooks/notification_system.py +115 -0
- package/.claude/hooks/pending_tracker.py +188 -0
- package/.claude/hooks/pipeline_checkpoint.py +583 -0
- package/.claude/hooks/post_batch_cascading.py +1740 -0
- package/.claude/hooks/post_tool_use.py +120 -0
- package/.claude/hooks/quality_watchdog.py +394 -0
- package/.claude/hooks/ralph_wiggum.py +286 -0
- package/.claude/hooks/session-source-sync.py +223 -0
- package/.claude/hooks/session_autosave_v2.py +1135 -0
- package/.claude/hooks/session_end.py +203 -0
- package/.claude/hooks/session_start.py +939 -0
- package/.claude/hooks/skill_indexer.py +48 -0
- package/.claude/hooks/skill_router.py +358 -0
- package/.claude/hooks/stop_hook_completeness.py +187 -0
- package/.claude/hooks/user_prompt_submit.py +125 -0
- package/.claude/package.json +1 -0
- package/.claude/rules/ANTHROPIC-STANDARDS.md +384 -0
- package/.claude/rules/CLAUDE-LITE.md +201 -0
- package/.claude/rules/RULE-GROUP-1.md +320 -0
- package/.claude/rules/RULE-GROUP-2.md +307 -0
- package/.claude/rules/RULE-GROUP-3.md +248 -0
- package/.claude/rules/RULE-GROUP-4.md +427 -0
- package/.claude/rules/RULE-GROUP-5.md +388 -0
- package/.claude/rules/RULE-GROUP-6.md +387 -0
- package/.claude/rules/RULE-GSD-MANDATORY.md +106 -0
- package/.claude/rules/agent-cognition.md +779 -0
- package/.claude/rules/agent-integrity.md +692 -0
- package/.claude/rules/epistemic-standards.md +333 -0
- package/.claude/rules/logging.md +53 -0
- package/.claude/rules/mcp-governance.md +128 -0
- package/.claude/rules/pipeline.md +60 -0
- package/.claude/rules/state-management.md +93 -0
- package/.claude/scripts/apply-tags.py +77 -0
- package/.claude/scripts/batch-extract-transcriptions.py +132 -0
- package/.claude/scripts/build-complete-index.py +250 -0
- package/.claude/scripts/build-planilha-index.py +170 -0
- package/.claude/scripts/complete-tag-matching.py +250 -0
- package/.claude/scripts/deduplicate-inbox.py +139 -0
- package/.claude/scripts/docx-xml-extractor.py +141 -0
- package/.claude/scripts/extract-docx-text.py +58 -0
- package/.claude/scripts/extract-single-transcription.py +74 -0
- package/.claude/scripts/extract_docx_from_gdrive.py +77 -0
- package/.claude/scripts/jarvis_orchestrator.py +5 -0
- package/.claude/scripts/organized-downloader.py +246 -0
- package/.claude/scripts/planilha-tagger.py +187 -0
- package/.claude/scripts/revert-tags.py +70 -0
- package/.claude/scripts/source-sync.py +265 -0
- package/.claude/scripts/tag-inbox-files.py +276 -0
- package/.claude/scripts/tag-inbox-v2.py +253 -0
- package/.claude/scripts/test-extraction.py +35 -0
- package/.claude/scripts/test-full-extraction.py +74 -0
- package/.claude/scripts/validate_cascading_integrity.py +409 -0
- package/.claude/settings.json +215 -0
- package/.claude/skills/DETECTION-PROTOCOL.md +217 -0
- package/.claude/skills/README.md +240 -0
- package/.claude/skills/SKILL-REGISTRY.md +283 -0
- package/.claude/skills/SKILL-SUGGESTIONS.md +114 -0
- package/.claude/skills/_TEMPLATES/SKILL-WRITER-GUIDE.md +385 -0
- package/.claude/skills/agent-creation/SKILL.md +374 -0
- package/.claude/skills/ask-company/SKILL.md +198 -0
- package/.claude/skills/brainstorming/SKILL.md +72 -0
- package/.claude/skills/chronicler/SKILL.md +146 -0
- package/.claude/skills/chronicler/chronicler_core.py +468 -0
- package/.claude/skills/code-review/SKILL.md +160 -0
- package/.claude/skills/convert-to-company-docs/SKILL.md +68 -0
- package/.claude/skills/convert-to-company-docs/convert.py +532 -0
- package/.claude/skills/dispatching-parallel-agents/SKILL.md +193 -0
- package/.claude/skills/docs-megabrain/SKILL.md +251 -0
- package/.claude/skills/executing-plans/SKILL.md +114 -0
- package/.claude/skills/executor/SKILL.md +161 -0
- package/.claude/skills/fase-2-5-tagging/SKILL.md +182 -0
- package/.claude/skills/feature-dev/SKILL.md +154 -0
- package/.claude/skills/frontend-design/SKILL.md +165 -0
- package/.claude/skills/gdrive-transcription-downloader/SKILL.md +249 -0
- package/.claude/skills/gemini-fallback/SKILL.md +67 -0
- package/.claude/skills/gemini-fallback/gemini_fetch.py +0 -0
- package/.claude/skills/gha/SKILL.md +96 -0
- package/.claude/skills/gha/gha_diagnostic.py +227 -0
- package/.claude/skills/github-workflow/SKILL.md +190 -0
- package/.claude/skills/hookify/SKILL.md +134 -0
- package/.claude/skills/hybrid-source-reading/SKILL.md +265 -0
- package/.claude/skills/jarvis/SKILL.md +546 -0
- package/.claude/skills/jarvis-briefing/SKILL.md +340 -0
- package/.claude/skills/knowledge-extraction/SKILL.md +318 -0
- package/.claude/skills/ler-planilha/SKILL.md +281 -0
- package/.claude/skills/pipeline-jarvis/SKILL.md +430 -0
- package/.claude/skills/plugin-dev/SKILL.md +176 -0
- package/.claude/skills/pr-review-toolkit/SKILL.md +178 -0
- package/.claude/skills/process-company-inbox/SKILL.md +183 -0
- package/.claude/skills/python-megabrain/SKILL.md +323 -0
- package/.claude/skills/resume/SKILL.md +61 -0
- package/.claude/skills/save/SKILL.md +87 -0
- package/.claude/skills/skill-creator-internal/SKILL.md +186 -0
- package/.claude/skills/skill-writer/SKILL.md +153 -0
- package/.claude/skills/skill-writer/examples.md +191 -0
- package/.claude/skills/skill-writer/troubleshooting.md +205 -0
- package/.claude/skills/smart-download-tagger/SKILL.md +148 -0
- package/.claude/skills/source-sync/SKILL.md +240 -0
- package/.claude/skills/sync-docs/SKILL.md +193 -0
- package/.claude/skills/sync-docs/config.json +37 -0
- package/.claude/skills/sync-docs/gdrive_sync.py +358 -0
- package/.claude/skills/sync-docs/reauth.py +71 -0
- package/.claude/skills/using-superpowers/SKILL.md +105 -0
- package/.claude/skills/verification-before-completion/SKILL.md +130 -0
- package/.claude/skills/verify/SKILL.md +154 -0
- package/.claude/skills/verify/verify_runner.py +0 -0
- package/.claude/skills/verify-6-levels/SKILL.md +234 -0
- package/.claude/skills/writing-plans/SKILL.md +184 -0
- package/.claude/templates/BATCH-LOG-TEMPLATE.md +221 -0
- package/.claudeignore +9 -0
- package/.cursor/agents.yaml +44 -0
- package/.cursor/rules/mega-brain.md +39 -0
- package/.gitattributes +19 -0
- package/.github/CODEOWNERS +8 -0
- package/.github/ISSUE_TEMPLATE/agent.md +96 -0
- package/.github/ISSUE_TEMPLATE/bug.md +67 -0
- package/.github/ISSUE_TEMPLATE/feature.md +56 -0
- package/.github/ISSUE_TEMPLATE/pipeline.md +70 -0
- package/.github/PULL_REQUEST_TEMPLATE.md +30 -0
- package/.github/assets/banner.svg +152 -0
- package/.github/assets/logo-dark.svg +79 -0
- package/.github/assets/social-preview.png +0 -0
- package/.github/layer1-allowlist.txt +196 -0
- package/.github/layer2-manifest.txt +42 -0
- package/.github/layer3-manifest.txt +94 -0
- package/.github/workflows/claude-code-pr.yml +198 -0
- package/.github/workflows/claude-code-review.yml +57 -0
- package/.github/workflows/claude.yml +76 -0
- package/.github/workflows/publish-pro.yml +72 -0
- package/.github/workflows/publish.yml +86 -0
- package/.github/workflows/verification.yml +251 -0
- package/.gitignore +244 -0
- package/.gitleaks.toml +118 -0
- package/.windsurf/agents.yaml +44 -0
- package/.windsurf/rules/mega-brain.md +39 -0
- package/CONTRIBUTING.md +62 -0
- package/QUICK-START.md +231 -0
- package/README.md +168 -0
- package/agents/AGENT-INDEX.yaml +107 -0
- package/agents/MASTER-AGENT.md +612 -0
- package/agents/README.md +48 -0
- package/agents/_templates/INDEX.md +741 -0
- package/agents/_templates/TEMPLATE-AGENT-MD-ULTRA-ROBUSTO-V3.md +2399 -0
- package/agents/boardroom/CHECKLIST-MASTER.md +281 -0
- package/agents/boardroom/INTEGRATION-GUIDE.md +406 -0
- package/agents/boardroom/README.md +238 -0
- package/agents/boardroom/config/BOARDROOM-CONFIG.md +186 -0
- package/agents/boardroom/config/TTS-INTEGRATION.md +258 -0
- package/agents/boardroom/config/VOICE-PROFILES.md +624 -0
- package/agents/boardroom/scripts/audio_generator.py +375 -0
- package/agents/boardroom/scripts/audio_generator_edge.py +353 -0
- package/agents/boardroom/scripts/jarvis_boardroom_hook.py +415 -0
- package/agents/boardroom/scripts/notebooklm_generator.py +578 -0
- package/agents/boardroom/templates/EPISODE-TEMPLATE.md +367 -0
- package/agents/boardroom/templates/scene-templates/SCENE-AGENT-DEBATE.md +252 -0
- package/agents/boardroom/templates/scene-templates/SCENE-COUNCIL.md +270 -0
- package/agents/boardroom/templates/scene-templates/SCENE-DNA-CONSULTATION.md +126 -0
- package/agents/boardroom/templates/scene-templates/SCENE-QUESTION.md +174 -0
- package/agents/boardroom/workflows/WORKFLOW-AUDIO-GENERATION.md +421 -0
- package/agents/conclave/CRITIC.md +197 -0
- package/agents/conclave/DEVILS-ADVOCATE.md +274 -0
- package/agents/conclave/README.md +35 -0
- package/agents/conclave/SYNTHESIZER.md +293 -0
- package/agents/conclave/advogado-do-diabo/AGENT.md +502 -0
- package/agents/conclave/advogado-do-diabo/SOUL.md +100 -0
- package/agents/conclave/critico-metodologico/AGENT.md +683 -0
- package/agents/conclave/critico-metodologico/SOUL.md +107 -0
- package/agents/conclave/sintetizador/AGENT.md +571 -0
- package/agents/conclave/sintetizador/SOUL.md +94 -0
- package/agents/constitution/BASE-CONSTITUTION.md +254 -0
- package/agents/persona-registry.yaml +300 -0
- package/agents/sua-empresa/.gitkeep +0 -0
- package/agents/sua-empresa/README.md +44 -0
- package/agents/sua-empresa/_example/jds/EXAMPLE-JD.md +42 -0
- package/agents/sua-empresa/_example/org/EXAMPLE-ORG.md +32 -0
- package/agents/sua-empresa/_example/roles/EXAMPLE-ROLE.md +38 -0
- package/artifacts/README.md +11 -0
- package/artifacts/canonical/.gitkeep +0 -0
- package/artifacts/chunks/.gitkeep +0 -0
- package/artifacts/insights/.gitkeep +0 -0
- package/artifacts/narratives/.gitkeep +0 -0
- package/bin/cli.js +2 -0
- package/bin/lib/ascii-art.js +202 -0
- package/bin/lib/feature-gate.js +46 -0
- package/bin/lib/installer.js +593 -0
- package/bin/lib/license.js +59 -0
- package/bin/lib/pro-commands.js +75 -0
- package/bin/lib/setup-wizard.js +547 -0
- package/bin/lib/validate-email.js +113 -0
- package/bin/mega-brain.js +136 -0
- package/bin/pre-publish-gate.js +229 -0
- package/bin/push.js +1056 -0
- package/bin/templates/env.example +27 -0
- package/bin/utils/pro-detector.js +50 -0
- package/bin/validate-package.js +190 -0
- package/core/__init__.py +2 -0
- package/core/glossary/INDEX.md +63 -0
- package/core/glossary/digital.md +243 -0
- package/core/glossary/finance.md +49 -0
- package/core/glossary/marketing.md +69 -0
- package/core/glossary/operations.md +50 -0
- package/core/glossary/sales.md +690 -0
- package/core/intelligence/__init__.py +41 -0
- package/core/intelligence/agent_trigger.py +468 -0
- package/core/intelligence/audit_layers.py +491 -0
- package/core/intelligence/autonomous_processor.py +796 -0
- package/core/intelligence/bootstrap_registry.py +550 -0
- package/core/intelligence/business_model_detector.py +476 -0
- package/core/intelligence/dossier_trigger.py +336 -0
- package/core/intelligence/entity_normalizer.py +565 -0
- package/core/intelligence/org_chain_detector.py +411 -0
- package/core/intelligence/review_dashboard.py +338 -0
- package/core/intelligence/role_detector.py +855 -0
- package/core/intelligence/session_autosave.py +46 -0
- package/core/intelligence/skill_generator.py +601 -0
- package/core/intelligence/sow_generator.py +711 -0
- package/core/intelligence/sync_package_files.py +504 -0
- package/core/intelligence/task_orchestrator.py +780 -0
- package/core/intelligence/theme_analyzer.py +562 -0
- package/core/intelligence/tool_discovery.py +432 -0
- package/core/intelligence/validate_json_integrity.py +106 -0
- package/core/intelligence/validate_layers.py +310 -0
- package/core/intelligence/verify_classifications.py +94 -0
- package/core/intelligence/viability_scorer.py +592 -0
- package/core/jarvis/02-JARVIS-SOUL.md +390 -0
- package/core/jarvis/03-JARVIS-DNA.yaml +312 -0
- package/core/jarvis/AGENT.md +191 -0
- package/core/jarvis/agent-creator/AGENT.md +199 -0
- package/core/jarvis/agent-creator/SOUL.md +82 -0
- package/core/jarvis/agent-creator/tasks/create-agent.md +133 -0
- package/core/jarvis/agent-creator/tasks/sync-agents.md +100 -0
- package/core/jarvis/agent-creator/workflows/wf-create-agent.yaml +110 -0
- package/core/jarvis/agent-creator/workflows/wf-pipeline-trigger.yaml +111 -0
- package/core/jarvis/autonomous/benchmark/AGENT.md +347 -0
- package/core/jarvis/autonomous/benchmark/SOUL.md +78 -0
- package/core/jarvis/autonomous/critic/AGENT.md +324 -0
- package/core/jarvis/autonomous/critic/SOUL.md +78 -0
- package/core/jarvis/autonomous/evolver/AGENT.md +294 -0
- package/core/jarvis/autonomous/evolver/SOUL.md +85 -0
- package/core/jarvis/autonomous/playbook-generator/AGENT.md +399 -0
- package/core/jarvis/autonomous/playbook-generator/SOUL.md +80 -0
- package/core/patterns/_ROLE_PATTERNS.yaml +547 -0
- package/core/patterns/quality_gates.yaml +259 -0
- package/core/patterns/trigger_config.yaml +193 -0
- package/core/schemas/SCHEMA-INDEX.md +94 -0
- package/core/schemas/canonical-map.schema.json +98 -0
- package/core/schemas/chunks-state.schema.json +131 -0
- package/core/schemas/decisions-registry.schema.json +120 -0
- package/core/schemas/file-registry.schema.json +69 -0
- package/core/schemas/insights-state.schema.json +111 -0
- package/core/schemas/narratives-state.schema.json +150 -0
- package/core/tasks/CHANGELOG.md +55 -0
- package/core/tasks/TASK-REGISTRY.md +113 -0
- package/core/tasks/_templates/task-tmpl.md +105 -0
- package/core/tasks/analyze-themes.md +84 -0
- package/core/tasks/detect-role.md +82 -0
- package/core/tasks/extract-dna.md +114 -0
- package/core/tasks/normalize-entities.md +82 -0
- package/core/tasks/process-batch.md +111 -0
- package/core/tasks/validate-cascade.md +105 -0
- package/core/templates/README.md +27 -0
- package/core/templates/agents/dna-config-template.yaml +181 -0
- package/core/templates/agents/enrichment-protocol.md +408 -0
- package/core/templates/agents/memory-template.md +567 -0
- package/core/templates/agents/reasoning-model.md +331 -0
- package/core/templates/agents/soul-template.md +416 -0
- package/core/templates/agents/template-evolution.md +544 -0
- package/core/templates/debates/CONCLAVE-LOG-TEMPLATE-v2.md +309 -0
- package/core/templates/debates/conclave-log-template.md +309 -0
- package/core/templates/debates/conclave-protocol.md +518 -0
- package/core/templates/debates/debate-dynamics-config.yaml +322 -0
- package/core/templates/debates/debate-dynamics.md +613 -0
- package/core/templates/debates/debate-protocol.md +323 -0
- package/core/templates/logs/LOG-TEMPLATES.md +1068 -0
- package/core/templates/logs/batch-visual-template.md +841 -0
- package/core/templates/logs/log-structure.md +65 -0
- package/core/templates/logs/visual-diff.md +159 -0
- package/core/templates/phases/dossier-compilation.md +790 -0
- package/core/templates/phases/narrative-metabolism.md +292 -0
- package/core/templates/phases/narrative-synthesis.md +278 -0
- package/core/templates/phases/phase4-checkpoint.md +146 -0
- package/core/templates/phases/prompt-1.1-chunking.md +154 -0
- package/core/templates/phases/prompt-1.2-entity-resolution.md +186 -0
- package/core/templates/phases/prompt-2.1-dna-tags.md +208 -0
- package/core/templates/phases/prompt-2.1-insight-extraction.md +191 -0
- package/core/templates/phases/prompt-3.1-narrative.md +331 -0
- package/core/templates/phases/sources-compilation.md +340 -0
- package/core/workflows/PIPELINE-JARVIS-DOCS.md +606 -0
- package/core/workflows/wf-conclave.yaml +139 -0
- package/core/workflows/wf-extract-dna.yaml +158 -0
- package/core/workflows/wf-ingest.yaml +88 -0
- package/core/workflows/wf-pipeline-full.yaml +138 -0
- package/docs/API-KEYS-GUIDE.md +372 -0
- package/docs/INTEGRATION-POINTS.md +501 -0
- package/docs/LAYERS.md +403 -0
- package/docs/PLAN-MODE-PROTOCOL.md +388 -0
- package/docs/RESTORE-AND-INDEX.md +203 -0
- package/docs/TAG-RESOLVER-IMPLEMENTATION.md +597 -0
- package/docs/conselho.md +337 -0
- package/docs/context7-readme.md +28 -0
- package/docs/jarvis-logging-protocol.md +380 -0
- package/docs/pipeline-completa-v4.md +1315 -0
- package/docs/prompts/meta_agente_mapeamento_processos.md +297 -0
- package/docs/quick-start.md +197 -0
- package/docs/readme-ralph-cascateamento.md +207 -0
- package/docs/template-master.md +727 -0
- package/docs/templates/phase5/IMPLEMENTATION-GUIDE.md +355 -0
- package/docs/templates/phase5/MOGA-BRAIN-PHASE5-TEMPLATES.md +1284 -0
- package/docs/templates/phase5/README.md +165 -0
- package/docs/workflow-continuous-claude.md +2232 -0
- package/inbox/.gitkeep +0 -0
- package/inbox/README.md +15 -0
- package/knowledge/NAVIGATION-MAP.json +292 -0
- package/knowledge/README.md +11 -0
- package/knowledge/dna/.gitkeep +0 -0
- package/knowledge/dossiers/persons/.gitkeep +0 -0
- package/knowledge/dossiers/system/.gitkeep +0 -0
- package/knowledge/dossiers/themes/.gitkeep +0 -0
- package/knowledge/playbooks/.gitkeep +0 -0
- package/knowledge/sources/.gitkeep +0 -0
- package/logs/.gitkeep +0 -0
- package/logs/README.md +11 -0
- package/package.json +180 -0
- package/requirements.txt +4 -0
|
@@ -0,0 +1,565 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
ENTITY NORMALIZER - Intelligence Layer v1.0
|
|
4
|
+
============================================
|
|
5
|
+
Canonicalizacao continua de entidades (pessoas, temas, roles, conceitos).
|
|
6
|
+
|
|
7
|
+
Resolve:
|
|
8
|
+
- Exact match nos aliases do ENTITY-REGISTRY
|
|
9
|
+
- Fuzzy match via difflib (threshold 0.85)
|
|
10
|
+
- Domain-aware boost para roles/temas
|
|
11
|
+
- Auto-merge acima de 0.95 similaridade
|
|
12
|
+
- Review queue entre 0.85-0.95
|
|
13
|
+
|
|
14
|
+
Usado por: theme_analyzer.py, role_detector.py, bootstrap_registry.py,
|
|
15
|
+
post_batch_cascading.py v3.0
|
|
16
|
+
|
|
17
|
+
Versao: 1.0.0
|
|
18
|
+
Data: 2026-02-24
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
import os
|
|
23
|
+
import re
|
|
24
|
+
import sys
|
|
25
|
+
import yaml
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from difflib import SequenceMatcher
|
|
28
|
+
from datetime import datetime, timezone
|
|
29
|
+
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# PATHS
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
BASE_DIR = Path(__file__).parent.parent
|
|
34
|
+
REGISTRY_PATH = BASE_DIR / "processing" / "canonical" / "ENTITY-REGISTRY.json"
|
|
35
|
+
TAXONOMY_PATH = BASE_DIR / "knowledge" / "dna" / "DOMAINS-TAXONOMY.yaml"
|
|
36
|
+
TRIGGER_CONFIG_PATH = BASE_DIR / "scripts" / "trigger_config.yaml"
|
|
37
|
+
REVIEW_QUEUE_PATH = BASE_DIR / "processing" / "canonical" / "review_queue.jsonl"
|
|
38
|
+
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
# CONFIG
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
DEFAULT_FUZZY_THRESHOLD = 0.85
|
|
43
|
+
DEFAULT_AUTO_MERGE_THRESHOLD = 0.95
|
|
44
|
+
DEFAULT_MIN_OCCURRENCES_TO_CONFIRM = 3
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def load_trigger_config():
|
|
48
|
+
"""Load thresholds from trigger_config.yaml."""
|
|
49
|
+
if TRIGGER_CONFIG_PATH.exists():
|
|
50
|
+
with open(TRIGGER_CONFIG_PATH, "r", encoding="utf-8") as f:
|
|
51
|
+
return yaml.safe_load(f)
|
|
52
|
+
return {}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_thresholds():
|
|
56
|
+
"""Get entity canonicalization thresholds."""
|
|
57
|
+
config = load_trigger_config()
|
|
58
|
+
ec = config.get("thresholds", {}).get("entity_canonicalization", {})
|
|
59
|
+
return {
|
|
60
|
+
"fuzzy_threshold": ec.get("fuzzy_threshold", DEFAULT_FUZZY_THRESHOLD),
|
|
61
|
+
"auto_merge_threshold": ec.get("auto_merge_threshold", DEFAULT_AUTO_MERGE_THRESHOLD),
|
|
62
|
+
"min_occurrences_to_confirm": ec.get("min_occurrences_to_confirm", DEFAULT_MIN_OCCURRENCES_TO_CONFIRM),
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
# REGISTRY I/O
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
def load_registry():
|
|
70
|
+
"""Load ENTITY-REGISTRY.json. Returns empty structure if not found."""
|
|
71
|
+
if REGISTRY_PATH.exists():
|
|
72
|
+
with open(REGISTRY_PATH, "r", encoding="utf-8") as f:
|
|
73
|
+
return json.load(f)
|
|
74
|
+
return create_empty_registry()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def save_registry(registry):
|
|
78
|
+
"""Save ENTITY-REGISTRY.json with version bump."""
|
|
79
|
+
registry["metadata"]["updated_at"] = datetime.now(timezone.utc).isoformat()
|
|
80
|
+
v = registry["metadata"].get("version", 0)
|
|
81
|
+
registry["metadata"]["version"] = v + 1
|
|
82
|
+
REGISTRY_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
83
|
+
with open(REGISTRY_PATH, "w", encoding="utf-8") as f:
|
|
84
|
+
json.dump(registry, f, indent=2, ensure_ascii=False)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def create_empty_registry():
|
|
88
|
+
"""Create empty registry structure."""
|
|
89
|
+
return {
|
|
90
|
+
"metadata": {
|
|
91
|
+
"version": 0,
|
|
92
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
93
|
+
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
94
|
+
"description": "ENTITY-REGISTRY - Single Source of Truth for Mega Brain entities"
|
|
95
|
+
},
|
|
96
|
+
"persons": {},
|
|
97
|
+
"themes": {},
|
|
98
|
+
"roles": {},
|
|
99
|
+
"concepts": {}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
# TAXONOMY LOADER
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
_taxonomy_cache = None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def load_taxonomy():
|
|
110
|
+
"""Load DOMAINS-TAXONOMY.yaml (cached)."""
|
|
111
|
+
global _taxonomy_cache
|
|
112
|
+
if _taxonomy_cache is not None:
|
|
113
|
+
return _taxonomy_cache
|
|
114
|
+
if TAXONOMY_PATH.exists():
|
|
115
|
+
with open(TAXONOMY_PATH, "r", encoding="utf-8") as f:
|
|
116
|
+
_taxonomy_cache = yaml.safe_load(f)
|
|
117
|
+
else:
|
|
118
|
+
_taxonomy_cache = {}
|
|
119
|
+
return _taxonomy_cache
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def get_domain_aliases():
|
|
123
|
+
"""Build flat map: alias -> domain_id from taxonomy."""
|
|
124
|
+
tax = load_taxonomy()
|
|
125
|
+
alias_map = {}
|
|
126
|
+
for dom in tax.get("dominios", []):
|
|
127
|
+
did = dom["id"]
|
|
128
|
+
alias_map[did.lower()] = did
|
|
129
|
+
for a in dom.get("aliases", []):
|
|
130
|
+
alias_map[a.lower()] = did
|
|
131
|
+
for s in dom.get("subdominios", []):
|
|
132
|
+
alias_map[s.lower()] = did
|
|
133
|
+
return alias_map
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def get_role_aliases():
|
|
137
|
+
"""Build flat map: alias -> canonical_role from taxonomy."""
|
|
138
|
+
tax = load_taxonomy()
|
|
139
|
+
role_map = {}
|
|
140
|
+
for role_key in tax.get("cargos", {}):
|
|
141
|
+
canonical = role_key.upper()
|
|
142
|
+
role_map[canonical.lower()] = canonical
|
|
143
|
+
# common variations
|
|
144
|
+
nice = canonical.replace("-", " ")
|
|
145
|
+
role_map[nice.lower()] = canonical
|
|
146
|
+
return role_map
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def get_person_aliases():
|
|
150
|
+
"""Build flat map: alias -> canonical_person from taxonomy."""
|
|
151
|
+
tax = load_taxonomy()
|
|
152
|
+
person_map = {}
|
|
153
|
+
for pkey in tax.get("pessoas", {}):
|
|
154
|
+
canonical = pkey.upper()
|
|
155
|
+
person_map[canonical.lower()] = canonical
|
|
156
|
+
nice = canonical.replace("-", " ")
|
|
157
|
+
person_map[nice.lower()] = canonical
|
|
158
|
+
return person_map
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
# NORMALIZATION (text utils)
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
def normalize_text(text):
|
|
165
|
+
"""Normalize text for comparison: lowercase, strip, collapse whitespace."""
|
|
166
|
+
if not text:
|
|
167
|
+
return ""
|
|
168
|
+
text = text.strip().lower()
|
|
169
|
+
text = re.sub(r"\s+", " ", text)
|
|
170
|
+
return text
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def normalize_entity_key(name):
|
|
174
|
+
"""Create a normalized key for registry lookup."""
|
|
175
|
+
return normalize_text(name)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# ---------------------------------------------------------------------------
|
|
179
|
+
# FUZZY MATCHING
|
|
180
|
+
# ---------------------------------------------------------------------------
|
|
181
|
+
def similarity(a, b):
|
|
182
|
+
"""Calculate similarity ratio between two strings."""
|
|
183
|
+
return SequenceMatcher(None, normalize_text(a), normalize_text(b)).ratio()
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def find_best_match(name, candidates, threshold=None):
|
|
187
|
+
"""
|
|
188
|
+
Find best fuzzy match for `name` among `candidates`.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
name: string to match
|
|
192
|
+
candidates: dict of {canonical: entity_data} or list of canonical names
|
|
193
|
+
threshold: minimum similarity (default from config)
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
(canonical_name, score) or (None, 0.0)
|
|
197
|
+
"""
|
|
198
|
+
if threshold is None:
|
|
199
|
+
threshold = get_thresholds()["fuzzy_threshold"]
|
|
200
|
+
|
|
201
|
+
norm_name = normalize_text(name)
|
|
202
|
+
if not norm_name:
|
|
203
|
+
return None, 0.0
|
|
204
|
+
|
|
205
|
+
best_match = None
|
|
206
|
+
best_score = 0.0
|
|
207
|
+
|
|
208
|
+
if isinstance(candidates, dict):
|
|
209
|
+
candidate_list = list(candidates.keys())
|
|
210
|
+
else:
|
|
211
|
+
candidate_list = list(candidates)
|
|
212
|
+
|
|
213
|
+
for canonical in candidate_list:
|
|
214
|
+
# Check canonical name
|
|
215
|
+
score = similarity(norm_name, canonical)
|
|
216
|
+
if score > best_score:
|
|
217
|
+
best_score = score
|
|
218
|
+
best_match = canonical
|
|
219
|
+
|
|
220
|
+
# Check aliases if dict with entity data
|
|
221
|
+
if isinstance(candidates, dict):
|
|
222
|
+
entity = candidates[canonical]
|
|
223
|
+
for alias in entity.get("aliases", []):
|
|
224
|
+
score = similarity(norm_name, alias)
|
|
225
|
+
if score > best_score:
|
|
226
|
+
best_score = score
|
|
227
|
+
best_match = canonical
|
|
228
|
+
|
|
229
|
+
if best_score >= threshold:
|
|
230
|
+
return best_match, best_score
|
|
231
|
+
return None, 0.0
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# ---------------------------------------------------------------------------
|
|
235
|
+
# CORE: NORMALIZE ENTITY
|
|
236
|
+
# ---------------------------------------------------------------------------
|
|
237
|
+
def normalize_entity(name, entity_type, registry=None, source_id=None,
|
|
238
|
+
auto_save=False, domain_hint=None):
|
|
239
|
+
"""
|
|
240
|
+
Main normalization function. Resolves a raw entity name to its canonical form.
|
|
241
|
+
|
|
242
|
+
Resolution order:
|
|
243
|
+
1. Exact match on canonical names
|
|
244
|
+
2. Exact match on aliases
|
|
245
|
+
3. Fuzzy match (>= threshold)
|
|
246
|
+
4. Domain-aware boost for roles/themes
|
|
247
|
+
5. New entity creation if no match
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
name: raw entity name (e.g. "closer", "Alex hormozi", "processo de vendas")
|
|
251
|
+
entity_type: "person" | "theme" | "role" | "concept"
|
|
252
|
+
registry: ENTITY-REGISTRY dict (loaded if None)
|
|
253
|
+
source_id: source that mentioned this entity
|
|
254
|
+
auto_save: save registry after modification
|
|
255
|
+
domain_hint: optional domain_id for context-aware matching
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
{
|
|
259
|
+
"canonical": str,
|
|
260
|
+
"match_type": "exact" | "alias" | "fuzzy" | "domain_boost" | "new",
|
|
261
|
+
"score": float,
|
|
262
|
+
"entity_type": str,
|
|
263
|
+
"created": bool
|
|
264
|
+
}
|
|
265
|
+
"""
|
|
266
|
+
if registry is None:
|
|
267
|
+
registry = load_registry()
|
|
268
|
+
|
|
269
|
+
thresholds = get_thresholds()
|
|
270
|
+
norm_name = normalize_text(name)
|
|
271
|
+
if not norm_name:
|
|
272
|
+
return {"canonical": name, "match_type": "empty", "score": 0.0,
|
|
273
|
+
"entity_type": entity_type, "created": False}
|
|
274
|
+
|
|
275
|
+
# Map entity_type to registry section
|
|
276
|
+
section_map = {
|
|
277
|
+
"person": "persons",
|
|
278
|
+
"theme": "themes",
|
|
279
|
+
"role": "roles",
|
|
280
|
+
"concept": "concepts"
|
|
281
|
+
}
|
|
282
|
+
section = section_map.get(entity_type, "concepts")
|
|
283
|
+
entities = registry.get(section, {})
|
|
284
|
+
|
|
285
|
+
# ----- STEP 1: Exact match on canonical names -----
|
|
286
|
+
for canonical, data in entities.items():
|
|
287
|
+
if normalize_text(canonical) == norm_name:
|
|
288
|
+
_increment_entity(data, source_id)
|
|
289
|
+
if auto_save:
|
|
290
|
+
save_registry(registry)
|
|
291
|
+
return {"canonical": canonical, "match_type": "exact", "score": 1.0,
|
|
292
|
+
"entity_type": entity_type, "created": False}
|
|
293
|
+
|
|
294
|
+
# ----- STEP 2: Exact match on aliases -----
|
|
295
|
+
for canonical, data in entities.items():
|
|
296
|
+
for alias in data.get("aliases", []):
|
|
297
|
+
if normalize_text(alias) == norm_name:
|
|
298
|
+
_increment_entity(data, source_id)
|
|
299
|
+
if auto_save:
|
|
300
|
+
save_registry(registry)
|
|
301
|
+
return {"canonical": canonical, "match_type": "alias", "score": 1.0,
|
|
302
|
+
"entity_type": entity_type, "created": False}
|
|
303
|
+
|
|
304
|
+
# ----- STEP 3: Fuzzy match -----
|
|
305
|
+
best_canonical, best_score = find_best_match(name, entities,
|
|
306
|
+
threshold=thresholds["fuzzy_threshold"])
|
|
307
|
+
|
|
308
|
+
# ----- STEP 4: Domain-aware boost -----
|
|
309
|
+
if domain_hint and entity_type in ("role", "theme"):
|
|
310
|
+
boosted = _domain_boost(name, entities, domain_hint)
|
|
311
|
+
if boosted and boosted[1] > best_score:
|
|
312
|
+
best_canonical, best_score = boosted
|
|
313
|
+
|
|
314
|
+
if best_canonical and best_score >= thresholds["auto_merge_threshold"]:
|
|
315
|
+
# Auto-merge: add as alias
|
|
316
|
+
entity_data = entities[best_canonical]
|
|
317
|
+
if norm_name not in [normalize_text(a) for a in entity_data.get("aliases", [])]:
|
|
318
|
+
entity_data.setdefault("aliases", []).append(name)
|
|
319
|
+
_increment_entity(entity_data, source_id)
|
|
320
|
+
if auto_save:
|
|
321
|
+
save_registry(registry)
|
|
322
|
+
return {"canonical": best_canonical, "match_type": "fuzzy", "score": best_score,
|
|
323
|
+
"entity_type": entity_type, "created": False}
|
|
324
|
+
|
|
325
|
+
if best_canonical and best_score >= thresholds["fuzzy_threshold"]:
|
|
326
|
+
# Candidate merge: add to review queue
|
|
327
|
+
_add_to_review_queue(name, best_canonical, best_score, entity_type, source_id)
|
|
328
|
+
_increment_entity(entities[best_canonical], source_id)
|
|
329
|
+
if auto_save:
|
|
330
|
+
save_registry(registry)
|
|
331
|
+
return {"canonical": best_canonical, "match_type": "fuzzy_candidate", "score": best_score,
|
|
332
|
+
"entity_type": entity_type, "created": False}
|
|
333
|
+
|
|
334
|
+
# ----- STEP 5: Check taxonomy before creating new -----
|
|
335
|
+
taxonomy_match = _check_taxonomy(name, entity_type)
|
|
336
|
+
if taxonomy_match:
|
|
337
|
+
# Entity known in taxonomy but not in registry - create with taxonomy info
|
|
338
|
+
canonical = taxonomy_match["canonical"]
|
|
339
|
+
if canonical not in entities:
|
|
340
|
+
entities[canonical] = _create_entity(
|
|
341
|
+
canonical, entity_type, source_id,
|
|
342
|
+
aliases=[name] if normalize_text(name) != normalize_text(canonical) else [],
|
|
343
|
+
domain_ids=taxonomy_match.get("domain_ids", [])
|
|
344
|
+
)
|
|
345
|
+
if auto_save:
|
|
346
|
+
save_registry(registry)
|
|
347
|
+
return {"canonical": canonical, "match_type": "taxonomy", "score": 1.0,
|
|
348
|
+
"entity_type": entity_type, "created": True}
|
|
349
|
+
|
|
350
|
+
# ----- STEP 6: New entity -----
|
|
351
|
+
canonical = _make_canonical_name(name, entity_type)
|
|
352
|
+
if canonical not in entities:
|
|
353
|
+
entities[canonical] = _create_entity(canonical, entity_type, source_id)
|
|
354
|
+
if auto_save:
|
|
355
|
+
save_registry(registry)
|
|
356
|
+
return {"canonical": canonical, "match_type": "new", "score": 0.0,
|
|
357
|
+
"entity_type": entity_type, "created": True}
|
|
358
|
+
|
|
359
|
+
return {"canonical": canonical, "match_type": "existing", "score": 1.0,
|
|
360
|
+
"entity_type": entity_type, "created": False}
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
# ---------------------------------------------------------------------------
|
|
364
|
+
# BATCH NORMALIZATION
|
|
365
|
+
# ---------------------------------------------------------------------------
|
|
366
|
+
def normalize_entities_batch(entities_list, entity_type, registry=None,
|
|
367
|
+
source_id=None, auto_save=True):
|
|
368
|
+
"""
|
|
369
|
+
Normalize a list of entity names in batch.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
entities_list: list of raw entity names
|
|
373
|
+
entity_type: "person" | "theme" | "role" | "concept"
|
|
374
|
+
registry: shared registry dict
|
|
375
|
+
source_id: source that mentioned these entities
|
|
376
|
+
auto_save: save registry after all normalizations
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
list of normalization results
|
|
380
|
+
"""
|
|
381
|
+
if registry is None:
|
|
382
|
+
registry = load_registry()
|
|
383
|
+
|
|
384
|
+
results = []
|
|
385
|
+
for name in entities_list:
|
|
386
|
+
result = normalize_entity(name, entity_type, registry=registry,
|
|
387
|
+
source_id=source_id, auto_save=False)
|
|
388
|
+
results.append(result)
|
|
389
|
+
|
|
390
|
+
if auto_save:
|
|
391
|
+
save_registry(registry)
|
|
392
|
+
|
|
393
|
+
return results
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
# ---------------------------------------------------------------------------
|
|
397
|
+
# INTERNAL HELPERS
|
|
398
|
+
# ---------------------------------------------------------------------------
|
|
399
|
+
def _increment_entity(entity_data, source_id=None):
|
|
400
|
+
"""Increment occurrence count and add source."""
|
|
401
|
+
count_key = "mention_count" if "mention_count" in entity_data else "occurrence_count"
|
|
402
|
+
entity_data[count_key] = entity_data.get(count_key, 0) + 1
|
|
403
|
+
if source_id:
|
|
404
|
+
sources = entity_data.setdefault("sources", [])
|
|
405
|
+
if source_id not in sources:
|
|
406
|
+
sources.append(source_id)
|
|
407
|
+
entity_data["last_seen"] = datetime.now(timezone.utc).isoformat()
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _create_entity(canonical, entity_type, source_id=None, aliases=None,
|
|
411
|
+
domain_ids=None):
|
|
412
|
+
"""Create a new entity entry."""
|
|
413
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
414
|
+
count_key = "mention_count" if entity_type in ("person", "role") else "occurrence_count"
|
|
415
|
+
entity = {
|
|
416
|
+
"canonical": canonical,
|
|
417
|
+
"aliases": aliases or [],
|
|
418
|
+
count_key: 1,
|
|
419
|
+
"sources": [source_id] if source_id else [],
|
|
420
|
+
"status": "tracking",
|
|
421
|
+
"created_at": now,
|
|
422
|
+
"last_seen": now,
|
|
423
|
+
}
|
|
424
|
+
if entity_type == "theme":
|
|
425
|
+
entity["has_dossier"] = False
|
|
426
|
+
entity["domain_ids"] = domain_ids or []
|
|
427
|
+
entity["related_roles"] = []
|
|
428
|
+
elif entity_type == "role":
|
|
429
|
+
entity["has_agent"] = False
|
|
430
|
+
entity["domain_ids"] = domain_ids or []
|
|
431
|
+
entity["responsibilities"] = []
|
|
432
|
+
entity["mention_breakdown"] = {"direct": 0, "inferred": 0, "emergent": 0}
|
|
433
|
+
entity["weighted_score"] = 0.0
|
|
434
|
+
entity["detection_history"] = []
|
|
435
|
+
elif entity_type == "person":
|
|
436
|
+
entity["has_agent"] = False
|
|
437
|
+
entity["has_dna"] = False
|
|
438
|
+
entity["domains"] = domain_ids or []
|
|
439
|
+
elif entity_type == "concept":
|
|
440
|
+
entity["layer"] = None # L1-L5
|
|
441
|
+
return entity
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def _make_canonical_name(name, entity_type):
|
|
445
|
+
"""Create canonical name from raw name."""
|
|
446
|
+
name = name.strip()
|
|
447
|
+
if entity_type == "person":
|
|
448
|
+
# Title Case for persons
|
|
449
|
+
return " ".join(w.capitalize() for w in name.split())
|
|
450
|
+
elif entity_type == "role":
|
|
451
|
+
# UPPER-CASE-WITH-HYPHENS for roles
|
|
452
|
+
return re.sub(r"\s+", "-", name.strip().upper())
|
|
453
|
+
elif entity_type == "theme":
|
|
454
|
+
# lowercase-with-hyphens for themes
|
|
455
|
+
clean = re.sub(r"[^\w\s-]", "", name.lower())
|
|
456
|
+
return re.sub(r"\s+", "-", clean.strip())
|
|
457
|
+
else:
|
|
458
|
+
# Title Case for concepts
|
|
459
|
+
return " ".join(w.capitalize() for w in name.split())
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _domain_boost(name, entities, domain_hint):
|
|
463
|
+
"""
|
|
464
|
+
Boost matching score for entities in the same domain.
|
|
465
|
+
If an entity shares the domain_hint, its fuzzy score gets +0.10 boost.
|
|
466
|
+
"""
|
|
467
|
+
norm_name = normalize_text(name)
|
|
468
|
+
best_match = None
|
|
469
|
+
best_score = 0.0
|
|
470
|
+
|
|
471
|
+
for canonical, data in entities.items():
|
|
472
|
+
domains = data.get("domain_ids", [])
|
|
473
|
+
if domain_hint in domains:
|
|
474
|
+
score = similarity(norm_name, canonical) + 0.10 # domain boost
|
|
475
|
+
if score > best_score:
|
|
476
|
+
best_score = score
|
|
477
|
+
best_match = canonical
|
|
478
|
+
for alias in data.get("aliases", []):
|
|
479
|
+
score = similarity(norm_name, alias) + 0.10
|
|
480
|
+
if score > best_score:
|
|
481
|
+
best_score = score
|
|
482
|
+
best_match = canonical
|
|
483
|
+
|
|
484
|
+
if best_match and best_score >= get_thresholds()["fuzzy_threshold"]:
|
|
485
|
+
return best_match, min(best_score, 1.0)
|
|
486
|
+
return None
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def _check_taxonomy(name, entity_type):
|
|
490
|
+
"""Check if entity exists in DOMAINS-TAXONOMY."""
|
|
491
|
+
norm = normalize_text(name)
|
|
492
|
+
|
|
493
|
+
if entity_type == "role":
|
|
494
|
+
role_map = get_role_aliases()
|
|
495
|
+
if norm in role_map:
|
|
496
|
+
canonical = role_map[norm]
|
|
497
|
+
tax = load_taxonomy()
|
|
498
|
+
cargo_data = tax.get("cargos", {}).get(canonical, {})
|
|
499
|
+
domains = cargo_data.get("dominios_primarios", []) + cargo_data.get("dominios_secundarios", [])
|
|
500
|
+
return {"canonical": canonical, "domain_ids": domains}
|
|
501
|
+
|
|
502
|
+
elif entity_type == "person":
|
|
503
|
+
person_map = get_person_aliases()
|
|
504
|
+
if norm in person_map:
|
|
505
|
+
canonical = person_map[norm]
|
|
506
|
+
tax = load_taxonomy()
|
|
507
|
+
person_data = tax.get("pessoas", {}).get(canonical, {})
|
|
508
|
+
domains = person_data.get("expertise_primaria", []) + person_data.get("expertise_secundaria", [])
|
|
509
|
+
return {"canonical": canonical, "domain_ids": domains}
|
|
510
|
+
|
|
511
|
+
elif entity_type == "theme":
|
|
512
|
+
domain_map = get_domain_aliases()
|
|
513
|
+
if norm in domain_map:
|
|
514
|
+
domain_id = domain_map[norm]
|
|
515
|
+
return {"canonical": domain_id, "domain_ids": [domain_id]}
|
|
516
|
+
|
|
517
|
+
return None
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def _add_to_review_queue(name, candidate_canonical, score, entity_type, source_id):
|
|
521
|
+
"""Add merge candidate to review queue."""
|
|
522
|
+
entry = {
|
|
523
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
524
|
+
"raw_name": name,
|
|
525
|
+
"candidate_canonical": candidate_canonical,
|
|
526
|
+
"score": round(score, 4),
|
|
527
|
+
"entity_type": entity_type,
|
|
528
|
+
"source_id": source_id,
|
|
529
|
+
"status": "pending"
|
|
530
|
+
}
|
|
531
|
+
REVIEW_QUEUE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
532
|
+
with open(REVIEW_QUEUE_PATH, "a", encoding="utf-8") as f:
|
|
533
|
+
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
# ---------------------------------------------------------------------------
|
|
537
|
+
# CLI
|
|
538
|
+
# ---------------------------------------------------------------------------
|
|
539
|
+
def main():
|
|
540
|
+
"""CLI: test normalization of a single entity."""
|
|
541
|
+
if len(sys.argv) < 3:
|
|
542
|
+
print("Uso: python3 entity_normalizer.py <entity_type> <name>")
|
|
543
|
+
print(" entity_type: person | theme | role | concept")
|
|
544
|
+
print(" name: nome da entidade a normalizar")
|
|
545
|
+
print()
|
|
546
|
+
print("Exemplo: python3 entity_normalizer.py person 'alex hormozi'")
|
|
547
|
+
sys.exit(1)
|
|
548
|
+
|
|
549
|
+
entity_type = sys.argv[1]
|
|
550
|
+
name = " ".join(sys.argv[2:])
|
|
551
|
+
|
|
552
|
+
result = normalize_entity(name, entity_type, auto_save=False)
|
|
553
|
+
|
|
554
|
+
print(f"\n=== ENTITY NORMALIZER ===")
|
|
555
|
+
print(f"Input: '{name}'")
|
|
556
|
+
print(f"Type: {entity_type}")
|
|
557
|
+
print(f"Canonical: '{result['canonical']}'")
|
|
558
|
+
print(f"Match: {result['match_type']}")
|
|
559
|
+
print(f"Score: {result['score']:.4f}")
|
|
560
|
+
print(f"Created: {result['created']}")
|
|
561
|
+
print()
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
if __name__ == "__main__":
|
|
565
|
+
main()
|