mega-brain-ai 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mega-brain-ai might be problematic. Click here for more details.

Files changed (308) hide show
  1. package/.claude/CLAUDE.md +155 -0
  2. package/.claude/commands/agents.md +161 -0
  3. package/.claude/commands/ask.md +117 -0
  4. package/.claude/commands/benchmark.md +224 -0
  5. package/.claude/commands/chat.md +343 -0
  6. package/.claude/commands/compare.md +116 -0
  7. package/.claude/commands/conclave.md +194 -0
  8. package/.claude/commands/config.md +133 -0
  9. package/.claude/commands/council.md +194 -0
  10. package/.claude/commands/create-agent.md +452 -0
  11. package/.claude/commands/debate.md +157 -0
  12. package/.claude/commands/documentation/create-architecture-documentation.md +175 -0
  13. package/.claude/commands/dossiers.md +180 -0
  14. package/.claude/commands/evolve.md +223 -0
  15. package/.claude/commands/extract-dna.md +170 -0
  16. package/.claude/commands/extract-knowledge.md +507 -0
  17. package/.claude/commands/inbox.md +296 -0
  18. package/.claude/commands/ingest-empresa.md +191 -0
  19. package/.claude/commands/ingest.md +182 -0
  20. package/.claude/commands/jarvis-briefing.md +67 -0
  21. package/.claude/commands/jarvis-control.md +169 -0
  22. package/.claude/commands/jarvis-full.md +181 -0
  23. package/.claude/commands/jarvis.md +212 -0
  24. package/.claude/commands/ler-drive.md +212 -0
  25. package/.claude/commands/log.md +158 -0
  26. package/.claude/commands/loop.md +133 -0
  27. package/.claude/commands/loops.md +73 -0
  28. package/.claude/commands/mission-autopilot.md +538 -0
  29. package/.claude/commands/mission.md +353 -0
  30. package/.claude/commands/process-inbox.md +148 -0
  31. package/.claude/commands/process-jarvis.md +3036 -0
  32. package/.claude/commands/process-video.md +131 -0
  33. package/.claude/commands/rag-search.md +78 -0
  34. package/.claude/commands/resume.md +33 -0
  35. package/.claude/commands/save.md +38 -0
  36. package/.claude/commands/scan-inbox.md +125 -0
  37. package/.claude/commands/setup.md +99 -0
  38. package/.claude/commands/system-digest.md +243 -0
  39. package/.claude/commands/verify.md +182 -0
  40. package/.claude/commands/view-dna.md +169 -0
  41. package/.claude/hooks/agent_doctor.py +433 -0
  42. package/.claude/hooks/agent_memory_persister.py +203 -0
  43. package/.claude/hooks/auto_formatter.py +158 -0
  44. package/.claude/hooks/checkpoint_writer.py +244 -0
  45. package/.claude/hooks/claude_md_guard.py +146 -0
  46. package/.claude/hooks/creation_validator.py +357 -0
  47. package/.claude/hooks/enforce_dual_location.py +501 -0
  48. package/.claude/hooks/enforce_plan_mode.py +220 -0
  49. package/.claude/hooks/inbox_age_alert.py +367 -0
  50. package/.claude/hooks/jarvis_briefing.py +506 -0
  51. package/.claude/hooks/ledger_updater.py +301 -0
  52. package/.claude/hooks/memory_hints_injector.py +251 -0
  53. package/.claude/hooks/memory_updater.py +202 -0
  54. package/.claude/hooks/multi_agent_hook.py +464 -0
  55. package/.claude/hooks/notification_system.py +120 -0
  56. package/.claude/hooks/pattern_analyzer.py +526 -0
  57. package/.claude/hooks/pending_tracker.py +188 -0
  58. package/.claude/hooks/post_batch_cascading.py +1740 -0
  59. package/.claude/hooks/post_output_validator.py +358 -0
  60. package/.claude/hooks/post_tool_use.py +120 -0
  61. package/.claude/hooks/post_write_validator.py +200 -0
  62. package/.claude/hooks/quality_watchdog.py +394 -0
  63. package/.claude/hooks/ralph_wiggum.py +277 -0
  64. package/.claude/hooks/session-source-sync.py +218 -0
  65. package/.claude/hooks/session_autosave_v2.py +1135 -0
  66. package/.claude/hooks/session_end.py +203 -0
  67. package/.claude/hooks/session_start.py +939 -0
  68. package/.claude/hooks/skill_indexer.py +48 -0
  69. package/.claude/hooks/skill_router.py +358 -0
  70. package/.claude/hooks/stop_hook_completeness.py +178 -0
  71. package/.claude/hooks/subagent_tracker.py +163 -0
  72. package/.claude/hooks/token_checkpoint.py +584 -0
  73. package/.claude/hooks/user_prompt_submit.py +125 -0
  74. package/.claude/rules/ANTHROPIC-STANDARDS.md +384 -0
  75. package/.claude/rules/CLAUDE-LITE.md +201 -0
  76. package/.claude/rules/RULE-GROUP-1.md +320 -0
  77. package/.claude/rules/RULE-GROUP-2.md +307 -0
  78. package/.claude/rules/RULE-GROUP-3.md +248 -0
  79. package/.claude/rules/RULE-GROUP-4.md +427 -0
  80. package/.claude/rules/RULE-GROUP-5.md +388 -0
  81. package/.claude/rules/RULE-GROUP-6.md +387 -0
  82. package/.claude/rules/logging.md +53 -0
  83. package/.claude/rules/mcp-governance.md +128 -0
  84. package/.claude/rules/pipeline.md +60 -0
  85. package/.claude/rules/state-management.md +93 -0
  86. package/.claude/scripts/apply-tags.py +77 -0
  87. package/.claude/scripts/batch-extract-transcriptions.py +132 -0
  88. package/.claude/scripts/build-complete-index.py +250 -0
  89. package/.claude/scripts/build-planilha-index.py +170 -0
  90. package/.claude/scripts/complete-tag-matching.py +250 -0
  91. package/.claude/scripts/deduplicate-inbox.py +139 -0
  92. package/.claude/scripts/docx-xml-extractor.py +141 -0
  93. package/.claude/scripts/extract-docx-text.py +58 -0
  94. package/.claude/scripts/extract-single-transcription.py +74 -0
  95. package/.claude/scripts/extract_docx_from_gdrive.py +77 -0
  96. package/.claude/scripts/organized-downloader.py +246 -0
  97. package/.claude/scripts/planilha-tagger.py +187 -0
  98. package/.claude/scripts/revert-tags.py +70 -0
  99. package/.claude/scripts/source-sync.py +265 -0
  100. package/.claude/scripts/tag-inbox-files.py +276 -0
  101. package/.claude/scripts/tag-inbox-v2.py +253 -0
  102. package/.claude/scripts/test-extraction.py +35 -0
  103. package/.claude/scripts/test-full-extraction.py +74 -0
  104. package/.claude/skills/00-SKILL-CREATOR/SKILL.md +186 -0
  105. package/.claude/skills/01-SKILL-DOCS-MEGABRAIN/SKILL.md +251 -0
  106. package/.claude/skills/02-SKILL-PYTHON-MEGABRAIN/SKILL.md +323 -0
  107. package/.claude/skills/03-SKILL-AGENT-CREATION/SKILL.md +374 -0
  108. package/.claude/skills/04-SKILL-KNOWLEDGE-EXTRACTION/SKILL.md +318 -0
  109. package/.claude/skills/05-SKILL-PIPELINE-JARVIS/SKILL.md +430 -0
  110. package/.claude/skills/06-SKILL-BRAINSTORMING/SKILL.md +72 -0
  111. package/.claude/skills/07-SKILL-DISPATCHING-PARALLEL-AGENTS/SKILL.md +193 -0
  112. package/.claude/skills/08-SKILL-EXECUTING-PLANS/SKILL.md +114 -0
  113. package/.claude/skills/09-SKILL-WRITING-PLANS/SKILL.md +184 -0
  114. package/.claude/skills/10-SKILL-VERIFICATION-BEFORE-COMPLETION/SKILL.md +130 -0
  115. package/.claude/skills/11-SKILL-USING-SUPERPOWERS/SKILL.md +105 -0
  116. package/.claude/skills/DETECTION-PROTOCOL.md +217 -0
  117. package/.claude/skills/README.md +240 -0
  118. package/.claude/skills/SKILL-REGISTRY.md +284 -0
  119. package/.claude/skills/SKILL-SUGGESTIONS.md +114 -0
  120. package/.claude/skills/_TEMPLATES/SKILL-WRITER-GUIDE.md +385 -0
  121. package/.claude/skills/chronicler/SKILL.md +146 -0
  122. package/.claude/skills/chronicler/chronicler_core.py +468 -0
  123. package/.claude/skills/code-review/SKILL.md +160 -0
  124. package/.claude/skills/council/SKILL.md +210 -0
  125. package/.claude/skills/executor/SKILL.md +161 -0
  126. package/.claude/skills/fase-2-5-tagging/SKILL.md +182 -0
  127. package/.claude/skills/feature-dev/SKILL.md +154 -0
  128. package/.claude/skills/finance-agent/SKILL.md +137 -0
  129. package/.claude/skills/frontend-design/SKILL.md +165 -0
  130. package/.claude/skills/gdrive-transcription-downloader/SKILL.md +249 -0
  131. package/.claude/skills/gemini-fallback/SKILL.md +67 -0
  132. package/.claude/skills/gemini-fallback/gemini_fetch.py +0 -0
  133. package/.claude/skills/gha/SKILL.md +96 -0
  134. package/.claude/skills/gha/gha_diagnostic.py +227 -0
  135. package/.claude/skills/github-workflow/SKILL.md +190 -0
  136. package/.claude/skills/hookify/SKILL.md +134 -0
  137. package/.claude/skills/hybrid-source-reading/SKILL.md +265 -0
  138. package/.claude/skills/jarvis/SKILL.md +546 -0
  139. package/.claude/skills/jarvis-briefing/SKILL.md +340 -0
  140. package/.claude/skills/ler-planilha/SKILL.md +281 -0
  141. package/.claude/skills/plugin-dev/SKILL.md +176 -0
  142. package/.claude/skills/pr-review-toolkit/SKILL.md +178 -0
  143. package/.claude/skills/resume/SKILL.md +61 -0
  144. package/.claude/skills/save/SKILL.md +87 -0
  145. package/.claude/skills/skill-writer/SKILL.md +153 -0
  146. package/.claude/skills/skill-writer/examples.md +191 -0
  147. package/.claude/skills/skill-writer/troubleshooting.md +205 -0
  148. package/.claude/skills/smart-download-tagger/SKILL.md +148 -0
  149. package/.claude/skills/source-sync/SKILL.md +240 -0
  150. package/.claude/skills/sync-docs/SKILL.md +193 -0
  151. package/.claude/skills/sync-docs/config.json +37 -0
  152. package/.claude/skills/sync-docs/gdrive_sync.py +358 -0
  153. package/.claude/skills/sync-docs/reauth.py +71 -0
  154. package/.claude/skills/talent-agent/SKILL.md +183 -0
  155. package/.claude/skills/verify/SKILL.md +154 -0
  156. package/.claude/skills/verify/verify_runner.py +0 -0
  157. package/.claude/skills/verify-6-levels/SKILL.md +234 -0
  158. package/.claude/templates/BATCH-LOG-TEMPLATE.md +221 -0
  159. package/.claudeignore +9 -0
  160. package/.gitattributes +4 -0
  161. package/.github/layer1-allowlist.txt +80 -0
  162. package/.github/layer2-manifest.txt +40 -0
  163. package/.gitignore +219 -0
  164. package/README.md +1210 -0
  165. package/agents/_templates/INDEX.md +741 -0
  166. package/agents/_templates/TEMPLATE-AGENT-MD-ULTRA-ROBUSTO-V3.md +2399 -0
  167. package/agents/boardroom/CHECKLIST-MASTER.md +281 -0
  168. package/agents/boardroom/INTEGRATION-GUIDE.md +406 -0
  169. package/agents/boardroom/README.md +238 -0
  170. package/agents/boardroom/config/BOARDROOM-CONFIG.md +186 -0
  171. package/agents/boardroom/config/TTS-INTEGRATION.md +258 -0
  172. package/agents/boardroom/config/VOICE-PROFILES.md +624 -0
  173. package/agents/boardroom/config/voice_mapping.json +128 -0
  174. package/agents/boardroom/scripts/audio_generator.py +375 -0
  175. package/agents/boardroom/scripts/audio_generator_edge.py +353 -0
  176. package/agents/boardroom/scripts/jarvis_boardroom_hook.py +415 -0
  177. package/agents/boardroom/scripts/notebooklm_generator.py +578 -0
  178. package/agents/boardroom/templates/EPISODE-TEMPLATE.md +367 -0
  179. package/agents/boardroom/templates/scene-templates/SCENE-AGENT-DEBATE.md +252 -0
  180. package/agents/boardroom/templates/scene-templates/SCENE-COUNCIL.md +270 -0
  181. package/agents/boardroom/templates/scene-templates/SCENE-DNA-CONSULTATION.md +126 -0
  182. package/agents/boardroom/templates/scene-templates/SCENE-QUESTION.md +174 -0
  183. package/agents/boardroom/workflows/WORKFLOW-AUDIO-GENERATION.md +421 -0
  184. package/agents/constitution/BASE-CONSTITUTION.md +254 -0
  185. package/agents/council/CRITIC.md +197 -0
  186. package/agents/council/DEVILS-ADVOCATE.md +274 -0
  187. package/agents/council/SYNTHESIZER.md +293 -0
  188. package/agents/council/advogado-do-diabo/AGENT.md +489 -0
  189. package/agents/council/advogado-do-diabo/SOUL.md +100 -0
  190. package/agents/council/critico-metodologico/AGENT.md +670 -0
  191. package/agents/council/critico-metodologico/SOUL.md +107 -0
  192. package/agents/council/sintetizador/AGENT.md +558 -0
  193. package/agents/council/sintetizador/SOUL.md +94 -0
  194. package/agents/persons/_example/AGENT-EXAMPLE.md +42 -0
  195. package/agents/persons/_example/DNA-EXAMPLE.yaml +61 -0
  196. package/agents/protocols/AGENT-COGNITION-PROTOCOL.md +779 -0
  197. package/agents/protocols/AGENT-INTEGRITY-PROTOCOL.md +692 -0
  198. package/agents/protocols/BATCH-VISUAL-PROTOCOL.md +841 -0
  199. package/agents/protocols/DNA-CONFIG-TEMPLATE.yaml +181 -0
  200. package/agents/protocols/DNA-EXTRACTION-PROTOCOL.md +370 -0
  201. package/agents/protocols/EPISTEMIC-PROTOCOL.md +333 -0
  202. package/agents/protocols/LOG-STRUCTURE-PROTOCOL.md +65 -0
  203. package/agents/protocols/MEMORY-PROTOCOL.md +567 -0
  204. package/agents/protocols/NARRATIVE-SYNTHESIS-PROTOCOL.md +278 -0
  205. package/agents/protocols/PHASE-4-VERIFICATION-CHECKPOINT.md +146 -0
  206. package/agents/protocols/SOUL-TEMPLATE.md +416 -0
  207. package/agents/protocols/TEMPLATE-EVOLUTION-PROTOCOL.md +544 -0
  208. package/agents/protocols/VISUAL-DIFF-PROTOCOL.md +159 -0
  209. package/agents/sua-empresa/README.md +44 -0
  210. package/agents/sua-empresa/_example/jds/EXAMPLE-JD.md +42 -0
  211. package/agents/sua-empresa/_example/org/EXAMPLE-ORG.md +32 -0
  212. package/agents/sua-empresa/_example/roles/EXAMPLE-ROLE.md +38 -0
  213. package/bin/cli.js +2 -0
  214. package/bin/lib/ascii-art.js +234 -0
  215. package/bin/lib/installer.js +402 -0
  216. package/bin/lib/setup-wizard.js +95 -0
  217. package/bin/lib/validate-email.js +109 -0
  218. package/bin/mega-brain.js +97 -0
  219. package/bin/push.js +342 -0
  220. package/bin/templates/env.example +38 -0
  221. package/inbox/.gitkeep +0 -0
  222. package/integrations/README.md +46 -0
  223. package/integrations/mcps/MCP-REGISTRY.md +56 -0
  224. package/integrations/mcps/excalidraw/CONFIG.md +56 -0
  225. package/integrations/mcps/gdrive/CONFIG.md +38 -0
  226. package/knowledge/dna/.gitkeep +0 -0
  227. package/knowledge/dossiers/persons/.gitkeep +0 -0
  228. package/knowledge/dossiers/persons/DOSSIER-EXAMPLE.md +49 -0
  229. package/knowledge/dossiers/system/.gitkeep +0 -0
  230. package/knowledge/dossiers/themes/.gitkeep +0 -0
  231. package/knowledge/playbooks/.gitkeep +0 -0
  232. package/knowledge/playbooks/PLAYBOOK-EXAMPLE.md +50 -0
  233. package/knowledge/sources/.gitkeep +0 -0
  234. package/logs/.gitkeep +0 -0
  235. package/package.json +128 -0
  236. package/processing/canonical/.gitkeep +0 -0
  237. package/processing/chunks/.gitkeep +0 -0
  238. package/processing/insights/.gitkeep +0 -0
  239. package/processing/narratives/.gitkeep +0 -0
  240. package/reference/CONSELHO.md +337 -0
  241. package/reference/CONTEXT7_README.md +28 -0
  242. package/reference/JARVIS-LOGGING-PROTOCOL.md +380 -0
  243. package/reference/QUICK-START.md +197 -0
  244. package/reference/README-RALPH-CASCATEAMENTO.md +207 -0
  245. package/reference/TEMPLATE-MASTER.md +727 -0
  246. package/reference/prds/prd-jarvis-mega-brain-v3.md +1305 -0
  247. package/reference/templates/phase5/IMPLEMENTATION-GUIDE.md +355 -0
  248. package/reference/templates/phase5/MOGA-BRAIN-PHASE5-TEMPLATES.md +1284 -0
  249. package/reference/templates/phase5/README.md +165 -0
  250. package/reference/workflow-claude-code-boris-cherny-continuous-claude.md +2232 -0
  251. package/system/database/001_moneyclub_buyers.sql +160 -0
  252. package/system/database/002_premium_token.sql +97 -0
  253. package/system/database/apply-migration.mjs +129 -0
  254. package/system/docs/MEGA-BRAIN-DEMO-COMPLETA.md +1226 -0
  255. package/system/docs/MEGA-BRAIN-MANIFESTO-COMPLETO.md +1054 -0
  256. package/system/docs/MOGA-BRAIN-EXPLICACAO-COMPLETA.md +791 -0
  257. package/system/docs/STRATEGIC-INTEGRATION-GUIDE.md +725 -0
  258. package/system/docs/architecture/01-system-context.md +136 -0
  259. package/system/docs/architecture/02-components.md +225 -0
  260. package/system/docs/architecture/03-data-flow.md +235 -0
  261. package/system/docs/architecture/04-integrations.md +283 -0
  262. package/system/docs/architecture/README.md +71 -0
  263. package/system/docs/architecture/diagrams/component-diagram.mmd +50 -0
  264. package/system/docs/architecture/diagrams/data-flow.mmd +39 -0
  265. package/system/docs/architecture/diagrams/system-overview.mmd +68 -0
  266. package/system/protocols/AGENT-AUTHORITY.md +217 -0
  267. package/system/protocols/CONSTITUICAO-BASE.md +115 -0
  268. package/system/protocols/CONSTITUTION.md +231 -0
  269. package/system/protocols/GOVERNANCE-MAP.md +123 -0
  270. package/system/protocols/HOOK-SECURITY-THREAT-MODEL.md +152 -0
  271. package/system/protocols/ORQUESTRACAO-PROTOCOL.md +215 -0
  272. package/system/protocols/_archive/CHUNKING-PROTOCOL.md +207 -0
  273. package/system/protocols/_archive/ENTITY-RESOLUTION-PROTOCOL.md +269 -0
  274. package/system/protocols/_archive/INSIGHT-EXTRACTION-PROTOCOL.md +257 -0
  275. package/system/protocols/_archive/NARRATIVE-SYNTHESIS-PROTOCOL.md +290 -0
  276. package/system/protocols/agents/AGENT-INTERACTION.md +315 -0
  277. package/system/protocols/agents/CORTEX-PROTOCOL.md +520 -0
  278. package/system/protocols/agents/EPISTEMIC-PROTOCOL.md +465 -0
  279. package/system/protocols/agents/MEMORY-PROTOCOL.md +366 -0
  280. package/system/protocols/agents/WAR-ROOM.md +355 -0
  281. package/system/protocols/company/COMPANY-DOCUMENT-PROTOCOL.md +793 -0
  282. package/system/protocols/company/COMPANY-ENRICHMENT-PROTOCOL.md +679 -0
  283. package/system/protocols/conclave/CONCLAVE-LOG-TEMPLATE-v2.md +309 -0
  284. package/system/protocols/conclave/CONCLAVE-PROTOCOL.md +518 -0
  285. package/system/protocols/conclave/DEBATE-DYNAMICS-CONFIG.yaml +322 -0
  286. package/system/protocols/conclave/DEBATE-DYNAMICS-PROTOCOL.md +613 -0
  287. package/system/protocols/conclave/DEBATE-PROTOCOL.md +323 -0
  288. package/system/protocols/council/COUNCIL-LOG-TEMPLATE-v2.md +309 -0
  289. package/system/protocols/council/COUNCIL-PROTOCOL.md +518 -0
  290. package/system/protocols/council/DEBATE-DYNAMICS-CONFIG.yaml +322 -0
  291. package/system/protocols/council/DEBATE-DYNAMICS-PROTOCOL.md +613 -0
  292. package/system/protocols/council/DEBATE-PROTOCOL.md +323 -0
  293. package/system/protocols/dna/DNA-EXTRACTION-PROTOCOL.md +1214 -0
  294. package/system/protocols/dna/ENRICHMENT-PROTOCOL.md +408 -0
  295. package/system/protocols/dna/REASONING-MODEL-PROTOCOL.md +331 -0
  296. package/system/protocols/pipeline/DOSSIER-COMPILATION-PROTOCOL.md +790 -0
  297. package/system/protocols/pipeline/NARRATIVE-METABOLISM-PROTOCOL.md +292 -0
  298. package/system/protocols/pipeline/PIPELINE-JARVIS-v2.1.md +606 -0
  299. package/system/protocols/pipeline/PROMPT-1.1-CHUNKING.md +154 -0
  300. package/system/protocols/pipeline/PROMPT-1.2-ENTITY-RESOLUTION.md +186 -0
  301. package/system/protocols/pipeline/PROMPT-2.1-DNA-TAGS-INCREMENT.md +208 -0
  302. package/system/protocols/pipeline/PROMPT-2.1-INSIGHT-EXTRACTION.md +191 -0
  303. package/system/protocols/pipeline/PROMPT-3.1-NARRATIVE-SYNTHESIS.md +331 -0
  304. package/system/protocols/pipeline/SOURCES-COMPILATION-PROTOCOL.md +340 -0
  305. package/system/protocols/system/AUTO-LOG-PROTOCOL.md +369 -0
  306. package/system/protocols/system/CHECKPOINT-ENFORCEMENT.md +176 -0
  307. package/system/protocols/system/ENFORCEMENT.md +435 -0
  308. package/system/protocols/system/LOG-TEMPLATES.md +1068 -0
@@ -0,0 +1,276 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ FASE 2.5 - Script de Tagueamento de Arquivos INBOX
4
+ Mega Brain - Sistema de Inteligencia de Negocios
5
+
6
+ Este script:
7
+ 1. Le a planilha de controle e extrai mapeamento nome -> TAG
8
+ 2. Varre todos os arquivos do INBOX recursivamente
9
+ 3. Faz matching entre arquivos e TAGs
10
+ 4. Renomeia arquivos com prefixo [TAG]
11
+ 5. Gera relatorio de arquivos processados vs orfaos
12
+ """
13
+
14
+ import os
15
+ import re
16
+ import json
17
+ from pathlib import Path
18
+ from datetime import datetime
19
+
20
+ # Configuracoes
21
+ INBOX_PATH = "inbox"
22
+ SCHEMA_PATH = ".claude/mission-control/SPREADSHEET-SCHEMA.json"
23
+ OUTPUT_PATH = ".claude/mission-control/TAG-MAPPING-REPORT.json"
24
+
25
+ # Mapeamento de pasta INBOX -> prefixo TAG
26
+ FOLDER_TO_PREFIX = {
27
+ "JEREMY MINER": "JM",
28
+ "JEREMY HAYNES": ["JH-ST", "JH-IC", "JH-WK", "AOBA", "PCVP", "LYFC", "MMM", "30DC", "STA", "UHTC"],
29
+ "THE SCALABLE COMPANY": "TSC",
30
+ "ALEX HORMOZI": "AH",
31
+ "JEREMY HAYNES PROGRAM": "CA",
32
+ "SAM OVEN (SETTERLUN UNIVERSITY)": None, # Nao tem na planilha ainda
33
+ "SETTERLUN (SETTERLUN UNIVERSITY)": None,
34
+ }
35
+
36
+ def extract_number_from_filename(filename):
37
+ """Extrai o numero do inicio do nome do arquivo."""
38
+ # Padroes comuns:
39
+ # "6 - 42 Minutes of Sales Training.txt"
40
+ # "112. How To Get Prospects.txt"
41
+ # "44 - LIVE CALL A Masterclass.txt"
42
+
43
+ patterns = [
44
+ r'^(\d+)\s*[-\.]\s*', # "123 - " ou "123. "
45
+ r'^(\d+)\.\s*', # "123. "
46
+ r'^(\d+)\s+', # "123 "
47
+ ]
48
+
49
+ for pattern in patterns:
50
+ match = re.match(pattern, filename)
51
+ if match:
52
+ return int(match.group(1))
53
+ return None
54
+
55
+ def clean_filename_for_matching(filename):
56
+ """Limpa o nome do arquivo para facilitar matching."""
57
+ # Remove extensao
58
+ name = os.path.splitext(filename)[0]
59
+ # Remove timestamps no final
60
+ name = re.sub(r'_\d{14}$', '', name)
61
+ # Remove numero inicial
62
+ name = re.sub(r'^\d+\s*[-\.]\s*', '', name)
63
+ # Lowercase e remove caracteres especiais
64
+ name = name.lower()
65
+ name = re.sub(r'[^\w\s]', ' ', name)
66
+ name = re.sub(r'\s+', ' ', name).strip()
67
+ return name
68
+
69
+ def determine_prefix_from_path(filepath):
70
+ """Determina o prefixo TAG baseado no caminho do arquivo."""
71
+ path_str = str(filepath).upper()
72
+
73
+ for folder, prefix in FOLDER_TO_PREFIX.items():
74
+ if folder in path_str:
75
+ return prefix
76
+ return None
77
+
78
+ def scan_inbox_files():
79
+ """Varre todos os arquivos do INBOX recursivamente."""
80
+ files = []
81
+ extensions = {'.txt', '.docx', '.pdf'}
82
+
83
+ for root, dirs, filenames in os.walk(INBOX_PATH):
84
+ # Ignorar pastas de backup/template
85
+ if '_BACKUP' in root or '_TEMPLATE' in root:
86
+ continue
87
+
88
+ for filename in filenames:
89
+ ext = os.path.splitext(filename)[1].lower()
90
+ if ext in extensions:
91
+ filepath = Path(root) / filename
92
+
93
+ # Verificar se ja tem TAG no nome
94
+ has_tag = bool(re.match(r'^\[[\w-]+\]', filename))
95
+
96
+ files.append({
97
+ 'path': str(filepath),
98
+ 'filename': filename,
99
+ 'folder': os.path.basename(root),
100
+ 'parent_folder': os.path.basename(os.path.dirname(root)),
101
+ 'number': extract_number_from_filename(filename),
102
+ 'clean_name': clean_filename_for_matching(filename),
103
+ 'has_tag': has_tag,
104
+ 'suggested_prefix': determine_prefix_from_path(filepath)
105
+ })
106
+
107
+ return files
108
+
109
+ def generate_tag_from_number(prefix, number):
110
+ """Gera TAG no formato [PREFIX]-[NNNN]."""
111
+ if isinstance(prefix, list):
112
+ prefix = prefix[0] # Usar primeiro prefixo como padrao
113
+ return f"{prefix}-{number:04d}"
114
+
115
+ def rename_file_with_tag(filepath, tag):
116
+ """Renomeia arquivo adicionando prefixo [TAG]."""
117
+ path = Path(filepath)
118
+ new_name = f"[{tag}] {path.name}"
119
+ new_path = path.parent / new_name
120
+
121
+ # Verificar se destino ja existe
122
+ if new_path.exists():
123
+ return None, "Destino ja existe"
124
+
125
+ try:
126
+ path.rename(new_path)
127
+ return str(new_path), None
128
+ except Exception as e:
129
+ return None, str(e)
130
+
131
+ def execute_rename(report, dry_run=False):
132
+ """Executa a renomeacao dos arquivos."""
133
+ print()
134
+ print("=" * 60)
135
+ print("EXECUTANDO RENOMEACAO" + (" (DRY RUN)" if dry_run else ""))
136
+ print("=" * 60)
137
+ print()
138
+
139
+ success = 0
140
+ errors = []
141
+
142
+ for item in report['to_tag']:
143
+ filepath = item['current_path']
144
+ tag = item['suggested_tag']
145
+
146
+ if dry_run:
147
+ path = Path(filepath)
148
+ new_name = f"[{tag}] {path.name}"
149
+ print(f" [DRY] {path.name}")
150
+ print(f" -> {new_name}")
151
+ success += 1
152
+ else:
153
+ new_path, error = rename_file_with_tag(filepath, tag)
154
+ if error:
155
+ errors.append({'path': filepath, 'error': error})
156
+ print(f" [ERRO] {item['filename']}: {error}")
157
+ else:
158
+ success += 1
159
+ if success % 50 == 0:
160
+ print(f" Renomeados: {success}/{len(report['to_tag'])}")
161
+
162
+ print()
163
+ print("=" * 60)
164
+ print("RESULTADO")
165
+ print("=" * 60)
166
+ print(f" Sucesso: {success}")
167
+ print(f" Erros: {len(errors)}")
168
+
169
+ if errors:
170
+ print()
171
+ print("ERROS:")
172
+ for e in errors[:10]: # Mostrar apenas primeiros 10
173
+ print(f" - {e['path']}: {e['error']}")
174
+ if len(errors) > 10:
175
+ print(f" ... e mais {len(errors) - 10} erros")
176
+
177
+ return success, errors
178
+
179
+
180
+ def main(execute=False, dry_run=False):
181
+ print("=" * 60)
182
+ print("FASE 2.5 - TAGUEAMENTO DE ARQUIVOS INBOX")
183
+ print("=" * 60)
184
+ print()
185
+
186
+ # 1. Carregar schema
187
+ print("[1/4] Carregando schema...")
188
+ with open(SCHEMA_PATH, 'r', encoding='utf-8') as f:
189
+ schema = json.load(f)
190
+
191
+ # 2. Escanear arquivos
192
+ print("[2/4] Escaneando INBOX...")
193
+ files = scan_inbox_files()
194
+ print(f" Encontrados: {len(files)} arquivos")
195
+
196
+ # 3. Classificar arquivos
197
+ print("[3/4] Classificando arquivos...")
198
+
199
+ already_tagged = [f for f in files if f['has_tag']]
200
+ to_tag = [f for f in files if not f['has_tag'] and f['suggested_prefix'] and f['number']]
201
+ orphans = [f for f in files if not f['has_tag'] and (not f['suggested_prefix'] or not f['number'])]
202
+
203
+ print(f" Ja tagueados: {len(already_tagged)}")
204
+ print(f" Para taguear: {len(to_tag)}")
205
+ print(f" Orfaos: {len(orphans)}")
206
+
207
+ # 4. Gerar relatorio
208
+ print("[4/4] Gerando relatorio...")
209
+
210
+ report = {
211
+ 'timestamp': datetime.now().isoformat(),
212
+ 'summary': {
213
+ 'total_files': len(files),
214
+ 'already_tagged': len(already_tagged),
215
+ 'to_tag': len(to_tag),
216
+ 'orphans': len(orphans)
217
+ },
218
+ 'to_tag': [],
219
+ 'orphans': []
220
+ }
221
+
222
+ for f in to_tag:
223
+ prefix = f['suggested_prefix']
224
+ if isinstance(prefix, list):
225
+ prefix = prefix[0]
226
+ tag = generate_tag_from_number(prefix, f['number'])
227
+ report['to_tag'].append({
228
+ 'current_path': f['path'],
229
+ 'suggested_tag': tag,
230
+ 'filename': f['filename'],
231
+ 'folder': f['folder']
232
+ })
233
+
234
+ for f in orphans:
235
+ report['orphans'].append({
236
+ 'path': f['path'],
237
+ 'filename': f['filename'],
238
+ 'folder': f['folder'],
239
+ 'reason': 'Sem prefixo conhecido' if not f['suggested_prefix'] else 'Sem numero no nome'
240
+ })
241
+
242
+ # Salvar relatorio
243
+ with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
244
+ json.dump(report, f, ensure_ascii=False, indent=2)
245
+
246
+ print()
247
+ print("=" * 60)
248
+ print("RELATORIO GERADO")
249
+ print("=" * 60)
250
+ print(f"Arquivo: {OUTPUT_PATH}")
251
+
252
+ # 5. Executar renomeacao se solicitado
253
+ if execute:
254
+ success, errors = execute_rename(report, dry_run)
255
+ report['execution'] = {
256
+ 'success': success,
257
+ 'errors': len(errors),
258
+ 'error_details': errors
259
+ }
260
+ # Atualizar relatorio com resultado
261
+ with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
262
+ json.dump(report, f, ensure_ascii=False, indent=2)
263
+ else:
264
+ print()
265
+ print("PROXIMO PASSO: Executar com --execute para renomear")
266
+ print(" Ou --dry-run para simular")
267
+
268
+ print()
269
+ return report
270
+
271
+
272
+ if __name__ == '__main__':
273
+ import sys
274
+ execute = '--execute' in sys.argv
275
+ dry_run = '--dry-run' in sys.argv
276
+ main(execute=execute, dry_run=dry_run)
@@ -0,0 +1,253 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ FASE 2.5 v2 - Tagueamento por MATCHING DE NOMES
4
+ Mega Brain - Sistema de Inteligência de Negócios
5
+
6
+ ABORDAGEM CORRETA:
7
+ 1. Extrair de TODAS as abas: nome_video → TAG
8
+ 2. Para cada arquivo INBOX, fazer matching por nome similar
9
+ 3. Se match encontrado, usar TAG da planilha
10
+ 4. Se não, manter órfão
11
+ """
12
+
13
+ import os
14
+ import re
15
+ import json
16
+ from pathlib import Path
17
+ from datetime import datetime
18
+ from difflib import SequenceMatcher
19
+
20
+ # Configurações
21
+ INBOX_PATH = "inbox"
22
+ SCHEMA_PATH = ".claude/mission-control/SPREADSHEET-SCHEMA.json"
23
+ INDEX_PATH = ".claude/mission-control/PLANILHA-INDEX.json"
24
+ OUTPUT_PATH = ".claude/mission-control/TAG-MAPPING-V2.json"
25
+
26
+ def normalize_name(name):
27
+ """Normaliza nome para matching."""
28
+ # Remove extensão
29
+ name = os.path.splitext(name)[0]
30
+ # Lowercase
31
+ name = name.lower()
32
+ # Remove timestamps
33
+ name = re.sub(r'_\d{14}$', '', name)
34
+ name = re.sub(r'\d{1,2}-\d{1,2}-\d{2,4}', '', name) # Remove datas tipo 12-25-24
35
+ # Remove número inicial
36
+ name = re.sub(r'^\d+[\s\.\-]+', '', name)
37
+ # Remove [youtube.com...] e similares
38
+ name = re.sub(r'\[youtube\.com[^\]]*\]', '', name)
39
+ name = re.sub(r'\[[^\]]*\]', '', name)
40
+ # Remove (1), (2), etc
41
+ name = re.sub(r'\s*\(\d+\)\s*', '', name)
42
+ # Remove caracteres especiais
43
+ name = re.sub(r'[^\w\s]', ' ', name)
44
+ name = re.sub(r'\s+', ' ', name).strip()
45
+ return name
46
+
47
+ def similar(a, b):
48
+ """Calcula similaridade entre dois nomes (0-1)."""
49
+ return SequenceMatcher(None, a, b).ratio()
50
+
51
+ def load_planilha_index():
52
+ """Carrega índice da planilha (nome → TAG)."""
53
+ if os.path.exists(INDEX_PATH):
54
+ with open(INDEX_PATH, 'r', encoding='utf-8') as f:
55
+ return json.load(f)
56
+ return None
57
+
58
+ def scan_inbox_files():
59
+ """Varre todos os arquivos do INBOX."""
60
+ files = []
61
+ extensions = {'.txt', '.docx', '.pdf'}
62
+
63
+ for root, dirs, filenames in os.walk(INBOX_PATH):
64
+ if '_BACKUP' in root or '_TEMPLATE' in root:
65
+ continue
66
+
67
+ for filename in filenames:
68
+ ext = os.path.splitext(filename)[1].lower()
69
+ if ext in extensions:
70
+ filepath = Path(root) / filename
71
+
72
+ # Verificar se já tem TAG
73
+ has_tag = bool(re.match(r'^\[[\w-]+\]', filename))
74
+
75
+ files.append({
76
+ 'path': str(filepath),
77
+ 'filename': filename,
78
+ 'folder': os.path.basename(root),
79
+ 'parent_folder': os.path.basename(os.path.dirname(root)),
80
+ 'normalized': normalize_name(filename),
81
+ 'has_tag': has_tag
82
+ })
83
+
84
+ return files
85
+
86
+ def match_file_to_index(file_info, index, threshold=0.7):
87
+ """Tenta encontrar match no índice da planilha."""
88
+ normalized_name = file_info['normalized']
89
+
90
+ if not normalized_name:
91
+ return None, 0
92
+
93
+ best_match = None
94
+ best_score = 0
95
+
96
+ for entry in index['entries']:
97
+ # Comparar com nome normalizado da planilha
98
+ score = similar(normalized_name, entry['normalized'])
99
+ if score > best_score and score >= threshold:
100
+ best_score = score
101
+ best_match = entry
102
+
103
+ return best_match, best_score
104
+
105
+ def main(execute=False, threshold=0.7):
106
+ print("=" * 60)
107
+ print("FASE 2.5 v2 - TAGUEAMENTO POR MATCHING")
108
+ print("=" * 60)
109
+ print()
110
+
111
+ # 1. Carregar índice da planilha
112
+ print("[1/4] Carregando índice da planilha...")
113
+ index = load_planilha_index()
114
+ if not index:
115
+ print(" ERRO: Índice não encontrado!")
116
+ print(" Execute primeiro: /criar-indice-planilha")
117
+ return None
118
+ print(f" {len(index['entries'])} entradas no índice")
119
+
120
+ # 2. Escanear INBOX
121
+ print("[2/4] Escaneando INBOX...")
122
+ files = scan_inbox_files()
123
+ print(f" {len(files)} arquivos encontrados")
124
+
125
+ # 3. Fazer matching
126
+ print(f"[3/4] Matching (threshold={threshold})...")
127
+
128
+ matched = []
129
+ orphans = []
130
+ already_tagged = []
131
+
132
+ for f in files:
133
+ if f['has_tag']:
134
+ already_tagged.append(f)
135
+ continue
136
+
137
+ match, score = match_file_to_index(f, index, threshold)
138
+ if match:
139
+ matched.append({
140
+ 'file': f,
141
+ 'match': match,
142
+ 'score': score
143
+ })
144
+ else:
145
+ orphans.append(f)
146
+
147
+ print(f" Já tagueados: {len(already_tagged)}")
148
+ print(f" Match encontrado: {len(matched)}")
149
+ print(f" Órfãos: {len(orphans)}")
150
+
151
+ # 4. Gerar relatório
152
+ print("[4/4] Gerando relatório...")
153
+
154
+ report = {
155
+ 'timestamp': datetime.now().isoformat(),
156
+ 'threshold': threshold,
157
+ 'summary': {
158
+ 'total_files': len(files),
159
+ 'already_tagged': len(already_tagged),
160
+ 'matched': len(matched),
161
+ 'orphans': len(orphans)
162
+ },
163
+ 'matches': [],
164
+ 'orphans': []
165
+ }
166
+
167
+ for m in matched:
168
+ report['matches'].append({
169
+ 'current_path': m['file']['path'],
170
+ 'filename': m['file']['filename'],
171
+ 'folder': m['file']['folder'],
172
+ 'matched_to': m['match']['original_name'],
173
+ 'tag': m['match']['tag'],
174
+ 'sheet': m['match']['sheet'],
175
+ 'score': round(m['score'], 3)
176
+ })
177
+
178
+ for o in orphans:
179
+ report['orphans'].append({
180
+ 'path': o['path'],
181
+ 'filename': o['filename'],
182
+ 'folder': o['folder'],
183
+ 'normalized': o['normalized']
184
+ })
185
+
186
+ # Salvar relatório
187
+ with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
188
+ json.dump(report, f, ensure_ascii=False, indent=2)
189
+
190
+ print()
191
+ print("=" * 60)
192
+ print("RELATÓRIO GERADO")
193
+ print("=" * 60)
194
+ print(f"Arquivo: {OUTPUT_PATH}")
195
+
196
+ # 5. Executar renomeação se solicitado
197
+ if execute and matched:
198
+ print()
199
+ print("=" * 60)
200
+ print("EXECUTANDO RENOMEAÇÃO")
201
+ print("=" * 60)
202
+
203
+ success = 0
204
+ errors = []
205
+
206
+ for m in matched:
207
+ filepath = Path(m['file']['path'])
208
+ tag = m['match']['tag']
209
+ new_name = f"[{tag}] {filepath.name}"
210
+ new_path = filepath.parent / new_name
211
+
212
+ if new_path.exists():
213
+ errors.append({'path': str(filepath), 'error': 'Destino já existe'})
214
+ continue
215
+
216
+ try:
217
+ filepath.rename(new_path)
218
+ success += 1
219
+ if success % 50 == 0:
220
+ print(f" Renomeados: {success}/{len(matched)}")
221
+ except Exception as e:
222
+ errors.append({'path': str(filepath), 'error': str(e)})
223
+
224
+ print()
225
+ print(f"Sucesso: {success}")
226
+ print(f"Erros: {len(errors)}")
227
+
228
+ report['execution'] = {
229
+ 'success': success,
230
+ 'errors': len(errors),
231
+ 'error_details': errors
232
+ }
233
+
234
+ with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
235
+ json.dump(report, f, ensure_ascii=False, indent=2)
236
+
237
+ else:
238
+ print()
239
+ print("PRÓXIMO PASSO: Execute com --execute para renomear")
240
+ print(f" Threshold atual: {threshold}")
241
+ print(" Use --threshold=0.6 para matching mais flexível")
242
+
243
+ return report
244
+
245
+
246
+ if __name__ == '__main__':
247
+ import sys
248
+ execute = '--execute' in sys.argv
249
+ threshold = 0.7
250
+ for arg in sys.argv:
251
+ if arg.startswith('--threshold='):
252
+ threshold = float(arg.split('=')[1])
253
+ main(execute=execute, threshold=threshold)
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test extraction of text from base64-encoded .docx
4
+ """
5
+
6
+ import base64
7
+ import subprocess
8
+ import tempfile
9
+ import os
10
+
11
+ # Base64 content from Google Drive (truncated for test - will use full content)
12
+ base64_content = """UEsDBBQACAgIACc2JVwAAAAAAAAAAAAAAAASAAAAd29yZC9udW1iZXJpbmcueG1s7VrLjtowFP2C/gOK1OWQJ4FBA7PoaKpWo6pqpx9gEkMs/IhsB2a+oYvu2m2/rV9SJyHhTRMaBEheBe71PfYxh3t0pdzdvxDcmkEuEKMDw25bRgvSgIWITgbGt+fHm57REhLQEGBG4cB4hcK4H765m/dpQkaQq3UtBUFFnwQDI5Iy7pumCCJIgGizGFKVHDNOgFRf+cQkgE+T+CZgJAYSjRBG8tV0LMs3FjBsYCSc9hcQNwQFnAk2lmlJn43HKICLR1HBq+yblzywICGQymxHk0OszsCoiFAsCjRyLJpKRgXI7BCJGcHFunlcZbeQg7m6Z4LzjeaMhzFnARRCRR/yZIloWxUuMIUoK6ocYX3P4iQEIFrCpOrYACr3bqu9F5eWQS2JLO9C4CoHyVNPaMQBf90+BTjiPlfrY1RJxRsIqkomvBTkMRBBBLgsAPAxCJgFUxi+A3QGSjGHk0py3kAKEZhwQJYiFbV+WdvakMvXCMRwiTb5P7T3nCXxUu7eMWgr/0C7Uw/A2QLw6wH0tgEC+HIchqkqV3FQWA/HL3FQ0Q+qAUggpiJFuDXDRRfMQuVfYLsp78RRH9PaFMqxVEMGQdaQjaEyGDASkqvAp4S01r59UCztbAmeYZVC6jEwrCyivIpLFZsBnC4yh7lTPZIyGMIAEYDzlKp8hi9l7q3dLuMfgyKK4Vjm4fgzTx+IhiqXhgdG11GWOe9HgE4yz3R9K11rlot5/kgKNKp8dLEiS5nZbpts7IpsMJtD/gSlhHw3I6c2I9vzTkLJqUPpCyOA7mbk1mbk2P5JGLlNSM6rT6fXOwkdrynNdWpTUgxOQqnTkOb82ow89zSNwW9Cc93adDrWaZpCtynN9epT6p6mK/Qa0txtbUa+11RjMNcM959u7BztxqMEYyh3XsCfXz+uzIwPkvl+bT58iM3P39fmwc0K7dwW3KzSzu2+zSrt3M7brNLO7bzNKu3cxtus0s7tuc0q7dy+23rq1dPvtTivnn4v3X/19HvhJqyn30t2Y09Pv3r6vWwP1tPv5bqvnn4v13j19HuBftvRfqv9Vvut9lvtt9pvtd824bc081m6+m7Vmumu0TSzlVtlzv4y50CZu7/MPVDm7S/zDpR19pd1VsvMlRecg39BLRwiffbMDUQIVcjZ93vxiwcNT9w4T8y1SycPygam3DNJkExw1DDBjTPBadmcct6QoAtFA4PkhWcNL9w4L7RJRGPGNfXvCG7s5I8OevMgU4ZHM3NCUteDPUTGeLrRMMbNb0LX2wi6iGjKZQJGg+WuxI9DbZX5csgyRUGfIPli44ab54spAPfBVUi9IPlg65b44II38s4u+SIIg3SBQ28uID6DXPfgY/V3wmX/9AaD09fxm6+Wy/7o9Oxk/2zyT193909JMDntnh4dvdgP0m3/tAl/V0LmR0u9jjZvxCwJLjpyCyL3mlqggSRdCX90ntkFJvic7Nghx3MOW+6eT6VgToNdHWsX8sEy/VeWowiRkx9/Rk5eEWvrFvj7pCgz8JuyFkFA/H2CC5LmguoxNh4wOflRA9cbwlIfp3C8nhbBzH4uYxokg+82DP4ZGPx59/iQPHjOB4nT4dK2yLEakX1raZSgO+qLgDj+BUU17pS59KlwVFgKvI263NCMDRRwvYpC7bbydDmwRsPjv4zHiz7J5ExJFhA3vzFUu7pXaIdopiI8K9oIgRLPJx8k9S5A2tO+lYAHooTI18shUBq+/mV8/VTFYnIMP3pOBwyrlSKcNyAG78bw/nCV2zQxXKIFgqXdHg6Z7uEX4HvfmdhxfYh8vRyapuHrX+jH8W0KLrY7dY1OA2LvQzA1pLEauHu+sNylNnEhMvZyIKCGsX8pYyuHGhPkVcZkWMr6GX3P5qwM7zGBR+XvqQauRleJNO7jwlbSoWJD5PDlIE0Nh/8yDp9DQnYl7CRLJOTUgRIQx59dBegESwU0uekXljjtGSVyS0Pk8OXwWj/P4asZar01Zn5GuRZjeNtHoOoKbj5jUSI5ME1A7PyCS56itvabxkhz+Eh7TPs+tDjG5TxzhsjTwVeduRNhsYMbrHByHdlbvTDym7M3J/shBo4Pgq+acjckZAdLfNw9CdkmJ0XW3OtZaf8QV3F7KRNp+26G/z89uD+NKr6eC46TB9j0lqakj3XeiNKkL/BTiLC0g/ZnZIAVMZw/vc40ejB87ckj7NhByYOywn6LsHcRtyxuYekXDn8kXKPyILGqBzu3xByruYFaIkMeNshH0zYtpXNtWk4YFeQBzZgE5YHgGiroUNMw2aDp3VIJme/ocev09CzqV1oN4IspyvseoOlQaJBG1XJAnXZjVPmyrXLyIeKqQ2bWlS6BXGEisx6WQ63czILf1ZfnC4Y8UACaEIQV4sIvB+toFv6je+fVdgH9KdL5inZVDEzlCN8AWC4gRGZYDgvRMMPHmOEZwxoBmPTbIcdsipIJHhhzsBy+4eeZoSZbp2dKCDV6mGcdciRjptnkTxRxraAPLHsPJgFPWcypDTa8fhB8T5C7sWm6wWYXd2/TdLh/+OqYPOu+3H9xcNS91E0pkCVcriDHTrNdmjmZGGKSCt8SvB25ZuFqxsPlKmTczHrf3fdlhJkdBC5RujPtLOg7RUcqzTQi04rO0aoPL9SI+5OD7S54uFzFjIZPPsYnB7nxLa5UprQt6iWU/PLaqp5gmCyUMZ1S3G4ByxhLbY7VPpGRpOUyp+HyzHLVNX6eZ2pidnflEGF+xYbrYq0N+dG2aiGygFN0oxIyB98R4jZs6zcn+5N/nPxDSA2F/+4xSUE/g0JapbbMz16dvNgPqjHzt5MPMQNbKs4j6nNl8ZWJRYxBG2J94/mGLa0L5QoL87sAmU7+mDLt9CYxWOrQ+a5Mjm/fII3zhZB/5WpvkRSGRdafnw1OseNDAkz2aHe7jfOP4PPmk40n+FlpDuSHJQSqaqqtZ8RscIPiE/eUtSqF09te81iFz7H4V3L5TaNNZ/Y1YfCi09M3dl8pW34t7vAyT8+wsSA8LHbXmHkJymdfy/vHY/chVlGOr9av/h+AUEsHCElAgyk3IAAAeckBAFBLAwQUAAgICAAnNiVcAAAAAAAAAAAAAAAAEQAAAHdvcmQvc2V0dGluZ3MueG1spZZLktowEIZPkDtQ2oMfA2RCjZlFpiZZZFaQA8iSbKvQqyQZD7ePZFs2j1TKkBXS391fy0275ZfXT85mR6INlSIDySIGMyKQxFSUGfi9f58/g5mxUGDIpCAZOBEDXrdfXpqNIdY6LzNzBGE2HGWgslZtosiginBoFlIR4YyF1Bxat9VlxKE+1GqOJFfQ0pwyak9RGsdr0GNkBmotNj1izinS0sjC+pCNLAqKSP8TIvSUvF3Im0Q1J8K2GSNNmDuDFKaiygQaf5TmjFWAHP/1EEfOgl+jpmTDGjau0Jx1iRqpsdISEWOc+tYZB2ISTyigRwwRU45wmTOchEMqBoxvjivQkHvhcvdFa1Hjg4y1MGzKQTrTL5prqE+3p4AP1PM8XtFJXXxFcFG21kNDPoJAFdQ2ANgjBCbRgeDvUBzh0My4nNTOVyRMYakhH5vU3PXPJvFVu+wqqMhIK/+P9kPLWo3tvnyEdvYGJqv7AOkNYH0f4PkWgMjnY4zIRZ5zKL6Psx44NMyDaQALzcF4wrcI91OwlYZX4HYo/5Xjlj7Wo9LYDWSI2oEMtu5+ITwneK9rsj8p8i6d16zZHKEbEwmIvAMmBayZ3cN8Z6UKxq9p3Jm7O2Zc7br7aoCsgFsKyN3curiOPiQm3lRrOr2aPmV0kZPpnQ8iH1CpLm1eJhlgtKxs4vnW7bC7DttNXqa9LW1taWdrNxAhVyPn3S9GLQ3amd9T0J5GbRm05aitgrYatXXQ1l6rXN01o+LgyhCWXi8kY7Ih+Odov5H6eoRvhO0fUEsHCLVCXjdPAgAAaAgAAFBLAwQUAAgICAAnNiVcAAAAAAAAAAAAAAAAEgAAAHdvcmQvZm9udFRhYmxlLnhtbKWVS27bMBCGT9A7CNzblI3USAXLQdEg3XTX9gATkpII84UhZcW3L2Xr4dpBICsrSRz93wzJn8Pt05tWyUGgl9bkZLVMSSIMs1yaMid//7wsHkniAxgOyhqRk6Pw5Gn3ZdtkhTXBJ1FufKZZTqoQXEapZ5XQ4JfWCRODhUUNIX5iSTXgvnYLZrWDIF+lkuFI12m6IR3G5qRGk3WIhZYMrbdFaCWZLQrJRPfoFTgl71nybFmthQmnjBSFijVY4yvpfE/Tc2kxWPWQw0eTOGjV/9e4Kdk4QhP3QqtzosYid2iZ8D6OPp+DA3GVTljAFjEoppTwf86+Eg3SDJjWGVegIfcy5u4W7YQaJzKuhVdTCjmHfslXBDzeVgEz1vNS7+QkF18RoirUOBhyDoJVgKEHqDkEZdle8B9gDjCYmZeT7HxF4hJKBD2a1N+1s6v0yi6/K3BipJWfo/1EW7vR7g9zaBcncPX1PsD6BrC5D/B4C2DibR6DRuUlR/L7OJuBI/t+MA0QwO99S/hGedcFT0PDEbhtyu9y4murbVHrNDZkYKeGTHbd5ZI0mQEdO8t3lKAI3W1pd+vs/gFQSwcIoGOcdbcBAAC3BgAAUEsDBBQACAgIACc2JVwAAAAAAAAAAAAAAAAPAAAAd29yZC9zdHlsZXMueG1s"""
13
+
14
+ # Decode and process
15
+ try:
16
+ docx_bytes = base64.b64decode(base64_content)
17
+ print(f"Decoded {len(docx_bytes)} bytes")
18
+
19
+ # Save to temp file
20
+ with open('/tmp/test_doc.docx', 'wb') as f:
21
+ f.write(docx_bytes)
22
+
23
+ print("Saved to /tmp/test_doc.docx")
24
+
25
+ # Check if it's a valid zip/docx
26
+ import zipfile
27
+ if zipfile.is_zipfile('/tmp/test_doc.docx'):
28
+ print("Valid ZIP/DOCX structure detected")
29
+ with zipfile.ZipFile('/tmp/test_doc.docx', 'r') as z:
30
+ print("Contents:", z.namelist()[:5])
31
+ else:
32
+ print("Not a valid ZIP file - content may be truncated")
33
+
34
+ except Exception as e:
35
+ print(f"Error: {e}")
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to verify extraction works with known base64 content
4
+ """
5
+
6
+ import base64
7
+ import tempfile
8
+ import zipfile
9
+ import re
10
+ from xml.etree import ElementTree as ET
11
+
12
+ WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
13
+
14
+ def extract_text_from_xml(xml_content: str) -> str:
15
+ try:
16
+ root = ET.fromstring(xml_content)
17
+ except ET.ParseError:
18
+ texts = re.findall(r'<w:t[^>]*>([^<]*)</w:t>', xml_content)
19
+ return ' '.join(texts)
20
+
21
+ texts = []
22
+ for elem in root.iter():
23
+ if elem.tag == f'{WORD_NAMESPACE}t':
24
+ if elem.text:
25
+ texts.append(elem.text)
26
+ elif elem.tag == f'{WORD_NAMESPACE}p':
27
+ if texts and not texts[-1].endswith('\n'):
28
+ texts.append('\n')
29
+ elif elem.tag == f'{WORD_NAMESPACE}br':
30
+ texts.append('\n')
31
+
32
+ text = ''.join(texts)
33
+ text = re.sub(r'[ \t]+', ' ', text)
34
+ text = re.sub(r'\n\s*\n', '\n\n', text)
35
+ return text.strip()
36
+
37
+ # Read test base64 from file
38
+ TEST_FILE = ".claude/temp/full_base64.txt"
39
+
40
+ try:
41
+ with open(TEST_FILE, 'r') as f:
42
+ base64_content = f.read().strip()
43
+
44
+ print(f"Read {len(base64_content)} chars of base64")
45
+
46
+ docx_bytes = base64.b64decode(base64_content)
47
+ print(f"Decoded to {len(docx_bytes)} bytes")
48
+
49
+ # Save temporarily and check structure
50
+ with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as tmp:
51
+ tmp.write(docx_bytes)
52
+ tmp_path = tmp.name
53
+
54
+ if zipfile.is_zipfile(tmp_path):
55
+ print("Valid ZIP/DOCX structure")
56
+ with zipfile.ZipFile(tmp_path, 'r') as z:
57
+ files = z.namelist()
58
+ print(f"Contains {len(files)} files")
59
+ if 'word/document.xml' in files:
60
+ xml_content = z.read('word/document.xml').decode('utf-8')
61
+ print(f"document.xml: {len(xml_content)} chars")
62
+ text = extract_text_from_xml(xml_content)
63
+ print(f"\nExtracted text ({len(text)} chars, {len(text.split())} words):")
64
+ print("="*60)
65
+ print(text[:2000] if len(text) > 2000 else text)
66
+ print("="*60)
67
+ else:
68
+ print("Not a valid ZIP file")
69
+
70
+ except FileNotFoundError:
71
+ print(f"Test file not found: {TEST_FILE}")
72
+ print("Please save base64 content to this file first")
73
+ except Exception as e:
74
+ print(f"Error: {e}")