mega-brain-ai 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mega-brain-ai might be problematic. Click here for more details.

Files changed (308) hide show
  1. package/.claude/CLAUDE.md +155 -0
  2. package/.claude/commands/agents.md +161 -0
  3. package/.claude/commands/ask.md +117 -0
  4. package/.claude/commands/benchmark.md +224 -0
  5. package/.claude/commands/chat.md +343 -0
  6. package/.claude/commands/compare.md +116 -0
  7. package/.claude/commands/conclave.md +194 -0
  8. package/.claude/commands/config.md +133 -0
  9. package/.claude/commands/council.md +194 -0
  10. package/.claude/commands/create-agent.md +452 -0
  11. package/.claude/commands/debate.md +157 -0
  12. package/.claude/commands/documentation/create-architecture-documentation.md +175 -0
  13. package/.claude/commands/dossiers.md +180 -0
  14. package/.claude/commands/evolve.md +223 -0
  15. package/.claude/commands/extract-dna.md +170 -0
  16. package/.claude/commands/extract-knowledge.md +507 -0
  17. package/.claude/commands/inbox.md +296 -0
  18. package/.claude/commands/ingest-empresa.md +191 -0
  19. package/.claude/commands/ingest.md +182 -0
  20. package/.claude/commands/jarvis-briefing.md +67 -0
  21. package/.claude/commands/jarvis-control.md +169 -0
  22. package/.claude/commands/jarvis-full.md +181 -0
  23. package/.claude/commands/jarvis.md +212 -0
  24. package/.claude/commands/ler-drive.md +212 -0
  25. package/.claude/commands/log.md +158 -0
  26. package/.claude/commands/loop.md +133 -0
  27. package/.claude/commands/loops.md +73 -0
  28. package/.claude/commands/mission-autopilot.md +538 -0
  29. package/.claude/commands/mission.md +353 -0
  30. package/.claude/commands/process-inbox.md +148 -0
  31. package/.claude/commands/process-jarvis.md +3036 -0
  32. package/.claude/commands/process-video.md +131 -0
  33. package/.claude/commands/rag-search.md +78 -0
  34. package/.claude/commands/resume.md +33 -0
  35. package/.claude/commands/save.md +38 -0
  36. package/.claude/commands/scan-inbox.md +125 -0
  37. package/.claude/commands/setup.md +99 -0
  38. package/.claude/commands/system-digest.md +243 -0
  39. package/.claude/commands/verify.md +182 -0
  40. package/.claude/commands/view-dna.md +169 -0
  41. package/.claude/hooks/agent_doctor.py +433 -0
  42. package/.claude/hooks/agent_memory_persister.py +203 -0
  43. package/.claude/hooks/auto_formatter.py +158 -0
  44. package/.claude/hooks/checkpoint_writer.py +244 -0
  45. package/.claude/hooks/claude_md_guard.py +146 -0
  46. package/.claude/hooks/creation_validator.py +357 -0
  47. package/.claude/hooks/enforce_dual_location.py +501 -0
  48. package/.claude/hooks/enforce_plan_mode.py +220 -0
  49. package/.claude/hooks/inbox_age_alert.py +367 -0
  50. package/.claude/hooks/jarvis_briefing.py +506 -0
  51. package/.claude/hooks/ledger_updater.py +301 -0
  52. package/.claude/hooks/memory_hints_injector.py +251 -0
  53. package/.claude/hooks/memory_updater.py +202 -0
  54. package/.claude/hooks/multi_agent_hook.py +464 -0
  55. package/.claude/hooks/notification_system.py +120 -0
  56. package/.claude/hooks/pattern_analyzer.py +526 -0
  57. package/.claude/hooks/pending_tracker.py +188 -0
  58. package/.claude/hooks/post_batch_cascading.py +1740 -0
  59. package/.claude/hooks/post_output_validator.py +358 -0
  60. package/.claude/hooks/post_tool_use.py +120 -0
  61. package/.claude/hooks/post_write_validator.py +200 -0
  62. package/.claude/hooks/quality_watchdog.py +394 -0
  63. package/.claude/hooks/ralph_wiggum.py +277 -0
  64. package/.claude/hooks/session-source-sync.py +218 -0
  65. package/.claude/hooks/session_autosave_v2.py +1135 -0
  66. package/.claude/hooks/session_end.py +203 -0
  67. package/.claude/hooks/session_start.py +939 -0
  68. package/.claude/hooks/skill_indexer.py +48 -0
  69. package/.claude/hooks/skill_router.py +358 -0
  70. package/.claude/hooks/stop_hook_completeness.py +178 -0
  71. package/.claude/hooks/subagent_tracker.py +163 -0
  72. package/.claude/hooks/token_checkpoint.py +584 -0
  73. package/.claude/hooks/user_prompt_submit.py +125 -0
  74. package/.claude/rules/ANTHROPIC-STANDARDS.md +384 -0
  75. package/.claude/rules/CLAUDE-LITE.md +201 -0
  76. package/.claude/rules/RULE-GROUP-1.md +320 -0
  77. package/.claude/rules/RULE-GROUP-2.md +307 -0
  78. package/.claude/rules/RULE-GROUP-3.md +248 -0
  79. package/.claude/rules/RULE-GROUP-4.md +427 -0
  80. package/.claude/rules/RULE-GROUP-5.md +388 -0
  81. package/.claude/rules/RULE-GROUP-6.md +387 -0
  82. package/.claude/rules/logging.md +53 -0
  83. package/.claude/rules/mcp-governance.md +128 -0
  84. package/.claude/rules/pipeline.md +60 -0
  85. package/.claude/rules/state-management.md +93 -0
  86. package/.claude/scripts/apply-tags.py +77 -0
  87. package/.claude/scripts/batch-extract-transcriptions.py +132 -0
  88. package/.claude/scripts/build-complete-index.py +250 -0
  89. package/.claude/scripts/build-planilha-index.py +170 -0
  90. package/.claude/scripts/complete-tag-matching.py +250 -0
  91. package/.claude/scripts/deduplicate-inbox.py +139 -0
  92. package/.claude/scripts/docx-xml-extractor.py +141 -0
  93. package/.claude/scripts/extract-docx-text.py +58 -0
  94. package/.claude/scripts/extract-single-transcription.py +74 -0
  95. package/.claude/scripts/extract_docx_from_gdrive.py +77 -0
  96. package/.claude/scripts/organized-downloader.py +246 -0
  97. package/.claude/scripts/planilha-tagger.py +187 -0
  98. package/.claude/scripts/revert-tags.py +70 -0
  99. package/.claude/scripts/source-sync.py +265 -0
  100. package/.claude/scripts/tag-inbox-files.py +276 -0
  101. package/.claude/scripts/tag-inbox-v2.py +253 -0
  102. package/.claude/scripts/test-extraction.py +35 -0
  103. package/.claude/scripts/test-full-extraction.py +74 -0
  104. package/.claude/skills/00-SKILL-CREATOR/SKILL.md +186 -0
  105. package/.claude/skills/01-SKILL-DOCS-MEGABRAIN/SKILL.md +251 -0
  106. package/.claude/skills/02-SKILL-PYTHON-MEGABRAIN/SKILL.md +323 -0
  107. package/.claude/skills/03-SKILL-AGENT-CREATION/SKILL.md +374 -0
  108. package/.claude/skills/04-SKILL-KNOWLEDGE-EXTRACTION/SKILL.md +318 -0
  109. package/.claude/skills/05-SKILL-PIPELINE-JARVIS/SKILL.md +430 -0
  110. package/.claude/skills/06-SKILL-BRAINSTORMING/SKILL.md +72 -0
  111. package/.claude/skills/07-SKILL-DISPATCHING-PARALLEL-AGENTS/SKILL.md +193 -0
  112. package/.claude/skills/08-SKILL-EXECUTING-PLANS/SKILL.md +114 -0
  113. package/.claude/skills/09-SKILL-WRITING-PLANS/SKILL.md +184 -0
  114. package/.claude/skills/10-SKILL-VERIFICATION-BEFORE-COMPLETION/SKILL.md +130 -0
  115. package/.claude/skills/11-SKILL-USING-SUPERPOWERS/SKILL.md +105 -0
  116. package/.claude/skills/DETECTION-PROTOCOL.md +217 -0
  117. package/.claude/skills/README.md +240 -0
  118. package/.claude/skills/SKILL-REGISTRY.md +284 -0
  119. package/.claude/skills/SKILL-SUGGESTIONS.md +114 -0
  120. package/.claude/skills/_TEMPLATES/SKILL-WRITER-GUIDE.md +385 -0
  121. package/.claude/skills/chronicler/SKILL.md +146 -0
  122. package/.claude/skills/chronicler/chronicler_core.py +468 -0
  123. package/.claude/skills/code-review/SKILL.md +160 -0
  124. package/.claude/skills/council/SKILL.md +210 -0
  125. package/.claude/skills/executor/SKILL.md +161 -0
  126. package/.claude/skills/fase-2-5-tagging/SKILL.md +182 -0
  127. package/.claude/skills/feature-dev/SKILL.md +154 -0
  128. package/.claude/skills/finance-agent/SKILL.md +137 -0
  129. package/.claude/skills/frontend-design/SKILL.md +165 -0
  130. package/.claude/skills/gdrive-transcription-downloader/SKILL.md +249 -0
  131. package/.claude/skills/gemini-fallback/SKILL.md +67 -0
  132. package/.claude/skills/gemini-fallback/gemini_fetch.py +0 -0
  133. package/.claude/skills/gha/SKILL.md +96 -0
  134. package/.claude/skills/gha/gha_diagnostic.py +227 -0
  135. package/.claude/skills/github-workflow/SKILL.md +190 -0
  136. package/.claude/skills/hookify/SKILL.md +134 -0
  137. package/.claude/skills/hybrid-source-reading/SKILL.md +265 -0
  138. package/.claude/skills/jarvis/SKILL.md +546 -0
  139. package/.claude/skills/jarvis-briefing/SKILL.md +340 -0
  140. package/.claude/skills/ler-planilha/SKILL.md +281 -0
  141. package/.claude/skills/plugin-dev/SKILL.md +176 -0
  142. package/.claude/skills/pr-review-toolkit/SKILL.md +178 -0
  143. package/.claude/skills/resume/SKILL.md +61 -0
  144. package/.claude/skills/save/SKILL.md +87 -0
  145. package/.claude/skills/skill-writer/SKILL.md +153 -0
  146. package/.claude/skills/skill-writer/examples.md +191 -0
  147. package/.claude/skills/skill-writer/troubleshooting.md +205 -0
  148. package/.claude/skills/smart-download-tagger/SKILL.md +148 -0
  149. package/.claude/skills/source-sync/SKILL.md +240 -0
  150. package/.claude/skills/sync-docs/SKILL.md +193 -0
  151. package/.claude/skills/sync-docs/config.json +37 -0
  152. package/.claude/skills/sync-docs/gdrive_sync.py +358 -0
  153. package/.claude/skills/sync-docs/reauth.py +71 -0
  154. package/.claude/skills/talent-agent/SKILL.md +183 -0
  155. package/.claude/skills/verify/SKILL.md +154 -0
  156. package/.claude/skills/verify/verify_runner.py +0 -0
  157. package/.claude/skills/verify-6-levels/SKILL.md +234 -0
  158. package/.claude/templates/BATCH-LOG-TEMPLATE.md +221 -0
  159. package/.claudeignore +9 -0
  160. package/.gitattributes +4 -0
  161. package/.github/layer1-allowlist.txt +80 -0
  162. package/.github/layer2-manifest.txt +40 -0
  163. package/.gitignore +219 -0
  164. package/README.md +1210 -0
  165. package/agents/_templates/INDEX.md +741 -0
  166. package/agents/_templates/TEMPLATE-AGENT-MD-ULTRA-ROBUSTO-V3.md +2399 -0
  167. package/agents/boardroom/CHECKLIST-MASTER.md +281 -0
  168. package/agents/boardroom/INTEGRATION-GUIDE.md +406 -0
  169. package/agents/boardroom/README.md +238 -0
  170. package/agents/boardroom/config/BOARDROOM-CONFIG.md +186 -0
  171. package/agents/boardroom/config/TTS-INTEGRATION.md +258 -0
  172. package/agents/boardroom/config/VOICE-PROFILES.md +624 -0
  173. package/agents/boardroom/config/voice_mapping.json +128 -0
  174. package/agents/boardroom/scripts/audio_generator.py +375 -0
  175. package/agents/boardroom/scripts/audio_generator_edge.py +353 -0
  176. package/agents/boardroom/scripts/jarvis_boardroom_hook.py +415 -0
  177. package/agents/boardroom/scripts/notebooklm_generator.py +578 -0
  178. package/agents/boardroom/templates/EPISODE-TEMPLATE.md +367 -0
  179. package/agents/boardroom/templates/scene-templates/SCENE-AGENT-DEBATE.md +252 -0
  180. package/agents/boardroom/templates/scene-templates/SCENE-COUNCIL.md +270 -0
  181. package/agents/boardroom/templates/scene-templates/SCENE-DNA-CONSULTATION.md +126 -0
  182. package/agents/boardroom/templates/scene-templates/SCENE-QUESTION.md +174 -0
  183. package/agents/boardroom/workflows/WORKFLOW-AUDIO-GENERATION.md +421 -0
  184. package/agents/constitution/BASE-CONSTITUTION.md +254 -0
  185. package/agents/council/CRITIC.md +197 -0
  186. package/agents/council/DEVILS-ADVOCATE.md +274 -0
  187. package/agents/council/SYNTHESIZER.md +293 -0
  188. package/agents/council/advogado-do-diabo/AGENT.md +489 -0
  189. package/agents/council/advogado-do-diabo/SOUL.md +100 -0
  190. package/agents/council/critico-metodologico/AGENT.md +670 -0
  191. package/agents/council/critico-metodologico/SOUL.md +107 -0
  192. package/agents/council/sintetizador/AGENT.md +558 -0
  193. package/agents/council/sintetizador/SOUL.md +94 -0
  194. package/agents/persons/_example/AGENT-EXAMPLE.md +42 -0
  195. package/agents/persons/_example/DNA-EXAMPLE.yaml +61 -0
  196. package/agents/protocols/AGENT-COGNITION-PROTOCOL.md +779 -0
  197. package/agents/protocols/AGENT-INTEGRITY-PROTOCOL.md +692 -0
  198. package/agents/protocols/BATCH-VISUAL-PROTOCOL.md +841 -0
  199. package/agents/protocols/DNA-CONFIG-TEMPLATE.yaml +181 -0
  200. package/agents/protocols/DNA-EXTRACTION-PROTOCOL.md +370 -0
  201. package/agents/protocols/EPISTEMIC-PROTOCOL.md +333 -0
  202. package/agents/protocols/LOG-STRUCTURE-PROTOCOL.md +65 -0
  203. package/agents/protocols/MEMORY-PROTOCOL.md +567 -0
  204. package/agents/protocols/NARRATIVE-SYNTHESIS-PROTOCOL.md +278 -0
  205. package/agents/protocols/PHASE-4-VERIFICATION-CHECKPOINT.md +146 -0
  206. package/agents/protocols/SOUL-TEMPLATE.md +416 -0
  207. package/agents/protocols/TEMPLATE-EVOLUTION-PROTOCOL.md +544 -0
  208. package/agents/protocols/VISUAL-DIFF-PROTOCOL.md +159 -0
  209. package/agents/sua-empresa/README.md +44 -0
  210. package/agents/sua-empresa/_example/jds/EXAMPLE-JD.md +42 -0
  211. package/agents/sua-empresa/_example/org/EXAMPLE-ORG.md +32 -0
  212. package/agents/sua-empresa/_example/roles/EXAMPLE-ROLE.md +38 -0
  213. package/bin/cli.js +2 -0
  214. package/bin/lib/ascii-art.js +234 -0
  215. package/bin/lib/installer.js +402 -0
  216. package/bin/lib/setup-wizard.js +95 -0
  217. package/bin/lib/validate-email.js +109 -0
  218. package/bin/mega-brain.js +97 -0
  219. package/bin/push.js +342 -0
  220. package/bin/templates/env.example +38 -0
  221. package/inbox/.gitkeep +0 -0
  222. package/integrations/README.md +46 -0
  223. package/integrations/mcps/MCP-REGISTRY.md +56 -0
  224. package/integrations/mcps/excalidraw/CONFIG.md +56 -0
  225. package/integrations/mcps/gdrive/CONFIG.md +38 -0
  226. package/knowledge/dna/.gitkeep +0 -0
  227. package/knowledge/dossiers/persons/.gitkeep +0 -0
  228. package/knowledge/dossiers/persons/DOSSIER-EXAMPLE.md +49 -0
  229. package/knowledge/dossiers/system/.gitkeep +0 -0
  230. package/knowledge/dossiers/themes/.gitkeep +0 -0
  231. package/knowledge/playbooks/.gitkeep +0 -0
  232. package/knowledge/playbooks/PLAYBOOK-EXAMPLE.md +50 -0
  233. package/knowledge/sources/.gitkeep +0 -0
  234. package/logs/.gitkeep +0 -0
  235. package/package.json +128 -0
  236. package/processing/canonical/.gitkeep +0 -0
  237. package/processing/chunks/.gitkeep +0 -0
  238. package/processing/insights/.gitkeep +0 -0
  239. package/processing/narratives/.gitkeep +0 -0
  240. package/reference/CONSELHO.md +337 -0
  241. package/reference/CONTEXT7_README.md +28 -0
  242. package/reference/JARVIS-LOGGING-PROTOCOL.md +380 -0
  243. package/reference/QUICK-START.md +197 -0
  244. package/reference/README-RALPH-CASCATEAMENTO.md +207 -0
  245. package/reference/TEMPLATE-MASTER.md +727 -0
  246. package/reference/prds/prd-jarvis-mega-brain-v3.md +1305 -0
  247. package/reference/templates/phase5/IMPLEMENTATION-GUIDE.md +355 -0
  248. package/reference/templates/phase5/MOGA-BRAIN-PHASE5-TEMPLATES.md +1284 -0
  249. package/reference/templates/phase5/README.md +165 -0
  250. package/reference/workflow-claude-code-boris-cherny-continuous-claude.md +2232 -0
  251. package/system/database/001_moneyclub_buyers.sql +160 -0
  252. package/system/database/002_premium_token.sql +97 -0
  253. package/system/database/apply-migration.mjs +129 -0
  254. package/system/docs/MEGA-BRAIN-DEMO-COMPLETA.md +1226 -0
  255. package/system/docs/MEGA-BRAIN-MANIFESTO-COMPLETO.md +1054 -0
  256. package/system/docs/MOGA-BRAIN-EXPLICACAO-COMPLETA.md +791 -0
  257. package/system/docs/STRATEGIC-INTEGRATION-GUIDE.md +725 -0
  258. package/system/docs/architecture/01-system-context.md +136 -0
  259. package/system/docs/architecture/02-components.md +225 -0
  260. package/system/docs/architecture/03-data-flow.md +235 -0
  261. package/system/docs/architecture/04-integrations.md +283 -0
  262. package/system/docs/architecture/README.md +71 -0
  263. package/system/docs/architecture/diagrams/component-diagram.mmd +50 -0
  264. package/system/docs/architecture/diagrams/data-flow.mmd +39 -0
  265. package/system/docs/architecture/diagrams/system-overview.mmd +68 -0
  266. package/system/protocols/AGENT-AUTHORITY.md +217 -0
  267. package/system/protocols/CONSTITUICAO-BASE.md +115 -0
  268. package/system/protocols/CONSTITUTION.md +231 -0
  269. package/system/protocols/GOVERNANCE-MAP.md +123 -0
  270. package/system/protocols/HOOK-SECURITY-THREAT-MODEL.md +152 -0
  271. package/system/protocols/ORQUESTRACAO-PROTOCOL.md +215 -0
  272. package/system/protocols/_archive/CHUNKING-PROTOCOL.md +207 -0
  273. package/system/protocols/_archive/ENTITY-RESOLUTION-PROTOCOL.md +269 -0
  274. package/system/protocols/_archive/INSIGHT-EXTRACTION-PROTOCOL.md +257 -0
  275. package/system/protocols/_archive/NARRATIVE-SYNTHESIS-PROTOCOL.md +290 -0
  276. package/system/protocols/agents/AGENT-INTERACTION.md +315 -0
  277. package/system/protocols/agents/CORTEX-PROTOCOL.md +520 -0
  278. package/system/protocols/agents/EPISTEMIC-PROTOCOL.md +465 -0
  279. package/system/protocols/agents/MEMORY-PROTOCOL.md +366 -0
  280. package/system/protocols/agents/WAR-ROOM.md +355 -0
  281. package/system/protocols/company/COMPANY-DOCUMENT-PROTOCOL.md +793 -0
  282. package/system/protocols/company/COMPANY-ENRICHMENT-PROTOCOL.md +679 -0
  283. package/system/protocols/conclave/CONCLAVE-LOG-TEMPLATE-v2.md +309 -0
  284. package/system/protocols/conclave/CONCLAVE-PROTOCOL.md +518 -0
  285. package/system/protocols/conclave/DEBATE-DYNAMICS-CONFIG.yaml +322 -0
  286. package/system/protocols/conclave/DEBATE-DYNAMICS-PROTOCOL.md +613 -0
  287. package/system/protocols/conclave/DEBATE-PROTOCOL.md +323 -0
  288. package/system/protocols/council/COUNCIL-LOG-TEMPLATE-v2.md +309 -0
  289. package/system/protocols/council/COUNCIL-PROTOCOL.md +518 -0
  290. package/system/protocols/council/DEBATE-DYNAMICS-CONFIG.yaml +322 -0
  291. package/system/protocols/council/DEBATE-DYNAMICS-PROTOCOL.md +613 -0
  292. package/system/protocols/council/DEBATE-PROTOCOL.md +323 -0
  293. package/system/protocols/dna/DNA-EXTRACTION-PROTOCOL.md +1214 -0
  294. package/system/protocols/dna/ENRICHMENT-PROTOCOL.md +408 -0
  295. package/system/protocols/dna/REASONING-MODEL-PROTOCOL.md +331 -0
  296. package/system/protocols/pipeline/DOSSIER-COMPILATION-PROTOCOL.md +790 -0
  297. package/system/protocols/pipeline/NARRATIVE-METABOLISM-PROTOCOL.md +292 -0
  298. package/system/protocols/pipeline/PIPELINE-JARVIS-v2.1.md +606 -0
  299. package/system/protocols/pipeline/PROMPT-1.1-CHUNKING.md +154 -0
  300. package/system/protocols/pipeline/PROMPT-1.2-ENTITY-RESOLUTION.md +186 -0
  301. package/system/protocols/pipeline/PROMPT-2.1-DNA-TAGS-INCREMENT.md +208 -0
  302. package/system/protocols/pipeline/PROMPT-2.1-INSIGHT-EXTRACTION.md +191 -0
  303. package/system/protocols/pipeline/PROMPT-3.1-NARRATIVE-SYNTHESIS.md +331 -0
  304. package/system/protocols/pipeline/SOURCES-COMPILATION-PROTOCOL.md +340 -0
  305. package/system/protocols/system/AUTO-LOG-PROTOCOL.md +369 -0
  306. package/system/protocols/system/CHECKPOINT-ENFORCEMENT.md +176 -0
  307. package/system/protocols/system/ENFORCEMENT.md +435 -0
  308. package/system/protocols/system/LOG-TEMPLATES.md +1068 -0
@@ -0,0 +1,141 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ DOCX XML Text Extractor for Mega Brain
4
+ ---------------------------------------
5
+ Extrai texto de arquivos .docx usando parsing XML direto.
6
+ Não requer pandoc ou outras dependências externas.
7
+ """
8
+
9
+ import base64
10
+ import tempfile
11
+ import os
12
+ import sys
13
+ import re
14
+ import zipfile
15
+ from xml.etree import ElementTree as ET
16
+
17
+ # OOXML namespace
18
+ WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
19
+
20
+ def extract_text_from_xml(xml_content: str) -> str:
21
+ """
22
+ Extract text from Word document.xml content
23
+ """
24
+ try:
25
+ root = ET.fromstring(xml_content)
26
+ except ET.ParseError:
27
+ # Fallback to regex if XML parsing fails
28
+ texts = re.findall(r'<w:t[^>]*>([^<]*)</w:t>', xml_content)
29
+ return ' '.join(texts)
30
+
31
+ texts = []
32
+
33
+ # Find all <w:t> elements (text runs)
34
+ for elem in root.iter():
35
+ if elem.tag == f'{WORD_NAMESPACE}t':
36
+ if elem.text:
37
+ texts.append(elem.text)
38
+ # Handle paragraph breaks
39
+ elif elem.tag == f'{WORD_NAMESPACE}p':
40
+ if texts and not texts[-1].endswith('\n'):
41
+ texts.append('\n')
42
+ # Handle line breaks
43
+ elif elem.tag == f'{WORD_NAMESPACE}br':
44
+ texts.append('\n')
45
+
46
+ # Join and clean up
47
+ text = ''.join(texts)
48
+ # Normalize whitespace but preserve paragraph breaks
49
+ text = re.sub(r'[ \t]+', ' ', text)
50
+ text = re.sub(r'\n\s*\n', '\n\n', text)
51
+ text = text.strip()
52
+
53
+ return text
54
+
55
+ def extract_from_docx_bytes(docx_bytes: bytes) -> str:
56
+ """
57
+ Extract text from docx file bytes
58
+ """
59
+ with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as tmp:
60
+ tmp.write(docx_bytes)
61
+ tmp_path = tmp.name
62
+
63
+ try:
64
+ if not zipfile.is_zipfile(tmp_path):
65
+ return "[ERROR] Invalid docx file (not a valid ZIP)"
66
+
67
+ with zipfile.ZipFile(tmp_path, 'r') as z:
68
+ if 'word/document.xml' not in z.namelist():
69
+ return "[ERROR] Invalid docx file (no document.xml)"
70
+
71
+ xml_content = z.read('word/document.xml').decode('utf-8')
72
+ return extract_text_from_xml(xml_content)
73
+
74
+ finally:
75
+ try:
76
+ os.unlink(tmp_path)
77
+ except:
78
+ pass
79
+
80
+ def process_base64_file(input_file: str) -> str:
81
+ """
82
+ Process a file containing base64-encoded docx content
83
+ """
84
+ with open(input_file, 'r') as f:
85
+ base64_content = f.read().strip()
86
+
87
+ # Clean base64 content
88
+ base64_clean = base64_content.replace('\n', '').replace('\r', '').replace(' ', '')
89
+
90
+ try:
91
+ docx_bytes = base64.b64decode(base64_clean)
92
+ except Exception as e:
93
+ return f"[ERROR] Base64 decode failed: {e}"
94
+
95
+ return extract_from_docx_bytes(docx_bytes)
96
+
97
+ def save_transcription(text: str, tag: str, name: str, output_dir: str) -> str:
98
+ """
99
+ Save transcription with proper [TAG] naming
100
+ """
101
+ # Clean the name
102
+ clean_name = name
103
+ for ext in ['.docx', '.mp4', '.txt']:
104
+ clean_name = clean_name.replace(ext, '')
105
+ clean_name = clean_name.strip(' -.')
106
+
107
+ filename = f"[{tag}] {clean_name}.txt"
108
+ filepath = os.path.join(output_dir, filename)
109
+
110
+ os.makedirs(output_dir, exist_ok=True)
111
+
112
+ with open(filepath, 'w', encoding='utf-8') as f:
113
+ f.write(text)
114
+
115
+ return filepath
116
+
117
+ if __name__ == "__main__":
118
+ import argparse
119
+
120
+ parser = argparse.ArgumentParser(description='Extract text from base64-encoded docx')
121
+ parser.add_argument('base64_file', help='File containing base64 content')
122
+ parser.add_argument('--tag', help='TAG for output file (e.g., JM-0003)')
123
+ parser.add_argument('--name', help='Original filename for output')
124
+ parser.add_argument('--output-dir', help='Output directory')
125
+
126
+ args = parser.parse_args()
127
+
128
+ text = process_base64_file(args.base64_file)
129
+
130
+ if text.startswith('[ERROR]'):
131
+ print(text, file=sys.stderr)
132
+ sys.exit(1)
133
+
134
+ if args.tag and args.name and args.output_dir:
135
+ filepath = save_transcription(text, args.tag, args.name, args.output_dir)
136
+ print(f"Saved: {filepath}")
137
+ print(f"Characters: {len(text)}")
138
+ print(f"Words: {len(text.split())}")
139
+ else:
140
+ # Just print the text
141
+ print(text)
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Extract text from base64-encoded .docx files from Google Drive
4
+ """
5
+
6
+ import base64
7
+ import subprocess
8
+ import tempfile
9
+ import os
10
+ import sys
11
+
12
+ def extract_text_from_base64_docx(base64_content: str) -> str:
13
+ """
14
+ Decode base64 docx content and extract text using pandoc
15
+ """
16
+ # Decode base64
17
+ docx_bytes = base64.b64decode(base64_content)
18
+
19
+ # Create temp file
20
+ with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as tmp:
21
+ tmp.write(docx_bytes)
22
+ tmp_path = tmp.name
23
+
24
+ try:
25
+ # Use pandoc to extract text
26
+ result = subprocess.run(
27
+ ['pandoc', tmp_path, '-t', 'plain', '--wrap=none'],
28
+ capture_output=True,
29
+ text=True,
30
+ check=True
31
+ )
32
+ return result.stdout
33
+ finally:
34
+ # Clean up
35
+ os.unlink(tmp_path)
36
+
37
+ def save_transcription(text: str, tag: str, name: str, output_dir: str) -> str:
38
+ """
39
+ Save extracted text to file with proper naming
40
+ """
41
+ # Clean filename
42
+ clean_name = name.replace('.docx', '').replace('.mp4', '').strip()
43
+ filename = f"[{tag}] {clean_name}.txt"
44
+ filepath = os.path.join(output_dir, filename)
45
+
46
+ with open(filepath, 'w', encoding='utf-8') as f:
47
+ f.write(text)
48
+
49
+ return filepath
50
+
51
+ if __name__ == "__main__":
52
+ if len(sys.argv) < 2:
53
+ print("Usage: python extract-docx-text.py <base64_content>")
54
+ sys.exit(1)
55
+
56
+ base64_content = sys.argv[1]
57
+ text = extract_text_from_base64_docx(base64_content)
58
+ print(text)
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Extract a single transcription directly from Google Drive
4
+ Uses the MCP gdrive API output format
5
+ """
6
+
7
+ import base64
8
+ import subprocess
9
+ import tempfile
10
+ import os
11
+ import sys
12
+ import re
13
+ import zipfile
14
+
15
+ def extract_text_from_docx_bytes(docx_bytes: bytes) -> str:
16
+ """
17
+ Extract text from docx bytes using pandoc or fallback to XML parsing
18
+ """
19
+ # Create temp file
20
+ with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as tmp:
21
+ tmp.write(docx_bytes)
22
+ tmp_path = tmp.name
23
+
24
+ try:
25
+ # First try pandoc
26
+ result = subprocess.run(
27
+ ['pandoc', tmp_path, '-t', 'plain', '--wrap=none'],
28
+ capture_output=True,
29
+ text=True,
30
+ timeout=30
31
+ )
32
+
33
+ if result.returncode == 0:
34
+ return result.stdout
35
+
36
+ # Fallback: extract from XML directly
37
+ with zipfile.ZipFile(tmp_path, 'r') as z:
38
+ if 'word/document.xml' in z.namelist():
39
+ xml_content = z.read('word/document.xml').decode('utf-8')
40
+ # Extract text from <w:t> tags
41
+ texts = re.findall(r'<w:t[^>]*>([^<]*)</w:t>', xml_content)
42
+ return ' '.join(texts)
43
+
44
+ return "[ERROR] Could not extract text"
45
+
46
+ except Exception as e:
47
+ return f"[ERROR] {e}"
48
+ finally:
49
+ try:
50
+ os.unlink(tmp_path)
51
+ except:
52
+ pass
53
+
54
+ def main():
55
+ # Read base64 from stdin
56
+ base64_content = sys.stdin.read().strip()
57
+
58
+ if not base64_content:
59
+ print("[ERROR] No base64 content provided via stdin")
60
+ sys.exit(1)
61
+
62
+ try:
63
+ docx_bytes = base64.b64decode(base64_content)
64
+ print(f"[INFO] Decoded {len(docx_bytes)} bytes", file=sys.stderr)
65
+
66
+ text = extract_text_from_docx_bytes(docx_bytes)
67
+ print(text)
68
+
69
+ except Exception as e:
70
+ print(f"[ERROR] {e}")
71
+ sys.exit(1)
72
+
73
+ if __name__ == "__main__":
74
+ main()
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script para extrair texto de arquivos .docx baixados do Google Drive (base64)
4
+ """
5
+
6
+ import base64
7
+ import zipfile
8
+ import io
9
+ import re
10
+ import sys
11
+ import os
12
+
13
+ def decode_base64_docx(base64_content):
14
+ """Decodifica conteúdo base64 para bytes"""
15
+ # Remove possíveis prefixos de data URI
16
+ if ',' in base64_content:
17
+ base64_content = base64_content.split(',')[1]
18
+
19
+ # Remove whitespace
20
+ base64_content = base64_content.strip().replace('\n', '').replace('\r', '')
21
+
22
+ return base64.b64decode(base64_content)
23
+
24
+ def extract_text_from_docx_bytes(docx_bytes):
25
+ """Extrai texto de um arquivo .docx (que é um ZIP)"""
26
+ try:
27
+ # Abre como ZIP
28
+ with zipfile.ZipFile(io.BytesIO(docx_bytes)) as zf:
29
+ # Lê word/document.xml
30
+ with zf.open('word/document.xml') as doc:
31
+ xml_content = doc.read().decode('utf-8')
32
+
33
+ # Remove tags XML, mantendo apenas texto
34
+ # Padrão para encontrar texto entre tags <w:t>
35
+ text_parts = re.findall(r'<w:t[^>]*>([^<]*)</w:t>', xml_content)
36
+
37
+ # Junta tudo
38
+ raw_text = ''.join(text_parts)
39
+
40
+ # Limpa e formata
41
+ # Substitui múltiplos espaços por um
42
+ text = re.sub(r' +', ' ', raw_text)
43
+
44
+ # Tenta preservar parágrafos baseado em padrões comuns
45
+ # Adiciona quebras antes de números que parecem timestamps ou marcadores
46
+ text = re.sub(r'(\d{1,2}:\d{2})', r'\n\1', text)
47
+
48
+ return text.strip()
49
+
50
+ except Exception as e:
51
+ return f"ERRO ao extrair texto: {str(e)}"
52
+
53
+ def process_file(base64_content, output_path):
54
+ """Processa um arquivo: decode + extract + save"""
55
+ try:
56
+ # Decodifica base64
57
+ docx_bytes = decode_base64_docx(base64_content)
58
+
59
+ # Extrai texto
60
+ text = extract_text_from_docx_bytes(docx_bytes)
61
+
62
+ # Cria diretório se necessário
63
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
64
+
65
+ # Salva como .txt
66
+ with open(output_path, 'w', encoding='utf-8') as f:
67
+ f.write(text)
68
+
69
+ return True, len(text)
70
+
71
+ except Exception as e:
72
+ return False, str(e)
73
+
74
+ if __name__ == "__main__":
75
+ # Teste básico
76
+ if len(sys.argv) > 1:
77
+ print(f"Script pronto. Use as funções: decode_base64_docx, extract_text_from_docx_bytes, process_file")
@@ -0,0 +1,246 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ ORGANIZED DOWNLOADER - Download com TAG
4
+ Mega Brain - Sistema de Inteligência de Negócios
5
+
6
+ Baixa arquivos do Google Drive JÁ com [TAG] no nome.
7
+ Organiza automaticamente em inbox/[SOURCE]/
8
+
9
+ USO: Este script é chamado pelo JARVIS via MCP, não diretamente.
10
+ """
11
+
12
+ import os
13
+ import json
14
+ import re
15
+ from datetime import datetime
16
+ from pathlib import Path
17
+
18
+ # Configurações
19
+ MEGA_BRAIN = "."
20
+ INBOX = f"{MEGA_BRAIN}/inbox"
21
+ MISSION_CONTROL = f"{MEGA_BRAIN}/.claude/mission-control"
22
+ PLANILHA_INDEX = f"{MISSION_CONTROL}/PLANILHA-INDEX.json"
23
+ DOWNLOAD_LOG = f"{MISSION_CONTROL}/DOWNLOAD-LOG.json"
24
+
25
+ # Mapeamento de prefixo → pasta no INBOX
26
+ PREFIX_TO_FOLDER = {
27
+ "JM": "JEREMY MINER",
28
+ "JH-ST": "JEREMY HAYNES/SALES TRAINING",
29
+ "JH-IC": "JEREMY HAYNES/INNER CIRCLE",
30
+ "JH-WK": "JEREMY HAYNES/WEEKLY CALLS",
31
+ "AOBA": "JEREMY HAYNES/AOBA",
32
+ "PCVP": "JEREMY HAYNES/PCVP",
33
+ "LYFC": "JEREMY HAYNES/LYFC",
34
+ "MMM": "JEREMY HAYNES/MMM",
35
+ "30DC": "JEREMY HAYNES/30DC",
36
+ "STA": "JEREMY HAYNES/STA",
37
+ "UHTC": "JEREMY HAYNES/UHTC",
38
+ "CG": "COLE GORDON",
39
+ "TSC": "COLE GORDON/TSC",
40
+ "EDC": "COLE GORDON/EAD",
41
+ "AH": "ALEX HORMOZI",
42
+ "CA": "JEREMY HAYNES PROGRAM"
43
+ }
44
+
45
+
46
+ def sanitize_filename(name):
47
+ """Remove caracteres inválidos do nome do arquivo."""
48
+ invalid_chars = '<>:"/\\|?*'
49
+ for char in invalid_chars:
50
+ name = name.replace(char, '_')
51
+ return name.strip()
52
+
53
+
54
+ def get_folder_for_tag(tag):
55
+ """Retorna pasta de destino baseada no prefixo da TAG."""
56
+ for prefix, folder in PREFIX_TO_FOLDER.items():
57
+ if tag.startswith(prefix + "-"):
58
+ return folder
59
+ return "OUTROS"
60
+
61
+
62
+ def format_tagged_filename(tag, original_name):
63
+ """Formata nome do arquivo com TAG."""
64
+ clean_name = sanitize_filename(original_name)
65
+ return f"[{tag}] {clean_name}"
66
+
67
+
68
+ def prepare_download_operations(tagged_entries):
69
+ """
70
+ Prepara operações de download para MCP.
71
+
72
+ Args:
73
+ tagged_entries: Lista de entradas já tagueadas na planilha
74
+ Formato: [{'tag': 'JM-0001', 'name': '...', 'file_id': '...', ...}]
75
+
76
+ Returns:
77
+ Lista de operações de download
78
+ """
79
+ operations = []
80
+
81
+ for entry in tagged_entries:
82
+ tag = entry.get('tag', '')
83
+ original_name = entry.get('name', entry.get('original_name', 'arquivo'))
84
+ file_id = entry.get('file_id', entry.get('drive_id', ''))
85
+
86
+ if not tag or not file_id:
87
+ continue
88
+
89
+ folder = get_folder_for_tag(tag)
90
+ dest_folder = os.path.join(INBOX, folder)
91
+ tagged_name = format_tagged_filename(tag, original_name)
92
+ dest_path = os.path.join(dest_folder, tagged_name)
93
+
94
+ operations.append({
95
+ 'type': 'download',
96
+ 'file_id': file_id,
97
+ 'original_name': original_name,
98
+ 'tagged_name': tagged_name,
99
+ 'tag': tag,
100
+ 'dest_folder': dest_folder,
101
+ 'dest_path': dest_path,
102
+ 'source': folder.split('/')[0] if '/' in folder else folder
103
+ })
104
+
105
+ return operations
106
+
107
+
108
+ def ensure_folders_exist(operations):
109
+ """Cria pastas de destino se não existirem."""
110
+ folders = set(op['dest_folder'] for op in operations)
111
+ created = []
112
+
113
+ for folder in folders:
114
+ if not os.path.exists(folder):
115
+ os.makedirs(folder, exist_ok=True)
116
+ created.append(folder)
117
+
118
+ return created
119
+
120
+
121
+ def generate_download_report(operations):
122
+ """Gera relatório de downloads."""
123
+ report = {
124
+ 'timestamp': datetime.now().isoformat(),
125
+ 'total_downloads': len(operations),
126
+ 'by_source': {},
127
+ 'operations': operations
128
+ }
129
+
130
+ for op in operations:
131
+ source = op['source']
132
+ if source not in report['by_source']:
133
+ report['by_source'][source] = 0
134
+ report['by_source'][source] += 1
135
+
136
+ return report
137
+
138
+
139
+ def log_downloads(operations, success_ids):
140
+ """Registra downloads realizados."""
141
+ log_path = DOWNLOAD_LOG
142
+
143
+ if os.path.exists(log_path):
144
+ with open(log_path, 'r', encoding='utf-8') as f:
145
+ log = json.load(f)
146
+ else:
147
+ log = {'downloads': [], 'stats': {'total': 0, 'by_date': {}}}
148
+
149
+ today = datetime.now().strftime('%Y-%m-%d')
150
+
151
+ for op in operations:
152
+ if op['file_id'] in success_ids:
153
+ log['downloads'].append({
154
+ 'timestamp': datetime.now().isoformat(),
155
+ 'tag': op['tag'],
156
+ 'file_id': op['file_id'],
157
+ 'dest_path': op['dest_path']
158
+ })
159
+ log['stats']['total'] += 1
160
+
161
+ if today not in log['stats']['by_date']:
162
+ log['stats']['by_date'][today] = 0
163
+ log['stats']['by_date'][today] += 1
164
+
165
+ with open(log_path, 'w', encoding='utf-8') as f:
166
+ json.dump(log, f, indent=2, ensure_ascii=False)
167
+
168
+ return log
169
+
170
+
171
+ def update_planilha_index(operations, success_ids):
172
+ """Atualiza índice da planilha com status de download."""
173
+ if not os.path.exists(PLANILHA_INDEX):
174
+ return
175
+
176
+ with open(PLANILHA_INDEX, 'r', encoding='utf-8') as f:
177
+ index = json.load(f)
178
+
179
+ downloaded_tags = {op['tag'] for op in operations if op['file_id'] in success_ids}
180
+
181
+ for entry in index.get('entries', []):
182
+ if entry.get('tag') in downloaded_tags:
183
+ entry['downloaded'] = True
184
+ entry['download_date'] = datetime.now().isoformat()
185
+
186
+ with open(PLANILHA_INDEX, 'w', encoding='utf-8') as f:
187
+ json.dump(index, f, indent=2, ensure_ascii=False)
188
+
189
+
190
+ def main(tagged_entries=None, preview=True):
191
+ """
192
+ Função principal do downloader.
193
+
194
+ Args:
195
+ tagged_entries: Lista de entradas já tagueadas
196
+ preview: Se True, apenas mostra relatório
197
+
198
+ Returns:
199
+ Se preview=True: relatório
200
+ Se preview=False: lista de operações para MCP
201
+ """
202
+ print("=" * 60)
203
+ print("ORGANIZED DOWNLOADER - Download com TAG")
204
+ print("=" * 60)
205
+ print()
206
+
207
+ if tagged_entries is None:
208
+ print("[MODO STANDALONE]")
209
+ print("Passe tagged_entries para executar.")
210
+ return None
211
+
212
+ operations = prepare_download_operations(tagged_entries)
213
+
214
+ if not operations:
215
+ print("✅ Nenhum download pendente!")
216
+ return []
217
+
218
+ report = generate_download_report(operations)
219
+
220
+ print(f"Downloads preparados: {report['total_downloads']}")
221
+ print()
222
+ print("Por fonte:")
223
+ for source, count in sorted(report['by_source'].items()):
224
+ print(f" {source}: {count}")
225
+ print()
226
+
227
+ if preview:
228
+ print("Arquivos:")
229
+ for i, op in enumerate(operations[:5], 1):
230
+ print(f" {i}. [{op['tag']}] → {op['source']}")
231
+ if len(operations) > 5:
232
+ print(f" ... e mais {len(operations) - 5}")
233
+ print()
234
+ print("MODO PREVIEW - Nenhum download realizado.")
235
+ return report
236
+
237
+ # Criar pastas
238
+ created_folders = ensure_folders_exist(operations)
239
+ if created_folders:
240
+ print(f"Pastas criadas: {len(created_folders)}")
241
+
242
+ return operations
243
+
244
+
245
+ if __name__ == '__main__':
246
+ main()