mega-brain-ai 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mega-brain-ai might be problematic. Click here for more details.
- package/.claude/CLAUDE.md +155 -0
- package/.claude/commands/agents.md +161 -0
- package/.claude/commands/ask.md +117 -0
- package/.claude/commands/benchmark.md +224 -0
- package/.claude/commands/chat.md +343 -0
- package/.claude/commands/compare.md +116 -0
- package/.claude/commands/conclave.md +194 -0
- package/.claude/commands/config.md +133 -0
- package/.claude/commands/council.md +194 -0
- package/.claude/commands/create-agent.md +452 -0
- package/.claude/commands/debate.md +157 -0
- package/.claude/commands/documentation/create-architecture-documentation.md +175 -0
- package/.claude/commands/dossiers.md +180 -0
- package/.claude/commands/evolve.md +223 -0
- package/.claude/commands/extract-dna.md +170 -0
- package/.claude/commands/extract-knowledge.md +507 -0
- package/.claude/commands/inbox.md +296 -0
- package/.claude/commands/ingest-empresa.md +191 -0
- package/.claude/commands/ingest.md +182 -0
- package/.claude/commands/jarvis-briefing.md +67 -0
- package/.claude/commands/jarvis-control.md +169 -0
- package/.claude/commands/jarvis-full.md +181 -0
- package/.claude/commands/jarvis.md +212 -0
- package/.claude/commands/ler-drive.md +212 -0
- package/.claude/commands/log.md +158 -0
- package/.claude/commands/loop.md +133 -0
- package/.claude/commands/loops.md +73 -0
- package/.claude/commands/mission-autopilot.md +538 -0
- package/.claude/commands/mission.md +353 -0
- package/.claude/commands/process-inbox.md +148 -0
- package/.claude/commands/process-jarvis.md +3036 -0
- package/.claude/commands/process-video.md +131 -0
- package/.claude/commands/rag-search.md +78 -0
- package/.claude/commands/resume.md +33 -0
- package/.claude/commands/save.md +38 -0
- package/.claude/commands/scan-inbox.md +125 -0
- package/.claude/commands/setup.md +99 -0
- package/.claude/commands/system-digest.md +243 -0
- package/.claude/commands/verify.md +182 -0
- package/.claude/commands/view-dna.md +169 -0
- package/.claude/hooks/agent_doctor.py +433 -0
- package/.claude/hooks/agent_memory_persister.py +203 -0
- package/.claude/hooks/auto_formatter.py +158 -0
- package/.claude/hooks/checkpoint_writer.py +244 -0
- package/.claude/hooks/claude_md_guard.py +146 -0
- package/.claude/hooks/creation_validator.py +357 -0
- package/.claude/hooks/enforce_dual_location.py +501 -0
- package/.claude/hooks/enforce_plan_mode.py +220 -0
- package/.claude/hooks/inbox_age_alert.py +367 -0
- package/.claude/hooks/jarvis_briefing.py +506 -0
- package/.claude/hooks/ledger_updater.py +301 -0
- package/.claude/hooks/memory_hints_injector.py +251 -0
- package/.claude/hooks/memory_updater.py +202 -0
- package/.claude/hooks/multi_agent_hook.py +464 -0
- package/.claude/hooks/notification_system.py +120 -0
- package/.claude/hooks/pattern_analyzer.py +526 -0
- package/.claude/hooks/pending_tracker.py +188 -0
- package/.claude/hooks/post_batch_cascading.py +1740 -0
- package/.claude/hooks/post_output_validator.py +358 -0
- package/.claude/hooks/post_tool_use.py +120 -0
- package/.claude/hooks/post_write_validator.py +200 -0
- package/.claude/hooks/quality_watchdog.py +394 -0
- package/.claude/hooks/ralph_wiggum.py +277 -0
- package/.claude/hooks/session-source-sync.py +218 -0
- package/.claude/hooks/session_autosave_v2.py +1135 -0
- package/.claude/hooks/session_end.py +203 -0
- package/.claude/hooks/session_start.py +939 -0
- package/.claude/hooks/skill_indexer.py +48 -0
- package/.claude/hooks/skill_router.py +358 -0
- package/.claude/hooks/stop_hook_completeness.py +178 -0
- package/.claude/hooks/subagent_tracker.py +163 -0
- package/.claude/hooks/token_checkpoint.py +584 -0
- package/.claude/hooks/user_prompt_submit.py +125 -0
- package/.claude/rules/ANTHROPIC-STANDARDS.md +384 -0
- package/.claude/rules/CLAUDE-LITE.md +201 -0
- package/.claude/rules/RULE-GROUP-1.md +320 -0
- package/.claude/rules/RULE-GROUP-2.md +307 -0
- package/.claude/rules/RULE-GROUP-3.md +248 -0
- package/.claude/rules/RULE-GROUP-4.md +427 -0
- package/.claude/rules/RULE-GROUP-5.md +388 -0
- package/.claude/rules/RULE-GROUP-6.md +387 -0
- package/.claude/rules/logging.md +53 -0
- package/.claude/rules/mcp-governance.md +128 -0
- package/.claude/rules/pipeline.md +60 -0
- package/.claude/rules/state-management.md +93 -0
- package/.claude/scripts/apply-tags.py +77 -0
- package/.claude/scripts/batch-extract-transcriptions.py +132 -0
- package/.claude/scripts/build-complete-index.py +250 -0
- package/.claude/scripts/build-planilha-index.py +170 -0
- package/.claude/scripts/complete-tag-matching.py +250 -0
- package/.claude/scripts/deduplicate-inbox.py +139 -0
- package/.claude/scripts/docx-xml-extractor.py +141 -0
- package/.claude/scripts/extract-docx-text.py +58 -0
- package/.claude/scripts/extract-single-transcription.py +74 -0
- package/.claude/scripts/extract_docx_from_gdrive.py +77 -0
- package/.claude/scripts/organized-downloader.py +246 -0
- package/.claude/scripts/planilha-tagger.py +187 -0
- package/.claude/scripts/revert-tags.py +70 -0
- package/.claude/scripts/source-sync.py +265 -0
- package/.claude/scripts/tag-inbox-files.py +276 -0
- package/.claude/scripts/tag-inbox-v2.py +253 -0
- package/.claude/scripts/test-extraction.py +35 -0
- package/.claude/scripts/test-full-extraction.py +74 -0
- package/.claude/skills/00-SKILL-CREATOR/SKILL.md +186 -0
- package/.claude/skills/01-SKILL-DOCS-MEGABRAIN/SKILL.md +251 -0
- package/.claude/skills/02-SKILL-PYTHON-MEGABRAIN/SKILL.md +323 -0
- package/.claude/skills/03-SKILL-AGENT-CREATION/SKILL.md +374 -0
- package/.claude/skills/04-SKILL-KNOWLEDGE-EXTRACTION/SKILL.md +318 -0
- package/.claude/skills/05-SKILL-PIPELINE-JARVIS/SKILL.md +430 -0
- package/.claude/skills/06-SKILL-BRAINSTORMING/SKILL.md +72 -0
- package/.claude/skills/07-SKILL-DISPATCHING-PARALLEL-AGENTS/SKILL.md +193 -0
- package/.claude/skills/08-SKILL-EXECUTING-PLANS/SKILL.md +114 -0
- package/.claude/skills/09-SKILL-WRITING-PLANS/SKILL.md +184 -0
- package/.claude/skills/10-SKILL-VERIFICATION-BEFORE-COMPLETION/SKILL.md +130 -0
- package/.claude/skills/11-SKILL-USING-SUPERPOWERS/SKILL.md +105 -0
- package/.claude/skills/DETECTION-PROTOCOL.md +217 -0
- package/.claude/skills/README.md +240 -0
- package/.claude/skills/SKILL-REGISTRY.md +284 -0
- package/.claude/skills/SKILL-SUGGESTIONS.md +114 -0
- package/.claude/skills/_TEMPLATES/SKILL-WRITER-GUIDE.md +385 -0
- package/.claude/skills/chronicler/SKILL.md +146 -0
- package/.claude/skills/chronicler/chronicler_core.py +468 -0
- package/.claude/skills/code-review/SKILL.md +160 -0
- package/.claude/skills/council/SKILL.md +210 -0
- package/.claude/skills/executor/SKILL.md +161 -0
- package/.claude/skills/fase-2-5-tagging/SKILL.md +182 -0
- package/.claude/skills/feature-dev/SKILL.md +154 -0
- package/.claude/skills/finance-agent/SKILL.md +137 -0
- package/.claude/skills/frontend-design/SKILL.md +165 -0
- package/.claude/skills/gdrive-transcription-downloader/SKILL.md +249 -0
- package/.claude/skills/gemini-fallback/SKILL.md +67 -0
- package/.claude/skills/gemini-fallback/gemini_fetch.py +0 -0
- package/.claude/skills/gha/SKILL.md +96 -0
- package/.claude/skills/gha/gha_diagnostic.py +227 -0
- package/.claude/skills/github-workflow/SKILL.md +190 -0
- package/.claude/skills/hookify/SKILL.md +134 -0
- package/.claude/skills/hybrid-source-reading/SKILL.md +265 -0
- package/.claude/skills/jarvis/SKILL.md +546 -0
- package/.claude/skills/jarvis-briefing/SKILL.md +340 -0
- package/.claude/skills/ler-planilha/SKILL.md +281 -0
- package/.claude/skills/plugin-dev/SKILL.md +176 -0
- package/.claude/skills/pr-review-toolkit/SKILL.md +178 -0
- package/.claude/skills/resume/SKILL.md +61 -0
- package/.claude/skills/save/SKILL.md +87 -0
- package/.claude/skills/skill-writer/SKILL.md +153 -0
- package/.claude/skills/skill-writer/examples.md +191 -0
- package/.claude/skills/skill-writer/troubleshooting.md +205 -0
- package/.claude/skills/smart-download-tagger/SKILL.md +148 -0
- package/.claude/skills/source-sync/SKILL.md +240 -0
- package/.claude/skills/sync-docs/SKILL.md +193 -0
- package/.claude/skills/sync-docs/config.json +37 -0
- package/.claude/skills/sync-docs/gdrive_sync.py +358 -0
- package/.claude/skills/sync-docs/reauth.py +71 -0
- package/.claude/skills/talent-agent/SKILL.md +183 -0
- package/.claude/skills/verify/SKILL.md +154 -0
- package/.claude/skills/verify/verify_runner.py +0 -0
- package/.claude/skills/verify-6-levels/SKILL.md +234 -0
- package/.claude/templates/BATCH-LOG-TEMPLATE.md +221 -0
- package/.claudeignore +9 -0
- package/.gitattributes +4 -0
- package/.github/layer1-allowlist.txt +80 -0
- package/.github/layer2-manifest.txt +40 -0
- package/.gitignore +219 -0
- package/README.md +1210 -0
- package/agents/_templates/INDEX.md +741 -0
- package/agents/_templates/TEMPLATE-AGENT-MD-ULTRA-ROBUSTO-V3.md +2399 -0
- package/agents/boardroom/CHECKLIST-MASTER.md +281 -0
- package/agents/boardroom/INTEGRATION-GUIDE.md +406 -0
- package/agents/boardroom/README.md +238 -0
- package/agents/boardroom/config/BOARDROOM-CONFIG.md +186 -0
- package/agents/boardroom/config/TTS-INTEGRATION.md +258 -0
- package/agents/boardroom/config/VOICE-PROFILES.md +624 -0
- package/agents/boardroom/config/voice_mapping.json +128 -0
- package/agents/boardroom/scripts/audio_generator.py +375 -0
- package/agents/boardroom/scripts/audio_generator_edge.py +353 -0
- package/agents/boardroom/scripts/jarvis_boardroom_hook.py +415 -0
- package/agents/boardroom/scripts/notebooklm_generator.py +578 -0
- package/agents/boardroom/templates/EPISODE-TEMPLATE.md +367 -0
- package/agents/boardroom/templates/scene-templates/SCENE-AGENT-DEBATE.md +252 -0
- package/agents/boardroom/templates/scene-templates/SCENE-COUNCIL.md +270 -0
- package/agents/boardroom/templates/scene-templates/SCENE-DNA-CONSULTATION.md +126 -0
- package/agents/boardroom/templates/scene-templates/SCENE-QUESTION.md +174 -0
- package/agents/boardroom/workflows/WORKFLOW-AUDIO-GENERATION.md +421 -0
- package/agents/constitution/BASE-CONSTITUTION.md +254 -0
- package/agents/council/CRITIC.md +197 -0
- package/agents/council/DEVILS-ADVOCATE.md +274 -0
- package/agents/council/SYNTHESIZER.md +293 -0
- package/agents/council/advogado-do-diabo/AGENT.md +489 -0
- package/agents/council/advogado-do-diabo/SOUL.md +100 -0
- package/agents/council/critico-metodologico/AGENT.md +670 -0
- package/agents/council/critico-metodologico/SOUL.md +107 -0
- package/agents/council/sintetizador/AGENT.md +558 -0
- package/agents/council/sintetizador/SOUL.md +94 -0
- package/agents/persons/_example/AGENT-EXAMPLE.md +42 -0
- package/agents/persons/_example/DNA-EXAMPLE.yaml +61 -0
- package/agents/protocols/AGENT-COGNITION-PROTOCOL.md +779 -0
- package/agents/protocols/AGENT-INTEGRITY-PROTOCOL.md +692 -0
- package/agents/protocols/BATCH-VISUAL-PROTOCOL.md +841 -0
- package/agents/protocols/DNA-CONFIG-TEMPLATE.yaml +181 -0
- package/agents/protocols/DNA-EXTRACTION-PROTOCOL.md +370 -0
- package/agents/protocols/EPISTEMIC-PROTOCOL.md +333 -0
- package/agents/protocols/LOG-STRUCTURE-PROTOCOL.md +65 -0
- package/agents/protocols/MEMORY-PROTOCOL.md +567 -0
- package/agents/protocols/NARRATIVE-SYNTHESIS-PROTOCOL.md +278 -0
- package/agents/protocols/PHASE-4-VERIFICATION-CHECKPOINT.md +146 -0
- package/agents/protocols/SOUL-TEMPLATE.md +416 -0
- package/agents/protocols/TEMPLATE-EVOLUTION-PROTOCOL.md +544 -0
- package/agents/protocols/VISUAL-DIFF-PROTOCOL.md +159 -0
- package/agents/sua-empresa/README.md +44 -0
- package/agents/sua-empresa/_example/jds/EXAMPLE-JD.md +42 -0
- package/agents/sua-empresa/_example/org/EXAMPLE-ORG.md +32 -0
- package/agents/sua-empresa/_example/roles/EXAMPLE-ROLE.md +38 -0
- package/bin/cli.js +2 -0
- package/bin/lib/ascii-art.js +234 -0
- package/bin/lib/installer.js +402 -0
- package/bin/lib/setup-wizard.js +95 -0
- package/bin/lib/validate-email.js +109 -0
- package/bin/mega-brain.js +97 -0
- package/bin/push.js +342 -0
- package/bin/templates/env.example +38 -0
- package/inbox/.gitkeep +0 -0
- package/integrations/README.md +46 -0
- package/integrations/mcps/MCP-REGISTRY.md +56 -0
- package/integrations/mcps/excalidraw/CONFIG.md +56 -0
- package/integrations/mcps/gdrive/CONFIG.md +38 -0
- package/knowledge/dna/.gitkeep +0 -0
- package/knowledge/dossiers/persons/.gitkeep +0 -0
- package/knowledge/dossiers/persons/DOSSIER-EXAMPLE.md +49 -0
- package/knowledge/dossiers/system/.gitkeep +0 -0
- package/knowledge/dossiers/themes/.gitkeep +0 -0
- package/knowledge/playbooks/.gitkeep +0 -0
- package/knowledge/playbooks/PLAYBOOK-EXAMPLE.md +50 -0
- package/knowledge/sources/.gitkeep +0 -0
- package/logs/.gitkeep +0 -0
- package/package.json +128 -0
- package/processing/canonical/.gitkeep +0 -0
- package/processing/chunks/.gitkeep +0 -0
- package/processing/insights/.gitkeep +0 -0
- package/processing/narratives/.gitkeep +0 -0
- package/reference/CONSELHO.md +337 -0
- package/reference/CONTEXT7_README.md +28 -0
- package/reference/JARVIS-LOGGING-PROTOCOL.md +380 -0
- package/reference/QUICK-START.md +197 -0
- package/reference/README-RALPH-CASCATEAMENTO.md +207 -0
- package/reference/TEMPLATE-MASTER.md +727 -0
- package/reference/prds/prd-jarvis-mega-brain-v3.md +1305 -0
- package/reference/templates/phase5/IMPLEMENTATION-GUIDE.md +355 -0
- package/reference/templates/phase5/MOGA-BRAIN-PHASE5-TEMPLATES.md +1284 -0
- package/reference/templates/phase5/README.md +165 -0
- package/reference/workflow-claude-code-boris-cherny-continuous-claude.md +2232 -0
- package/system/database/001_moneyclub_buyers.sql +160 -0
- package/system/database/002_premium_token.sql +97 -0
- package/system/database/apply-migration.mjs +129 -0
- package/system/docs/MEGA-BRAIN-DEMO-COMPLETA.md +1226 -0
- package/system/docs/MEGA-BRAIN-MANIFESTO-COMPLETO.md +1054 -0
- package/system/docs/MOGA-BRAIN-EXPLICACAO-COMPLETA.md +791 -0
- package/system/docs/STRATEGIC-INTEGRATION-GUIDE.md +725 -0
- package/system/docs/architecture/01-system-context.md +136 -0
- package/system/docs/architecture/02-components.md +225 -0
- package/system/docs/architecture/03-data-flow.md +235 -0
- package/system/docs/architecture/04-integrations.md +283 -0
- package/system/docs/architecture/README.md +71 -0
- package/system/docs/architecture/diagrams/component-diagram.mmd +50 -0
- package/system/docs/architecture/diagrams/data-flow.mmd +39 -0
- package/system/docs/architecture/diagrams/system-overview.mmd +68 -0
- package/system/protocols/AGENT-AUTHORITY.md +217 -0
- package/system/protocols/CONSTITUICAO-BASE.md +115 -0
- package/system/protocols/CONSTITUTION.md +231 -0
- package/system/protocols/GOVERNANCE-MAP.md +123 -0
- package/system/protocols/HOOK-SECURITY-THREAT-MODEL.md +152 -0
- package/system/protocols/ORQUESTRACAO-PROTOCOL.md +215 -0
- package/system/protocols/_archive/CHUNKING-PROTOCOL.md +207 -0
- package/system/protocols/_archive/ENTITY-RESOLUTION-PROTOCOL.md +269 -0
- package/system/protocols/_archive/INSIGHT-EXTRACTION-PROTOCOL.md +257 -0
- package/system/protocols/_archive/NARRATIVE-SYNTHESIS-PROTOCOL.md +290 -0
- package/system/protocols/agents/AGENT-INTERACTION.md +315 -0
- package/system/protocols/agents/CORTEX-PROTOCOL.md +520 -0
- package/system/protocols/agents/EPISTEMIC-PROTOCOL.md +465 -0
- package/system/protocols/agents/MEMORY-PROTOCOL.md +366 -0
- package/system/protocols/agents/WAR-ROOM.md +355 -0
- package/system/protocols/company/COMPANY-DOCUMENT-PROTOCOL.md +793 -0
- package/system/protocols/company/COMPANY-ENRICHMENT-PROTOCOL.md +679 -0
- package/system/protocols/conclave/CONCLAVE-LOG-TEMPLATE-v2.md +309 -0
- package/system/protocols/conclave/CONCLAVE-PROTOCOL.md +518 -0
- package/system/protocols/conclave/DEBATE-DYNAMICS-CONFIG.yaml +322 -0
- package/system/protocols/conclave/DEBATE-DYNAMICS-PROTOCOL.md +613 -0
- package/system/protocols/conclave/DEBATE-PROTOCOL.md +323 -0
- package/system/protocols/council/COUNCIL-LOG-TEMPLATE-v2.md +309 -0
- package/system/protocols/council/COUNCIL-PROTOCOL.md +518 -0
- package/system/protocols/council/DEBATE-DYNAMICS-CONFIG.yaml +322 -0
- package/system/protocols/council/DEBATE-DYNAMICS-PROTOCOL.md +613 -0
- package/system/protocols/council/DEBATE-PROTOCOL.md +323 -0
- package/system/protocols/dna/DNA-EXTRACTION-PROTOCOL.md +1214 -0
- package/system/protocols/dna/ENRICHMENT-PROTOCOL.md +408 -0
- package/system/protocols/dna/REASONING-MODEL-PROTOCOL.md +331 -0
- package/system/protocols/pipeline/DOSSIER-COMPILATION-PROTOCOL.md +790 -0
- package/system/protocols/pipeline/NARRATIVE-METABOLISM-PROTOCOL.md +292 -0
- package/system/protocols/pipeline/PIPELINE-JARVIS-v2.1.md +606 -0
- package/system/protocols/pipeline/PROMPT-1.1-CHUNKING.md +154 -0
- package/system/protocols/pipeline/PROMPT-1.2-ENTITY-RESOLUTION.md +186 -0
- package/system/protocols/pipeline/PROMPT-2.1-DNA-TAGS-INCREMENT.md +208 -0
- package/system/protocols/pipeline/PROMPT-2.1-INSIGHT-EXTRACTION.md +191 -0
- package/system/protocols/pipeline/PROMPT-3.1-NARRATIVE-SYNTHESIS.md +331 -0
- package/system/protocols/pipeline/SOURCES-COMPILATION-PROTOCOL.md +340 -0
- package/system/protocols/system/AUTO-LOG-PROTOCOL.md +369 -0
- package/system/protocols/system/CHECKPOINT-ENFORCEMENT.md +176 -0
- package/system/protocols/system/ENFORCEMENT.md +435 -0
- package/system/protocols/system/LOG-TEMPLATES.md +1068 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
DOCX XML Text Extractor for Mega Brain
|
|
4
|
+
---------------------------------------
|
|
5
|
+
Extrai texto de arquivos .docx usando parsing XML direto.
|
|
6
|
+
Não requer pandoc ou outras dependências externas.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import base64
|
|
10
|
+
import tempfile
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
import re
|
|
14
|
+
import zipfile
|
|
15
|
+
from xml.etree import ElementTree as ET
|
|
16
|
+
|
|
17
|
+
# OOXML namespace
|
|
18
|
+
WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
|
|
19
|
+
|
|
20
|
+
def extract_text_from_xml(xml_content: str) -> str:
|
|
21
|
+
"""
|
|
22
|
+
Extract text from Word document.xml content
|
|
23
|
+
"""
|
|
24
|
+
try:
|
|
25
|
+
root = ET.fromstring(xml_content)
|
|
26
|
+
except ET.ParseError:
|
|
27
|
+
# Fallback to regex if XML parsing fails
|
|
28
|
+
texts = re.findall(r'<w:t[^>]*>([^<]*)</w:t>', xml_content)
|
|
29
|
+
return ' '.join(texts)
|
|
30
|
+
|
|
31
|
+
texts = []
|
|
32
|
+
|
|
33
|
+
# Find all <w:t> elements (text runs)
|
|
34
|
+
for elem in root.iter():
|
|
35
|
+
if elem.tag == f'{WORD_NAMESPACE}t':
|
|
36
|
+
if elem.text:
|
|
37
|
+
texts.append(elem.text)
|
|
38
|
+
# Handle paragraph breaks
|
|
39
|
+
elif elem.tag == f'{WORD_NAMESPACE}p':
|
|
40
|
+
if texts and not texts[-1].endswith('\n'):
|
|
41
|
+
texts.append('\n')
|
|
42
|
+
# Handle line breaks
|
|
43
|
+
elif elem.tag == f'{WORD_NAMESPACE}br':
|
|
44
|
+
texts.append('\n')
|
|
45
|
+
|
|
46
|
+
# Join and clean up
|
|
47
|
+
text = ''.join(texts)
|
|
48
|
+
# Normalize whitespace but preserve paragraph breaks
|
|
49
|
+
text = re.sub(r'[ \t]+', ' ', text)
|
|
50
|
+
text = re.sub(r'\n\s*\n', '\n\n', text)
|
|
51
|
+
text = text.strip()
|
|
52
|
+
|
|
53
|
+
return text
|
|
54
|
+
|
|
55
|
+
def extract_from_docx_bytes(docx_bytes: bytes) -> str:
|
|
56
|
+
"""
|
|
57
|
+
Extract text from docx file bytes
|
|
58
|
+
"""
|
|
59
|
+
with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as tmp:
|
|
60
|
+
tmp.write(docx_bytes)
|
|
61
|
+
tmp_path = tmp.name
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
if not zipfile.is_zipfile(tmp_path):
|
|
65
|
+
return "[ERROR] Invalid docx file (not a valid ZIP)"
|
|
66
|
+
|
|
67
|
+
with zipfile.ZipFile(tmp_path, 'r') as z:
|
|
68
|
+
if 'word/document.xml' not in z.namelist():
|
|
69
|
+
return "[ERROR] Invalid docx file (no document.xml)"
|
|
70
|
+
|
|
71
|
+
xml_content = z.read('word/document.xml').decode('utf-8')
|
|
72
|
+
return extract_text_from_xml(xml_content)
|
|
73
|
+
|
|
74
|
+
finally:
|
|
75
|
+
try:
|
|
76
|
+
os.unlink(tmp_path)
|
|
77
|
+
except:
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
def process_base64_file(input_file: str) -> str:
|
|
81
|
+
"""
|
|
82
|
+
Process a file containing base64-encoded docx content
|
|
83
|
+
"""
|
|
84
|
+
with open(input_file, 'r') as f:
|
|
85
|
+
base64_content = f.read().strip()
|
|
86
|
+
|
|
87
|
+
# Clean base64 content
|
|
88
|
+
base64_clean = base64_content.replace('\n', '').replace('\r', '').replace(' ', '')
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
docx_bytes = base64.b64decode(base64_clean)
|
|
92
|
+
except Exception as e:
|
|
93
|
+
return f"[ERROR] Base64 decode failed: {e}"
|
|
94
|
+
|
|
95
|
+
return extract_from_docx_bytes(docx_bytes)
|
|
96
|
+
|
|
97
|
+
def save_transcription(text: str, tag: str, name: str, output_dir: str) -> str:
|
|
98
|
+
"""
|
|
99
|
+
Save transcription with proper [TAG] naming
|
|
100
|
+
"""
|
|
101
|
+
# Clean the name
|
|
102
|
+
clean_name = name
|
|
103
|
+
for ext in ['.docx', '.mp4', '.txt']:
|
|
104
|
+
clean_name = clean_name.replace(ext, '')
|
|
105
|
+
clean_name = clean_name.strip(' -.')
|
|
106
|
+
|
|
107
|
+
filename = f"[{tag}] {clean_name}.txt"
|
|
108
|
+
filepath = os.path.join(output_dir, filename)
|
|
109
|
+
|
|
110
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
111
|
+
|
|
112
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
113
|
+
f.write(text)
|
|
114
|
+
|
|
115
|
+
return filepath
|
|
116
|
+
|
|
117
|
+
if __name__ == "__main__":
|
|
118
|
+
import argparse
|
|
119
|
+
|
|
120
|
+
parser = argparse.ArgumentParser(description='Extract text from base64-encoded docx')
|
|
121
|
+
parser.add_argument('base64_file', help='File containing base64 content')
|
|
122
|
+
parser.add_argument('--tag', help='TAG for output file (e.g., JM-0003)')
|
|
123
|
+
parser.add_argument('--name', help='Original filename for output')
|
|
124
|
+
parser.add_argument('--output-dir', help='Output directory')
|
|
125
|
+
|
|
126
|
+
args = parser.parse_args()
|
|
127
|
+
|
|
128
|
+
text = process_base64_file(args.base64_file)
|
|
129
|
+
|
|
130
|
+
if text.startswith('[ERROR]'):
|
|
131
|
+
print(text, file=sys.stderr)
|
|
132
|
+
sys.exit(1)
|
|
133
|
+
|
|
134
|
+
if args.tag and args.name and args.output_dir:
|
|
135
|
+
filepath = save_transcription(text, args.tag, args.name, args.output_dir)
|
|
136
|
+
print(f"Saved: {filepath}")
|
|
137
|
+
print(f"Characters: {len(text)}")
|
|
138
|
+
print(f"Words: {len(text.split())}")
|
|
139
|
+
else:
|
|
140
|
+
# Just print the text
|
|
141
|
+
print(text)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Extract text from base64-encoded .docx files from Google Drive
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import base64
|
|
7
|
+
import subprocess
|
|
8
|
+
import tempfile
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
def extract_text_from_base64_docx(base64_content: str) -> str:
|
|
13
|
+
"""
|
|
14
|
+
Decode base64 docx content and extract text using pandoc
|
|
15
|
+
"""
|
|
16
|
+
# Decode base64
|
|
17
|
+
docx_bytes = base64.b64decode(base64_content)
|
|
18
|
+
|
|
19
|
+
# Create temp file
|
|
20
|
+
with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as tmp:
|
|
21
|
+
tmp.write(docx_bytes)
|
|
22
|
+
tmp_path = tmp.name
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
# Use pandoc to extract text
|
|
26
|
+
result = subprocess.run(
|
|
27
|
+
['pandoc', tmp_path, '-t', 'plain', '--wrap=none'],
|
|
28
|
+
capture_output=True,
|
|
29
|
+
text=True,
|
|
30
|
+
check=True
|
|
31
|
+
)
|
|
32
|
+
return result.stdout
|
|
33
|
+
finally:
|
|
34
|
+
# Clean up
|
|
35
|
+
os.unlink(tmp_path)
|
|
36
|
+
|
|
37
|
+
def save_transcription(text: str, tag: str, name: str, output_dir: str) -> str:
|
|
38
|
+
"""
|
|
39
|
+
Save extracted text to file with proper naming
|
|
40
|
+
"""
|
|
41
|
+
# Clean filename
|
|
42
|
+
clean_name = name.replace('.docx', '').replace('.mp4', '').strip()
|
|
43
|
+
filename = f"[{tag}] {clean_name}.txt"
|
|
44
|
+
filepath = os.path.join(output_dir, filename)
|
|
45
|
+
|
|
46
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
47
|
+
f.write(text)
|
|
48
|
+
|
|
49
|
+
return filepath
|
|
50
|
+
|
|
51
|
+
if __name__ == "__main__":
|
|
52
|
+
if len(sys.argv) < 2:
|
|
53
|
+
print("Usage: python extract-docx-text.py <base64_content>")
|
|
54
|
+
sys.exit(1)
|
|
55
|
+
|
|
56
|
+
base64_content = sys.argv[1]
|
|
57
|
+
text = extract_text_from_base64_docx(base64_content)
|
|
58
|
+
print(text)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Extract a single transcription directly from Google Drive
|
|
4
|
+
Uses the MCP gdrive API output format
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import base64
|
|
8
|
+
import subprocess
|
|
9
|
+
import tempfile
|
|
10
|
+
import os
|
|
11
|
+
import sys
|
|
12
|
+
import re
|
|
13
|
+
import zipfile
|
|
14
|
+
|
|
15
|
+
def extract_text_from_docx_bytes(docx_bytes: bytes) -> str:
|
|
16
|
+
"""
|
|
17
|
+
Extract text from docx bytes using pandoc or fallback to XML parsing
|
|
18
|
+
"""
|
|
19
|
+
# Create temp file
|
|
20
|
+
with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as tmp:
|
|
21
|
+
tmp.write(docx_bytes)
|
|
22
|
+
tmp_path = tmp.name
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
# First try pandoc
|
|
26
|
+
result = subprocess.run(
|
|
27
|
+
['pandoc', tmp_path, '-t', 'plain', '--wrap=none'],
|
|
28
|
+
capture_output=True,
|
|
29
|
+
text=True,
|
|
30
|
+
timeout=30
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
if result.returncode == 0:
|
|
34
|
+
return result.stdout
|
|
35
|
+
|
|
36
|
+
# Fallback: extract from XML directly
|
|
37
|
+
with zipfile.ZipFile(tmp_path, 'r') as z:
|
|
38
|
+
if 'word/document.xml' in z.namelist():
|
|
39
|
+
xml_content = z.read('word/document.xml').decode('utf-8')
|
|
40
|
+
# Extract text from <w:t> tags
|
|
41
|
+
texts = re.findall(r'<w:t[^>]*>([^<]*)</w:t>', xml_content)
|
|
42
|
+
return ' '.join(texts)
|
|
43
|
+
|
|
44
|
+
return "[ERROR] Could not extract text"
|
|
45
|
+
|
|
46
|
+
except Exception as e:
|
|
47
|
+
return f"[ERROR] {e}"
|
|
48
|
+
finally:
|
|
49
|
+
try:
|
|
50
|
+
os.unlink(tmp_path)
|
|
51
|
+
except:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
def main():
|
|
55
|
+
# Read base64 from stdin
|
|
56
|
+
base64_content = sys.stdin.read().strip()
|
|
57
|
+
|
|
58
|
+
if not base64_content:
|
|
59
|
+
print("[ERROR] No base64 content provided via stdin")
|
|
60
|
+
sys.exit(1)
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
docx_bytes = base64.b64decode(base64_content)
|
|
64
|
+
print(f"[INFO] Decoded {len(docx_bytes)} bytes", file=sys.stderr)
|
|
65
|
+
|
|
66
|
+
text = extract_text_from_docx_bytes(docx_bytes)
|
|
67
|
+
print(text)
|
|
68
|
+
|
|
69
|
+
except Exception as e:
|
|
70
|
+
print(f"[ERROR] {e}")
|
|
71
|
+
sys.exit(1)
|
|
72
|
+
|
|
73
|
+
if __name__ == "__main__":
|
|
74
|
+
main()
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Script para extrair texto de arquivos .docx baixados do Google Drive (base64)
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import base64
|
|
7
|
+
import zipfile
|
|
8
|
+
import io
|
|
9
|
+
import re
|
|
10
|
+
import sys
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
def decode_base64_docx(base64_content):
|
|
14
|
+
"""Decodifica conteúdo base64 para bytes"""
|
|
15
|
+
# Remove possíveis prefixos de data URI
|
|
16
|
+
if ',' in base64_content:
|
|
17
|
+
base64_content = base64_content.split(',')[1]
|
|
18
|
+
|
|
19
|
+
# Remove whitespace
|
|
20
|
+
base64_content = base64_content.strip().replace('\n', '').replace('\r', '')
|
|
21
|
+
|
|
22
|
+
return base64.b64decode(base64_content)
|
|
23
|
+
|
|
24
|
+
def extract_text_from_docx_bytes(docx_bytes):
|
|
25
|
+
"""Extrai texto de um arquivo .docx (que é um ZIP)"""
|
|
26
|
+
try:
|
|
27
|
+
# Abre como ZIP
|
|
28
|
+
with zipfile.ZipFile(io.BytesIO(docx_bytes)) as zf:
|
|
29
|
+
# Lê word/document.xml
|
|
30
|
+
with zf.open('word/document.xml') as doc:
|
|
31
|
+
xml_content = doc.read().decode('utf-8')
|
|
32
|
+
|
|
33
|
+
# Remove tags XML, mantendo apenas texto
|
|
34
|
+
# Padrão para encontrar texto entre tags <w:t>
|
|
35
|
+
text_parts = re.findall(r'<w:t[^>]*>([^<]*)</w:t>', xml_content)
|
|
36
|
+
|
|
37
|
+
# Junta tudo
|
|
38
|
+
raw_text = ''.join(text_parts)
|
|
39
|
+
|
|
40
|
+
# Limpa e formata
|
|
41
|
+
# Substitui múltiplos espaços por um
|
|
42
|
+
text = re.sub(r' +', ' ', raw_text)
|
|
43
|
+
|
|
44
|
+
# Tenta preservar parágrafos baseado em padrões comuns
|
|
45
|
+
# Adiciona quebras antes de números que parecem timestamps ou marcadores
|
|
46
|
+
text = re.sub(r'(\d{1,2}:\d{2})', r'\n\1', text)
|
|
47
|
+
|
|
48
|
+
return text.strip()
|
|
49
|
+
|
|
50
|
+
except Exception as e:
|
|
51
|
+
return f"ERRO ao extrair texto: {str(e)}"
|
|
52
|
+
|
|
53
|
+
def process_file(base64_content, output_path):
|
|
54
|
+
"""Processa um arquivo: decode + extract + save"""
|
|
55
|
+
try:
|
|
56
|
+
# Decodifica base64
|
|
57
|
+
docx_bytes = decode_base64_docx(base64_content)
|
|
58
|
+
|
|
59
|
+
# Extrai texto
|
|
60
|
+
text = extract_text_from_docx_bytes(docx_bytes)
|
|
61
|
+
|
|
62
|
+
# Cria diretório se necessário
|
|
63
|
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
64
|
+
|
|
65
|
+
# Salva como .txt
|
|
66
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
67
|
+
f.write(text)
|
|
68
|
+
|
|
69
|
+
return True, len(text)
|
|
70
|
+
|
|
71
|
+
except Exception as e:
|
|
72
|
+
return False, str(e)
|
|
73
|
+
|
|
74
|
+
if __name__ == "__main__":
|
|
75
|
+
# Teste básico
|
|
76
|
+
if len(sys.argv) > 1:
|
|
77
|
+
print(f"Script pronto. Use as funções: decode_base64_docx, extract_text_from_docx_bytes, process_file")
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
ORGANIZED DOWNLOADER - Download com TAG
|
|
4
|
+
Mega Brain - Sistema de Inteligência de Negócios
|
|
5
|
+
|
|
6
|
+
Baixa arquivos do Google Drive JÁ com [TAG] no nome.
|
|
7
|
+
Organiza automaticamente em inbox/[SOURCE]/
|
|
8
|
+
|
|
9
|
+
USO: Este script é chamado pelo JARVIS via MCP, não diretamente.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import json
|
|
14
|
+
import re
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
# Configurações
|
|
19
|
+
MEGA_BRAIN = "."
|
|
20
|
+
INBOX = f"{MEGA_BRAIN}/inbox"
|
|
21
|
+
MISSION_CONTROL = f"{MEGA_BRAIN}/.claude/mission-control"
|
|
22
|
+
PLANILHA_INDEX = f"{MISSION_CONTROL}/PLANILHA-INDEX.json"
|
|
23
|
+
DOWNLOAD_LOG = f"{MISSION_CONTROL}/DOWNLOAD-LOG.json"
|
|
24
|
+
|
|
25
|
+
# Mapeamento de prefixo → pasta no INBOX
|
|
26
|
+
PREFIX_TO_FOLDER = {
|
|
27
|
+
"JM": "JEREMY MINER",
|
|
28
|
+
"JH-ST": "JEREMY HAYNES/SALES TRAINING",
|
|
29
|
+
"JH-IC": "JEREMY HAYNES/INNER CIRCLE",
|
|
30
|
+
"JH-WK": "JEREMY HAYNES/WEEKLY CALLS",
|
|
31
|
+
"AOBA": "JEREMY HAYNES/AOBA",
|
|
32
|
+
"PCVP": "JEREMY HAYNES/PCVP",
|
|
33
|
+
"LYFC": "JEREMY HAYNES/LYFC",
|
|
34
|
+
"MMM": "JEREMY HAYNES/MMM",
|
|
35
|
+
"30DC": "JEREMY HAYNES/30DC",
|
|
36
|
+
"STA": "JEREMY HAYNES/STA",
|
|
37
|
+
"UHTC": "JEREMY HAYNES/UHTC",
|
|
38
|
+
"CG": "COLE GORDON",
|
|
39
|
+
"TSC": "COLE GORDON/TSC",
|
|
40
|
+
"EDC": "COLE GORDON/EAD",
|
|
41
|
+
"AH": "ALEX HORMOZI",
|
|
42
|
+
"CA": "JEREMY HAYNES PROGRAM"
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def sanitize_filename(name):
|
|
47
|
+
"""Remove caracteres inválidos do nome do arquivo."""
|
|
48
|
+
invalid_chars = '<>:"/\\|?*'
|
|
49
|
+
for char in invalid_chars:
|
|
50
|
+
name = name.replace(char, '_')
|
|
51
|
+
return name.strip()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_folder_for_tag(tag):
|
|
55
|
+
"""Retorna pasta de destino baseada no prefixo da TAG."""
|
|
56
|
+
for prefix, folder in PREFIX_TO_FOLDER.items():
|
|
57
|
+
if tag.startswith(prefix + "-"):
|
|
58
|
+
return folder
|
|
59
|
+
return "OUTROS"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def format_tagged_filename(tag, original_name):
|
|
63
|
+
"""Formata nome do arquivo com TAG."""
|
|
64
|
+
clean_name = sanitize_filename(original_name)
|
|
65
|
+
return f"[{tag}] {clean_name}"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def prepare_download_operations(tagged_entries):
|
|
69
|
+
"""
|
|
70
|
+
Prepara operações de download para MCP.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
tagged_entries: Lista de entradas já tagueadas na planilha
|
|
74
|
+
Formato: [{'tag': 'JM-0001', 'name': '...', 'file_id': '...', ...}]
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Lista de operações de download
|
|
78
|
+
"""
|
|
79
|
+
operations = []
|
|
80
|
+
|
|
81
|
+
for entry in tagged_entries:
|
|
82
|
+
tag = entry.get('tag', '')
|
|
83
|
+
original_name = entry.get('name', entry.get('original_name', 'arquivo'))
|
|
84
|
+
file_id = entry.get('file_id', entry.get('drive_id', ''))
|
|
85
|
+
|
|
86
|
+
if not tag or not file_id:
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
folder = get_folder_for_tag(tag)
|
|
90
|
+
dest_folder = os.path.join(INBOX, folder)
|
|
91
|
+
tagged_name = format_tagged_filename(tag, original_name)
|
|
92
|
+
dest_path = os.path.join(dest_folder, tagged_name)
|
|
93
|
+
|
|
94
|
+
operations.append({
|
|
95
|
+
'type': 'download',
|
|
96
|
+
'file_id': file_id,
|
|
97
|
+
'original_name': original_name,
|
|
98
|
+
'tagged_name': tagged_name,
|
|
99
|
+
'tag': tag,
|
|
100
|
+
'dest_folder': dest_folder,
|
|
101
|
+
'dest_path': dest_path,
|
|
102
|
+
'source': folder.split('/')[0] if '/' in folder else folder
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
return operations
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def ensure_folders_exist(operations):
|
|
109
|
+
"""Cria pastas de destino se não existirem."""
|
|
110
|
+
folders = set(op['dest_folder'] for op in operations)
|
|
111
|
+
created = []
|
|
112
|
+
|
|
113
|
+
for folder in folders:
|
|
114
|
+
if not os.path.exists(folder):
|
|
115
|
+
os.makedirs(folder, exist_ok=True)
|
|
116
|
+
created.append(folder)
|
|
117
|
+
|
|
118
|
+
return created
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def generate_download_report(operations):
|
|
122
|
+
"""Gera relatório de downloads."""
|
|
123
|
+
report = {
|
|
124
|
+
'timestamp': datetime.now().isoformat(),
|
|
125
|
+
'total_downloads': len(operations),
|
|
126
|
+
'by_source': {},
|
|
127
|
+
'operations': operations
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
for op in operations:
|
|
131
|
+
source = op['source']
|
|
132
|
+
if source not in report['by_source']:
|
|
133
|
+
report['by_source'][source] = 0
|
|
134
|
+
report['by_source'][source] += 1
|
|
135
|
+
|
|
136
|
+
return report
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def log_downloads(operations, success_ids):
|
|
140
|
+
"""Registra downloads realizados."""
|
|
141
|
+
log_path = DOWNLOAD_LOG
|
|
142
|
+
|
|
143
|
+
if os.path.exists(log_path):
|
|
144
|
+
with open(log_path, 'r', encoding='utf-8') as f:
|
|
145
|
+
log = json.load(f)
|
|
146
|
+
else:
|
|
147
|
+
log = {'downloads': [], 'stats': {'total': 0, 'by_date': {}}}
|
|
148
|
+
|
|
149
|
+
today = datetime.now().strftime('%Y-%m-%d')
|
|
150
|
+
|
|
151
|
+
for op in operations:
|
|
152
|
+
if op['file_id'] in success_ids:
|
|
153
|
+
log['downloads'].append({
|
|
154
|
+
'timestamp': datetime.now().isoformat(),
|
|
155
|
+
'tag': op['tag'],
|
|
156
|
+
'file_id': op['file_id'],
|
|
157
|
+
'dest_path': op['dest_path']
|
|
158
|
+
})
|
|
159
|
+
log['stats']['total'] += 1
|
|
160
|
+
|
|
161
|
+
if today not in log['stats']['by_date']:
|
|
162
|
+
log['stats']['by_date'][today] = 0
|
|
163
|
+
log['stats']['by_date'][today] += 1
|
|
164
|
+
|
|
165
|
+
with open(log_path, 'w', encoding='utf-8') as f:
|
|
166
|
+
json.dump(log, f, indent=2, ensure_ascii=False)
|
|
167
|
+
|
|
168
|
+
return log
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def update_planilha_index(operations, success_ids):
|
|
172
|
+
"""Atualiza índice da planilha com status de download."""
|
|
173
|
+
if not os.path.exists(PLANILHA_INDEX):
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
with open(PLANILHA_INDEX, 'r', encoding='utf-8') as f:
|
|
177
|
+
index = json.load(f)
|
|
178
|
+
|
|
179
|
+
downloaded_tags = {op['tag'] for op in operations if op['file_id'] in success_ids}
|
|
180
|
+
|
|
181
|
+
for entry in index.get('entries', []):
|
|
182
|
+
if entry.get('tag') in downloaded_tags:
|
|
183
|
+
entry['downloaded'] = True
|
|
184
|
+
entry['download_date'] = datetime.now().isoformat()
|
|
185
|
+
|
|
186
|
+
with open(PLANILHA_INDEX, 'w', encoding='utf-8') as f:
|
|
187
|
+
json.dump(index, f, indent=2, ensure_ascii=False)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def main(tagged_entries=None, preview=True):
|
|
191
|
+
"""
|
|
192
|
+
Função principal do downloader.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
tagged_entries: Lista de entradas já tagueadas
|
|
196
|
+
preview: Se True, apenas mostra relatório
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Se preview=True: relatório
|
|
200
|
+
Se preview=False: lista de operações para MCP
|
|
201
|
+
"""
|
|
202
|
+
print("=" * 60)
|
|
203
|
+
print("ORGANIZED DOWNLOADER - Download com TAG")
|
|
204
|
+
print("=" * 60)
|
|
205
|
+
print()
|
|
206
|
+
|
|
207
|
+
if tagged_entries is None:
|
|
208
|
+
print("[MODO STANDALONE]")
|
|
209
|
+
print("Passe tagged_entries para executar.")
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
operations = prepare_download_operations(tagged_entries)
|
|
213
|
+
|
|
214
|
+
if not operations:
|
|
215
|
+
print("✅ Nenhum download pendente!")
|
|
216
|
+
return []
|
|
217
|
+
|
|
218
|
+
report = generate_download_report(operations)
|
|
219
|
+
|
|
220
|
+
print(f"Downloads preparados: {report['total_downloads']}")
|
|
221
|
+
print()
|
|
222
|
+
print("Por fonte:")
|
|
223
|
+
for source, count in sorted(report['by_source'].items()):
|
|
224
|
+
print(f" {source}: {count}")
|
|
225
|
+
print()
|
|
226
|
+
|
|
227
|
+
if preview:
|
|
228
|
+
print("Arquivos:")
|
|
229
|
+
for i, op in enumerate(operations[:5], 1):
|
|
230
|
+
print(f" {i}. [{op['tag']}] → {op['source']}")
|
|
231
|
+
if len(operations) > 5:
|
|
232
|
+
print(f" ... e mais {len(operations) - 5}")
|
|
233
|
+
print()
|
|
234
|
+
print("MODO PREVIEW - Nenhum download realizado.")
|
|
235
|
+
return report
|
|
236
|
+
|
|
237
|
+
# Criar pastas
|
|
238
|
+
created_folders = ensure_folders_exist(operations)
|
|
239
|
+
if created_folders:
|
|
240
|
+
print(f"Pastas criadas: {len(created_folders)}")
|
|
241
|
+
|
|
242
|
+
return operations
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
if __name__ == '__main__':
|
|
246
|
+
main()
|