npm - agroplan-ai-cli - Versions diffs - 1.0.15 → 1.0.18 - Mend

agroplan-ai-cli 1.0.15 → 1.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/backend-template/.env.example +19 -0
package/backend-template/api.py +128 -65
package/backend-template/core/zarc_adapter.py +290 -0
package/backend-template/data/zarc/zarc_index_2025-2026.json +1612 -0
package/backend-template/providers/zarc_provider.py +294 -130
package/backend-template/scripts/build_zarc_index.py +256 -0
package/package.json +1 -1

package/backend-template/providers/zarc_provider.py CHANGED Viewed

@@ -16,6 +16,11 @@ ZARC_CACHE_DIR = os.path.join(os.path.dirname(__file__), '..', 'data', 'zarc')
 ZARC_CACHE_TTL = int(os.getenv("ZARC_CACHE_TTL", "86400"))  # 24 horas
 ZARC_SOURCE = os.getenv("ZARC_SOURCE", "official")  # official, fallback
 ZARC_SAFRA_DEFAULT = os.getenv("ZARC_SAFRA", "2025/2026")
+ZARC_FAST_INDEX_ENABLED = os.getenv("ZARC_FAST_INDEX_ENABLED", "true").lower() == "true"
+ZARC_ALLOW_FULL_SCAN = os.getenv("ZARC_ALLOW_FULL_SCAN", "false").lower() == "true"
+# Cache do índice em memória (pequeno, pode ficar em RAM)
+_zarc_index_cache = {}
 # URLs oficiais do Portal de Dados Abertos do Ministério da Agricultura
 ZARC_URLS = {
@@ -314,163 +319,275 @@ def is_cache_valid(cache_path: str) -> bool:
     file_age = datetime.now() - datetime.fromtimestamp(os.path.getmtime(cache_path))
     return file_age.total_seconds() < ZARC_CACHE_TTL
-def download_zarc_dataset(safra: str) -> Optional[str]:
+def get_zarc_status(safra: str = ZARC_SAFRA_DEFAULT) -> Dict[str, Any]:
     """
-    Baixa dataset ZARC oficial
+    Retorna status do ZARC sem carregar dados
+    MEMORY SAFE: Não carrega CSV, apenas verifica arquivos
     Returns:
-        Caminho do arquivo baixado ou None se falhar
+        Status do ZARC (configuração, cache, etc)
     """
+    cache_path = get_cache_path(safra)
+    # Verificar índice
+    safra_filename = safra.replace("/", "-")
+    index_path = os.path.join(ZARC_CACHE_DIR, f"zarc_index_{safra_filename}.json")
+    status = {
+        "status": "configured",
+        "safra": safra,
+        "source": ZARC_SOURCE,
+        "fast_index": ZARC_FAST_INDEX_ENABLED,
+        "full_scan": ZARC_ALLOW_FULL_SCAN,
+        "index_exists": os.path.exists(index_path),
+        "cache_exists": os.path.exists(cache_path),
+        "cache_valid": False,
+        "cache_size_mb": 0
+    }
+    if os.path.exists(cache_path):
+        try:
+            # Tamanho do arquivo em MB
+            size_bytes = os.path.getsize(cache_path)
+            status["cache_size_mb"] = round(size_bytes / (1024 * 1024), 2)
+            # Verificar se cache é válido
+            status["cache_valid"] = is_cache_valid(cache_path)
+        except Exception:
+            pass
+    return status
+def load_zarc_index(safra: str = ZARC_SAFRA_DEFAULT) -> Optional[Dict[str, Any]]:
+    """
+    Carrega índice ZARC compacto em memória
+    MEMORY SAFE: Índice é pequeno (~35KB), pode ficar em RAM
+    Returns:
+        Índice ZARC ou None se não existir
+    """
+    global _zarc_index_cache
+    # Verificar cache em memória
+    if safra in _zarc_index_cache:
+        return _zarc_index_cache[safra]
+    # Carregar do arquivo
+    safra_filename = safra.replace("/", "-")
+    index_path = os.path.join(ZARC_CACHE_DIR, f"zarc_index_{safra_filename}.json")
+    if not os.path.exists(index_path):
+        return None
     try:
-        url = ZARC_URLS.get(safra)
-        if not url:
-            print(f"URL não disponível para safra {safra}")
-            return None
+        with open(index_path, 'r', encoding='utf-8') as f:
+            index = json.load(f)
-        cache_path = get_cache_path(safra)
+        # Cachear em memória
+        _zarc_index_cache[safra] = index
-        print(f"Baixando ZARC oficial de {url}...")
-        # Criar request com User-Agent
-        req = urllib.request.Request(
-            url,
-            headers={
-                'User-Agent': 'AgroPlan-AI/1.0 (https://github.com/Kuuhaku-Allan/agroplan-ai)'
-            }
-        )
+        return index
+    except Exception as e:
+        print(f"Erro ao carregar índice ZARC: {e}")
+        return None
+def buscar_zarc_indexado(
+    cultura: str,
+    uf: Optional[str] = None,
+    municipio: Optional[str] = None,
+    solo: Optional[str] = None,
+    safra: str = ZARC_SAFRA_DEFAULT
+) -> Optional[Dict[str, Any]]:
+    """
+    Busca ZARC no índice compacto (rápido)
+    PERFORMANCE: Lookup O(1) em vez de O(n) no CSV
+    Returns:
+        Dados ZARC ou None se não encontrar no índice
+    """
+    if not ZARC_FAST_INDEX_ENABLED:
+        return None
+    index = load_zarc_index(safra)
+    if not index:
+        return None
+    # Normalizar parâmetros
+    cultura_norm = normalizar_cultura(cultura)
+    if not uf or not municipio:
+        return None
+    uf_norm = normalizar_uf(uf)
+    municipio_norm = normalizar_municipio(municipio)
+    # Tentar diferentes combinações de solo
+    solos_tentar = []
+    if solo:
+        solo_norm = normalizar_solo(solo)
+        # Tentar o solo especificado primeiro, depois outros como fallback
+        solos_tentar = [solo_norm, "medio", "arenoso", "argiloso", "misto"]
+        # Remover duplicatas mantendo ordem
+        seen = set()
+        solos_tentar = [s for s in solos_tentar if not (s in seen or seen.add(s))]
+    else:
+        # Se não especificou solo, tentar todos (preferir medio/argiloso)
+        solos_tentar = ["medio", "argiloso", "arenoso", "misto"]
+    # Buscar no índice
+    for solo_test in solos_tentar:
+        # Chave: UF|municipio|cultura|solo
+        chave = f"{uf_norm}|{municipio_norm}|{cultura_norm}|{solo_test}"
-        # Download
-        with urllib.request.urlopen(req, timeout=30) as response:
-            content = response.read().decode('utf-8')
+        if chave in index["records"]:
+            return index["records"][chave]
+    return None
+def iter_zarc_records(file_path: str):
+    """
+    Itera sobre registros ZARC em streaming
+    MEMORY SAFE: Usa yield para processar linha por linha
+    Args:
+        file_path: Caminho do arquivo CSV
+    Yields:
+        Dicionário com dados de cada linha
+    """
+    with open(file_path, 'r', encoding='utf-8-sig', newline='') as f:
+        # Detectar delimitador
+        primeira_linha = f.readline()
+        f.seek(0)
-        # Salvar
-        with open(cache_path, 'w', encoding='utf-8') as f:
-            f.write(content)
+        delimiter = ';' if ';' in primeira_linha else ','
-        print(f"ZARC oficial baixado e salvo em {cache_path}")
-        return cache_path
+        reader = csv.DictReader(f, delimiter=delimiter)
-    except Exception as e:
-        print(f"Erro ao baixar ZARC oficial: {e}")
-        return None
+        for row in reader:
+            yield row
-def get_zarc_dataset(safra: str = ZARC_SAFRA_DEFAULT) -> Dict[str, Any]:
+def ensure_zarc_file(safra: str = ZARC_SAFRA_DEFAULT) -> Optional[Dict[str, Any]]:
     """
-    Obtém dataset ZARC (cache ou download)
+    Garante que arquivo ZARC existe, baixando se necessário
+    MEMORY SAFE: Não carrega registros, apenas gerencia arquivo
     Returns:
-        Dicionário com:
-        - records: Lista de registros ZARC
-        - source: "zarc-oficial" | "zarc-cache" | "zarc-fallback"
-        - fallback: bool
-        - cache_path: str ou None
-        - error: str ou None
+        Metadata do arquivo ou None se não disponível
     """
     cache_path = get_cache_path(safra)
     # Verificar cache válido
     if is_cache_valid(cache_path):
-        try:
-            records = load_zarc_from_file(cache_path)
-            return {
-                "records": records,
-                "source": "zarc-cache",
-                "fallback": False,
-                "cache_path": cache_path,
-                "error": None
-            }
-        except Exception as e:
-            print(f"Erro ao carregar cache ZARC: {e}")
+        return {
+            "file_path": cache_path,
+            "source": "zarc-cache",
+            "fallback": False,
+            "error": None
+        }
     # Tentar download se source for official
     if ZARC_SOURCE == "official":
-        downloaded_path = download_zarc_dataset(safra)
-        if downloaded_path:
+        url = ZARC_URLS.get(safra)
+        if url:
             try:
-                records = load_zarc_from_file(downloaded_path)
+                # Criar request com User-Agent
+                req = urllib.request.Request(
+                    url,
+                    headers={
+                        'User-Agent': 'AgroPlan-AI/1.0 (https://github.com/Kuuhaku-Allan/agroplan-ai)'
+                    }
+                )
+                # Download
+                with urllib.request.urlopen(req, timeout=30) as response:
+                    content = response.read().decode('utf-8')
+                # Salvar
+                with open(cache_path, 'w', encoding='utf-8') as f:
+                    f.write(content)
                 return {
-                    "records": records,
+                    "file_path": cache_path,
                     "source": "zarc-oficial",
                     "fallback": False,
-                    "cache_path": downloaded_path,
                     "error": None
                 }
             except Exception as e:
-                print(f"Erro ao carregar ZARC baixado: {e}")
+                # Se download falhar, tentar usar cache antigo
+                if os.path.exists(cache_path):
+                    return {
+                        "file_path": cache_path,
+                        "source": "zarc-cache",
+                        "fallback": False,
+                        "error": f"Download falhou, usando cache antigo: {str(e)}"
+                    }
     # Usar cache antigo se existir (mesmo expirado)
     if os.path.exists(cache_path):
-        try:
-            records = load_zarc_from_file(cache_path)
-            return {
-                "records": records,
-                "source": "zarc-cache",
-                "fallback": False,
-                "cache_path": cache_path,
-                "error": "Cache expirado mas usado"
-            }
-        except Exception as e:
-            print(f"Erro ao carregar cache antigo: {e}")
+        return {
+            "file_path": cache_path,
+            "source": "zarc-cache",
+            "fallback": False,
+            "error": "Cache expirado mas usado"
+        }
-    # Fallback para dados simplificados
-    print("Usando fallback ZARC simplificado")
-    return {
-        "records": get_zarc_fallback(),
-        "source": "zarc-fallback",
-        "fallback": True,
-        "cache_path": None,
-        "error": "CSV oficial não disponível, usando dados simplificados"
-    }
+    # Nenhum arquivo disponível
+    return None
+# REMOVIDO: Funções obsoletas que carregavam CSV inteiro em memória
+# Essas funções foram removidas para evitar problemas de memória
+# Use: ensure_zarc_file() + iter_zarc_records() + buscar_zarc_indexado()
-def load_zarc_from_file(file_path: str) -> List[Dict[str, Any]]:
-    """Carrega dados ZARC de arquivo CSV"""
-    registros = []
+def download_zarc_dataset(*args, **kwargs):
+    """
+    REMOVIDO: Esta função carregava o CSV inteiro em memória (214 MB).
-    with open(file_path, 'r', encoding='utf-8-sig') as f:  # utf-8-sig remove BOM
-        # Detectar delimitador (CSV oficial usa ponto-e-vírgula)
-        primeira_linha = f.readline()
-        f.seek(0)
-        delimiter = ';' if ';' in primeira_linha else ','
-        reader = csv.DictReader(f, delimiter=delimiter)
-        # Log das colunas encontradas (primeira vez)
-        if reader.fieldnames:
-            print(f"Colunas ZARC encontradas ({len(reader.fieldnames)} colunas, delimiter='{delimiter}')")
-        for row in reader:
-            registros.append(row)
+    Use: ensure_zarc_file() para gerenciar o arquivo sem carregar dados
+    """
+    raise RuntimeError(
+        "download_zarc_dataset() foi removido por causar problemas de memória. "
+        "Use ensure_zarc_file() para gerenciar o arquivo ZARC."
+    )
+def get_zarc_dataset(*args, **kwargs):
+    """
+    REMOVIDO: Esta função carregava 1M+ registros em memória.
-    return registros
+    Use: buscar_zarc_indexado() para lookup rápido O(1)
+    Use: iter_zarc_records() para processar em streaming
+    """
+    raise RuntimeError(
+        "get_zarc_dataset() foi removido por carregar 1M+ registros em memória. "
+        "Use buscar_zarc_indexado() para lookup rápido ou iter_zarc_records() para streaming."
+    )
-def inspect_zarc_columns(safra: str = ZARC_SAFRA_DEFAULT) -> Optional[List[str]]:
+def load_zarc_from_file(*args, **kwargs):
     """
-    Inspeciona colunas do CSV ZARC oficial
+    REMOVIDO: Esta função carregava o CSV inteiro em uma lista.
-    Returns:
-        Lista de nomes de colunas ou None se falhar
+    Use: iter_zarc_records() para processar linha por linha
     """
-    try:
-        dataset = get_zarc_dataset(safra)
-        if not dataset or not dataset.get("records"):
-            print("Nenhum registro ZARC disponível")
-            return None
-        # Pegar colunas do primeiro registro
-        if dataset["records"]:
-            colunas = list(dataset["records"][0].keys())
-            print(f"\nColunas do CSV ZARC ({dataset['source']}):")
-            for i, col in enumerate(colunas, 1):
-                print(f"  {i}. {col}")
-            return colunas
-        return None
-    except Exception as e:
-        print(f"Erro ao inspecionar colunas ZARC: {e}")
-        return None
+    raise RuntimeError(
+        "load_zarc_from_file() foi removido por carregar CSV inteiro em lista. "
+        "Use iter_zarc_records() para processar linha por linha."
+    )
+def inspect_zarc_columns(*args, **kwargs):
+    """
+    REMOVIDO: Esta função dependia de get_zarc_dataset().
+    Use: iter_zarc_records() e inspecione a primeira linha
+    """
+    raise RuntimeError(
+        "inspect_zarc_columns() foi removido por depender de get_zarc_dataset(). "
+        "Use iter_zarc_records() e inspecione a primeira linha."
+    )
 def get_zarc_fallback() -> List[Dict[str, Any]]:
     """
@@ -576,6 +693,8 @@ def buscar_zarc(
     """
     Busca dados ZARC para cultura/região específica
+    PERFORMANCE: Tenta índice primeiro (rápido), depois streaming (lento)
     Args:
         cultura: Nome da cultura
         uf: Unidade Federativa (opcional)
@@ -586,27 +705,51 @@ def buscar_zarc(
     Returns:
         Dicionário com dados ZARC ou None se não encontrar
     """
-    dataset_info = get_zarc_dataset(safra)
-    if not dataset_info or not dataset_info.get("records"):
-        return None
-    dataset = dataset_info["records"]
-    source = dataset_info["source"]
-    is_fallback = dataset_info["fallback"]
+    # FAST PATH: Tentar índice primeiro (O(1) lookup)
+    if ZARC_FAST_INDEX_ENABLED:
+        resultado_indexado = buscar_zarc_indexado(cultura, uf, municipio, solo, safra)
+        if resultado_indexado:
+            return resultado_indexado
+    # SLOW PATH: Full scan no CSV (apenas se permitido)
+    if not ZARC_ALLOW_FULL_SCAN:
+        # Não encontrou no índice e full scan não é permitido
+        # Tentar fallback
+        return buscar_zarc_fallback(cultura, uf, municipio, solo, safra)
+    # Full scan permitido (desenvolvimento local)
+    return buscar_zarc_streaming(cultura, uf, municipio, solo, safra)
+def buscar_zarc_streaming(
+    cultura: str,
+    uf: Optional[str] = None,
+    municipio: Optional[str] = None,
+    solo: Optional[str] = None,
+    safra: str = ZARC_SAFRA_DEFAULT
+) -> Optional[Dict[str, Any]]:
+    """
+    Busca ZARC usando streaming no CSV (LENTO mas memory-safe)
+    PERFORMANCE: O(n) - varre todo o CSV
+    Usar apenas em desenvolvimento ou quando índice não disponível
+    """
     # Normalizar parâmetros de busca
     cultura_norm = normalizar_cultura(cultura)
     uf_norm = normalizar_uf(uf) if uf else None
     municipio_norm = normalizar_municipio(municipio) if municipio else None
     solo_norm = normalizar_solo(solo) if solo else None
-    # Se estiver usando dados oficiais, processar decêndios
-    if not is_fallback:
-        # Buscar no CSV oficial
+    # Tentar obter arquivo ZARC
+    file_info = ensure_zarc_file(safra)
+    if file_info:
+        # Usar arquivo oficial/cache com streaming
+        source = file_info["source"]
         melhor_match = None
         melhor_score = 0
-        for registro in dataset:
+        # Processar CSV em streaming (linha por linha)
+        for registro in iter_zarc_records(file_info["file_path"]):
             score = 0
             # Cultura deve bater
@@ -628,9 +771,10 @@ def buscar_zarc(
                 if normalizar_solo(solo_registro) == solo_norm:
                     score += 2
+            # Manter apenas o melhor match (não acumula lista)
             if score > melhor_score:
                 melhor_score = score
-                melhor_match = registro
+                melhor_match = registro.copy()  # Copia apenas este registro
         if melhor_match:
             # Extrair janelas de plantio dos decêndios
@@ -684,11 +828,31 @@ def buscar_zarc(
                 "message": "Nenhuma recomendação ZARC encontrada para os parâmetros informados."
             }
-    # Fallback: usar dados simplificados
+    # Arquivo não disponível, usar fallback
+    return buscar_zarc_fallback(cultura, uf, municipio, solo, safra)
+def buscar_zarc_fallback(
+    cultura: str,
+    uf: Optional[str] = None,
+    municipio: Optional[str] = None,
+    solo: Optional[str] = None,
+    safra: str = ZARC_SAFRA_DEFAULT
+) -> Optional[Dict[str, Any]]:
+    """
+    Busca ZARC em dados simplificados (fallback)
+    """
+    # Normalizar parâmetros
+    cultura_norm = normalizar_cultura(cultura)
+    uf_norm = normalizar_uf(uf) if uf else None
+    municipio_norm = normalizar_municipio(municipio) if municipio else None
+    solo_norm = normalizar_solo(solo) if solo else None
+    # Fallback: usar dados simplificados (lista pequena em memória)
+    fallback_data = get_zarc_fallback()
     melhor_match = None
     melhor_score = 0
-    for registro in dataset:
+    for registro in fallback_data:
         score = 0
         # Cultura deve bater