own-rag-cli 0.0.7-snapshot → 0.0.9-snapshot
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/bin/indexer_full.py +46 -1
- package/bin/mcp_server.py +43 -1
- package/bin/postinstall.sh +17 -1
- package/package.json +1 -1
- package/rag-setup-macos.run +8 -3
- package/rag-setup.run +8 -3
package/README.md
CHANGED
package/bin/indexer_full.py
CHANGED
|
@@ -1176,6 +1176,7 @@ def index_file(
|
|
|
1176
1176
|
splitter: RecursiveCharacterTextSplitter,
|
|
1177
1177
|
root_path: Path,
|
|
1178
1178
|
embedding_batch_size: int,
|
|
1179
|
+
raise_on_stop_iteration: bool = False,
|
|
1179
1180
|
) -> int:
|
|
1180
1181
|
"""
|
|
1181
1182
|
Indexa um único arquivo: lê, divide em chunks, gera embeddings e faz upsert.
|
|
@@ -1320,6 +1321,10 @@ def index_file(
|
|
|
1320
1321
|
_warn_stop_iteration(
|
|
1321
1322
|
f" [AVISO] {filepath.name}: {skipped_chunks} chunk(s) vazio(s)/inválido(s) foram ignorados."
|
|
1322
1323
|
)
|
|
1324
|
+
if raise_on_stop_iteration and inserted_chunks == 0 and stop_iteration_warnings > 0:
|
|
1325
|
+
raise RuntimeError(
|
|
1326
|
+
f"stop_iteration_all_chunks:{filepath.name}:{stop_iteration_warnings}"
|
|
1327
|
+
)
|
|
1323
1328
|
return inserted_chunks
|
|
1324
1329
|
|
|
1325
1330
|
|
|
@@ -1532,8 +1537,8 @@ def main():
|
|
|
1532
1537
|
if target is None:
|
|
1533
1538
|
continue
|
|
1534
1539
|
|
|
1540
|
+
target_quantization = jina_quantization if target.model_choice == "jina" else "default"
|
|
1535
1541
|
if target.model_choice not in loaded_models:
|
|
1536
|
-
target_quantization = jina_quantization if target.model_choice == "jina" else "default"
|
|
1537
1542
|
try:
|
|
1538
1543
|
loaded_models[target.model_choice] = load_embedding_model(
|
|
1539
1544
|
target.model_choice,
|
|
@@ -1568,6 +1573,7 @@ def main():
|
|
|
1568
1573
|
splitter,
|
|
1569
1574
|
root_path,
|
|
1570
1575
|
embedding_batch_size=effective_batch_size,
|
|
1576
|
+
raise_on_stop_iteration=(target.model_choice == "bge"),
|
|
1571
1577
|
)
|
|
1572
1578
|
total_chunks += n_chunks
|
|
1573
1579
|
files_processed_total += 1
|
|
@@ -1591,6 +1597,45 @@ def main():
|
|
|
1591
1597
|
gc.collect()
|
|
1592
1598
|
continue
|
|
1593
1599
|
|
|
1600
|
+
if target.model_choice == "bge":
|
|
1601
|
+
fallback_target = target_by_model.get("jina-v2") or target_by_model.get("jina")
|
|
1602
|
+
if fallback_target is not None:
|
|
1603
|
+
fallback_quantization = (
|
|
1604
|
+
jina_quantization if fallback_target.model_choice == "jina" else "default"
|
|
1605
|
+
)
|
|
1606
|
+
try:
|
|
1607
|
+
if fallback_target.model_choice not in loaded_models:
|
|
1608
|
+
loaded_models[fallback_target.model_choice] = load_embedding_model(
|
|
1609
|
+
fallback_target.model_choice,
|
|
1610
|
+
fallback_quantization,
|
|
1611
|
+
)
|
|
1612
|
+
fallback_model = loaded_models[fallback_target.model_choice]
|
|
1613
|
+
fallback_collection = collections[fallback_target.collection_name]
|
|
1614
|
+
tqdm.write(
|
|
1615
|
+
f" [FALLBACK] {filepath.name}: BGE falhou; "
|
|
1616
|
+
f"reindexando com {fallback_target.label}."
|
|
1617
|
+
)
|
|
1618
|
+
fallback_chunks = index_file(
|
|
1619
|
+
filepath,
|
|
1620
|
+
fallback_collection,
|
|
1621
|
+
fallback_model,
|
|
1622
|
+
splitter,
|
|
1623
|
+
root_path,
|
|
1624
|
+
embedding_batch_size=effective_batch_size,
|
|
1625
|
+
raise_on_stop_iteration=False,
|
|
1626
|
+
)
|
|
1627
|
+
total_chunks += fallback_chunks
|
|
1628
|
+
files_processed_total += 1
|
|
1629
|
+
files_eligible_by_collection[fallback_target.collection_name] += 1
|
|
1630
|
+
chunks_by_collection[fallback_target.collection_name] += fallback_chunks
|
|
1631
|
+
files_by_collection[fallback_target.collection_name] += 1
|
|
1632
|
+
break
|
|
1633
|
+
except Exception as fallback_error:
|
|
1634
|
+
tqdm.write(
|
|
1635
|
+
f" [ERRO] {filepath} [fallback {fallback_target.label}]: "
|
|
1636
|
+
f"{_format_exception(fallback_error)}"
|
|
1637
|
+
)
|
|
1638
|
+
|
|
1594
1639
|
if (
|
|
1595
1640
|
_is_dimension_mismatch_error(e)
|
|
1596
1641
|
and not collection_dimension_reset_done[target.collection_name]
|
package/bin/mcp_server.py
CHANGED
|
@@ -1021,6 +1021,10 @@ def _index_single_file_for_branch(
|
|
|
1021
1021
|
_warn_stop_iteration(
|
|
1022
1022
|
f"{filepath.name} [{branch.key}] ignorou {skipped_chunks} chunk(s) vazio(s)/inválido(s)."
|
|
1023
1023
|
)
|
|
1024
|
+
if branch.key == "bge_doc" and inserted_chunks == 0 and stop_iteration_warnings > 0:
|
|
1025
|
+
raise RuntimeError(
|
|
1026
|
+
f"stop_iteration_all_chunks:{filepath.name}:{stop_iteration_warnings}"
|
|
1027
|
+
)
|
|
1024
1028
|
return inserted_chunks
|
|
1025
1029
|
|
|
1026
1030
|
|
|
@@ -1139,6 +1143,7 @@ def _rrf_fuse(hits_by_branch: dict[str, list[RetrievedHit]], top_limit: int) ->
|
|
|
1139
1143
|
|
|
1140
1144
|
|
|
1141
1145
|
def _apply_rerank(query: str, fused_hits: list[FusedHit], top_k: int) -> tuple[list[FusedHit], bool, str | None]:
|
|
1146
|
+
global _reranker, _reranker_error
|
|
1142
1147
|
if not fused_hits:
|
|
1143
1148
|
return [], False, None
|
|
1144
1149
|
|
|
@@ -1163,7 +1168,9 @@ def _apply_rerank(query: str, fused_hits: list[FusedHit], top_k: int) -> tuple[l
|
|
|
1163
1168
|
)
|
|
1164
1169
|
return fused_hits[:top_k], True, None
|
|
1165
1170
|
except Exception as e:
|
|
1166
|
-
|
|
1171
|
+
_reranker = None
|
|
1172
|
+
_reranker_error = f"runtime_error:{e}"
|
|
1173
|
+
return fused_hits[:top_k], False, f"fallback_sem_rerank:{e}"
|
|
1167
1174
|
|
|
1168
1175
|
|
|
1169
1176
|
def _format_similarity(similarity: float | None) -> str:
|
|
@@ -1483,6 +1490,23 @@ def update_file_index(file_path: str) -> str:
|
|
|
1483
1490
|
)
|
|
1484
1491
|
inserted_per_branch[branch.key] = inserted
|
|
1485
1492
|
except Exception as e:
|
|
1493
|
+
if branch.key == "bge_doc" and "jina_code" not in inserted_per_branch:
|
|
1494
|
+
fallback_branch = BRANCH_SPECS["jina_code"]
|
|
1495
|
+
try:
|
|
1496
|
+
inserted = _index_single_file_for_branch(
|
|
1497
|
+
filepath,
|
|
1498
|
+
fallback_branch,
|
|
1499
|
+
splitter,
|
|
1500
|
+
delete_existing=False,
|
|
1501
|
+
)
|
|
1502
|
+
inserted_per_branch[fallback_branch.key] = inserted
|
|
1503
|
+
index_errors.append(f"{branch.key}: {e} (fallback para {fallback_branch.key} aplicado)")
|
|
1504
|
+
continue
|
|
1505
|
+
except Exception as fallback_error:
|
|
1506
|
+
index_errors.append(
|
|
1507
|
+
f"{branch.key}: {e} | fallback {fallback_branch.key}: {fallback_error}"
|
|
1508
|
+
)
|
|
1509
|
+
continue
|
|
1486
1510
|
index_errors.append(f"{branch.key}: {e}")
|
|
1487
1511
|
|
|
1488
1512
|
success_branches = [k for k, v in inserted_per_branch.items() if v > 0]
|
|
@@ -1611,13 +1635,31 @@ def index_specific_folder(folder_path: str) -> str:
|
|
|
1611
1635
|
for filepath in _scan_folder(folder):
|
|
1612
1636
|
processed_files += 1
|
|
1613
1637
|
targets = _classify_file_targets(filepath)
|
|
1638
|
+
indexed_branches_for_file: set[str] = set()
|
|
1614
1639
|
|
|
1615
1640
|
for branch in targets:
|
|
1616
1641
|
try:
|
|
1617
1642
|
n_chunks = _index_single_file_for_branch(filepath, branch, splitter)
|
|
1618
1643
|
branch_file_counts[branch.key] += 1
|
|
1619
1644
|
branch_chunk_counts[branch.key] += n_chunks
|
|
1645
|
+
indexed_branches_for_file.add(branch.key)
|
|
1620
1646
|
except Exception as e:
|
|
1647
|
+
if branch.key == "bge_doc" and "jina_code" not in indexed_branches_for_file:
|
|
1648
|
+
fallback_branch = BRANCH_SPECS["jina_code"]
|
|
1649
|
+
try:
|
|
1650
|
+
n_chunks = _index_single_file_for_branch(filepath, fallback_branch, splitter)
|
|
1651
|
+
branch_file_counts[fallback_branch.key] += 1
|
|
1652
|
+
branch_chunk_counts[fallback_branch.key] += n_chunks
|
|
1653
|
+
indexed_branches_for_file.add(fallback_branch.key)
|
|
1654
|
+
error_count += 1
|
|
1655
|
+
if len(error_samples) < 10:
|
|
1656
|
+
error_samples.append(
|
|
1657
|
+
f"{filepath.name} [{branch.key}]: {e} "
|
|
1658
|
+
f"(fallback para {fallback_branch.key} aplicado)"
|
|
1659
|
+
)
|
|
1660
|
+
continue
|
|
1661
|
+
except Exception as fallback_error:
|
|
1662
|
+
e = RuntimeError(f"{e} | fallback {fallback_branch.key}: {fallback_error}")
|
|
1621
1663
|
error_count += 1
|
|
1622
1664
|
if len(error_samples) < 10:
|
|
1623
1665
|
error_samples.append(f"{filepath.name} [{branch.key}]: {e}")
|
package/bin/postinstall.sh
CHANGED
|
@@ -35,6 +35,14 @@ ALIAS_LINE="alias rag='~/.local/bin/rag-wrapper.sh'"
|
|
|
35
35
|
log_info() { printf "[+] %s\n" "$*"; }
|
|
36
36
|
log_warn() { printf "[!] %s\n" "$*" >&2; }
|
|
37
37
|
|
|
38
|
+
ask_yes_no_default_no() {
|
|
39
|
+
local prompt="$1"
|
|
40
|
+
local answer=""
|
|
41
|
+
read -r -p "${prompt} [y/N] " answer || return 1
|
|
42
|
+
answer="$(printf "%s" "${answer}" | tr '[:upper:]' '[:lower:]')"
|
|
43
|
+
[[ "${answer}" == "y" || "${answer}" == "yes" || "${answer}" == "s" || "${answer}" == "sim" ]]
|
|
44
|
+
}
|
|
45
|
+
|
|
38
46
|
ensure_own_rag_cli_config() {
|
|
39
47
|
python3 - "${OWN_RAG_CONFIG_FILE}" <<'PYEOF'
|
|
40
48
|
import json
|
|
@@ -309,7 +317,15 @@ if command -v claude >/dev/null 2>&1 || [[ -d "${HOME}/.claude" ]] || [[ -f "${C
|
|
|
309
317
|
fi
|
|
310
318
|
|
|
311
319
|
if command -v codex >/dev/null 2>&1 || [[ -d "${HOME}/.codex" ]] || [[ -f "${CODEX_CONFIG_FILE}" ]]; then
|
|
312
|
-
|
|
320
|
+
if [[ -t 0 ]]; then
|
|
321
|
+
if ask_yes_no_default_no "Deseja adicionar/atualizar o MCP 'rag-codebase' no Codex?"; then
|
|
322
|
+
ensure_codex_mcp_config || true
|
|
323
|
+
else
|
|
324
|
+
log_info "Configuração MCP do Codex ignorada por escolha do usuário."
|
|
325
|
+
fi
|
|
326
|
+
else
|
|
327
|
+
log_info "Sem terminal interativo: pulando configuração automática do MCP no Codex."
|
|
328
|
+
fi
|
|
313
329
|
fi
|
|
314
330
|
|
|
315
331
|
if [[ -f "${WRAPPER_SRC}" ]]; then
|