own-rag-cli 0.0.3-snapshot → 0.0.5-snapshot
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +116 -23
- package/README.pt-br.md +112 -19
- package/bin/indexer_full.py +128 -21
- package/bin/mcp_server.py +146 -22
- package/bin/postinstall.sh +234 -0
- package/bin/rag-wrapper.sh +151 -10
- package/package.json +1 -1
- package/rag-setup-macos.run +2 -2
- package/rag-setup.run +2 -2
package/bin/mcp_server.py
CHANGED
|
@@ -399,7 +399,7 @@ IGNORED_EXTENSIONS = {
|
|
|
399
399
|
".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".webp", ".bmp",
|
|
400
400
|
".mp4", ".mp3", ".wav", ".ogg", ".avi", ".mov",
|
|
401
401
|
".zip", ".tar", ".gz", ".rar", ".7z", ".jar", ".war",
|
|
402
|
-
".pyc", ".pyo", ".so", ".dll", ".exe", ".bin",
|
|
402
|
+
".pyc", ".pyo", ".so", ".dll", ".exe", ".bin", ".run",
|
|
403
403
|
".lock", ".sum", ".sqlite", ".db", ".sqlite3",
|
|
404
404
|
".ttf", ".woff", ".woff2", ".eot",
|
|
405
405
|
".pdf", ".docx", ".xlsx", ".pptx",
|
|
@@ -804,16 +804,43 @@ def _delete_file_chunks(collection: chromadb.Collection, file_path: str) -> int:
|
|
|
804
804
|
|
|
805
805
|
|
|
806
806
|
def _read_file_safe(filepath: Path) -> str | None:
|
|
807
|
+
try:
|
|
808
|
+
raw = filepath.read_bytes()
|
|
809
|
+
except OSError:
|
|
810
|
+
return None
|
|
811
|
+
|
|
812
|
+
if _looks_binary_content(raw):
|
|
813
|
+
return None
|
|
814
|
+
|
|
807
815
|
for encoding in ("utf-8", "latin-1", "cp1252"):
|
|
808
816
|
try:
|
|
809
|
-
return
|
|
817
|
+
return raw.decode(encoding)
|
|
810
818
|
except UnicodeDecodeError:
|
|
811
819
|
continue
|
|
812
|
-
except OSError:
|
|
813
|
-
return None
|
|
814
820
|
return None
|
|
815
821
|
|
|
816
822
|
|
|
823
|
+
def _looks_binary_content(raw: bytes) -> bool:
|
|
824
|
+
if not raw:
|
|
825
|
+
return False
|
|
826
|
+
|
|
827
|
+
sample = raw[:4096]
|
|
828
|
+
if b"\x00" in sample:
|
|
829
|
+
return True
|
|
830
|
+
|
|
831
|
+
non_text_bytes = 0
|
|
832
|
+
for byte in sample:
|
|
833
|
+
if byte in (9, 10, 13): # \t \n \r
|
|
834
|
+
continue
|
|
835
|
+
if 32 <= byte <= 126: # ASCII imprimivel
|
|
836
|
+
continue
|
|
837
|
+
if 160 <= byte <= 255: # Latin-1 estendido
|
|
838
|
+
continue
|
|
839
|
+
non_text_bytes += 1
|
|
840
|
+
|
|
841
|
+
return (non_text_bytes / len(sample)) > 0.30
|
|
842
|
+
|
|
843
|
+
|
|
817
844
|
def _scan_folder(folder_path: Path) -> Iterator[Path]:
|
|
818
845
|
for dirpath, dirnames, filenames in os.walk(folder_path):
|
|
819
846
|
dirnames[:] = [
|
|
@@ -871,32 +898,108 @@ def _index_single_file_for_branch(
|
|
|
871
898
|
_delete_file_chunks(collection, abs_path)
|
|
872
899
|
|
|
873
900
|
inserted_chunks = 0
|
|
901
|
+
skipped_chunks = 0
|
|
902
|
+
stop_iteration_warnings = 0
|
|
874
903
|
batch_ids: list[str] = []
|
|
875
904
|
batch_docs: list[str] = []
|
|
876
905
|
batch_metadatas: list[dict[str, object]] = []
|
|
877
906
|
|
|
907
|
+
def _warn_stop_iteration(message: str) -> None:
|
|
908
|
+
nonlocal stop_iteration_warnings
|
|
909
|
+
if stop_iteration_warnings < 3:
|
|
910
|
+
log.warning(message)
|
|
911
|
+
stop_iteration_warnings += 1
|
|
912
|
+
|
|
913
|
+
def _to_embedding_rows(encoded_embeddings: object) -> list[list[float]]:
|
|
914
|
+
if hasattr(encoded_embeddings, "tolist"):
|
|
915
|
+
rows = encoded_embeddings.tolist()
|
|
916
|
+
if isinstance(rows, list):
|
|
917
|
+
if rows and isinstance(rows[0], (int, float)):
|
|
918
|
+
return [list(rows)]
|
|
919
|
+
return rows
|
|
920
|
+
return [list(row) for row in encoded_embeddings] # type: ignore[arg-type]
|
|
921
|
+
|
|
878
922
|
def _flush_batch() -> None:
|
|
879
|
-
nonlocal inserted_chunks
|
|
923
|
+
nonlocal inserted_chunks, skipped_chunks
|
|
880
924
|
if not batch_ids:
|
|
881
925
|
return
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
926
|
+
|
|
927
|
+
pending_ids = list(batch_ids)
|
|
928
|
+
pending_docs = list(batch_docs)
|
|
929
|
+
pending_metadatas = list(batch_metadatas)
|
|
930
|
+
|
|
931
|
+
try:
|
|
932
|
+
encoded = model.encode(
|
|
933
|
+
pending_docs,
|
|
934
|
+
show_progress_bar=False,
|
|
935
|
+
batch_size=EMBEDDING_BATCH_SIZE,
|
|
936
|
+
)
|
|
937
|
+
embeddings = _to_embedding_rows(encoded)
|
|
938
|
+
collection.upsert(
|
|
939
|
+
ids=pending_ids,
|
|
940
|
+
embeddings=embeddings,
|
|
941
|
+
documents=pending_docs,
|
|
942
|
+
metadatas=pending_metadatas,
|
|
943
|
+
)
|
|
944
|
+
inserted_chunks += len(pending_ids)
|
|
945
|
+
del embeddings
|
|
946
|
+
except StopIteration:
|
|
947
|
+
_warn_stop_iteration(
|
|
948
|
+
f"{filepath.name} [{branch.key}] StopIteration no batch; aplicando fallback por chunk."
|
|
949
|
+
)
|
|
950
|
+
for chunk_id, chunk_doc, chunk_metadata in zip(pending_ids, pending_docs, pending_metadatas):
|
|
951
|
+
candidate_doc = chunk_doc.strip()
|
|
952
|
+
if not candidate_doc:
|
|
953
|
+
skipped_chunks += 1
|
|
954
|
+
continue
|
|
955
|
+
try:
|
|
956
|
+
encoded_single = model.encode(
|
|
957
|
+
[candidate_doc],
|
|
958
|
+
show_progress_bar=False,
|
|
959
|
+
batch_size=1,
|
|
960
|
+
)
|
|
961
|
+
single_embeddings = _to_embedding_rows(encoded_single)
|
|
962
|
+
collection.upsert(
|
|
963
|
+
ids=[chunk_id],
|
|
964
|
+
embeddings=single_embeddings,
|
|
965
|
+
documents=[candidate_doc],
|
|
966
|
+
metadatas=[chunk_metadata],
|
|
967
|
+
)
|
|
968
|
+
inserted_chunks += 1
|
|
969
|
+
del single_embeddings
|
|
970
|
+
except StopIteration:
|
|
971
|
+
compact_doc = " ".join(candidate_doc.split())
|
|
972
|
+
if not compact_doc:
|
|
973
|
+
skipped_chunks += 1
|
|
974
|
+
continue
|
|
975
|
+
try:
|
|
976
|
+
encoded_single = model.encode(
|
|
977
|
+
[compact_doc],
|
|
978
|
+
show_progress_bar=False,
|
|
979
|
+
batch_size=1,
|
|
980
|
+
)
|
|
981
|
+
single_embeddings = _to_embedding_rows(encoded_single)
|
|
982
|
+
collection.upsert(
|
|
983
|
+
ids=[chunk_id],
|
|
984
|
+
embeddings=single_embeddings,
|
|
985
|
+
documents=[compact_doc],
|
|
986
|
+
metadatas=[chunk_metadata],
|
|
987
|
+
)
|
|
988
|
+
inserted_chunks += 1
|
|
989
|
+
del single_embeddings
|
|
990
|
+
except StopIteration:
|
|
991
|
+
skipped_chunks += 1
|
|
992
|
+
_warn_stop_iteration(
|
|
993
|
+
f"{filepath.name} [{branch.key}] chunk ignorado após StopIteration repetido."
|
|
994
|
+
)
|
|
895
995
|
batch_ids.clear()
|
|
896
996
|
batch_docs.clear()
|
|
897
997
|
batch_metadatas.clear()
|
|
898
998
|
|
|
899
999
|
for i, chunk in enumerate(chunks):
|
|
1000
|
+
if not chunk or not chunk.strip():
|
|
1001
|
+
skipped_chunks += 1
|
|
1002
|
+
continue
|
|
900
1003
|
batch_ids.append(_make_chunk_id(abs_path, i))
|
|
901
1004
|
batch_docs.append(chunk)
|
|
902
1005
|
batch_metadatas.append(
|
|
@@ -914,6 +1017,10 @@ def _index_single_file_for_branch(
|
|
|
914
1017
|
_flush_batch()
|
|
915
1018
|
|
|
916
1019
|
_flush_batch()
|
|
1020
|
+
if skipped_chunks:
|
|
1021
|
+
_warn_stop_iteration(
|
|
1022
|
+
f"{filepath.name} [{branch.key}] ignorou {skipped_chunks} chunk(s) vazio(s)/inválido(s)."
|
|
1023
|
+
)
|
|
917
1024
|
return inserted_chunks
|
|
918
1025
|
|
|
919
1026
|
|
|
@@ -1196,15 +1303,32 @@ def _run_ensemble_mode(query: str, top_k: int) -> tuple[list[FusedHit], list[str
|
|
|
1196
1303
|
# Servidor MCP via FastMCP
|
|
1197
1304
|
# ---------------------------------------------------------------------------
|
|
1198
1305
|
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1306
|
+
FORCE_SEMANTIC_FALLBACK = (
|
|
1307
|
+
os.environ.get("MCP_FORCE_SEMANTIC_FALLBACK", "true").strip().lower()
|
|
1308
|
+
in {"1", "true", "yes", "on"}
|
|
1309
|
+
)
|
|
1310
|
+
|
|
1311
|
+
|
|
1312
|
+
def _build_server_instructions() -> str:
|
|
1313
|
+
instructions = (
|
|
1202
1314
|
"Servidor RAG para busca semântica em código-fonte local com suporte a ensemble híbrido. "
|
|
1203
1315
|
"No modo hybrid, a branch de código usa Jina v2 e a de documentação usa BGE. "
|
|
1204
1316
|
"Use semantic_search_code(query, top_k, mode='ensemble') para combinar Jina+BGE com RRF e reranking. "
|
|
1205
1317
|
"Use update_file_index após editar um arquivo para manter as duas coleções sincronizadas. "
|
|
1206
1318
|
"Use index_specific_folder para indexação recursiva sob demanda."
|
|
1207
|
-
)
|
|
1319
|
+
)
|
|
1320
|
+
if FORCE_SEMANTIC_FALLBACK:
|
|
1321
|
+
instructions += (
|
|
1322
|
+
" Regra obrigatória: quando a pergunta mencionar função/classe/componente/arquivo "
|
|
1323
|
+
"e isso não estiver no contexto imediato, chame semantic_search_code(mode='ensemble') "
|
|
1324
|
+
"antes de responder que não encontrou. Só conclua ausência após ao menos uma busca semântica."
|
|
1325
|
+
)
|
|
1326
|
+
return instructions
|
|
1327
|
+
|
|
1328
|
+
|
|
1329
|
+
mcp = FastMCP(
|
|
1330
|
+
name="rag-codebase",
|
|
1331
|
+
instructions=_build_server_instructions(),
|
|
1208
1332
|
)
|
|
1209
1333
|
|
|
1210
1334
|
|
package/bin/postinstall.sh
CHANGED
|
@@ -17,6 +17,14 @@ MONITOR_DEST="${LOCAL_BIN_DIR}/chroma_monitor.sh"
|
|
|
17
17
|
REMOVE_SRC="${PACKAGE_ROOT}/bin/rag-remove.sh"
|
|
18
18
|
REMOVE_DEST="${LOCAL_BIN_DIR}/rag-remove.sh"
|
|
19
19
|
OWN_RAG_CONFIG_FILE="${HOME}/.own-rag-cli.json"
|
|
20
|
+
MCP_SERVER_COMMAND="${HOME}/.local/bin/mcp-rag-server"
|
|
21
|
+
CLAUDE_CONFIG_FILE="${HOME}/.claude.json"
|
|
22
|
+
CURSOR_MCP_CONFIG_1="${HOME}/.cursor/mcp.json"
|
|
23
|
+
CURSOR_MCP_CONFIG_2="${HOME}/.config/Cursor/User/mcp.json"
|
|
24
|
+
CURSOR_RULE_DIR_1="${HOME}/.cursor/rules"
|
|
25
|
+
CURSOR_RULE_DIR_2="${HOME}/.config/Cursor/User/rules"
|
|
26
|
+
CURSOR_RULE_FILE_NAME="own-rag-force-semantic-search.mdc"
|
|
27
|
+
CODEX_CONFIG_FILE="${HOME}/.codex/config.toml"
|
|
20
28
|
|
|
21
29
|
COMPOSE_SOURCE="${PACKAGE_ROOT}/bin/docker-compose.yml"
|
|
22
30
|
COMPOSE_DIR="${HOME}/docker-chromadb"
|
|
@@ -52,10 +60,236 @@ cfg.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encodin
|
|
|
52
60
|
PYEOF
|
|
53
61
|
}
|
|
54
62
|
|
|
63
|
+
ensure_mcp_json_config() {
|
|
64
|
+
local cfg_file="$1"
|
|
65
|
+
local cfg_label="$2"
|
|
66
|
+
local update_result
|
|
67
|
+
|
|
68
|
+
if ! update_result="$(
|
|
69
|
+
python3 - "${cfg_file}" "${MCP_SERVER_COMMAND}" "${OWN_RAG_CONFIG_FILE}" <<'PYEOF'
|
|
70
|
+
import json
|
|
71
|
+
import sys
|
|
72
|
+
from pathlib import Path
|
|
73
|
+
|
|
74
|
+
cfg_path = Path(sys.argv[1]).expanduser()
|
|
75
|
+
mcp_server_command = sys.argv[2]
|
|
76
|
+
own_rag_config = sys.argv[3]
|
|
77
|
+
|
|
78
|
+
if cfg_path.exists():
|
|
79
|
+
try:
|
|
80
|
+
data = json.loads(cfg_path.read_text(encoding="utf-8"))
|
|
81
|
+
except Exception:
|
|
82
|
+
data = {}
|
|
83
|
+
else:
|
|
84
|
+
data = {}
|
|
85
|
+
|
|
86
|
+
if not isinstance(data, dict):
|
|
87
|
+
data = {}
|
|
88
|
+
|
|
89
|
+
mcp_servers = data.get("mcpServers")
|
|
90
|
+
if not isinstance(mcp_servers, dict):
|
|
91
|
+
mcp_servers = {}
|
|
92
|
+
|
|
93
|
+
desired = {
|
|
94
|
+
"command": mcp_server_command,
|
|
95
|
+
"args": [],
|
|
96
|
+
"env": {
|
|
97
|
+
"OWN_RAG_CLI_CONFIG_FILE": own_rag_config,
|
|
98
|
+
"TOKENIZERS_PARALLELISM": "false",
|
|
99
|
+
"MCP_FORCE_SEMANTIC_FALLBACK": "true",
|
|
100
|
+
},
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
current = mcp_servers.get("rag-codebase")
|
|
104
|
+
if current == desired:
|
|
105
|
+
print("skip:already_set")
|
|
106
|
+
raise SystemExit(0)
|
|
107
|
+
|
|
108
|
+
mcp_servers["rag-codebase"] = desired
|
|
109
|
+
data["mcpServers"] = mcp_servers
|
|
110
|
+
cfg_path.parent.mkdir(parents=True, exist_ok=True)
|
|
111
|
+
cfg_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
|
112
|
+
print("updated")
|
|
113
|
+
PYEOF
|
|
114
|
+
)"; then
|
|
115
|
+
log_warn "Falha ao atualizar ${cfg_label}: ${cfg_file}"
|
|
116
|
+
return 1
|
|
117
|
+
fi
|
|
118
|
+
|
|
119
|
+
if [[ "${update_result}" == "updated" ]]; then
|
|
120
|
+
log_info "${cfg_label} MCP configurado: ${cfg_file}"
|
|
121
|
+
fi
|
|
122
|
+
return 0
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
ensure_codex_mcp_config() {
|
|
126
|
+
local update_result
|
|
127
|
+
if ! update_result="$(
|
|
128
|
+
python3 - "${CODEX_CONFIG_FILE}" "${MCP_SERVER_COMMAND}" "${OWN_RAG_CONFIG_FILE}" <<'PYEOF'
|
|
129
|
+
import re
|
|
130
|
+
import sys
|
|
131
|
+
from pathlib import Path
|
|
132
|
+
|
|
133
|
+
cfg_path = Path(sys.argv[1]).expanduser()
|
|
134
|
+
mcp_server_command = sys.argv[2]
|
|
135
|
+
own_rag_config = sys.argv[3]
|
|
136
|
+
|
|
137
|
+
cfg_path.parent.mkdir(parents=True, exist_ok=True)
|
|
138
|
+
if cfg_path.exists():
|
|
139
|
+
text = cfg_path.read_text(encoding="utf-8")
|
|
140
|
+
else:
|
|
141
|
+
text = ""
|
|
142
|
+
|
|
143
|
+
block = (
|
|
144
|
+
"[mcp_servers.rag-codebase]\n"
|
|
145
|
+
f'command = "{mcp_server_command}"\n'
|
|
146
|
+
"args = []\n\n"
|
|
147
|
+
"[mcp_servers.rag-codebase.env]\n"
|
|
148
|
+
f'OWN_RAG_CLI_CONFIG_FILE = "{own_rag_config}"\n'
|
|
149
|
+
'TOKENIZERS_PARALLELISM = "false"\n'
|
|
150
|
+
'MCP_FORCE_SEMANTIC_FALLBACK = "true"\n'
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
pattern = re.compile(
|
|
154
|
+
r"(?ms)^\[mcp_servers\.rag-codebase\]\n.*?(?=^\[mcp_servers\.|^\[[^\]]+\]|$)"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
if pattern.search(text):
|
|
158
|
+
new_text = pattern.sub(block + "\n", text, count=1)
|
|
159
|
+
else:
|
|
160
|
+
suffix = "" if not text or text.endswith("\n") else "\n"
|
|
161
|
+
new_text = f"{text}{suffix}\n{block}"
|
|
162
|
+
|
|
163
|
+
if new_text == text:
|
|
164
|
+
print("skip:already_set")
|
|
165
|
+
raise SystemExit(0)
|
|
166
|
+
|
|
167
|
+
cfg_path.write_text(new_text, encoding="utf-8")
|
|
168
|
+
print("updated")
|
|
169
|
+
PYEOF
|
|
170
|
+
)"; then
|
|
171
|
+
log_warn "Falha ao configurar MCP no Codex: ${CODEX_CONFIG_FILE}"
|
|
172
|
+
return 1
|
|
173
|
+
fi
|
|
174
|
+
|
|
175
|
+
if [[ "${update_result}" == "updated" ]]; then
|
|
176
|
+
log_info "Codex MCP configurado: ${CODEX_CONFIG_FILE}"
|
|
177
|
+
fi
|
|
178
|
+
return 0
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
ensure_cursor_semantic_rule_files() {
|
|
182
|
+
local rule_dir
|
|
183
|
+
local rule_path
|
|
184
|
+
for rule_dir in "${CURSOR_RULE_DIR_1}" "${CURSOR_RULE_DIR_2}"; do
|
|
185
|
+
if ! mkdir -p "${rule_dir}" 2>/dev/null; then
|
|
186
|
+
log_warn "Nao foi possivel criar diretorio de regras do Cursor: ${rule_dir}"
|
|
187
|
+
continue
|
|
188
|
+
fi
|
|
189
|
+
|
|
190
|
+
rule_path="${rule_dir}/${CURSOR_RULE_FILE_NAME}"
|
|
191
|
+
if ! cat > "${rule_path}" <<'EOF'
|
|
192
|
+
---
|
|
193
|
+
description: Force semantic_search_code fallback when local context is missing
|
|
194
|
+
alwaysApply: true
|
|
195
|
+
---
|
|
196
|
+
When the prompt asks about a function, class, component, or file that is not present in the current local context,
|
|
197
|
+
call MCP server `rag-codebase` tool `semantic_search_code` with `mode="ensemble"` before saying "not found".
|
|
198
|
+
Only conclude "not found" after at least one semantic_search_code call returns no relevant result.
|
|
199
|
+
If `rag-codebase` appears in available tools, do not claim MCP access is unavailable.
|
|
200
|
+
EOF
|
|
201
|
+
then
|
|
202
|
+
log_warn "Nao foi possivel escrever regra do Cursor em: ${rule_path}"
|
|
203
|
+
continue
|
|
204
|
+
fi
|
|
205
|
+
|
|
206
|
+
log_info "Regra do Cursor instalada: ${rule_path}"
|
|
207
|
+
done
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
ensure_cursor_force_fallback_env() {
|
|
211
|
+
local cfg_file
|
|
212
|
+
local update_result
|
|
213
|
+
|
|
214
|
+
for cfg_file in "${CURSOR_MCP_CONFIG_1}" "${CURSOR_MCP_CONFIG_2}"; do
|
|
215
|
+
if [[ ! -f "${cfg_file}" ]]; then
|
|
216
|
+
continue
|
|
217
|
+
fi
|
|
218
|
+
|
|
219
|
+
if ! update_result="$(
|
|
220
|
+
python3 - "${cfg_file}" <<'PYEOF'
|
|
221
|
+
import json
|
|
222
|
+
import sys
|
|
223
|
+
from pathlib import Path
|
|
224
|
+
|
|
225
|
+
cfg_path = Path(sys.argv[1]).expanduser()
|
|
226
|
+
|
|
227
|
+
try:
|
|
228
|
+
data = json.loads(cfg_path.read_text(encoding="utf-8"))
|
|
229
|
+
except Exception:
|
|
230
|
+
print("skip:invalid_json")
|
|
231
|
+
raise SystemExit(0)
|
|
232
|
+
|
|
233
|
+
if not isinstance(data, dict):
|
|
234
|
+
print("skip:invalid_root")
|
|
235
|
+
raise SystemExit(0)
|
|
236
|
+
|
|
237
|
+
mcp_servers = data.get("mcpServers")
|
|
238
|
+
if not isinstance(mcp_servers, dict):
|
|
239
|
+
print("skip:no_mcp_servers")
|
|
240
|
+
raise SystemExit(0)
|
|
241
|
+
|
|
242
|
+
rag_cfg = mcp_servers.get("rag-codebase")
|
|
243
|
+
if not isinstance(rag_cfg, dict):
|
|
244
|
+
print("skip:no_rag_codebase")
|
|
245
|
+
raise SystemExit(0)
|
|
246
|
+
|
|
247
|
+
env = rag_cfg.get("env")
|
|
248
|
+
if not isinstance(env, dict):
|
|
249
|
+
env = {}
|
|
250
|
+
|
|
251
|
+
if str(env.get("MCP_FORCE_SEMANTIC_FALLBACK", "")).strip().lower() in {"1", "true", "yes", "on"}:
|
|
252
|
+
print("skip:already_set")
|
|
253
|
+
raise SystemExit(0)
|
|
254
|
+
|
|
255
|
+
env["MCP_FORCE_SEMANTIC_FALLBACK"] = "true"
|
|
256
|
+
rag_cfg["env"] = env
|
|
257
|
+
mcp_servers["rag-codebase"] = rag_cfg
|
|
258
|
+
data["mcpServers"] = mcp_servers
|
|
259
|
+
|
|
260
|
+
cfg_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
|
261
|
+
print("updated")
|
|
262
|
+
PYEOF
|
|
263
|
+
)"; then
|
|
264
|
+
log_warn "Falha ao ajustar fallback MCP no Cursor: ${cfg_file}"
|
|
265
|
+
continue
|
|
266
|
+
fi
|
|
267
|
+
|
|
268
|
+
if [[ "${update_result}" == "updated" ]]; then
|
|
269
|
+
log_info "Cursor MCP atualizado com MCP_FORCE_SEMANTIC_FALLBACK=true: ${cfg_file}"
|
|
270
|
+
fi
|
|
271
|
+
done
|
|
272
|
+
}
|
|
273
|
+
|
|
55
274
|
mkdir -p "${LOCAL_BIN_DIR}"
|
|
56
275
|
ensure_own_rag_cli_config
|
|
57
276
|
log_info "Config criada/ok: ${OWN_RAG_CONFIG_FILE}"
|
|
58
277
|
|
|
278
|
+
if command -v cursor >/dev/null 2>&1 || [[ -d "${HOME}/.cursor" ]] || [[ -d "${HOME}/.config/Cursor" ]]; then
|
|
279
|
+
ensure_mcp_json_config "${CURSOR_MCP_CONFIG_1}" "Cursor"
|
|
280
|
+
ensure_mcp_json_config "${CURSOR_MCP_CONFIG_2}" "Cursor"
|
|
281
|
+
ensure_cursor_semantic_rule_files
|
|
282
|
+
ensure_cursor_force_fallback_env
|
|
283
|
+
fi
|
|
284
|
+
|
|
285
|
+
if command -v claude >/dev/null 2>&1 || [[ -d "${HOME}/.claude" ]] || [[ -f "${CLAUDE_CONFIG_FILE}" ]]; then
|
|
286
|
+
ensure_mcp_json_config "${CLAUDE_CONFIG_FILE}" "Claude"
|
|
287
|
+
fi
|
|
288
|
+
|
|
289
|
+
if command -v codex >/dev/null 2>&1 || [[ -d "${HOME}/.codex" ]] || [[ -f "${CODEX_CONFIG_FILE}" ]]; then
|
|
290
|
+
ensure_codex_mcp_config
|
|
291
|
+
fi
|
|
292
|
+
|
|
59
293
|
if [[ -f "${WRAPPER_SRC}" ]]; then
|
|
60
294
|
cp "${WRAPPER_SRC}" "${WRAPPER_DEST}"
|
|
61
295
|
chmod +x "${WRAPPER_DEST}"
|
package/bin/rag-wrapper.sh
CHANGED
|
@@ -12,6 +12,8 @@ PACKAGE_MONITOR_SCRIPT="${SCRIPT_DIR}/../chroma_monitor.sh"
|
|
|
12
12
|
LOCAL_REMOVE_SCRIPT="${LOCAL_BIN_DIR}/rag-remove.sh"
|
|
13
13
|
PACKAGE_REMOVE_SCRIPT="${SCRIPT_DIR}/../bin/rag-remove.sh"
|
|
14
14
|
|
|
15
|
+
URL_TEMP_DIR=""
|
|
16
|
+
|
|
15
17
|
detect_os() {
|
|
16
18
|
case "$(uname -s)" in
|
|
17
19
|
Darwin) echo "macos" ;;
|
|
@@ -99,16 +101,140 @@ find_remove_runner() {
|
|
|
99
101
|
return 1
|
|
100
102
|
}
|
|
101
103
|
|
|
104
|
+
cleanup_url_temp_dir() {
|
|
105
|
+
if [[ -n "${URL_TEMP_DIR}" && -d "${URL_TEMP_DIR}" ]]; then
|
|
106
|
+
rm -rf "${URL_TEMP_DIR}" || true
|
|
107
|
+
fi
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
run_with_privilege() {
|
|
111
|
+
if [[ "$(id -u)" -eq 0 ]]; then
|
|
112
|
+
"$@"
|
|
113
|
+
return
|
|
114
|
+
fi
|
|
115
|
+
if command -v sudo >/dev/null 2>&1; then
|
|
116
|
+
sudo "$@"
|
|
117
|
+
return
|
|
118
|
+
fi
|
|
119
|
+
"$@"
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
install_package_if_missing() {
|
|
123
|
+
local package_name="$1"
|
|
124
|
+
local os_name="$2"
|
|
125
|
+
|
|
126
|
+
if command -v "${package_name}" >/dev/null 2>&1; then
|
|
127
|
+
return 0
|
|
128
|
+
fi
|
|
129
|
+
|
|
130
|
+
echo "Aviso: '${package_name}' não encontrado. Tentando instalar..." >&2
|
|
131
|
+
|
|
132
|
+
case "${os_name}" in
|
|
133
|
+
linux)
|
|
134
|
+
if command -v apt-get >/dev/null 2>&1; then
|
|
135
|
+
run_with_privilege apt-get update -y
|
|
136
|
+
run_with_privilege apt-get install -y "${package_name}"
|
|
137
|
+
elif command -v dnf >/dev/null 2>&1; then
|
|
138
|
+
run_with_privilege dnf install -y "${package_name}"
|
|
139
|
+
elif command -v yum >/dev/null 2>&1; then
|
|
140
|
+
run_with_privilege yum install -y "${package_name}"
|
|
141
|
+
elif command -v pacman >/dev/null 2>&1; then
|
|
142
|
+
run_with_privilege pacman -Sy --noconfirm "${package_name}"
|
|
143
|
+
elif command -v zypper >/dev/null 2>&1; then
|
|
144
|
+
run_with_privilege zypper --non-interactive install "${package_name}"
|
|
145
|
+
elif command -v apk >/dev/null 2>&1; then
|
|
146
|
+
run_with_privilege apk add --no-cache "${package_name}"
|
|
147
|
+
else
|
|
148
|
+
echo "Erro: não foi possível instalar '${package_name}' automaticamente nesta distro Linux." >&2
|
|
149
|
+
return 1
|
|
150
|
+
fi
|
|
151
|
+
;;
|
|
152
|
+
macos)
|
|
153
|
+
if command -v brew >/dev/null 2>&1; then
|
|
154
|
+
brew install "${package_name}"
|
|
155
|
+
else
|
|
156
|
+
echo "Erro: '${package_name}' ausente e Homebrew não encontrado no macOS." >&2
|
|
157
|
+
echo "Instale manualmente e tente novamente." >&2
|
|
158
|
+
return 1
|
|
159
|
+
fi
|
|
160
|
+
;;
|
|
161
|
+
*)
|
|
162
|
+
echo "Erro: sistema operacional não suportado para instalação automática de '${package_name}'." >&2
|
|
163
|
+
return 1
|
|
164
|
+
;;
|
|
165
|
+
esac
|
|
166
|
+
|
|
167
|
+
if ! command -v "${package_name}" >/dev/null 2>&1; then
|
|
168
|
+
echo "Erro: instalação de '${package_name}' falhou." >&2
|
|
169
|
+
return 1
|
|
170
|
+
fi
|
|
171
|
+
return 0
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
is_url_input() {
|
|
175
|
+
local value="$1"
|
|
176
|
+
[[ "${value}" =~ ^https?:// ]]
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
looks_like_zip_file() {
|
|
180
|
+
local file_path="$1"
|
|
181
|
+
local signature
|
|
182
|
+
signature="$(LC_ALL=C head -c 4 "${file_path}" | od -An -tx1 | tr -d ' \n' || true)"
|
|
183
|
+
case "${signature}" in
|
|
184
|
+
504b0304|504b0506|504b0708) return 0 ;;
|
|
185
|
+
esac
|
|
186
|
+
return 1
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
prepare_url_source() {
|
|
190
|
+
local remote_url="$1"
|
|
191
|
+
local os_name="$2"
|
|
192
|
+
local file_hint
|
|
193
|
+
local download_name
|
|
194
|
+
local download_path
|
|
195
|
+
local extract_dir
|
|
196
|
+
|
|
197
|
+
install_package_if_missing "curl" "${os_name}" >/dev/null
|
|
198
|
+
|
|
199
|
+
URL_TEMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/own-rag-url.XXXXXX")"
|
|
200
|
+
trap cleanup_url_temp_dir EXIT
|
|
201
|
+
|
|
202
|
+
file_hint="${remote_url%%\?*}"
|
|
203
|
+
download_name="$(basename "${file_hint}")"
|
|
204
|
+
if [[ -z "${download_name}" || "${download_name}" == "/" || "${download_name}" == "." ]]; then
|
|
205
|
+
download_name="downloaded-content"
|
|
206
|
+
fi
|
|
207
|
+
download_path="${URL_TEMP_DIR}/${download_name}"
|
|
208
|
+
|
|
209
|
+
echo "Baixando conteúdo remoto: ${remote_url}" >&2
|
|
210
|
+
curl -fL --retry 2 --connect-timeout 20 --max-time 600 \
|
|
211
|
+
-o "${download_path}" "${remote_url}"
|
|
212
|
+
|
|
213
|
+
if looks_like_zip_file "${download_path}"; then
|
|
214
|
+
install_package_if_missing "unzip" "${os_name}" >/dev/null
|
|
215
|
+
extract_dir="${URL_TEMP_DIR}/extracted"
|
|
216
|
+
mkdir -p "${extract_dir}"
|
|
217
|
+
unzip -q "${download_path}" -d "${extract_dir}"
|
|
218
|
+
echo "Conteúdo ZIP extraído para indexação temporária." >&2
|
|
219
|
+
echo "${extract_dir}"
|
|
220
|
+
return 0
|
|
221
|
+
fi
|
|
222
|
+
|
|
223
|
+
echo "${URL_TEMP_DIR}"
|
|
224
|
+
}
|
|
225
|
+
|
|
102
226
|
usage() {
|
|
103
227
|
cat <<'EOF'
|
|
104
228
|
Uso:
|
|
105
|
-
rag run [path]
|
|
229
|
+
rag run [path|url]
|
|
106
230
|
rag monitor [command]
|
|
107
231
|
rag remove
|
|
108
232
|
|
|
109
233
|
Exemplos:
|
|
110
234
|
rag run .
|
|
111
235
|
rag run /caminho/do/projeto
|
|
236
|
+
rag run https://exemplo.com/documentacao.md
|
|
237
|
+
rag run https://exemplo.com/pacote.zip
|
|
112
238
|
rag monitor
|
|
113
239
|
rag monitor full
|
|
114
240
|
rag remove
|
|
@@ -132,24 +258,21 @@ fi
|
|
|
132
258
|
|
|
133
259
|
case "${command_name}" in
|
|
134
260
|
run)
|
|
135
|
-
|
|
136
|
-
|
|
261
|
+
target_input="${1:-}"
|
|
262
|
+
shift || true
|
|
263
|
+
|
|
264
|
+
if [[ -z "${target_input}" ]]; then
|
|
137
265
|
printf "⚠️ Pasta não informada. Deseja usar a pasta atual: %s? (s/n)\n" "${PWD}"
|
|
138
266
|
read -r answer
|
|
139
267
|
answer="$(echo "${answer}" | tr '[:upper:]' '[:lower:]')"
|
|
140
268
|
if [[ "${answer}" == "s" || "${answer}" == "sim" || "${answer}" == "y" || "${answer}" == "yes" ]]; then
|
|
141
|
-
|
|
269
|
+
target_input="${PWD}"
|
|
142
270
|
else
|
|
143
271
|
echo "Dica: use 'rag run /caminho/do/projeto'."
|
|
144
272
|
exit 1
|
|
145
273
|
fi
|
|
146
274
|
fi
|
|
147
275
|
|
|
148
|
-
if [[ ! -d "${target_path}" ]]; then
|
|
149
|
-
echo "Erro: caminho não encontrado ou não é diretório: ${target_path}" >&2
|
|
150
|
-
exit 1
|
|
151
|
-
fi
|
|
152
|
-
|
|
153
276
|
if ! setup_runner="$(find_setup_runner "${os_name}")"; then
|
|
154
277
|
if [[ "${os_name}" == "macos" ]]; then
|
|
155
278
|
echo "Erro: rag-setup-macos.run não encontrado." >&2
|
|
@@ -161,7 +284,25 @@ case "${command_name}" in
|
|
|
161
284
|
exit 1
|
|
162
285
|
fi
|
|
163
286
|
|
|
164
|
-
|
|
287
|
+
if is_url_input "${target_input}"; then
|
|
288
|
+
if ! target_path="$(prepare_url_source "${target_input}" "${os_name}")"; then
|
|
289
|
+
echo "Erro: falha ao preparar conteúdo remoto para indexação: ${target_input}" >&2
|
|
290
|
+
exit 1
|
|
291
|
+
fi
|
|
292
|
+
echo "Indexando conteúdo baixado temporariamente em: ${target_path}" >&2
|
|
293
|
+
"${setup_runner}" "${target_path}" "$@"
|
|
294
|
+
cleanup_url_temp_dir
|
|
295
|
+
trap - EXIT
|
|
296
|
+
exit 0
|
|
297
|
+
fi
|
|
298
|
+
|
|
299
|
+
target_path="${target_input}"
|
|
300
|
+
if [[ ! -d "${target_path}" ]]; then
|
|
301
|
+
echo "Erro: caminho não encontrado ou não é diretório: ${target_path}" >&2
|
|
302
|
+
exit 1
|
|
303
|
+
fi
|
|
304
|
+
|
|
305
|
+
exec "${setup_runner}" "${target_path}" "$@"
|
|
165
306
|
;;
|
|
166
307
|
monitor)
|
|
167
308
|
if ! monitor_runner="$(find_monitor_runner "${os_name}")"; then
|