own-rag-cli 0.0.3-snapshot → 0.0.5-snapshot

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/mcp_server.py CHANGED
@@ -399,7 +399,7 @@ IGNORED_EXTENSIONS = {
399
399
  ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".webp", ".bmp",
400
400
  ".mp4", ".mp3", ".wav", ".ogg", ".avi", ".mov",
401
401
  ".zip", ".tar", ".gz", ".rar", ".7z", ".jar", ".war",
402
- ".pyc", ".pyo", ".so", ".dll", ".exe", ".bin",
402
+ ".pyc", ".pyo", ".so", ".dll", ".exe", ".bin", ".run",
403
403
  ".lock", ".sum", ".sqlite", ".db", ".sqlite3",
404
404
  ".ttf", ".woff", ".woff2", ".eot",
405
405
  ".pdf", ".docx", ".xlsx", ".pptx",
@@ -804,16 +804,43 @@ def _delete_file_chunks(collection: chromadb.Collection, file_path: str) -> int:
804
804
 
805
805
 
806
806
  def _read_file_safe(filepath: Path) -> str | None:
807
+ try:
808
+ raw = filepath.read_bytes()
809
+ except OSError:
810
+ return None
811
+
812
+ if _looks_binary_content(raw):
813
+ return None
814
+
807
815
  for encoding in ("utf-8", "latin-1", "cp1252"):
808
816
  try:
809
- return filepath.read_text(encoding=encoding)
817
+ return raw.decode(encoding)
810
818
  except UnicodeDecodeError:
811
819
  continue
812
- except OSError:
813
- return None
814
820
  return None
815
821
 
816
822
 
823
+ def _looks_binary_content(raw: bytes) -> bool:
824
+ if not raw:
825
+ return False
826
+
827
+ sample = raw[:4096]
828
+ if b"\x00" in sample:
829
+ return True
830
+
831
+ non_text_bytes = 0
832
+ for byte in sample:
833
+ if byte in (9, 10, 13): # \t \n \r
834
+ continue
835
+ if 32 <= byte <= 126: # ASCII imprimivel
836
+ continue
837
+ if 160 <= byte <= 255: # Latin-1 estendido
838
+ continue
839
+ non_text_bytes += 1
840
+
841
+ return (non_text_bytes / len(sample)) > 0.30
842
+
843
+
817
844
  def _scan_folder(folder_path: Path) -> Iterator[Path]:
818
845
  for dirpath, dirnames, filenames in os.walk(folder_path):
819
846
  dirnames[:] = [
@@ -871,32 +898,108 @@ def _index_single_file_for_branch(
871
898
  _delete_file_chunks(collection, abs_path)
872
899
 
873
900
  inserted_chunks = 0
901
+ skipped_chunks = 0
902
+ stop_iteration_warnings = 0
874
903
  batch_ids: list[str] = []
875
904
  batch_docs: list[str] = []
876
905
  batch_metadatas: list[dict[str, object]] = []
877
906
 
907
+ def _warn_stop_iteration(message: str) -> None:
908
+ nonlocal stop_iteration_warnings
909
+ if stop_iteration_warnings < 3:
910
+ log.warning(message)
911
+ stop_iteration_warnings += 1
912
+
913
+ def _to_embedding_rows(encoded_embeddings: object) -> list[list[float]]:
914
+ if hasattr(encoded_embeddings, "tolist"):
915
+ rows = encoded_embeddings.tolist()
916
+ if isinstance(rows, list):
917
+ if rows and isinstance(rows[0], (int, float)):
918
+ return [list(rows)]
919
+ return rows
920
+ return [list(row) for row in encoded_embeddings] # type: ignore[arg-type]
921
+
878
922
  def _flush_batch() -> None:
879
- nonlocal inserted_chunks
923
+ nonlocal inserted_chunks, skipped_chunks
880
924
  if not batch_ids:
881
925
  return
882
- embeddings = model.encode(
883
- batch_docs,
884
- show_progress_bar=False,
885
- batch_size=EMBEDDING_BATCH_SIZE,
886
- ).tolist()
887
- collection.upsert(
888
- ids=batch_ids,
889
- embeddings=embeddings,
890
- documents=batch_docs,
891
- metadatas=batch_metadatas,
892
- )
893
- inserted_chunks += len(batch_ids)
894
- del embeddings
926
+
927
+ pending_ids = list(batch_ids)
928
+ pending_docs = list(batch_docs)
929
+ pending_metadatas = list(batch_metadatas)
930
+
931
+ try:
932
+ encoded = model.encode(
933
+ pending_docs,
934
+ show_progress_bar=False,
935
+ batch_size=EMBEDDING_BATCH_SIZE,
936
+ )
937
+ embeddings = _to_embedding_rows(encoded)
938
+ collection.upsert(
939
+ ids=pending_ids,
940
+ embeddings=embeddings,
941
+ documents=pending_docs,
942
+ metadatas=pending_metadatas,
943
+ )
944
+ inserted_chunks += len(pending_ids)
945
+ del embeddings
946
+ except StopIteration:
947
+ _warn_stop_iteration(
948
+ f"{filepath.name} [{branch.key}] StopIteration no batch; aplicando fallback por chunk."
949
+ )
950
+ for chunk_id, chunk_doc, chunk_metadata in zip(pending_ids, pending_docs, pending_metadatas):
951
+ candidate_doc = chunk_doc.strip()
952
+ if not candidate_doc:
953
+ skipped_chunks += 1
954
+ continue
955
+ try:
956
+ encoded_single = model.encode(
957
+ [candidate_doc],
958
+ show_progress_bar=False,
959
+ batch_size=1,
960
+ )
961
+ single_embeddings = _to_embedding_rows(encoded_single)
962
+ collection.upsert(
963
+ ids=[chunk_id],
964
+ embeddings=single_embeddings,
965
+ documents=[candidate_doc],
966
+ metadatas=[chunk_metadata],
967
+ )
968
+ inserted_chunks += 1
969
+ del single_embeddings
970
+ except StopIteration:
971
+ compact_doc = " ".join(candidate_doc.split())
972
+ if not compact_doc:
973
+ skipped_chunks += 1
974
+ continue
975
+ try:
976
+ encoded_single = model.encode(
977
+ [compact_doc],
978
+ show_progress_bar=False,
979
+ batch_size=1,
980
+ )
981
+ single_embeddings = _to_embedding_rows(encoded_single)
982
+ collection.upsert(
983
+ ids=[chunk_id],
984
+ embeddings=single_embeddings,
985
+ documents=[compact_doc],
986
+ metadatas=[chunk_metadata],
987
+ )
988
+ inserted_chunks += 1
989
+ del single_embeddings
990
+ except StopIteration:
991
+ skipped_chunks += 1
992
+ _warn_stop_iteration(
993
+ f"{filepath.name} [{branch.key}] chunk ignorado após StopIteration repetido."
994
+ )
895
995
  batch_ids.clear()
896
996
  batch_docs.clear()
897
997
  batch_metadatas.clear()
898
998
 
899
999
  for i, chunk in enumerate(chunks):
1000
+ if not chunk or not chunk.strip():
1001
+ skipped_chunks += 1
1002
+ continue
900
1003
  batch_ids.append(_make_chunk_id(abs_path, i))
901
1004
  batch_docs.append(chunk)
902
1005
  batch_metadatas.append(
@@ -914,6 +1017,10 @@ def _index_single_file_for_branch(
914
1017
  _flush_batch()
915
1018
 
916
1019
  _flush_batch()
1020
+ if skipped_chunks:
1021
+ _warn_stop_iteration(
1022
+ f"{filepath.name} [{branch.key}] ignorou {skipped_chunks} chunk(s) vazio(s)/inválido(s)."
1023
+ )
917
1024
  return inserted_chunks
918
1025
 
919
1026
 
@@ -1196,15 +1303,32 @@ def _run_ensemble_mode(query: str, top_k: int) -> tuple[list[FusedHit], list[str
1196
1303
  # Servidor MCP via FastMCP
1197
1304
  # ---------------------------------------------------------------------------
1198
1305
 
1199
- mcp = FastMCP(
1200
- name="rag-codebase",
1201
- instructions=(
1306
+ FORCE_SEMANTIC_FALLBACK = (
1307
+ os.environ.get("MCP_FORCE_SEMANTIC_FALLBACK", "true").strip().lower()
1308
+ in {"1", "true", "yes", "on"}
1309
+ )
1310
+
1311
+
1312
+ def _build_server_instructions() -> str:
1313
+ instructions = (
1202
1314
  "Servidor RAG para busca semântica em código-fonte local com suporte a ensemble híbrido. "
1203
1315
  "No modo hybrid, a branch de código usa Jina v2 e a de documentação usa BGE. "
1204
1316
  "Use semantic_search_code(query, top_k, mode='ensemble') para combinar Jina+BGE com RRF e reranking. "
1205
1317
  "Use update_file_index após editar um arquivo para manter as duas coleções sincronizadas. "
1206
1318
  "Use index_specific_folder para indexação recursiva sob demanda."
1207
- ),
1319
+ )
1320
+ if FORCE_SEMANTIC_FALLBACK:
1321
+ instructions += (
1322
+ " Regra obrigatória: quando a pergunta mencionar função/classe/componente/arquivo "
1323
+ "e isso não estiver no contexto imediato, chame semantic_search_code(mode='ensemble') "
1324
+ "antes de responder que não encontrou. Só conclua ausência após ao menos uma busca semântica."
1325
+ )
1326
+ return instructions
1327
+
1328
+
1329
+ mcp = FastMCP(
1330
+ name="rag-codebase",
1331
+ instructions=_build_server_instructions(),
1208
1332
  )
1209
1333
 
1210
1334
 
@@ -17,6 +17,14 @@ MONITOR_DEST="${LOCAL_BIN_DIR}/chroma_monitor.sh"
17
17
  REMOVE_SRC="${PACKAGE_ROOT}/bin/rag-remove.sh"
18
18
  REMOVE_DEST="${LOCAL_BIN_DIR}/rag-remove.sh"
19
19
  OWN_RAG_CONFIG_FILE="${HOME}/.own-rag-cli.json"
20
+ MCP_SERVER_COMMAND="${HOME}/.local/bin/mcp-rag-server"
21
+ CLAUDE_CONFIG_FILE="${HOME}/.claude.json"
22
+ CURSOR_MCP_CONFIG_1="${HOME}/.cursor/mcp.json"
23
+ CURSOR_MCP_CONFIG_2="${HOME}/.config/Cursor/User/mcp.json"
24
+ CURSOR_RULE_DIR_1="${HOME}/.cursor/rules"
25
+ CURSOR_RULE_DIR_2="${HOME}/.config/Cursor/User/rules"
26
+ CURSOR_RULE_FILE_NAME="own-rag-force-semantic-search.mdc"
27
+ CODEX_CONFIG_FILE="${HOME}/.codex/config.toml"
20
28
 
21
29
  COMPOSE_SOURCE="${PACKAGE_ROOT}/bin/docker-compose.yml"
22
30
  COMPOSE_DIR="${HOME}/docker-chromadb"
@@ -52,10 +60,236 @@ cfg.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encodin
52
60
  PYEOF
53
61
  }
54
62
 
63
+ ensure_mcp_json_config() {
64
+ local cfg_file="$1"
65
+ local cfg_label="$2"
66
+ local update_result
67
+
68
+ if ! update_result="$(
69
+ python3 - "${cfg_file}" "${MCP_SERVER_COMMAND}" "${OWN_RAG_CONFIG_FILE}" <<'PYEOF'
70
+ import json
71
+ import sys
72
+ from pathlib import Path
73
+
74
+ cfg_path = Path(sys.argv[1]).expanduser()
75
+ mcp_server_command = sys.argv[2]
76
+ own_rag_config = sys.argv[3]
77
+
78
+ if cfg_path.exists():
79
+ try:
80
+ data = json.loads(cfg_path.read_text(encoding="utf-8"))
81
+ except Exception:
82
+ data = {}
83
+ else:
84
+ data = {}
85
+
86
+ if not isinstance(data, dict):
87
+ data = {}
88
+
89
+ mcp_servers = data.get("mcpServers")
90
+ if not isinstance(mcp_servers, dict):
91
+ mcp_servers = {}
92
+
93
+ desired = {
94
+ "command": mcp_server_command,
95
+ "args": [],
96
+ "env": {
97
+ "OWN_RAG_CLI_CONFIG_FILE": own_rag_config,
98
+ "TOKENIZERS_PARALLELISM": "false",
99
+ "MCP_FORCE_SEMANTIC_FALLBACK": "true",
100
+ },
101
+ }
102
+
103
+ current = mcp_servers.get("rag-codebase")
104
+ if current == desired:
105
+ print("skip:already_set")
106
+ raise SystemExit(0)
107
+
108
+ mcp_servers["rag-codebase"] = desired
109
+ data["mcpServers"] = mcp_servers
110
+ cfg_path.parent.mkdir(parents=True, exist_ok=True)
111
+ cfg_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
112
+ print("updated")
113
+ PYEOF
114
+ )"; then
115
+ log_warn "Falha ao atualizar ${cfg_label}: ${cfg_file}"
116
+ return 1
117
+ fi
118
+
119
+ if [[ "${update_result}" == "updated" ]]; then
120
+ log_info "${cfg_label} MCP configurado: ${cfg_file}"
121
+ fi
122
+ return 0
123
+ }
124
+
125
+ ensure_codex_mcp_config() {
126
+ local update_result
127
+ if ! update_result="$(
128
+ python3 - "${CODEX_CONFIG_FILE}" "${MCP_SERVER_COMMAND}" "${OWN_RAG_CONFIG_FILE}" <<'PYEOF'
129
+ import re
130
+ import sys
131
+ from pathlib import Path
132
+
133
+ cfg_path = Path(sys.argv[1]).expanduser()
134
+ mcp_server_command = sys.argv[2]
135
+ own_rag_config = sys.argv[3]
136
+
137
+ cfg_path.parent.mkdir(parents=True, exist_ok=True)
138
+ if cfg_path.exists():
139
+ text = cfg_path.read_text(encoding="utf-8")
140
+ else:
141
+ text = ""
142
+
143
+ block = (
144
+ "[mcp_servers.rag-codebase]\n"
145
+ f'command = "{mcp_server_command}"\n'
146
+ "args = []\n\n"
147
+ "[mcp_servers.rag-codebase.env]\n"
148
+ f'OWN_RAG_CLI_CONFIG_FILE = "{own_rag_config}"\n'
149
+ 'TOKENIZERS_PARALLELISM = "false"\n'
150
+ 'MCP_FORCE_SEMANTIC_FALLBACK = "true"\n'
151
+ )
152
+
153
+ pattern = re.compile(
154
+ r"(?ms)^\[mcp_servers\.rag-codebase\]\n.*?(?=^\[mcp_servers\.|^\[[^\]]+\]|$)"
155
+ )
156
+
157
+ if pattern.search(text):
158
+ new_text = pattern.sub(block + "\n", text, count=1)
159
+ else:
160
+ suffix = "" if not text or text.endswith("\n") else "\n"
161
+ new_text = f"{text}{suffix}\n{block}"
162
+
163
+ if new_text == text:
164
+ print("skip:already_set")
165
+ raise SystemExit(0)
166
+
167
+ cfg_path.write_text(new_text, encoding="utf-8")
168
+ print("updated")
169
+ PYEOF
170
+ )"; then
171
+ log_warn "Falha ao configurar MCP no Codex: ${CODEX_CONFIG_FILE}"
172
+ return 1
173
+ fi
174
+
175
+ if [[ "${update_result}" == "updated" ]]; then
176
+ log_info "Codex MCP configurado: ${CODEX_CONFIG_FILE}"
177
+ fi
178
+ return 0
179
+ }
180
+
181
+ ensure_cursor_semantic_rule_files() {
182
+ local rule_dir
183
+ local rule_path
184
+ for rule_dir in "${CURSOR_RULE_DIR_1}" "${CURSOR_RULE_DIR_2}"; do
185
+ if ! mkdir -p "${rule_dir}" 2>/dev/null; then
186
+ log_warn "Nao foi possivel criar diretorio de regras do Cursor: ${rule_dir}"
187
+ continue
188
+ fi
189
+
190
+ rule_path="${rule_dir}/${CURSOR_RULE_FILE_NAME}"
191
+ if ! cat > "${rule_path}" <<'EOF'
192
+ ---
193
+ description: Force semantic_search_code fallback when local context is missing
194
+ alwaysApply: true
195
+ ---
196
+ When the prompt asks about a function, class, component, or file that is not present in the current local context,
197
+ call MCP server `rag-codebase` tool `semantic_search_code` with `mode="ensemble"` before saying "not found".
198
+ Only conclude "not found" after at least one semantic_search_code call returns no relevant result.
199
+ If `rag-codebase` appears in available tools, do not claim MCP access is unavailable.
200
+ EOF
201
+ then
202
+ log_warn "Nao foi possivel escrever regra do Cursor em: ${rule_path}"
203
+ continue
204
+ fi
205
+
206
+ log_info "Regra do Cursor instalada: ${rule_path}"
207
+ done
208
+ }
209
+
210
+ ensure_cursor_force_fallback_env() {
211
+ local cfg_file
212
+ local update_result
213
+
214
+ for cfg_file in "${CURSOR_MCP_CONFIG_1}" "${CURSOR_MCP_CONFIG_2}"; do
215
+ if [[ ! -f "${cfg_file}" ]]; then
216
+ continue
217
+ fi
218
+
219
+ if ! update_result="$(
220
+ python3 - "${cfg_file}" <<'PYEOF'
221
+ import json
222
+ import sys
223
+ from pathlib import Path
224
+
225
+ cfg_path = Path(sys.argv[1]).expanduser()
226
+
227
+ try:
228
+ data = json.loads(cfg_path.read_text(encoding="utf-8"))
229
+ except Exception:
230
+ print("skip:invalid_json")
231
+ raise SystemExit(0)
232
+
233
+ if not isinstance(data, dict):
234
+ print("skip:invalid_root")
235
+ raise SystemExit(0)
236
+
237
+ mcp_servers = data.get("mcpServers")
238
+ if not isinstance(mcp_servers, dict):
239
+ print("skip:no_mcp_servers")
240
+ raise SystemExit(0)
241
+
242
+ rag_cfg = mcp_servers.get("rag-codebase")
243
+ if not isinstance(rag_cfg, dict):
244
+ print("skip:no_rag_codebase")
245
+ raise SystemExit(0)
246
+
247
+ env = rag_cfg.get("env")
248
+ if not isinstance(env, dict):
249
+ env = {}
250
+
251
+ if str(env.get("MCP_FORCE_SEMANTIC_FALLBACK", "")).strip().lower() in {"1", "true", "yes", "on"}:
252
+ print("skip:already_set")
253
+ raise SystemExit(0)
254
+
255
+ env["MCP_FORCE_SEMANTIC_FALLBACK"] = "true"
256
+ rag_cfg["env"] = env
257
+ mcp_servers["rag-codebase"] = rag_cfg
258
+ data["mcpServers"] = mcp_servers
259
+
260
+ cfg_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
261
+ print("updated")
262
+ PYEOF
263
+ )"; then
264
+ log_warn "Falha ao ajustar fallback MCP no Cursor: ${cfg_file}"
265
+ continue
266
+ fi
267
+
268
+ if [[ "${update_result}" == "updated" ]]; then
269
+ log_info "Cursor MCP atualizado com MCP_FORCE_SEMANTIC_FALLBACK=true: ${cfg_file}"
270
+ fi
271
+ done
272
+ }
273
+
55
274
  mkdir -p "${LOCAL_BIN_DIR}"
56
275
  ensure_own_rag_cli_config
57
276
  log_info "Config criada/ok: ${OWN_RAG_CONFIG_FILE}"
58
277
 
278
+ if command -v cursor >/dev/null 2>&1 || [[ -d "${HOME}/.cursor" ]] || [[ -d "${HOME}/.config/Cursor" ]]; then
279
+ ensure_mcp_json_config "${CURSOR_MCP_CONFIG_1}" "Cursor"
280
+ ensure_mcp_json_config "${CURSOR_MCP_CONFIG_2}" "Cursor"
281
+ ensure_cursor_semantic_rule_files
282
+ ensure_cursor_force_fallback_env
283
+ fi
284
+
285
+ if command -v claude >/dev/null 2>&1 || [[ -d "${HOME}/.claude" ]] || [[ -f "${CLAUDE_CONFIG_FILE}" ]]; then
286
+ ensure_mcp_json_config "${CLAUDE_CONFIG_FILE}" "Claude"
287
+ fi
288
+
289
+ if command -v codex >/dev/null 2>&1 || [[ -d "${HOME}/.codex" ]] || [[ -f "${CODEX_CONFIG_FILE}" ]]; then
290
+ ensure_codex_mcp_config
291
+ fi
292
+
59
293
  if [[ -f "${WRAPPER_SRC}" ]]; then
60
294
  cp "${WRAPPER_SRC}" "${WRAPPER_DEST}"
61
295
  chmod +x "${WRAPPER_DEST}"
@@ -12,6 +12,8 @@ PACKAGE_MONITOR_SCRIPT="${SCRIPT_DIR}/../chroma_monitor.sh"
12
12
  LOCAL_REMOVE_SCRIPT="${LOCAL_BIN_DIR}/rag-remove.sh"
13
13
  PACKAGE_REMOVE_SCRIPT="${SCRIPT_DIR}/../bin/rag-remove.sh"
14
14
 
15
+ URL_TEMP_DIR=""
16
+
15
17
  detect_os() {
16
18
  case "$(uname -s)" in
17
19
  Darwin) echo "macos" ;;
@@ -99,16 +101,140 @@ find_remove_runner() {
99
101
  return 1
100
102
  }
101
103
 
104
+ cleanup_url_temp_dir() {
105
+ if [[ -n "${URL_TEMP_DIR}" && -d "${URL_TEMP_DIR}" ]]; then
106
+ rm -rf "${URL_TEMP_DIR}" || true
107
+ fi
108
+ }
109
+
110
+ run_with_privilege() {
111
+ if [[ "$(id -u)" -eq 0 ]]; then
112
+ "$@"
113
+ return
114
+ fi
115
+ if command -v sudo >/dev/null 2>&1; then
116
+ sudo "$@"
117
+ return
118
+ fi
119
+ "$@"
120
+ }
121
+
122
+ install_package_if_missing() {
123
+ local package_name="$1"
124
+ local os_name="$2"
125
+
126
+ if command -v "${package_name}" >/dev/null 2>&1; then
127
+ return 0
128
+ fi
129
+
130
+ echo "Aviso: '${package_name}' não encontrado. Tentando instalar..." >&2
131
+
132
+ case "${os_name}" in
133
+ linux)
134
+ if command -v apt-get >/dev/null 2>&1; then
135
+ run_with_privilege apt-get update -y
136
+ run_with_privilege apt-get install -y "${package_name}"
137
+ elif command -v dnf >/dev/null 2>&1; then
138
+ run_with_privilege dnf install -y "${package_name}"
139
+ elif command -v yum >/dev/null 2>&1; then
140
+ run_with_privilege yum install -y "${package_name}"
141
+ elif command -v pacman >/dev/null 2>&1; then
142
+ run_with_privilege pacman -Sy --noconfirm "${package_name}"
143
+ elif command -v zypper >/dev/null 2>&1; then
144
+ run_with_privilege zypper --non-interactive install "${package_name}"
145
+ elif command -v apk >/dev/null 2>&1; then
146
+ run_with_privilege apk add --no-cache "${package_name}"
147
+ else
148
+ echo "Erro: não foi possível instalar '${package_name}' automaticamente nesta distro Linux." >&2
149
+ return 1
150
+ fi
151
+ ;;
152
+ macos)
153
+ if command -v brew >/dev/null 2>&1; then
154
+ brew install "${package_name}"
155
+ else
156
+ echo "Erro: '${package_name}' ausente e Homebrew não encontrado no macOS." >&2
157
+ echo "Instale manualmente e tente novamente." >&2
158
+ return 1
159
+ fi
160
+ ;;
161
+ *)
162
+ echo "Erro: sistema operacional não suportado para instalação automática de '${package_name}'." >&2
163
+ return 1
164
+ ;;
165
+ esac
166
+
167
+ if ! command -v "${package_name}" >/dev/null 2>&1; then
168
+ echo "Erro: instalação de '${package_name}' falhou." >&2
169
+ return 1
170
+ fi
171
+ return 0
172
+ }
173
+
174
+ is_url_input() {
175
+ local value="$1"
176
+ [[ "${value}" =~ ^https?:// ]]
177
+ }
178
+
179
+ looks_like_zip_file() {
180
+ local file_path="$1"
181
+ local signature
182
+ signature="$(LC_ALL=C head -c 4 "${file_path}" | od -An -tx1 | tr -d ' \n' || true)"
183
+ case "${signature}" in
184
+ 504b0304|504b0506|504b0708) return 0 ;;
185
+ esac
186
+ return 1
187
+ }
188
+
189
+ prepare_url_source() {
190
+ local remote_url="$1"
191
+ local os_name="$2"
192
+ local file_hint
193
+ local download_name
194
+ local download_path
195
+ local extract_dir
196
+
197
+ install_package_if_missing "curl" "${os_name}" >/dev/null
198
+
199
+ URL_TEMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/own-rag-url.XXXXXX")"
200
+ trap cleanup_url_temp_dir EXIT
201
+
202
+ file_hint="${remote_url%%\?*}"
203
+ download_name="$(basename "${file_hint}")"
204
+ if [[ -z "${download_name}" || "${download_name}" == "/" || "${download_name}" == "." ]]; then
205
+ download_name="downloaded-content"
206
+ fi
207
+ download_path="${URL_TEMP_DIR}/${download_name}"
208
+
209
+ echo "Baixando conteúdo remoto: ${remote_url}" >&2
210
+ curl -fL --retry 2 --connect-timeout 20 --max-time 600 \
211
+ -o "${download_path}" "${remote_url}"
212
+
213
+ if looks_like_zip_file "${download_path}"; then
214
+ install_package_if_missing "unzip" "${os_name}" >/dev/null
215
+ extract_dir="${URL_TEMP_DIR}/extracted"
216
+ mkdir -p "${extract_dir}"
217
+ unzip -q "${download_path}" -d "${extract_dir}"
218
+ echo "Conteúdo ZIP extraído para indexação temporária." >&2
219
+ echo "${extract_dir}"
220
+ return 0
221
+ fi
222
+
223
+ echo "${URL_TEMP_DIR}"
224
+ }
225
+
102
226
  usage() {
103
227
  cat <<'EOF'
104
228
  Uso:
105
- rag run [path]
229
+ rag run [path|url]
106
230
  rag monitor [command]
107
231
  rag remove
108
232
 
109
233
  Exemplos:
110
234
  rag run .
111
235
  rag run /caminho/do/projeto
236
+ rag run https://exemplo.com/documentacao.md
237
+ rag run https://exemplo.com/pacote.zip
112
238
  rag monitor
113
239
  rag monitor full
114
240
  rag remove
@@ -132,24 +258,21 @@ fi
132
258
 
133
259
  case "${command_name}" in
134
260
  run)
135
- target_path="${1:-}"
136
- if [[ -z "${target_path}" ]]; then
261
+ target_input="${1:-}"
262
+ shift || true
263
+
264
+ if [[ -z "${target_input}" ]]; then
137
265
  printf "⚠️ Pasta não informada. Deseja usar a pasta atual: %s? (s/n)\n" "${PWD}"
138
266
  read -r answer
139
267
  answer="$(echo "${answer}" | tr '[:upper:]' '[:lower:]')"
140
268
  if [[ "${answer}" == "s" || "${answer}" == "sim" || "${answer}" == "y" || "${answer}" == "yes" ]]; then
141
- target_path="${PWD}"
269
+ target_input="${PWD}"
142
270
  else
143
271
  echo "Dica: use 'rag run /caminho/do/projeto'."
144
272
  exit 1
145
273
  fi
146
274
  fi
147
275
 
148
- if [[ ! -d "${target_path}" ]]; then
149
- echo "Erro: caminho não encontrado ou não é diretório: ${target_path}" >&2
150
- exit 1
151
- fi
152
-
153
276
  if ! setup_runner="$(find_setup_runner "${os_name}")"; then
154
277
  if [[ "${os_name}" == "macos" ]]; then
155
278
  echo "Erro: rag-setup-macos.run não encontrado." >&2
@@ -161,7 +284,25 @@ case "${command_name}" in
161
284
  exit 1
162
285
  fi
163
286
 
164
- exec "${setup_runner}" "${target_path}"
287
+ if is_url_input "${target_input}"; then
288
+ if ! target_path="$(prepare_url_source "${target_input}" "${os_name}")"; then
289
+ echo "Erro: falha ao preparar conteúdo remoto para indexação: ${target_input}" >&2
290
+ exit 1
291
+ fi
292
+ echo "Indexando conteúdo baixado temporariamente em: ${target_path}" >&2
293
+ "${setup_runner}" "${target_path}" "$@"
294
+ cleanup_url_temp_dir
295
+ trap - EXIT
296
+ exit 0
297
+ fi
298
+
299
+ target_path="${target_input}"
300
+ if [[ ! -d "${target_path}" ]]; then
301
+ echo "Erro: caminho não encontrado ou não é diretório: ${target_path}" >&2
302
+ exit 1
303
+ fi
304
+
305
+ exec "${setup_runner}" "${target_path}" "$@"
165
306
  ;;
166
307
  monitor)
167
308
  if ! monitor_runner="$(find_monitor_runner "${os_name}")"; then
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "own-rag-cli",
3
- "version": "0.0.3-snapshot",
3
+ "version": "0.0.5-snapshot",
4
4
  "description": "Local RAG setup with ChromaDB + MCP server (Jina/BGE hybrid support).",
5
5
  "license": "MIT",
6
6
  "private": false,