contextcore 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. activity/__init__.py +1 -0
  2. activity/recent_sync.py +56 -0
  3. audio_search_implementation_v2/__init__.py +1 -0
  4. audio_search_implementation_v2/audio_index.py +67 -0
  5. audio_search_implementation_v2/scanner.py +47 -0
  6. audio_search_implementation_v2/worker.py +17 -0
  7. auth_manager.py +57 -0
  8. cli/__init__.py +2 -0
  9. cli/commands/__init__.py +1 -0
  10. cli/commands/doctor.py +238 -0
  11. cli/commands/helpers.py +500 -0
  12. cli/commands/init.py +1022 -0
  13. cli/commands/report.py +185 -0
  14. cli/commands/search.py +172 -0
  15. cli/commands/status.py +296 -0
  16. cli/commands/update.py +122 -0
  17. cli/constants.py +5 -0
  18. cli/env.py +60 -0
  19. cli/lifecycle.py +353 -0
  20. cli/main.py +293 -0
  21. cli/paths.py +53 -0
  22. cli/server.py +216 -0
  23. cli/ui.py +165 -0
  24. config.py +500 -0
  25. contextcore-0.1.0.dist-info/METADATA +693 -0
  26. contextcore-0.1.0.dist-info/RECORD +63 -0
  27. contextcore-0.1.0.dist-info/WHEEL +5 -0
  28. contextcore-0.1.0.dist-info/entry_points.txt +2 -0
  29. contextcore-0.1.0.dist-info/licenses/LICENSE +661 -0
  30. contextcore-0.1.0.dist-info/top_level.txt +17 -0
  31. core/__init__.py +16 -0
  32. core/sdk.py +233 -0
  33. detect_paths.py +346 -0
  34. image_search_implementation_v2/__init__.py +1 -0
  35. image_search_implementation_v2/annoy_store.py +210 -0
  36. image_search_implementation_v2/config.py +34 -0
  37. image_search_implementation_v2/db.py +277 -0
  38. image_search_implementation_v2/embedder.py +55 -0
  39. image_search_implementation_v2/index_worker.py +114 -0
  40. image_search_implementation_v2/main.py +42 -0
  41. image_search_implementation_v2/ocr.py +30 -0
  42. image_search_implementation_v2/search.py +258 -0
  43. index_controller/__init__.py +1 -0
  44. index_controller/ignore.py +27 -0
  45. index_controller/thumbnail_manager.py +112 -0
  46. mcp_registration.py +360 -0
  47. mcp_server.py +1391 -0
  48. rclone_service.py +55 -0
  49. register_mcp.py +710 -0
  50. run_index_pipeline.py +614 -0
  51. text_search_implementation_v2/__init__.py +1 -0
  52. text_search_implementation_v2/config.py +8 -0
  53. text_search_implementation_v2/db.py +148 -0
  54. text_search_implementation_v2/extract.py +191 -0
  55. text_search_implementation_v2/index_controller.py +30 -0
  56. text_search_implementation_v2/index_worker.py +237 -0
  57. text_search_implementation_v2/main.py +56 -0
  58. text_search_implementation_v2/search.py +264 -0
  59. unimain.py +3868 -0
  60. video_search_implementation_v2/__init__.py +1 -0
  61. video_search_implementation_v2/runtime.py +216 -0
  62. video_search_implementation_v2/video_index.py +659 -0
  63. video_search_implementation_v2/watcher.py +181 -0
activity/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """ContextCore activity helpers package."""
@@ -0,0 +1,56 @@
1
+ # activity/recent_sync.py
2
+
3
+ import json
4
+ import time
5
+ from pathlib import Path
6
+ from threading import Lock
7
+
8
+ import sys
9
+ _ROOT = Path(__file__).resolve().parent.parent
10
+ sys.path.insert(0, str(_ROOT))
11
+ from config import get_organized_root
12
+
13
+ MAX_ITEMS = 5
14
+ STATE_PATH = get_organized_root() / ".radxa_state" / "recent_sync.json"
15
+
16
+ _lock = Lock()
17
+
18
+
19
+ def _load():
20
+ if STATE_PATH.exists():
21
+ try:
22
+ return json.loads(STATE_PATH.read_text())
23
+ except Exception:
24
+ return []
25
+ return []
26
+
27
+
28
+ def _save(items):
29
+ STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
30
+ STATE_PATH.write_text(json.dumps(items, indent=2))
31
+
32
+
33
+ def record_sync(path: str, category: str):
34
+ with _lock:
35
+ items = _load()
36
+
37
+ filename = Path(path).name
38
+
39
+ # Remove duplicates (same path)
40
+ items = [i for i in items if i["path"] != path]
41
+
42
+ items.insert(0, {
43
+ "path": path,
44
+ "filename": filename,
45
+ "category": category,
46
+ "synced_at": int(time.time()),
47
+ })
48
+
49
+ items = items[:MAX_ITEMS]
50
+
51
+ _save(items)
52
+
53
+
54
+ def get_recent_syncs():
55
+ with _lock:
56
+ return _load()
@@ -0,0 +1 @@
1
+ """Audio indexing and transcription package for ContextCore."""
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ import threading
4
+ from pathlib import Path
5
+
6
+ from faster_whisper import WhisperModel
7
+
8
+ from text_search_implementation_v2.db import get_file_mtime, upsert_file
9
+
10
+ AUDIO_EXTS = {".mp3", ".wav", ".m4a", ".aac", ".flac", ".ogg"}
11
+
12
+ _whisper_model = None
13
+ _whisper_lock = threading.Lock()
14
+
15
+
16
+ def get_whisper():
17
+ global _whisper_model
18
+ if _whisper_model is None:
19
+ print("Loading Whisper model...")
20
+ _whisper_model = WhisperModel("small.en", device="cpu", compute_type="int8")
21
+ return _whisper_model
22
+
23
+
24
+ def prewarm_whisper() -> tuple[bool, str | None]:
25
+ try:
26
+ get_whisper()
27
+ return True, None
28
+ except Exception as exc:
29
+ return False, str(exc)
30
+
31
+
32
+ def transcribe_audio(path: Path):
33
+ model = get_whisper()
34
+ segments, _ = model.transcribe(str(path))
35
+ return " ".join(seg.text.strip() for seg in segments if seg.text.strip())
36
+
37
+
38
+ def scan_audio_index(audio_root: Path):
39
+ total_new = 0
40
+
41
+ for path in audio_root.rglob("*"):
42
+ if not path.is_file() or path.suffix.lower() not in AUDIO_EXTS:
43
+ continue
44
+
45
+ try:
46
+ mtime = path.stat().st_mtime
47
+ except Exception:
48
+ continue
49
+
50
+ existing_mtime = get_file_mtime(str(path))
51
+ if existing_mtime is not None and abs(existing_mtime - mtime) < 0.001:
52
+ continue
53
+
54
+ print("Transcribing:", path)
55
+ try:
56
+ transcript = transcribe_audio(path)
57
+ except Exception as exc:
58
+ print("Transcription failed:", exc)
59
+ continue
60
+
61
+ if not transcript:
62
+ continue
63
+
64
+ upsert_file(str(path), path.name, "audio", mtime, transcript)
65
+ total_new += 1
66
+
67
+ return {"status": "ok", "new_audio_indexed": total_new}
@@ -0,0 +1,47 @@
1
+ # audio_search_implementation_v2/scanner.py
2
+
3
+ from pathlib import Path
4
+ from faster_whisper import WhisperModel
5
+ from text_search_implementation_v2.db import upsert_file, get_file_mtime
6
+
7
+ AUDIO_EXTS = {".mp3", ".wav", ".m4a", ".aac", ".flac", ".ogg"}
8
+
9
+ def run_audio_scan(audio_root: Path):
10
+ print("🎧 Loading Whisper model...")
11
+ model = WhisperModel("small.en", device="cpu", compute_type="int8")
12
+
13
+ total_new = 0
14
+
15
+ for p in audio_root.rglob("*"):
16
+ if not p.is_file():
17
+ continue
18
+ if p.suffix.lower() not in AUDIO_EXTS:
19
+ continue
20
+
21
+ try:
22
+ mtime = p.stat().st_mtime
23
+ except Exception:
24
+ continue
25
+
26
+ existing_mtime = get_file_mtime(str(p))
27
+ if existing_mtime is not None:
28
+ if abs(existing_mtime - mtime) < 0.001:
29
+ continue
30
+
31
+ print("🎧 Transcribing:", p)
32
+
33
+ try:
34
+ segments, _ = model.transcribe(str(p))
35
+ transcript = " ".join(seg.text.strip() for seg in segments)
36
+ except Exception as e:
37
+ print("⚠️ Transcription failed:", e)
38
+ continue
39
+
40
+ if not transcript:
41
+ continue
42
+
43
+ upsert_file(str(p), p.name, "audio", mtime, transcript)
44
+ total_new += 1
45
+
46
+ print("🎧 Audio indexing complete. New files:", total_new)
47
+ return total_new
@@ -0,0 +1,17 @@
1
+ # audio_search_implementation_v2/worker.py
2
+
3
+ import sys
4
+ sys.path.insert(0, str(__import__("pathlib").Path(__file__).resolve().parent.parent))
5
+
6
+ from pathlib import Path
7
+ from audio_search_implementation_v2.scanner import run_audio_scan
8
+ from config import get_audio_directories
9
+
10
+ if __name__ == "__main__":
11
+ audio_dirs = get_audio_directories()
12
+ print("🚀 Audio worker started")
13
+ total = 0
14
+ for audio_root in audio_dirs:
15
+ if audio_root.is_dir():
16
+ total += run_audio_scan(audio_root)
17
+ print("🚀 Audio worker exiting. Indexed:", total)
auth_manager.py ADDED
@@ -0,0 +1,57 @@
1
+ import subprocess
2
+ import threading
3
+ import uuid
4
+ import re
5
+ import json
6
+
7
+ auth_sessions = {}
8
+
9
+ def start_auth(storage_type):
10
+ session_id = str(uuid.uuid4())
11
+
12
+ process = subprocess.Popen(
13
+ ["rclone", "authorize", storage_type, "--auth-no-open-browser"],
14
+ stdout=subprocess.PIPE,
15
+ stderr=subprocess.STDOUT,
16
+ text=True,
17
+ )
18
+
19
+ auth_sessions[session_id] = {
20
+ "process": process,
21
+ "status": "starting",
22
+ "verification_url": None,
23
+ "user_code": None,
24
+ "token": None
25
+ }
26
+
27
+ threading.Thread(
28
+ target=_monitor_process,
29
+ args=(session_id,),
30
+ daemon=True
31
+ ).start()
32
+
33
+ return session_id
34
+
35
+
36
+ def _monitor_process(session_id):
37
+ process = auth_sessions[session_id]["process"]
38
+
39
+ for line in process.stdout:
40
+ line = line.strip()
41
+ print("AUTH OUTPUT:", line)
42
+
43
+ # Capture verification URL
44
+ if "http" in line and "google" in line:
45
+ auth_sessions[session_id]["verification_url"] = line
46
+ auth_sessions[session_id]["status"] = "waiting_for_user"
47
+
48
+ # Capture user code
49
+ if re.search(r"[A-Z0-9]{4}-[A-Z0-9]{4}", line):
50
+ auth_sessions[session_id]["user_code"] = line
51
+
52
+ # Capture token JSON
53
+ if line.startswith("{") and "access_token" in line:
54
+ auth_sessions[session_id]["token"] = line
55
+ auth_sessions[session_id]["status"] = "completed"
56
+
57
+ process.wait()
cli/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ # cli/__init__.py
2
+ # ContextCore CLI package
@@ -0,0 +1 @@
1
+ # cli/commands/__init__.py
cli/commands/doctor.py ADDED
@@ -0,0 +1,238 @@
1
+ # cli/commands/doctor.py
2
+ #
3
+ # contextcore doctor - diagnostic check of the local installation.
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import platform
9
+ import shutil
10
+ import sqlite3
11
+ import subprocess
12
+ import sys
13
+ from pathlib import Path
14
+
15
+ from cli.constants import DEFAULT_PORT
16
+ from cli.lifecycle import autostart_status, get_port_usage, index_lock_active, read_index_state
17
+ from cli.paths import get_sdk_root
18
+ from cli.ui import console, error, header, hint, section, success, warning
19
+
20
+
21
+ def _check(label: str, ok: bool, fix_label: str = "", fix_cmd: str = "") -> bool:
22
+ if ok:
23
+ success(label)
24
+ else:
25
+ error(label)
26
+ if fix_cmd:
27
+ hint(fix_label, fix_cmd)
28
+ return ok
29
+
30
+
31
+ def run_doctor() -> None:
32
+ header("ContextCore Doctor")
33
+ console.print("[dim]Checking your ContextCore setup...[/dim]")
34
+
35
+ sdk_root = get_sdk_root()
36
+ issues = 0
37
+
38
+ section("Runtime")
39
+ py_ok = sys.version_info >= (3, 10)
40
+ if not _check(
41
+ f"Python {sys.version.split()[0]} {'(OK)' if py_ok else '(requires 3.10+)'}",
42
+ py_ok,
43
+ "upgrade python",
44
+ "https://python.org/downloads",
45
+ ):
46
+ issues += 1
47
+
48
+ try:
49
+ with sqlite3.connect(":memory:") as c:
50
+ c.execute("SELECT 1").fetchone()
51
+ _check("SQLite accessible", True)
52
+ except Exception as e:
53
+ _check(f"SQLite error: {e}", False)
54
+ issues += 1
55
+
56
+ try:
57
+ import sqlite_vec # noqa: F401
58
+ _check("sqlite-vec installed", True)
59
+ except ImportError:
60
+ _check("sqlite-vec not installed", False, "install sqlite-vec", ".venv/Scripts/pip install sqlite-vec")
61
+ issues += 1
62
+
63
+ section("Configuration")
64
+ cfg = Path.home() / ".contextcore" / "contextcore.yaml"
65
+ if _check(
66
+ f"Config file at {cfg}",
67
+ cfg.exists(),
68
+ "run init to create config",
69
+ "contextcore init",
70
+ ):
71
+ for line in cfg.read_text(encoding="utf-8").splitlines():
72
+ if line.startswith("organized_root:"):
73
+ val = line.split(":", 1)[1].strip().strip("'\"")
74
+ root = Path(val)
75
+ _check(
76
+ f"organized_root exists: {root}",
77
+ root.exists(),
78
+ "create the directory or update the config",
79
+ f"mkdir \"{root}\"",
80
+ )
81
+ if not root.exists():
82
+ issues += 1
83
+ break
84
+ else:
85
+ issues += 1
86
+
87
+ section("Autostart")
88
+ auto = autostart_status()
89
+ installed = bool(auto.get("installed"))
90
+ if _check(
91
+ f"Autostart {'installed' if installed else 'not installed'}",
92
+ installed,
93
+ "repair autostart",
94
+ "contextcore init",
95
+ ):
96
+ if auto.get("target"):
97
+ success(f"Autostart target: {auto.get('target')}")
98
+ else:
99
+ issues += 1
100
+
101
+ section("Index Lock")
102
+ active_lock, state = index_lock_active()
103
+ if active_lock:
104
+ success("A full index job is active")
105
+ if state.get("source"):
106
+ success(f"Source: {state.get('source')}")
107
+ else:
108
+ success("No active full index lock")
109
+ if state.get("stale_lock_recovered_at"):
110
+ warning(f"Recovered stale lock at {state.get('stale_lock_recovered_at')}")
111
+ elif read_index_state().get("active"):
112
+ warning("Index state says active, but no live lock was found")
113
+
114
+ section("MCP Server")
115
+ mcp = sdk_root / "mcp_server.py"
116
+ if _check("mcp_server.py found", mcp.exists(), "reinstall contextcore", "pip install --force-reinstall contextcore"):
117
+ r = subprocess.run(
118
+ [sys.executable, "-c", "import mcp_server"],
119
+ capture_output=True,
120
+ cwd=str(sdk_root),
121
+ timeout=10,
122
+ )
123
+ _check(
124
+ "mcp_server imports cleanly",
125
+ r.returncode == 0,
126
+ "retry MCP import check",
127
+ f"cd \"{sdk_root}\" && \"{sys.executable}\" -c \"import mcp_server\"",
128
+ )
129
+ if r.returncode != 0:
130
+ console.print(f" [dim]{r.stderr.strip()[-400:]}[/dim]")
131
+ issues += 1
132
+ else:
133
+ issues += 1
134
+
135
+ section("FastAPI Server")
136
+ usage = get_port_usage(DEFAULT_PORT)
137
+ if usage.get("is_contextcore"):
138
+ success(f"Server listening on port {DEFAULT_PORT}")
139
+ elif usage.get("in_use"):
140
+ pid = usage.get("pid")
141
+ name = usage.get("process_name") or "unknown"
142
+ _check(
143
+ f"Port {DEFAULT_PORT} is occupied by {name}{f' (PID {pid})' if pid else ''}",
144
+ False,
145
+ "inspect the conflicting process",
146
+ f"tasklist /FI \"PID eq {pid}\"" if platform.system() == "Windows" and pid else f"ps -p {pid} -o pid,comm,args" if pid else "",
147
+ )
148
+ if platform.system() == "Windows" and pid:
149
+ hint("stop it if appropriate", f"taskkill /F /PID {pid}")
150
+ elif pid:
151
+ hint("stop it if appropriate", f"kill {pid}")
152
+ issues += 1
153
+ else:
154
+ _check(
155
+ f"Server listening on port {DEFAULT_PORT}",
156
+ False,
157
+ "start the server",
158
+ "contextcore serve",
159
+ )
160
+ issues += 1
161
+
162
+ section("Claude Desktop")
163
+ plat = platform.system().lower()
164
+ if plat == "windows":
165
+ import os
166
+ claude_cfg = Path(os.environ.get("APPDATA", "~")) / "Claude" / "claude_desktop_config.json"
167
+ elif plat == "darwin":
168
+ claude_cfg = Path.home() / "Library" / "Application Support" / "Claude" / "claude_desktop_config.json"
169
+ else:
170
+ claude_cfg = Path.home() / ".config" / "Claude" / "claude_desktop_config.json"
171
+
172
+ claude_cfg = claude_cfg.expanduser()
173
+ if _check(
174
+ f"Claude Desktop config found at {claude_cfg}",
175
+ claude_cfg.exists(),
176
+ "install Claude Desktop or open it once to create the config",
177
+ "https://claude.ai/download",
178
+ ):
179
+ try:
180
+ data = json.loads(claude_cfg.read_text(encoding="utf-8"))
181
+ has_cc = "contextcore" in data.get("mcpServers", {})
182
+ _check("ContextCore registered in Claude Desktop", has_cc, "re-register", "contextcore register claude-desktop")
183
+ if not has_cc:
184
+ issues += 1
185
+ except Exception as e:
186
+ error(f"Could not read Claude config: {e}")
187
+ issues += 1
188
+ else:
189
+ issues += 1
190
+
191
+ section("Optional Models")
192
+ try:
193
+ import torch # noqa: F401
194
+ success("torch installed (image/video search available)")
195
+ except ImportError:
196
+ warning("torch not installed - image/video search unavailable")
197
+ hint("install vision model", "contextcore install clip")
198
+
199
+ try:
200
+ import faster_whisper # noqa: F401
201
+ success("faster-whisper installed (audio search available)")
202
+ except ImportError:
203
+ warning("faster-whisper not installed - audio search unavailable")
204
+ hint("install audio model", "contextcore install audio")
205
+
206
+ section("Image Search Capabilities")
207
+ try:
208
+ import pytesseract # noqa: F401
209
+ success("pytesseract installed (OCR Python package available)")
210
+ except ImportError:
211
+ warning("pytesseract not installed - OCR text extraction disabled")
212
+ hint("install OCR package", f"{sys.executable} -m pip install pytesseract")
213
+
214
+ tesseract_ok = bool(shutil.which("tesseract"))
215
+ if tesseract_ok:
216
+ success("tesseract binary found (OCR runtime available)")
217
+ else:
218
+ warning("tesseract binary not found - OCR text extraction disabled")
219
+ if platform.system() == "Windows":
220
+ hint("install tesseract", "winget install UB-Mannheim.TesseractOCR")
221
+ elif platform.system() == "Darwin":
222
+ hint("install tesseract", "brew install tesseract")
223
+ else:
224
+ hint("install tesseract", "sudo apt-get install tesseract-ocr")
225
+
226
+ try:
227
+ import annoy # noqa: F401
228
+ success("annoy installed (semantic image ANN backend available)")
229
+ except ImportError:
230
+ warning("annoy not installed - semantic image search disabled (OCR/filename still works)")
231
+ hint("install annoy", f"{sys.executable} -m pip install annoy")
232
+
233
+ console.print()
234
+ if issues == 0:
235
+ console.print("[bold green]All checks passed[/bold green] ContextCore is healthy.")
236
+ else:
237
+ console.print(f"[bold red]{issues} issue{'s' if issues > 1 else ''} found.[/bold red] Follow the Fix: suggestions above.")
238
+ console.print()