nia-sync 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nia_sync-0.1.1 → nia_sync-0.1.2}/PKG-INFO +2 -2
- {nia_sync-0.1.1 → nia_sync-0.1.2}/extractor.py +24 -14
- {nia_sync-0.1.1 → nia_sync-0.1.2}/main.py +71 -3
- {nia_sync-0.1.1 → nia_sync-0.1.2}/nia_sync.egg-info/PKG-INFO +2 -2
- {nia_sync-0.1.1 → nia_sync-0.1.2}/pyproject.toml +2 -2
- nia_sync-0.1.2/sync.py +305 -0
- {nia_sync-0.1.1 → nia_sync-0.1.2}/watcher.py +11 -4
- nia_sync-0.1.1/sync.py +0 -187
- {nia_sync-0.1.1 → nia_sync-0.1.2}/auth.py +0 -0
- {nia_sync-0.1.1 → nia_sync-0.1.2}/config.py +0 -0
- {nia_sync-0.1.1 → nia_sync-0.1.2}/nia_sync.egg-info/SOURCES.txt +0 -0
- {nia_sync-0.1.1 → nia_sync-0.1.2}/nia_sync.egg-info/dependency_links.txt +0 -0
- {nia_sync-0.1.1 → nia_sync-0.1.2}/nia_sync.egg-info/entry_points.txt +0 -0
- {nia_sync-0.1.1 → nia_sync-0.1.2}/nia_sync.egg-info/requires.txt +0 -0
- {nia_sync-0.1.1 → nia_sync-0.1.2}/nia_sync.egg-info/top_level.txt +0 -0
- {nia_sync-0.1.1 → nia_sync-0.1.2}/setup.cfg +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nia-sync
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary: Keep your local files in sync with Nia
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Keep your local files in sync with Nia Cloud
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Requires-Dist: typer>=0.9.0
|
|
7
7
|
Requires-Dist: rich>=13.0.0
|
|
@@ -136,6 +136,11 @@ MAX_ROWS = 100_000
|
|
|
136
136
|
MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10MB per file
|
|
137
137
|
|
|
138
138
|
|
|
139
|
+
def _connect_sqlite_readonly(db_path: str) -> sqlite3.Connection:
|
|
140
|
+
"""Open SQLite database in read-only mode to avoid lock issues."""
|
|
141
|
+
return sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=1)
|
|
142
|
+
|
|
143
|
+
|
|
139
144
|
def detect_source_type(path: str) -> str:
|
|
140
145
|
"""
|
|
141
146
|
Auto-detect the type of source based on path and file structure.
|
|
@@ -177,7 +182,7 @@ def detect_source_type(path: str) -> str:
|
|
|
177
182
|
return TYPE_FOLDER
|
|
178
183
|
|
|
179
184
|
try:
|
|
180
|
-
conn =
|
|
185
|
+
conn = _connect_sqlite_readonly(path)
|
|
181
186
|
cursor = conn.cursor()
|
|
182
187
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
|
183
188
|
tables = {row[0].lower() for row in cursor.fetchall()}
|
|
@@ -252,7 +257,7 @@ def _extract_imessage(
|
|
|
252
257
|
max_timestamp = cursor.get("last_timestamp", 0)
|
|
253
258
|
since_rowid = cursor.get("last_rowid")
|
|
254
259
|
|
|
255
|
-
conn =
|
|
260
|
+
conn = _connect_sqlite_readonly(db_path)
|
|
256
261
|
conn.row_factory = sqlite3.Row
|
|
257
262
|
cur = conn.cursor()
|
|
258
263
|
|
|
@@ -348,7 +353,7 @@ def _extract_safari_history(
|
|
|
348
353
|
max_visit_time = cursor.get("last_visit_time", 0)
|
|
349
354
|
since_visit_time = cursor.get("last_visit_time")
|
|
350
355
|
|
|
351
|
-
conn =
|
|
356
|
+
conn = _connect_sqlite_readonly(db_path)
|
|
352
357
|
conn.row_factory = sqlite3.Row
|
|
353
358
|
cur = conn.cursor()
|
|
354
359
|
|
|
@@ -439,7 +444,7 @@ def _extract_chrome_history(
|
|
|
439
444
|
max_visit_time = cursor.get("last_visit_time", 0)
|
|
440
445
|
since_visit_time = cursor.get("last_visit_time")
|
|
441
446
|
|
|
442
|
-
conn =
|
|
447
|
+
conn = _connect_sqlite_readonly(db_path)
|
|
443
448
|
conn.row_factory = sqlite3.Row
|
|
444
449
|
cur = conn.cursor()
|
|
445
450
|
|
|
@@ -534,7 +539,7 @@ def _extract_firefox_history(
|
|
|
534
539
|
max_visit_date = cursor.get("last_visit_date", 0)
|
|
535
540
|
since_visit_date = cursor.get("last_visit_date")
|
|
536
541
|
|
|
537
|
-
conn =
|
|
542
|
+
conn = _connect_sqlite_readonly(db_path)
|
|
538
543
|
conn.row_factory = sqlite3.Row
|
|
539
544
|
cur = conn.cursor()
|
|
540
545
|
|
|
@@ -760,7 +765,9 @@ def _extract_folder(
|
|
|
760
765
|
"""Extract text files from a regular folder with proper exclusion patterns."""
|
|
761
766
|
files = []
|
|
762
767
|
last_mtime = cursor.get("last_mtime", 0)
|
|
768
|
+
last_path = cursor.get("last_path", "")
|
|
763
769
|
max_mtime = last_mtime
|
|
770
|
+
max_path = last_path
|
|
764
771
|
extracted_count = 0
|
|
765
772
|
|
|
766
773
|
# Allowed text file extensions
|
|
@@ -783,6 +790,8 @@ def _extract_folder(
|
|
|
783
790
|
and not d.startswith(".")
|
|
784
791
|
and not d.endswith(".egg-info")
|
|
785
792
|
]
|
|
793
|
+
dirs.sort()
|
|
794
|
+
filenames.sort()
|
|
786
795
|
|
|
787
796
|
for filename in filenames:
|
|
788
797
|
if extracted_count >= limit:
|
|
@@ -816,9 +825,12 @@ def _extract_folder(
|
|
|
816
825
|
try:
|
|
817
826
|
stat = os.stat(file_path)
|
|
818
827
|
mtime = stat.st_mtime
|
|
828
|
+
rel_path = os.path.relpath(file_path, folder_path)
|
|
819
829
|
|
|
820
|
-
# Skip if not modified since last sync
|
|
821
|
-
if mtime
|
|
830
|
+
# Skip if not modified since last sync (tie-break by path)
|
|
831
|
+
if mtime < last_mtime:
|
|
832
|
+
continue
|
|
833
|
+
if mtime == last_mtime and rel_path <= last_path:
|
|
822
834
|
continue
|
|
823
835
|
|
|
824
836
|
# Skip large files
|
|
@@ -831,11 +843,6 @@ def _extract_folder(
|
|
|
831
843
|
if not content.strip():
|
|
832
844
|
continue
|
|
833
845
|
|
|
834
|
-
max_mtime = max(max_mtime, mtime)
|
|
835
|
-
|
|
836
|
-
# Relative path from folder root
|
|
837
|
-
rel_path = os.path.relpath(file_path, folder_path)
|
|
838
|
-
|
|
839
846
|
files.append({
|
|
840
847
|
"path": rel_path,
|
|
841
848
|
"content": content,
|
|
@@ -846,6 +853,9 @@ def _extract_folder(
|
|
|
846
853
|
},
|
|
847
854
|
})
|
|
848
855
|
extracted_count += 1
|
|
856
|
+
if mtime > max_mtime or (mtime == max_mtime and rel_path > max_path):
|
|
857
|
+
max_mtime = mtime
|
|
858
|
+
max_path = rel_path
|
|
849
859
|
|
|
850
860
|
except (PermissionError, IOError, OSError, UnicodeDecodeError) as e:
|
|
851
861
|
logger.warning(f"Could not read {file_path}: {e}")
|
|
@@ -855,7 +865,7 @@ def _extract_folder(
|
|
|
855
865
|
|
|
856
866
|
return {
|
|
857
867
|
"files": files,
|
|
858
|
-
"cursor": {"last_mtime": max_mtime},
|
|
868
|
+
"cursor": {"last_mtime": max_mtime, "last_path": max_path},
|
|
859
869
|
"stats": {"extracted": len(files), "db_type": TYPE_FOLDER},
|
|
860
870
|
}
|
|
861
871
|
|
|
@@ -871,7 +881,7 @@ def _extract_generic_db(
|
|
|
871
881
|
|
|
872
882
|
skip_tables = {"sqlite_sequence", "sqlite_stat1", "sqlite_stat4"}
|
|
873
883
|
|
|
874
|
-
conn =
|
|
884
|
+
conn = _connect_sqlite_readonly(db_path)
|
|
875
885
|
cur = conn.cursor()
|
|
876
886
|
|
|
877
887
|
cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
|
@@ -13,14 +13,25 @@ Usage:
|
|
|
13
13
|
"""
|
|
14
14
|
import os
|
|
15
15
|
import typer
|
|
16
|
+
import httpx
|
|
17
|
+
import logging
|
|
16
18
|
from rich.console import Console
|
|
17
19
|
from rich.panel import Panel
|
|
18
20
|
from rich.table import Table
|
|
19
21
|
|
|
20
22
|
from auth import login as do_login, logout as do_logout, is_authenticated, get_api_key
|
|
21
|
-
from config import get_sources, add_source, remove_source, enable_source_sync, NIA_SYNC_DIR, find_folder_path
|
|
23
|
+
from config import get_sources, add_source, remove_source, enable_source_sync, NIA_SYNC_DIR, find_folder_path, API_BASE_URL, get_api_key
|
|
22
24
|
from sync import sync_all_sources
|
|
23
|
-
from extractor import
|
|
25
|
+
from extractor import (
|
|
26
|
+
detect_source_type,
|
|
27
|
+
TYPE_FOLDER,
|
|
28
|
+
TYPE_TELEGRAM,
|
|
29
|
+
TYPE_GENERIC_DB,
|
|
30
|
+
TYPE_IMESSAGE,
|
|
31
|
+
TYPE_SAFARI_HISTORY,
|
|
32
|
+
TYPE_CHROME_HISTORY,
|
|
33
|
+
TYPE_FIREFOX_HISTORY,
|
|
34
|
+
)
|
|
24
35
|
|
|
25
36
|
app = typer.Typer(
|
|
26
37
|
name="nia",
|
|
@@ -30,6 +41,7 @@ app = typer.Typer(
|
|
|
30
41
|
epilog="[dim]Quick start: [cyan]nia login[/cyan] → [cyan]nia status[/cyan] → [cyan]nia[/cyan][/dim]",
|
|
31
42
|
)
|
|
32
43
|
console = Console()
|
|
44
|
+
logger = logging.getLogger(__name__)
|
|
33
45
|
|
|
34
46
|
|
|
35
47
|
@app.callback(invoke_without_command=True)
|
|
@@ -71,6 +83,14 @@ KNOWN_PATHS = {
|
|
|
71
83
|
"firefox_history": "~/Library/Application Support/Firefox/Profiles/*/places.sqlite",
|
|
72
84
|
}
|
|
73
85
|
|
|
86
|
+
DB_SOURCE_TYPES = {
|
|
87
|
+
TYPE_IMESSAGE,
|
|
88
|
+
TYPE_SAFARI_HISTORY,
|
|
89
|
+
TYPE_CHROME_HISTORY,
|
|
90
|
+
TYPE_FIREFOX_HISTORY,
|
|
91
|
+
TYPE_GENERIC_DB,
|
|
92
|
+
}
|
|
93
|
+
|
|
74
94
|
|
|
75
95
|
def _check_local_sources():
|
|
76
96
|
"""Check for indexed sources that exist locally and can be synced."""
|
|
@@ -381,6 +401,18 @@ def _resolve_sources(sources: list[dict], log_discoveries: bool = False) -> list
|
|
|
381
401
|
return resolved
|
|
382
402
|
|
|
383
403
|
|
|
404
|
+
def _get_watched_files(source: dict) -> set[str] | None:
|
|
405
|
+
path = source.get("path")
|
|
406
|
+
detected_type = source.get("detected_type")
|
|
407
|
+
if not path or not detected_type:
|
|
408
|
+
return None
|
|
409
|
+
if detected_type in DB_SOURCE_TYPES:
|
|
410
|
+
expanded = os.path.abspath(os.path.expanduser(path))
|
|
411
|
+
watched = {expanded, f"{expanded}-wal", f"{expanded}-shm"}
|
|
412
|
+
return watched
|
|
413
|
+
return None
|
|
414
|
+
|
|
415
|
+
|
|
384
416
|
@app.command(name="start", hidden=True)
|
|
385
417
|
def daemon(
|
|
386
418
|
watch: bool = typer.Option(True, "--watch/--poll", help="File watching (default) or polling"),
|
|
@@ -401,6 +433,9 @@ def daemon(
|
|
|
401
433
|
pending_syncs: set[str] = set() # source_ids pending sync
|
|
402
434
|
sync_lock = threading.Lock()
|
|
403
435
|
sources_by_id: dict[str, dict] = {}
|
|
436
|
+
last_sync_times: dict[str, float] = {}
|
|
437
|
+
last_heartbeat_time = 0.0
|
|
438
|
+
heartbeat_interval = 30
|
|
404
439
|
|
|
405
440
|
def handle_signal(signum, frame):
|
|
406
441
|
nonlocal running
|
|
@@ -442,6 +477,7 @@ def daemon(
|
|
|
442
477
|
if added > 0:
|
|
443
478
|
total_added += added
|
|
444
479
|
console.print(f"[green]✓ {src.get('display_name', 'Unknown')}[/green] - {added} items synced")
|
|
480
|
+
last_sync_times[source_id] = time.time()
|
|
445
481
|
else:
|
|
446
482
|
error = result.get("error", "unknown error")
|
|
447
483
|
errors.append(f"{src.get('display_name', 'Unknown')}: {error}")
|
|
@@ -473,7 +509,8 @@ def daemon(
|
|
|
473
509
|
# Add new watchers
|
|
474
510
|
for source_id in new_source_ids - current_watching:
|
|
475
511
|
src = new_sources_by_id[source_id]
|
|
476
|
-
|
|
512
|
+
watched_files = _get_watched_files(src)
|
|
513
|
+
if watcher.watch(source_id, src["path"], on_source_changed, watched_files=watched_files):
|
|
477
514
|
console.print(f" [dim]+ Watching {src.get('display_name', 'Unknown')}[/dim]")
|
|
478
515
|
newly_added.append(source_id)
|
|
479
516
|
|
|
@@ -559,6 +596,19 @@ def daemon(
|
|
|
559
596
|
# Process any pending syncs from file watcher
|
|
560
597
|
sync_pending_sources()
|
|
561
598
|
|
|
599
|
+
# Heartbeat to backend to mark daemon online
|
|
600
|
+
now = time.time()
|
|
601
|
+
if now - last_heartbeat_time >= heartbeat_interval:
|
|
602
|
+
_send_heartbeat(list(sources_by_id.keys()))
|
|
603
|
+
last_heartbeat_time = now
|
|
604
|
+
|
|
605
|
+
# Sanity sync to catch missed events
|
|
606
|
+
if fallback_interval > 0:
|
|
607
|
+
for source_id in list(sources_by_id.keys()):
|
|
608
|
+
last_sync = last_sync_times.get(source_id, 0)
|
|
609
|
+
if now - last_sync >= fallback_interval:
|
|
610
|
+
pending_syncs.add(source_id)
|
|
611
|
+
|
|
562
612
|
# Instant refresh if new folder detected matching an unlinked source
|
|
563
613
|
if refresh_triggered.is_set():
|
|
564
614
|
refresh_triggered.clear()
|
|
@@ -604,6 +654,7 @@ def daemon(
|
|
|
604
654
|
sync_count = 0
|
|
605
655
|
while running:
|
|
606
656
|
resolved, _ = refresh_sources()
|
|
657
|
+
_send_heartbeat([src["local_folder_id"] for src in resolved])
|
|
607
658
|
|
|
608
659
|
sync_count += 1
|
|
609
660
|
console.print(f"\n[bold]Sync #{sync_count}[/bold] - {len(resolved)} source(s)")
|
|
@@ -633,5 +684,22 @@ def daemon(
|
|
|
633
684
|
console.print("[green]✓ Stopped[/green]")
|
|
634
685
|
|
|
635
686
|
|
|
687
|
+
def _send_heartbeat(source_ids: list[str]) -> None:
|
|
688
|
+
if not source_ids:
|
|
689
|
+
return
|
|
690
|
+
api_key = get_api_key()
|
|
691
|
+
if not api_key:
|
|
692
|
+
return
|
|
693
|
+
try:
|
|
694
|
+
with httpx.Client(timeout=10) as client:
|
|
695
|
+
client.post(
|
|
696
|
+
f"{API_BASE_URL}/v2/daemon/heartbeat",
|
|
697
|
+
headers={"Authorization": f"Bearer {api_key}"},
|
|
698
|
+
json={"source_ids": source_ids},
|
|
699
|
+
)
|
|
700
|
+
except Exception:
|
|
701
|
+
logger.debug("Heartbeat failed", exc_info=True)
|
|
702
|
+
|
|
703
|
+
|
|
636
704
|
if __name__ == "__main__":
|
|
637
705
|
app()
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nia-sync
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary: Keep your local files in sync with Nia
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Keep your local files in sync with Nia Cloud
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Requires-Dist: typer>=0.9.0
|
|
7
7
|
Requires-Dist: rich>=13.0.0
|
|
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nia-sync"
|
|
7
|
-
version = "0.1.
|
|
8
|
-
description = "Keep your local files in sync with Nia"
|
|
7
|
+
version = "0.1.2"
|
|
8
|
+
description = "Keep your local files in sync with Nia Cloud"
|
|
9
9
|
requires-python = ">=3.10"
|
|
10
10
|
dependencies = [
|
|
11
11
|
"typer>=0.9.0",
|
nia_sync-0.1.2/sync.py
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Sync engine for Nia Local Sync CLI.
|
|
3
|
+
|
|
4
|
+
Handles:
|
|
5
|
+
- Extracting data from local sources (databases, folders)
|
|
6
|
+
- Uploading to cloud API
|
|
7
|
+
- Cursor management for incremental sync
|
|
8
|
+
"""
|
|
9
|
+
import os
|
|
10
|
+
import logging
|
|
11
|
+
import random
|
|
12
|
+
import time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
import httpx
|
|
16
|
+
|
|
17
|
+
from config import API_BASE_URL, get_api_key
|
|
18
|
+
from extractor import extract_incremental, detect_source_type
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
SYNC_TIMEOUT = 60 # 1 minute per sync request (reduced from 2 min)
|
|
23
|
+
CONNECT_TIMEOUT = 10 # 10 second connection timeout
|
|
24
|
+
MAX_FILES_PER_BATCH = 500 # Keep below backend limit (1000)
|
|
25
|
+
MAX_RETRIES = 4
|
|
26
|
+
RETRY_BASE_DELAY = 1.5
|
|
27
|
+
RETRY_MAX_DELAY = 15.0
|
|
28
|
+
|
|
29
|
+
# Reusable client for connection pooling
|
|
30
|
+
_http_client: httpx.Client | None = None
|
|
31
|
+
|
|
32
|
+
def get_http_client() -> httpx.Client:
|
|
33
|
+
"""Get or create HTTP client with connection pooling."""
|
|
34
|
+
global _http_client
|
|
35
|
+
if _http_client is None:
|
|
36
|
+
_http_client = httpx.Client(
|
|
37
|
+
timeout=httpx.Timeout(SYNC_TIMEOUT, connect=CONNECT_TIMEOUT),
|
|
38
|
+
limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
|
|
39
|
+
)
|
|
40
|
+
return _http_client
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def sync_all_sources(sources: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
44
|
+
"""
|
|
45
|
+
Sync all configured sources.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
sources: List of source configs from cloud API
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
List of results for each source
|
|
52
|
+
"""
|
|
53
|
+
results = []
|
|
54
|
+
|
|
55
|
+
for source in sources:
|
|
56
|
+
result = sync_source(source)
|
|
57
|
+
results.append(result)
|
|
58
|
+
|
|
59
|
+
return results
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def sync_source(source: dict[str, Any]) -> dict[str, Any]:
|
|
63
|
+
"""
|
|
64
|
+
Sync a single source.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
source: Source config from cloud API with:
|
|
68
|
+
- local_folder_id: UUID of the local folder
|
|
69
|
+
- path: Local path to sync
|
|
70
|
+
- detected_type: Type of source
|
|
71
|
+
- cursor: Current sync cursor
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Result dict with status, path, and stats
|
|
75
|
+
"""
|
|
76
|
+
local_folder_id = source.get("local_folder_id")
|
|
77
|
+
path = source.get("path", "")
|
|
78
|
+
detected_type = source.get("detected_type")
|
|
79
|
+
cursor = source.get("cursor", {})
|
|
80
|
+
|
|
81
|
+
# Expand ~ in path
|
|
82
|
+
path = os.path.expanduser(path)
|
|
83
|
+
|
|
84
|
+
# Validate path exists
|
|
85
|
+
if not os.path.exists(path):
|
|
86
|
+
error_message = f"Path does not exist: {path}"
|
|
87
|
+
report_sync_error(local_folder_id, error_message, path)
|
|
88
|
+
return {
|
|
89
|
+
"path": path,
|
|
90
|
+
"status": "error",
|
|
91
|
+
"error": error_message,
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# Auto-detect type if not specified
|
|
95
|
+
if not detected_type:
|
|
96
|
+
detected_type = detect_source_type(path)
|
|
97
|
+
|
|
98
|
+
logger.info(f"Syncing {path} (type={detected_type})")
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
# Extract data incrementally
|
|
102
|
+
extraction_result = extract_incremental(
|
|
103
|
+
path=path,
|
|
104
|
+
source_type=detected_type,
|
|
105
|
+
cursor=cursor,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
files = extraction_result.get("files", [])
|
|
109
|
+
new_cursor = extraction_result.get("cursor", {})
|
|
110
|
+
stats = extraction_result.get("stats", {})
|
|
111
|
+
|
|
112
|
+
if not files:
|
|
113
|
+
logger.info(f"No new data to sync for {path}")
|
|
114
|
+
return {
|
|
115
|
+
"path": path,
|
|
116
|
+
"status": "success",
|
|
117
|
+
"added": 0,
|
|
118
|
+
"message": "No new data",
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
# Upload to backend in batches
|
|
122
|
+
upload_result = upload_sync_batches(
|
|
123
|
+
local_folder_id=local_folder_id,
|
|
124
|
+
files=files,
|
|
125
|
+
cursor=new_cursor,
|
|
126
|
+
stats=stats,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if upload_result.get("status") == "ok":
|
|
130
|
+
# Update source cursor in-place so subsequent syncs use it
|
|
131
|
+
source["cursor"] = new_cursor
|
|
132
|
+
return {
|
|
133
|
+
"path": path,
|
|
134
|
+
"status": "success",
|
|
135
|
+
"added": len(files),
|
|
136
|
+
"chunks_indexed": upload_result.get("chunks_indexed", 0),
|
|
137
|
+
"new_cursor": new_cursor,
|
|
138
|
+
}
|
|
139
|
+
else:
|
|
140
|
+
report_sync_error(local_folder_id, upload_result.get("message", "Upload failed"), path)
|
|
141
|
+
return {
|
|
142
|
+
"path": path,
|
|
143
|
+
"status": "error",
|
|
144
|
+
"error": upload_result.get("message", "Upload failed"),
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
except PermissionError:
|
|
148
|
+
error_message = "Permission denied. Grant Full Disk Access in System Settings > Privacy & Security."
|
|
149
|
+
report_sync_error(local_folder_id, error_message, path)
|
|
150
|
+
return {
|
|
151
|
+
"path": path,
|
|
152
|
+
"status": "error",
|
|
153
|
+
"error": error_message,
|
|
154
|
+
}
|
|
155
|
+
except Exception as e:
|
|
156
|
+
logger.error(f"Error syncing {path}: {e}", exc_info=True)
|
|
157
|
+
report_sync_error(local_folder_id, str(e), path)
|
|
158
|
+
return {
|
|
159
|
+
"path": path,
|
|
160
|
+
"status": "error",
|
|
161
|
+
"error": str(e),
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def upload_sync_data(
|
|
166
|
+
local_folder_id: str,
|
|
167
|
+
files: list[dict[str, Any]],
|
|
168
|
+
cursor: dict[str, Any],
|
|
169
|
+
stats: dict[str, Any],
|
|
170
|
+
is_final_batch: bool = True,
|
|
171
|
+
) -> dict[str, Any]:
|
|
172
|
+
"""
|
|
173
|
+
Upload extracted data to the cloud API.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
local_folder_id: UUID of the local folder
|
|
177
|
+
files: List of extracted files with path, content, metadata
|
|
178
|
+
cursor: New cursor after extraction
|
|
179
|
+
stats: Extraction stats
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
API response dict
|
|
183
|
+
"""
|
|
184
|
+
api_key = get_api_key()
|
|
185
|
+
if not api_key:
|
|
186
|
+
return {"status": "error", "message": "Not authenticated"}
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
client = get_http_client()
|
|
190
|
+
response = _post_with_retries(
|
|
191
|
+
client=client,
|
|
192
|
+
url=f"{API_BASE_URL}/v2/daemon/sync",
|
|
193
|
+
headers={"Authorization": f"Bearer {api_key}"},
|
|
194
|
+
payload={
|
|
195
|
+
"local_folder_id": local_folder_id,
|
|
196
|
+
"files": files,
|
|
197
|
+
"cursor": cursor,
|
|
198
|
+
"stats": stats,
|
|
199
|
+
"is_final_batch": is_final_batch,
|
|
200
|
+
},
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
if response is None:
|
|
204
|
+
return {"status": "error", "message": "Request failed after retries"}
|
|
205
|
+
|
|
206
|
+
if response.status_code == 200:
|
|
207
|
+
return response.json()
|
|
208
|
+
elif response.status_code == 401:
|
|
209
|
+
return {"status": "error", "message": "Authentication failed"}
|
|
210
|
+
elif response.status_code == 404:
|
|
211
|
+
return {"status": "error", "message": "Local folder not found"}
|
|
212
|
+
else:
|
|
213
|
+
try:
|
|
214
|
+
detail = response.json().get("detail", response.text)
|
|
215
|
+
except ValueError:
|
|
216
|
+
detail = response.text or f"HTTP {response.status_code}"
|
|
217
|
+
return {"status": "error", "message": f"API error: {detail}"}
|
|
218
|
+
|
|
219
|
+
except httpx.TimeoutException:
|
|
220
|
+
return {"status": "error", "message": "Request timeout"}
|
|
221
|
+
except httpx.RequestError as e:
|
|
222
|
+
return {"status": "error", "message": f"Network error: {e}"}
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def upload_sync_batches(
|
|
226
|
+
local_folder_id: str,
|
|
227
|
+
files: list[dict[str, Any]],
|
|
228
|
+
cursor: dict[str, Any],
|
|
229
|
+
stats: dict[str, Any],
|
|
230
|
+
) -> dict[str, Any]:
|
|
231
|
+
"""Upload files in batches and only advance cursor after all succeed."""
|
|
232
|
+
if not files:
|
|
233
|
+
return {"status": "ok", "chunks_indexed": 0}
|
|
234
|
+
|
|
235
|
+
total_batches = max(1, (len(files) + MAX_FILES_PER_BATCH - 1) // MAX_FILES_PER_BATCH)
|
|
236
|
+
chunks_indexed = 0
|
|
237
|
+
|
|
238
|
+
for batch_index, batch in enumerate(_iter_batches(files, MAX_FILES_PER_BATCH), start=1):
|
|
239
|
+
is_last_batch = batch_index == total_batches
|
|
240
|
+
result = upload_sync_data(
|
|
241
|
+
local_folder_id=local_folder_id,
|
|
242
|
+
files=batch,
|
|
243
|
+
cursor=cursor if is_last_batch else {},
|
|
244
|
+
stats=stats if is_last_batch else {},
|
|
245
|
+
is_final_batch=is_last_batch,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
if result.get("status") != "ok":
|
|
249
|
+
return result
|
|
250
|
+
|
|
251
|
+
chunks_indexed += result.get("chunks_indexed", 0)
|
|
252
|
+
|
|
253
|
+
return {"status": "ok", "chunks_indexed": chunks_indexed}
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def report_sync_error(local_folder_id: str | None, error: str, path: str | None = None) -> None:
|
|
257
|
+
"""Report local sync errors to backend for UI visibility."""
|
|
258
|
+
if not local_folder_id:
|
|
259
|
+
return
|
|
260
|
+
api_key = get_api_key()
|
|
261
|
+
if not api_key:
|
|
262
|
+
return
|
|
263
|
+
|
|
264
|
+
try:
|
|
265
|
+
client = get_http_client()
|
|
266
|
+
_post_with_retries(
|
|
267
|
+
client=client,
|
|
268
|
+
url=f"{API_BASE_URL}/v2/daemon/sources/{local_folder_id}/error",
|
|
269
|
+
headers={"Authorization": f"Bearer {api_key}"},
|
|
270
|
+
payload={"error": error, "path": path},
|
|
271
|
+
)
|
|
272
|
+
except Exception:
|
|
273
|
+
logger.debug("Failed to report sync error", exc_info=True)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _iter_batches(items: list[dict[str, Any]], size: int):
|
|
277
|
+
for i in range(0, len(items), size):
|
|
278
|
+
yield items[i:i + size]
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _post_with_retries(
|
|
282
|
+
client: httpx.Client,
|
|
283
|
+
url: str,
|
|
284
|
+
headers: dict[str, str],
|
|
285
|
+
payload: dict[str, Any],
|
|
286
|
+
) -> httpx.Response | None:
|
|
287
|
+
delay = RETRY_BASE_DELAY
|
|
288
|
+
for attempt in range(MAX_RETRIES):
|
|
289
|
+
try:
|
|
290
|
+
response = client.post(url, headers=headers, json=payload)
|
|
291
|
+
if response.status_code in {429} or response.status_code >= 500:
|
|
292
|
+
raise httpx.HTTPStatusError(
|
|
293
|
+
f"Retryable status {response.status_code}",
|
|
294
|
+
request=response.request,
|
|
295
|
+
response=response,
|
|
296
|
+
)
|
|
297
|
+
return response
|
|
298
|
+
except (httpx.TimeoutException, httpx.RequestError, httpx.HTTPStatusError) as e:
|
|
299
|
+
is_last_attempt = attempt >= MAX_RETRIES - 1
|
|
300
|
+
if is_last_attempt:
|
|
301
|
+
logger.warning(f"POST failed after retries: {e}")
|
|
302
|
+
return None
|
|
303
|
+
jitter = random.uniform(0.8, 1.2)
|
|
304
|
+
time.sleep(min(RETRY_MAX_DELAY, delay) * jitter)
|
|
305
|
+
delay *= 2
|
|
@@ -41,6 +41,7 @@ class SyncEventHandler(FileSystemEventHandler):
|
|
|
41
41
|
source_path: str,
|
|
42
42
|
on_change: Callable[[str], None],
|
|
43
43
|
debounce_sec: float = 2.0,
|
|
44
|
+
watched_files: set[str] | None = None,
|
|
44
45
|
):
|
|
45
46
|
super().__init__()
|
|
46
47
|
self.source_id = source_id
|
|
@@ -51,8 +52,13 @@ class SyncEventHandler(FileSystemEventHandler):
|
|
|
51
52
|
self._lock = threading.Lock()
|
|
52
53
|
self._pending_changes = 0
|
|
53
54
|
|
|
55
|
+
# Watch specific files if provided (e.g., DB files without extensions)
|
|
56
|
+
if watched_files:
|
|
57
|
+
self._watched_files = {
|
|
58
|
+
os.path.abspath(os.path.expanduser(p)) for p in watched_files
|
|
59
|
+
}
|
|
54
60
|
# For database files, also watch the WAL/SHM files
|
|
55
|
-
|
|
61
|
+
elif self.source_path.endswith('.db'):
|
|
56
62
|
self._watched_files = {
|
|
57
63
|
self.source_path,
|
|
58
64
|
self.source_path + '-wal',
|
|
@@ -157,6 +163,7 @@ class FileWatcher:
|
|
|
157
163
|
source_id: str,
|
|
158
164
|
path: str,
|
|
159
165
|
on_change: Callable[[str], None],
|
|
166
|
+
watched_files: set[str] | None = None,
|
|
160
167
|
) -> bool:
|
|
161
168
|
"""
|
|
162
169
|
Add a path to watch.
|
|
@@ -178,9 +185,8 @@ class FileWatcher:
|
|
|
178
185
|
# Expand path
|
|
179
186
|
expanded = os.path.expanduser(path)
|
|
180
187
|
|
|
181
|
-
# For database files, watch the parent directory
|
|
182
|
-
|
|
183
|
-
if expanded.endswith('.db'):
|
|
188
|
+
# For database files (or explicit watched files), watch the parent directory
|
|
189
|
+
if watched_files or expanded.endswith('.db'):
|
|
184
190
|
watch_path = os.path.dirname(expanded)
|
|
185
191
|
else:
|
|
186
192
|
watch_path = expanded
|
|
@@ -196,6 +202,7 @@ class FileWatcher:
|
|
|
196
202
|
source_path=expanded,
|
|
197
203
|
on_change=on_change,
|
|
198
204
|
debounce_sec=self.debounce_sec,
|
|
205
|
+
watched_files=watched_files,
|
|
199
206
|
)
|
|
200
207
|
|
|
201
208
|
# Schedule watch
|
nia_sync-0.1.1/sync.py
DELETED
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Sync engine for Nia Local Sync CLI.
|
|
3
|
-
|
|
4
|
-
Handles:
|
|
5
|
-
- Extracting data from local sources (databases, folders)
|
|
6
|
-
- Uploading to cloud API
|
|
7
|
-
- Cursor management for incremental sync
|
|
8
|
-
"""
|
|
9
|
-
import os
|
|
10
|
-
import logging
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from typing import Any
|
|
13
|
-
import httpx
|
|
14
|
-
|
|
15
|
-
from config import API_BASE_URL, get_api_key
|
|
16
|
-
from extractor import extract_incremental, detect_source_type
|
|
17
|
-
|
|
18
|
-
logger = logging.getLogger(__name__)
|
|
19
|
-
|
|
20
|
-
SYNC_TIMEOUT = 120 # 2 minutes per sync request
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def sync_all_sources(sources: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
24
|
-
"""
|
|
25
|
-
Sync all configured sources.
|
|
26
|
-
|
|
27
|
-
Args:
|
|
28
|
-
sources: List of source configs from cloud API
|
|
29
|
-
|
|
30
|
-
Returns:
|
|
31
|
-
List of results for each source
|
|
32
|
-
"""
|
|
33
|
-
results = []
|
|
34
|
-
|
|
35
|
-
for source in sources:
|
|
36
|
-
result = sync_source(source)
|
|
37
|
-
results.append(result)
|
|
38
|
-
|
|
39
|
-
return results
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def sync_source(source: dict[str, Any]) -> dict[str, Any]:
|
|
43
|
-
"""
|
|
44
|
-
Sync a single source.
|
|
45
|
-
|
|
46
|
-
Args:
|
|
47
|
-
source: Source config from cloud API with:
|
|
48
|
-
- local_folder_id: UUID of the local folder
|
|
49
|
-
- path: Local path to sync
|
|
50
|
-
- detected_type: Type of source
|
|
51
|
-
- cursor: Current sync cursor
|
|
52
|
-
|
|
53
|
-
Returns:
|
|
54
|
-
Result dict with status, path, and stats
|
|
55
|
-
"""
|
|
56
|
-
local_folder_id = source.get("local_folder_id")
|
|
57
|
-
path = source.get("path", "")
|
|
58
|
-
detected_type = source.get("detected_type")
|
|
59
|
-
cursor = source.get("cursor", {})
|
|
60
|
-
|
|
61
|
-
# Expand ~ in path
|
|
62
|
-
path = os.path.expanduser(path)
|
|
63
|
-
|
|
64
|
-
# Validate path exists
|
|
65
|
-
if not os.path.exists(path):
|
|
66
|
-
return {
|
|
67
|
-
"path": path,
|
|
68
|
-
"status": "error",
|
|
69
|
-
"error": f"Path does not exist: {path}",
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
# Auto-detect type if not specified
|
|
73
|
-
if not detected_type:
|
|
74
|
-
detected_type = detect_source_type(path)
|
|
75
|
-
|
|
76
|
-
logger.info(f"Syncing {path} (type={detected_type})")
|
|
77
|
-
|
|
78
|
-
try:
|
|
79
|
-
# Extract data incrementally
|
|
80
|
-
extraction_result = extract_incremental(
|
|
81
|
-
path=path,
|
|
82
|
-
source_type=detected_type,
|
|
83
|
-
cursor=cursor,
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
files = extraction_result.get("files", [])
|
|
87
|
-
new_cursor = extraction_result.get("cursor", {})
|
|
88
|
-
stats = extraction_result.get("stats", {})
|
|
89
|
-
|
|
90
|
-
if not files:
|
|
91
|
-
logger.info(f"No new data to sync for {path}")
|
|
92
|
-
return {
|
|
93
|
-
"path": path,
|
|
94
|
-
"status": "success",
|
|
95
|
-
"added": 0,
|
|
96
|
-
"message": "No new data",
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
# Upload to backend
|
|
100
|
-
upload_result = upload_sync_data(
|
|
101
|
-
local_folder_id=local_folder_id,
|
|
102
|
-
files=files,
|
|
103
|
-
cursor=new_cursor,
|
|
104
|
-
stats=stats,
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
if upload_result.get("status") == "ok":
|
|
108
|
-
# Update source cursor in-place so subsequent syncs use it
|
|
109
|
-
source["cursor"] = new_cursor
|
|
110
|
-
return {
|
|
111
|
-
"path": path,
|
|
112
|
-
"status": "success",
|
|
113
|
-
"added": len(files),
|
|
114
|
-
"chunks_indexed": upload_result.get("chunks_indexed", 0),
|
|
115
|
-
"new_cursor": new_cursor,
|
|
116
|
-
}
|
|
117
|
-
else:
|
|
118
|
-
return {
|
|
119
|
-
"path": path,
|
|
120
|
-
"status": "error",
|
|
121
|
-
"error": upload_result.get("message", "Upload failed"),
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
except PermissionError:
|
|
125
|
-
return {
|
|
126
|
-
"path": path,
|
|
127
|
-
"status": "error",
|
|
128
|
-
"error": "Permission denied. Grant Full Disk Access in System Settings > Privacy & Security.",
|
|
129
|
-
}
|
|
130
|
-
except Exception as e:
|
|
131
|
-
logger.error(f"Error syncing {path}: {e}", exc_info=True)
|
|
132
|
-
return {
|
|
133
|
-
"path": path,
|
|
134
|
-
"status": "error",
|
|
135
|
-
"error": str(e),
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def upload_sync_data(
|
|
140
|
-
local_folder_id: str,
|
|
141
|
-
files: list[dict[str, Any]],
|
|
142
|
-
cursor: dict[str, Any],
|
|
143
|
-
stats: dict[str, Any],
|
|
144
|
-
) -> dict[str, Any]:
|
|
145
|
-
"""
|
|
146
|
-
Upload extracted data to the cloud API.
|
|
147
|
-
|
|
148
|
-
Args:
|
|
149
|
-
local_folder_id: UUID of the local folder
|
|
150
|
-
files: List of extracted files with path, content, metadata
|
|
151
|
-
cursor: New cursor after extraction
|
|
152
|
-
stats: Extraction stats
|
|
153
|
-
|
|
154
|
-
Returns:
|
|
155
|
-
API response dict
|
|
156
|
-
"""
|
|
157
|
-
api_key = get_api_key()
|
|
158
|
-
if not api_key:
|
|
159
|
-
return {"status": "error", "message": "Not authenticated"}
|
|
160
|
-
|
|
161
|
-
try:
|
|
162
|
-
with httpx.Client(timeout=SYNC_TIMEOUT) as client:
|
|
163
|
-
response = client.post(
|
|
164
|
-
f"{API_BASE_URL}/v2/daemon/sync",
|
|
165
|
-
headers={"Authorization": f"Bearer {api_key}"},
|
|
166
|
-
json={
|
|
167
|
-
"local_folder_id": local_folder_id,
|
|
168
|
-
"files": files,
|
|
169
|
-
"cursor": cursor,
|
|
170
|
-
"stats": stats,
|
|
171
|
-
},
|
|
172
|
-
)
|
|
173
|
-
|
|
174
|
-
if response.status_code == 200:
|
|
175
|
-
return response.json()
|
|
176
|
-
elif response.status_code == 401:
|
|
177
|
-
return {"status": "error", "message": "Authentication failed"}
|
|
178
|
-
elif response.status_code == 404:
|
|
179
|
-
return {"status": "error", "message": "Local folder not found"}
|
|
180
|
-
else:
|
|
181
|
-
detail = response.json().get("detail", response.text)
|
|
182
|
-
return {"status": "error", "message": f"API error: {detail}"}
|
|
183
|
-
|
|
184
|
-
except httpx.TimeoutException:
|
|
185
|
-
return {"status": "error", "message": "Request timeout"}
|
|
186
|
-
except httpx.RequestError as e:
|
|
187
|
-
return {"status": "error", "message": f"Network error: {e}"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|