nia-sync 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
auth.py CHANGED
@@ -104,64 +104,64 @@ def login() -> bool:
104
104
  def _poll_for_api_key(session_id: str, user_code: str) -> str | None:
105
105
  """Poll the exchange endpoint until authentication completes."""
106
106
  with httpx.Client(timeout=30) as client:
107
- for attempt in range(MAX_POLL_ATTEMPTS):
108
- try:
109
- response = client.post(
110
- f"{API_BASE_URL}/public/mcp-device/exchange",
111
- json={
112
- "authorization_session_id": session_id,
113
- "user_code": user_code,
114
- }
115
- )
116
-
117
- if response.status_code == 200:
118
- data = response.json()
119
- console.print("[green]Authentication successful![/green]")
120
- return data.get("api_key")
121
-
122
- elif response.status_code == 400:
123
- # Not ready yet - still pending or authorized but not ready
124
- detail = response.json().get("detail", "")
125
- if "not yet authorized" in detail.lower() or "complete the setup" in detail.lower():
126
- # Still waiting for user to complete in browser
127
- _show_waiting_indicator(attempt)
128
- time.sleep(POLL_INTERVAL_SECONDS)
129
- continue
130
- else:
131
- console.print(f"[red]Error: {detail}[/red]")
107
+ with console.status("[dim]Waiting for browser authentication...[/dim]") as status:
108
+ for attempt in range(MAX_POLL_ATTEMPTS):
109
+ try:
110
+ response = client.post(
111
+ f"{API_BASE_URL}/public/mcp-device/exchange",
112
+ json={
113
+ "authorization_session_id": session_id,
114
+ "user_code": user_code,
115
+ }
116
+ )
117
+
118
+ if response.status_code == 200:
119
+ data = response.json()
120
+ status.stop()
121
+ console.print("[green]Authentication successful![/green]")
122
+ return data.get("api_key")
123
+
124
+ elif response.status_code == 400:
125
+ # Not ready yet - still pending or authorized but not ready
126
+ detail = response.json().get("detail", "")
127
+ if "not yet authorized" in detail.lower() or "complete the setup" in detail.lower():
128
+ # Still waiting for user to complete in browser
129
+ time.sleep(POLL_INTERVAL_SECONDS)
130
+ continue
131
+ else:
132
+ status.stop()
133
+ console.print(f"[red]Error: {detail}[/red]")
134
+ return None
135
+
136
+ elif response.status_code == 410:
137
+ status.stop()
138
+ console.print("[red]Session expired. Please try again.[/red]")
132
139
  return None
133
140
 
134
- elif response.status_code == 410:
135
- console.print("[red]Session expired. Please try again.[/red]")
136
- return None
137
-
138
- elif response.status_code == 409:
139
- console.print("[red]Session already used. Please try again.[/red]")
140
- return None
141
+ elif response.status_code == 409:
142
+ status.stop()
143
+ console.print("[red]Session already used. Please try again.[/red]")
144
+ return None
141
145
 
142
- elif response.status_code == 404:
143
- console.print("[red]Invalid session. Please try again.[/red]")
144
- return None
146
+ elif response.status_code == 404:
147
+ status.stop()
148
+ console.print("[red]Invalid session. Please try again.[/red]")
149
+ return None
145
150
 
146
- else:
147
- console.print(f"[red]Unexpected error: {response.status_code}[/red]")
148
- return None
151
+ else:
152
+ status.stop()
153
+ console.print(f"[red]Unexpected error: {response.status_code}[/red]")
154
+ return None
149
155
 
150
- except httpx.RequestError as e:
151
- console.print(f"[yellow]Network error, retrying... ({e})[/yellow]")
152
- time.sleep(POLL_INTERVAL_SECONDS)
153
- continue
156
+ except httpx.RequestError as e:
157
+ console.print(f"[yellow]Network error, retrying... ({e})[/yellow]")
158
+ time.sleep(POLL_INTERVAL_SECONDS)
159
+ continue
154
160
 
155
161
  console.print("[red]Timeout waiting for authentication. Please try again.[/red]")
156
162
  return None
157
163
 
158
164
 
159
- def _show_waiting_indicator(attempt: int):
160
- """Show a waiting indicator."""
161
- dots = "." * ((attempt % 3) + 1)
162
- console.print(f"\r[dim]Waiting for browser authentication{dots} [/dim]", end="")
163
-
164
-
165
165
  def logout():
166
166
  """Clear stored credentials."""
167
167
  clear_config()
config.py CHANGED
@@ -16,7 +16,7 @@ NIA_SYNC_DIR = Path.home() / ".nia-sync"
16
16
  CONFIG_FILE = NIA_SYNC_DIR / "config.json"
17
17
 
18
18
  # API configuration
19
- API_BASE_URL = os.getenv("NIA_API_URL", "https://api.trynia.ai")
19
+ API_BASE_URL = os.getenv("NIA_API_URL", "https://apigcp.trynia.ai")
20
20
 
21
21
  # Default directories to search for folders (no config needed)
22
22
  DEFAULT_WATCH_DIRS = [
extractor.py CHANGED
@@ -136,6 +136,11 @@ MAX_ROWS = 100_000
136
136
  MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10MB per file
137
137
 
138
138
 
139
+ def _connect_sqlite_readonly(db_path: str) -> sqlite3.Connection:
140
+ """Open SQLite database in read-only mode to avoid lock issues."""
141
+ return sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=1)
142
+
143
+
139
144
  def detect_source_type(path: str) -> str:
140
145
  """
141
146
  Auto-detect the type of source based on path and file structure.
@@ -177,7 +182,7 @@ def detect_source_type(path: str) -> str:
177
182
  return TYPE_FOLDER
178
183
 
179
184
  try:
180
- conn = sqlite3.connect(path)
185
+ conn = _connect_sqlite_readonly(path)
181
186
  cursor = conn.cursor()
182
187
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
183
188
  tables = {row[0].lower() for row in cursor.fetchall()}
@@ -252,7 +257,7 @@ def _extract_imessage(
252
257
  max_timestamp = cursor.get("last_timestamp", 0)
253
258
  since_rowid = cursor.get("last_rowid")
254
259
 
255
- conn = sqlite3.connect(db_path)
260
+ conn = _connect_sqlite_readonly(db_path)
256
261
  conn.row_factory = sqlite3.Row
257
262
  cur = conn.cursor()
258
263
 
@@ -348,7 +353,7 @@ def _extract_safari_history(
348
353
  max_visit_time = cursor.get("last_visit_time", 0)
349
354
  since_visit_time = cursor.get("last_visit_time")
350
355
 
351
- conn = sqlite3.connect(db_path)
356
+ conn = _connect_sqlite_readonly(db_path)
352
357
  conn.row_factory = sqlite3.Row
353
358
  cur = conn.cursor()
354
359
 
@@ -439,7 +444,7 @@ def _extract_chrome_history(
439
444
  max_visit_time = cursor.get("last_visit_time", 0)
440
445
  since_visit_time = cursor.get("last_visit_time")
441
446
 
442
- conn = sqlite3.connect(db_path)
447
+ conn = _connect_sqlite_readonly(db_path)
443
448
  conn.row_factory = sqlite3.Row
444
449
  cur = conn.cursor()
445
450
 
@@ -534,7 +539,7 @@ def _extract_firefox_history(
534
539
  max_visit_date = cursor.get("last_visit_date", 0)
535
540
  since_visit_date = cursor.get("last_visit_date")
536
541
 
537
- conn = sqlite3.connect(db_path)
542
+ conn = _connect_sqlite_readonly(db_path)
538
543
  conn.row_factory = sqlite3.Row
539
544
  cur = conn.cursor()
540
545
 
@@ -760,7 +765,9 @@ def _extract_folder(
760
765
  """Extract text files from a regular folder with proper exclusion patterns."""
761
766
  files = []
762
767
  last_mtime = cursor.get("last_mtime", 0)
768
+ last_path = cursor.get("last_path", "")
763
769
  max_mtime = last_mtime
770
+ max_path = last_path
764
771
  extracted_count = 0
765
772
 
766
773
  # Allowed text file extensions
@@ -783,6 +790,8 @@ def _extract_folder(
783
790
  and not d.startswith(".")
784
791
  and not d.endswith(".egg-info")
785
792
  ]
793
+ dirs.sort()
794
+ filenames.sort()
786
795
 
787
796
  for filename in filenames:
788
797
  if extracted_count >= limit:
@@ -816,9 +825,12 @@ def _extract_folder(
816
825
  try:
817
826
  stat = os.stat(file_path)
818
827
  mtime = stat.st_mtime
828
+ rel_path = os.path.relpath(file_path, folder_path)
819
829
 
820
- # Skip if not modified since last sync
821
- if mtime <= last_mtime:
830
+ # Skip if not modified since last sync (tie-break by path)
831
+ if mtime < last_mtime:
832
+ continue
833
+ if mtime == last_mtime and rel_path <= last_path:
822
834
  continue
823
835
 
824
836
  # Skip large files
@@ -831,11 +843,6 @@ def _extract_folder(
831
843
  if not content.strip():
832
844
  continue
833
845
 
834
- max_mtime = max(max_mtime, mtime)
835
-
836
- # Relative path from folder root
837
- rel_path = os.path.relpath(file_path, folder_path)
838
-
839
846
  files.append({
840
847
  "path": rel_path,
841
848
  "content": content,
@@ -846,8 +853,11 @@ def _extract_folder(
846
853
  },
847
854
  })
848
855
  extracted_count += 1
856
+ if mtime > max_mtime or (mtime == max_mtime and rel_path > max_path):
857
+ max_mtime = mtime
858
+ max_path = rel_path
849
859
 
850
- except (PermissionError, IOError) as e:
860
+ except (PermissionError, IOError, OSError, UnicodeDecodeError) as e:
851
861
  logger.warning(f"Could not read {file_path}: {e}")
852
862
  continue
853
863
 
@@ -855,7 +865,7 @@ def _extract_folder(
855
865
 
856
866
  return {
857
867
  "files": files,
858
- "cursor": {"last_mtime": max_mtime},
868
+ "cursor": {"last_mtime": max_mtime, "last_path": max_path},
859
869
  "stats": {"extracted": len(files), "db_type": TYPE_FOLDER},
860
870
  }
861
871
 
@@ -871,7 +881,7 @@ def _extract_generic_db(
871
881
 
872
882
  skip_tables = {"sqlite_sequence", "sqlite_stat1", "sqlite_stat4"}
873
883
 
874
- conn = sqlite3.connect(db_path)
884
+ conn = _connect_sqlite_readonly(db_path)
875
885
  cur = conn.cursor()
876
886
 
877
887
  cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
main.py CHANGED
@@ -13,14 +13,25 @@ Usage:
13
13
  """
14
14
  import os
15
15
  import typer
16
+ import httpx
17
+ import logging
16
18
  from rich.console import Console
17
19
  from rich.panel import Panel
18
20
  from rich.table import Table
19
21
 
20
22
  from auth import login as do_login, logout as do_logout, is_authenticated, get_api_key
21
- from config import get_sources, add_source, remove_source, enable_source_sync, NIA_SYNC_DIR, find_folder_path
23
+ from config import get_sources, add_source, remove_source, enable_source_sync, NIA_SYNC_DIR, find_folder_path, API_BASE_URL, get_api_key
22
24
  from sync import sync_all_sources
23
- from extractor import detect_source_type
25
+ from extractor import (
26
+ detect_source_type,
27
+ TYPE_FOLDER,
28
+ TYPE_TELEGRAM,
29
+ TYPE_GENERIC_DB,
30
+ TYPE_IMESSAGE,
31
+ TYPE_SAFARI_HISTORY,
32
+ TYPE_CHROME_HISTORY,
33
+ TYPE_FIREFOX_HISTORY,
34
+ )
24
35
 
25
36
  app = typer.Typer(
26
37
  name="nia",
@@ -30,6 +41,7 @@ app = typer.Typer(
30
41
  epilog="[dim]Quick start: [cyan]nia login[/cyan] → [cyan]nia status[/cyan] → [cyan]nia[/cyan][/dim]",
31
42
  )
32
43
  console = Console()
44
+ logger = logging.getLogger(__name__)
33
45
 
34
46
 
35
47
  @app.callback(invoke_without_command=True)
@@ -71,6 +83,14 @@ KNOWN_PATHS = {
71
83
  "firefox_history": "~/Library/Application Support/Firefox/Profiles/*/places.sqlite",
72
84
  }
73
85
 
86
+ DB_SOURCE_TYPES = {
87
+ TYPE_IMESSAGE,
88
+ TYPE_SAFARI_HISTORY,
89
+ TYPE_CHROME_HISTORY,
90
+ TYPE_FIREFOX_HISTORY,
91
+ TYPE_GENERIC_DB,
92
+ }
93
+
74
94
 
75
95
  def _check_local_sources():
76
96
  """Check for indexed sources that exist locally and can be synced."""
@@ -261,8 +281,10 @@ def add(path: str = typer.Argument(..., help="Path to sync (folder or database)"
261
281
  result = add_source(path, detected_type)
262
282
 
263
283
  if result:
284
+ folder_id = result.get('local_folder_id', '')
285
+ short_id = folder_id[:8] if folder_id else 'unknown'
264
286
  console.print(f"[green]✓ Added:[/green] {result.get('display_name', path)}")
265
- console.print(f"[dim]ID: {result.get('local_folder_id')[:8]}[/dim]")
287
+ console.print(f"[dim]ID: {short_id}[/dim]")
266
288
  console.print("\n[dim]Run [cyan]nia[/cyan] to start syncing.[/dim]")
267
289
  else:
268
290
  console.print("[red]Failed to add source.[/red]")
@@ -379,6 +401,18 @@ def _resolve_sources(sources: list[dict], log_discoveries: bool = False) -> list
379
401
  return resolved
380
402
 
381
403
 
404
+ def _get_watched_files(source: dict) -> set[str] | None:
405
+ path = source.get("path")
406
+ detected_type = source.get("detected_type")
407
+ if not path or not detected_type:
408
+ return None
409
+ if detected_type in DB_SOURCE_TYPES:
410
+ expanded = os.path.abspath(os.path.expanduser(path))
411
+ watched = {expanded, f"{expanded}-wal", f"{expanded}-shm"}
412
+ return watched
413
+ return None
414
+
415
+
382
416
  @app.command(name="start", hidden=True)
383
417
  def daemon(
384
418
  watch: bool = typer.Option(True, "--watch/--poll", help="File watching (default) or polling"),
@@ -399,6 +433,9 @@ def daemon(
399
433
  pending_syncs: set[str] = set() # source_ids pending sync
400
434
  sync_lock = threading.Lock()
401
435
  sources_by_id: dict[str, dict] = {}
436
+ last_sync_times: dict[str, float] = {}
437
+ last_heartbeat_time = 0.0
438
+ heartbeat_interval = 30
402
439
 
403
440
  def handle_signal(signum, frame):
404
441
  nonlocal running
@@ -440,6 +477,7 @@ def daemon(
440
477
  if added > 0:
441
478
  total_added += added
442
479
  console.print(f"[green]✓ {src.get('display_name', 'Unknown')}[/green] - {added} items synced")
480
+ last_sync_times[source_id] = time.time()
443
481
  else:
444
482
  error = result.get("error", "unknown error")
445
483
  errors.append(f"{src.get('display_name', 'Unknown')}: {error}")
@@ -471,15 +509,19 @@ def daemon(
471
509
  # Add new watchers
472
510
  for source_id in new_source_ids - current_watching:
473
511
  src = new_sources_by_id[source_id]
474
- if watcher.watch(source_id, src["path"], on_source_changed):
512
+ watched_files = _get_watched_files(src)
513
+ if watcher.watch(source_id, src["path"], on_source_changed, watched_files=watched_files):
475
514
  console.print(f" [dim]+ Watching {src.get('display_name', 'Unknown')}[/dim]")
476
515
  newly_added.append(source_id)
477
516
 
478
517
  # Remove old watchers (source deleted from UI)
479
518
  for source_id in current_watching - new_source_ids:
480
519
  old_name = sources_by_id.get(source_id, {}).get("display_name", source_id[:8])
481
- watcher.unwatch(source_id)
482
- console.print(f" [dim]- Stopped watching {old_name}[/dim]")
520
+ try:
521
+ watcher.unwatch(source_id)
522
+ console.print(f" [dim]- Stopped watching {old_name}[/dim]")
523
+ except Exception as e:
524
+ logger.warning(f"Failed to unwatch {old_name}: {e}")
483
525
 
484
526
  sources_by_id = new_sources_by_id
485
527
  return resolved, newly_added
@@ -554,6 +596,19 @@ def daemon(
554
596
  # Process any pending syncs from file watcher
555
597
  sync_pending_sources()
556
598
 
599
+ # Heartbeat to backend to mark daemon online
600
+ now = time.time()
601
+ if now - last_heartbeat_time >= heartbeat_interval:
602
+ _send_heartbeat(list(sources_by_id.keys()))
603
+ last_heartbeat_time = now
604
+
605
+ # Sanity sync to catch missed events
606
+ if fallback_interval > 0:
607
+ for source_id in list(sources_by_id.keys()):
608
+ last_sync = last_sync_times.get(source_id, 0)
609
+ if now - last_sync >= fallback_interval:
610
+ pending_syncs.add(source_id)
611
+
557
612
  # Instant refresh if new folder detected matching an unlinked source
558
613
  if refresh_triggered.is_set():
559
614
  refresh_triggered.clear()
@@ -599,6 +654,7 @@ def daemon(
599
654
  sync_count = 0
600
655
  while running:
601
656
  resolved, _ = refresh_sources()
657
+ _send_heartbeat([src["local_folder_id"] for src in resolved])
602
658
 
603
659
  sync_count += 1
604
660
  console.print(f"\n[bold]Sync #{sync_count}[/bold] - {len(resolved)} source(s)")
@@ -628,5 +684,22 @@ def daemon(
628
684
  console.print("[green]✓ Stopped[/green]")
629
685
 
630
686
 
687
+ def _send_heartbeat(source_ids: list[str]) -> None:
688
+ if not source_ids:
689
+ return
690
+ api_key = get_api_key()
691
+ if not api_key:
692
+ return
693
+ try:
694
+ with httpx.Client(timeout=10) as client:
695
+ client.post(
696
+ f"{API_BASE_URL}/v2/daemon/heartbeat",
697
+ headers={"Authorization": f"Bearer {api_key}"},
698
+ json={"source_ids": source_ids},
699
+ )
700
+ except Exception:
701
+ logger.debug("Heartbeat failed", exc_info=True)
702
+
703
+
631
704
  if __name__ == "__main__":
632
705
  app()
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nia-sync
3
- Version: 0.1.0
4
- Summary: Keep your local files in sync with Nia
3
+ Version: 0.1.2
4
+ Summary: Keep your local files in sync with Nia Cloud
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: typer>=0.9.0
7
7
  Requires-Dist: rich>=13.0.0
@@ -0,0 +1,11 @@
1
+ auth.py,sha256=n0ezRqIbz3kZYcyIjH47_MDKgwNgAaVr0Y2NEfRxa50,5704
2
+ config.py,sha256=JWxdL8INKo23lam4F49ZbllxbW3yorqczm7aaqepQCc,7507
3
+ extractor.py,sha256=Y5Gc0AThTH9qMey4fYEqv4RosAqZl7GleKNMYs5ogxY,29181
4
+ main.py,sha256=NtAkI083bZIXZp9WVGaCunveV8UrR0ZBctBkBhrVf24,25168
5
+ sync.py,sha256=OrbjptK223zbzeelSTdZIF3O4gNQd5DLK8XXv-rnMXU,9583
6
+ watcher.py,sha256=JmsN9uR7Ss1mDC-kApXL6Hg_wQZWTsO7rRIFkQu8GbM,9978
7
+ nia_sync-0.1.2.dist-info/METADATA,sha256=Ihc9yJ1P_J0e28skRXVUgnh6PXdltk59Zcijw9YZrt8,246
8
+ nia_sync-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ nia_sync-0.1.2.dist-info/entry_points.txt,sha256=Fx8TIOgXqWdZzZEkEateDtcNfgnwuPW4jZTqlEUrHVs,33
10
+ nia_sync-0.1.2.dist-info/top_level.txt,sha256=_ZWBugSHWwSpLXYJAcF6TlWmzECu18k0y1-EX27jtBw,40
11
+ nia_sync-0.1.2.dist-info/RECORD,,
sync.py CHANGED
@@ -8,16 +8,36 @@ Handles:
8
8
  """
9
9
  import os
10
10
  import logging
11
+ import random
12
+ import time
11
13
  from pathlib import Path
12
14
  from typing import Any
13
15
  import httpx
14
16
 
15
- from config import API_BASE_URL, get_api_key, enable_source_sync
17
+ from config import API_BASE_URL, get_api_key
16
18
  from extractor import extract_incremental, detect_source_type
17
19
 
18
20
  logger = logging.getLogger(__name__)
19
21
 
20
- SYNC_TIMEOUT = 120 # 2 minutes per sync request
22
+ SYNC_TIMEOUT = 60 # 1 minute per sync request (reduced from 2 min)
23
+ CONNECT_TIMEOUT = 10 # 10 second connection timeout
24
+ MAX_FILES_PER_BATCH = 500 # Keep below backend limit (1000)
25
+ MAX_RETRIES = 4
26
+ RETRY_BASE_DELAY = 1.5
27
+ RETRY_MAX_DELAY = 15.0
28
+
29
+ # Reusable client for connection pooling
30
+ _http_client: httpx.Client | None = None
31
+
32
+ def get_http_client() -> httpx.Client:
33
+ """Get or create HTTP client with connection pooling."""
34
+ global _http_client
35
+ if _http_client is None:
36
+ _http_client = httpx.Client(
37
+ timeout=httpx.Timeout(SYNC_TIMEOUT, connect=CONNECT_TIMEOUT),
38
+ limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
39
+ )
40
+ return _http_client
21
41
 
22
42
 
23
43
  def sync_all_sources(sources: list[dict[str, Any]]) -> list[dict[str, Any]]:
@@ -63,17 +83,14 @@ def sync_source(source: dict[str, Any]) -> dict[str, Any]:
63
83
 
64
84
  # Validate path exists
65
85
  if not os.path.exists(path):
86
+ error_message = f"Path does not exist: {path}"
87
+ report_sync_error(local_folder_id, error_message, path)
66
88
  return {
67
89
  "path": path,
68
90
  "status": "error",
69
- "error": f"Path does not exist: {path}",
91
+ "error": error_message,
70
92
  }
71
93
 
72
- # Auto-enable sync if source exists locally but sync not enabled
73
- if not source.get("sync_enabled", False):
74
- logger.info(f"Auto-enabling sync for {path}")
75
- enable_source_sync(local_folder_id, path)
76
-
77
94
  # Auto-detect type if not specified
78
95
  if not detected_type:
79
96
  detected_type = detect_source_type(path)
@@ -101,8 +118,8 @@ def sync_source(source: dict[str, Any]) -> dict[str, Any]:
101
118
  "message": "No new data",
102
119
  }
103
120
 
104
- # Upload to backend
105
- upload_result = upload_sync_data(
121
+ # Upload to backend in batches
122
+ upload_result = upload_sync_batches(
106
123
  local_folder_id=local_folder_id,
107
124
  files=files,
108
125
  cursor=new_cursor,
@@ -120,6 +137,7 @@ def sync_source(source: dict[str, Any]) -> dict[str, Any]:
120
137
  "new_cursor": new_cursor,
121
138
  }
122
139
  else:
140
+ report_sync_error(local_folder_id, upload_result.get("message", "Upload failed"), path)
123
141
  return {
124
142
  "path": path,
125
143
  "status": "error",
@@ -127,13 +145,16 @@ def sync_source(source: dict[str, Any]) -> dict[str, Any]:
127
145
  }
128
146
 
129
147
  except PermissionError:
148
+ error_message = "Permission denied. Grant Full Disk Access in System Settings > Privacy & Security."
149
+ report_sync_error(local_folder_id, error_message, path)
130
150
  return {
131
151
  "path": path,
132
152
  "status": "error",
133
- "error": "Permission denied. Grant Full Disk Access in System Settings > Privacy & Security.",
153
+ "error": error_message,
134
154
  }
135
155
  except Exception as e:
136
156
  logger.error(f"Error syncing {path}: {e}", exc_info=True)
157
+ report_sync_error(local_folder_id, str(e), path)
137
158
  return {
138
159
  "path": path,
139
160
  "status": "error",
@@ -146,6 +167,7 @@ def upload_sync_data(
146
167
  files: list[dict[str, Any]],
147
168
  cursor: dict[str, Any],
148
169
  stats: dict[str, Any],
170
+ is_final_batch: bool = True,
149
171
  ) -> dict[str, Any]:
150
172
  """
151
173
  Upload extracted data to the cloud API.
@@ -164,29 +186,120 @@ def upload_sync_data(
164
186
  return {"status": "error", "message": "Not authenticated"}
165
187
 
166
188
  try:
167
- with httpx.Client(timeout=SYNC_TIMEOUT) as client:
168
- response = client.post(
169
- f"{API_BASE_URL}/v2/daemon/sync",
170
- headers={"Authorization": f"Bearer {api_key}"},
171
- json={
172
- "local_folder_id": local_folder_id,
173
- "files": files,
174
- "cursor": cursor,
175
- "stats": stats,
176
- },
177
- )
178
-
179
- if response.status_code == 200:
180
- return response.json()
181
- elif response.status_code == 401:
182
- return {"status": "error", "message": "Authentication failed"}
183
- elif response.status_code == 404:
184
- return {"status": "error", "message": "Local folder not found"}
185
- else:
189
+ client = get_http_client()
190
+ response = _post_with_retries(
191
+ client=client,
192
+ url=f"{API_BASE_URL}/v2/daemon/sync",
193
+ headers={"Authorization": f"Bearer {api_key}"},
194
+ payload={
195
+ "local_folder_id": local_folder_id,
196
+ "files": files,
197
+ "cursor": cursor,
198
+ "stats": stats,
199
+ "is_final_batch": is_final_batch,
200
+ },
201
+ )
202
+
203
+ if response is None:
204
+ return {"status": "error", "message": "Request failed after retries"}
205
+
206
+ if response.status_code == 200:
207
+ return response.json()
208
+ elif response.status_code == 401:
209
+ return {"status": "error", "message": "Authentication failed"}
210
+ elif response.status_code == 404:
211
+ return {"status": "error", "message": "Local folder not found"}
212
+ else:
213
+ try:
186
214
  detail = response.json().get("detail", response.text)
187
- return {"status": "error", "message": f"API error: {detail}"}
215
+ except ValueError:
216
+ detail = response.text or f"HTTP {response.status_code}"
217
+ return {"status": "error", "message": f"API error: {detail}"}
188
218
 
189
219
  except httpx.TimeoutException:
190
220
  return {"status": "error", "message": "Request timeout"}
191
221
  except httpx.RequestError as e:
192
222
  return {"status": "error", "message": f"Network error: {e}"}
223
+
224
+
225
+ def upload_sync_batches(
226
+ local_folder_id: str,
227
+ files: list[dict[str, Any]],
228
+ cursor: dict[str, Any],
229
+ stats: dict[str, Any],
230
+ ) -> dict[str, Any]:
231
+ """Upload files in batches and only advance cursor after all succeed."""
232
+ if not files:
233
+ return {"status": "ok", "chunks_indexed": 0}
234
+
235
+ total_batches = max(1, (len(files) + MAX_FILES_PER_BATCH - 1) // MAX_FILES_PER_BATCH)
236
+ chunks_indexed = 0
237
+
238
+ for batch_index, batch in enumerate(_iter_batches(files, MAX_FILES_PER_BATCH), start=1):
239
+ is_last_batch = batch_index == total_batches
240
+ result = upload_sync_data(
241
+ local_folder_id=local_folder_id,
242
+ files=batch,
243
+ cursor=cursor if is_last_batch else {},
244
+ stats=stats if is_last_batch else {},
245
+ is_final_batch=is_last_batch,
246
+ )
247
+
248
+ if result.get("status") != "ok":
249
+ return result
250
+
251
+ chunks_indexed += result.get("chunks_indexed", 0)
252
+
253
+ return {"status": "ok", "chunks_indexed": chunks_indexed}
254
+
255
+
256
+ def report_sync_error(local_folder_id: str | None, error: str, path: str | None = None) -> None:
257
+ """Report local sync errors to backend for UI visibility."""
258
+ if not local_folder_id:
259
+ return
260
+ api_key = get_api_key()
261
+ if not api_key:
262
+ return
263
+
264
+ try:
265
+ client = get_http_client()
266
+ _post_with_retries(
267
+ client=client,
268
+ url=f"{API_BASE_URL}/v2/daemon/sources/{local_folder_id}/error",
269
+ headers={"Authorization": f"Bearer {api_key}"},
270
+ payload={"error": error, "path": path},
271
+ )
272
+ except Exception:
273
+ logger.debug("Failed to report sync error", exc_info=True)
274
+
275
+
276
+ def _iter_batches(items: list[dict[str, Any]], size: int):
277
+ for i in range(0, len(items), size):
278
+ yield items[i:i + size]
279
+
280
+
281
+ def _post_with_retries(
282
+ client: httpx.Client,
283
+ url: str,
284
+ headers: dict[str, str],
285
+ payload: dict[str, Any],
286
+ ) -> httpx.Response | None:
287
+ delay = RETRY_BASE_DELAY
288
+ for attempt in range(MAX_RETRIES):
289
+ try:
290
+ response = client.post(url, headers=headers, json=payload)
291
+ if response.status_code in {429} or response.status_code >= 500:
292
+ raise httpx.HTTPStatusError(
293
+ f"Retryable status {response.status_code}",
294
+ request=response.request,
295
+ response=response,
296
+ )
297
+ return response
298
+ except (httpx.TimeoutException, httpx.RequestError, httpx.HTTPStatusError) as e:
299
+ is_last_attempt = attempt >= MAX_RETRIES - 1
300
+ if is_last_attempt:
301
+ logger.warning(f"POST failed after retries: {e}")
302
+ return None
303
+ jitter = random.uniform(0.8, 1.2)
304
+ time.sleep(min(RETRY_MAX_DELAY, delay) * jitter)
305
+ delay *= 2
watcher.py CHANGED
@@ -7,7 +7,7 @@ with debouncing to prevent rapid-fire updates.
7
7
  import os
8
8
  import threading
9
9
  import logging
10
- from typing import Callable
10
+ from typing import Any, Callable
11
11
  from pathlib import Path
12
12
 
13
13
  from watchdog.observers import Observer
@@ -41,6 +41,7 @@ class SyncEventHandler(FileSystemEventHandler):
41
41
  source_path: str,
42
42
  on_change: Callable[[str], None],
43
43
  debounce_sec: float = 2.0,
44
+ watched_files: set[str] | None = None,
44
45
  ):
45
46
  super().__init__()
46
47
  self.source_id = source_id
@@ -51,8 +52,13 @@ class SyncEventHandler(FileSystemEventHandler):
51
52
  self._lock = threading.Lock()
52
53
  self._pending_changes = 0
53
54
 
55
+ # Watch specific files if provided (e.g., DB files without extensions)
56
+ if watched_files:
57
+ self._watched_files = {
58
+ os.path.abspath(os.path.expanduser(p)) for p in watched_files
59
+ }
54
60
  # For database files, also watch the WAL/SHM files
55
- if self.source_path.endswith('.db'):
61
+ elif self.source_path.endswith('.db'):
56
62
  self._watched_files = {
57
63
  self.source_path,
58
64
  self.source_path + '-wal',
@@ -148,7 +154,7 @@ class FileWatcher:
148
154
  self.debounce_sec = debounce_sec
149
155
  self.observer = Observer()
150
156
  self.handlers: dict[str, SyncEventHandler] = {}
151
- self._watches: dict[str, any] = {}
157
+ self._watches: dict[str, Any] = {}
152
158
  self._lock = threading.Lock()
153
159
  self._started = False
154
160
 
@@ -157,6 +163,7 @@ class FileWatcher:
157
163
  source_id: str,
158
164
  path: str,
159
165
  on_change: Callable[[str], None],
166
+ watched_files: set[str] | None = None,
160
167
  ) -> bool:
161
168
  """
162
169
  Add a path to watch.
@@ -178,9 +185,8 @@ class FileWatcher:
178
185
  # Expand path
179
186
  expanded = os.path.expanduser(path)
180
187
 
181
- # For database files, watch the parent directory
182
- # to catch .db-wal changes
183
- if expanded.endswith('.db'):
188
+ # For database files (or explicit watched files), watch the parent directory
189
+ if watched_files or expanded.endswith('.db'):
184
190
  watch_path = os.path.dirname(expanded)
185
191
  else:
186
192
  watch_path = expanded
@@ -196,6 +202,7 @@ class FileWatcher:
196
202
  source_path=expanded,
197
203
  on_change=on_change,
198
204
  debounce_sec=self.debounce_sec,
205
+ watched_files=watched_files,
199
206
  )
200
207
 
201
208
  # Schedule watch
@@ -1,11 +0,0 @@
1
- auth.py,sha256=_Q-wzLLtinoy8qtTihZNqdRAECPEzVjC5NgCXOLftJ8,5487
2
- config.py,sha256=tJD3k3mBP9KORg4tX692uKk3Z92tEeYRI8jmgPjY5P0,7504
3
- extractor.py,sha256=GxKmBTq04AxCjhtimlHZ2Gh3auvEEUyMHT-vM4YqWzI,28570
4
- main.py,sha256=-ZWWs-5pQx_gR2QLwMrhbmuTKZo_8PQnVoExi85lqUU,22744
5
- sync.py,sha256=iH9N22NEr2Nt5O6GeDP_X--G6IXhr8Hx3Z2xfJWkXc0,5667
6
- watcher.py,sha256=BfdGwcfNDJiddUBMIPx61En2tSkLb3YnfxePKjSFQTc,9593
7
- nia_sync-0.1.0.dist-info/METADATA,sha256=Qm4BGkM9fO0tiZg1AgKmA8-S_8f9674DtZZXBY_u0hc,240
8
- nia_sync-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
- nia_sync-0.1.0.dist-info/entry_points.txt,sha256=Fx8TIOgXqWdZzZEkEateDtcNfgnwuPW4jZTqlEUrHVs,33
10
- nia_sync-0.1.0.dist-info/top_level.txt,sha256=_ZWBugSHWwSpLXYJAcF6TlWmzECu18k0y1-EX27jtBw,40
11
- nia_sync-0.1.0.dist-info/RECORD,,