nia-sync 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nia-sync
3
- Version: 0.1.1
4
- Summary: Keep your local files in sync with Nia
3
+ Version: 0.1.3
4
+ Summary: Keep your local files in sync with Nia Cloud
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: typer>=0.9.0
7
7
  Requires-Dist: rich>=13.0.0
@@ -136,6 +136,11 @@ MAX_ROWS = 100_000
136
136
  MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10MB per file
137
137
 
138
138
 
139
+ def _connect_sqlite_readonly(db_path: str) -> sqlite3.Connection:
140
+ """Open SQLite database in read-only mode to avoid lock issues."""
141
+ return sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=1)
142
+
143
+
139
144
  def detect_source_type(path: str) -> str:
140
145
  """
141
146
  Auto-detect the type of source based on path and file structure.
@@ -177,7 +182,7 @@ def detect_source_type(path: str) -> str:
177
182
  return TYPE_FOLDER
178
183
 
179
184
  try:
180
- conn = sqlite3.connect(path)
185
+ conn = _connect_sqlite_readonly(path)
181
186
  cursor = conn.cursor()
182
187
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
183
188
  tables = {row[0].lower() for row in cursor.fetchall()}
@@ -252,7 +257,7 @@ def _extract_imessage(
252
257
  max_timestamp = cursor.get("last_timestamp", 0)
253
258
  since_rowid = cursor.get("last_rowid")
254
259
 
255
- conn = sqlite3.connect(db_path)
260
+ conn = _connect_sqlite_readonly(db_path)
256
261
  conn.row_factory = sqlite3.Row
257
262
  cur = conn.cursor()
258
263
 
@@ -348,7 +353,7 @@ def _extract_safari_history(
348
353
  max_visit_time = cursor.get("last_visit_time", 0)
349
354
  since_visit_time = cursor.get("last_visit_time")
350
355
 
351
- conn = sqlite3.connect(db_path)
356
+ conn = _connect_sqlite_readonly(db_path)
352
357
  conn.row_factory = sqlite3.Row
353
358
  cur = conn.cursor()
354
359
 
@@ -439,7 +444,7 @@ def _extract_chrome_history(
439
444
  max_visit_time = cursor.get("last_visit_time", 0)
440
445
  since_visit_time = cursor.get("last_visit_time")
441
446
 
442
- conn = sqlite3.connect(db_path)
447
+ conn = _connect_sqlite_readonly(db_path)
443
448
  conn.row_factory = sqlite3.Row
444
449
  cur = conn.cursor()
445
450
 
@@ -534,7 +539,7 @@ def _extract_firefox_history(
534
539
  max_visit_date = cursor.get("last_visit_date", 0)
535
540
  since_visit_date = cursor.get("last_visit_date")
536
541
 
537
- conn = sqlite3.connect(db_path)
542
+ conn = _connect_sqlite_readonly(db_path)
538
543
  conn.row_factory = sqlite3.Row
539
544
  cur = conn.cursor()
540
545
 
@@ -760,7 +765,9 @@ def _extract_folder(
760
765
  """Extract text files from a regular folder with proper exclusion patterns."""
761
766
  files = []
762
767
  last_mtime = cursor.get("last_mtime", 0)
768
+ last_path = cursor.get("last_path", "")
763
769
  max_mtime = last_mtime
770
+ max_path = last_path
764
771
  extracted_count = 0
765
772
 
766
773
  # Allowed text file extensions
@@ -783,6 +790,8 @@ def _extract_folder(
783
790
  and not d.startswith(".")
784
791
  and not d.endswith(".egg-info")
785
792
  ]
793
+ dirs.sort()
794
+ filenames.sort()
786
795
 
787
796
  for filename in filenames:
788
797
  if extracted_count >= limit:
@@ -816,9 +825,12 @@ def _extract_folder(
816
825
  try:
817
826
  stat = os.stat(file_path)
818
827
  mtime = stat.st_mtime
828
+ rel_path = os.path.relpath(file_path, folder_path)
819
829
 
820
- # Skip if not modified since last sync
821
- if mtime <= last_mtime:
830
+ # Skip if not modified since last sync (tie-break by path)
831
+ if mtime < last_mtime:
832
+ continue
833
+ if mtime == last_mtime and rel_path <= last_path:
822
834
  continue
823
835
 
824
836
  # Skip large files
@@ -831,11 +843,6 @@ def _extract_folder(
831
843
  if not content.strip():
832
844
  continue
833
845
 
834
- max_mtime = max(max_mtime, mtime)
835
-
836
- # Relative path from folder root
837
- rel_path = os.path.relpath(file_path, folder_path)
838
-
839
846
  files.append({
840
847
  "path": rel_path,
841
848
  "content": content,
@@ -846,6 +853,9 @@ def _extract_folder(
846
853
  },
847
854
  })
848
855
  extracted_count += 1
856
+ if mtime > max_mtime or (mtime == max_mtime and rel_path > max_path):
857
+ max_mtime = mtime
858
+ max_path = rel_path
849
859
 
850
860
  except (PermissionError, IOError, OSError, UnicodeDecodeError) as e:
851
861
  logger.warning(f"Could not read {file_path}: {e}")
@@ -855,7 +865,7 @@ def _extract_folder(
855
865
 
856
866
  return {
857
867
  "files": files,
858
- "cursor": {"last_mtime": max_mtime},
868
+ "cursor": {"last_mtime": max_mtime, "last_path": max_path},
859
869
  "stats": {"extracted": len(files), "db_type": TYPE_FOLDER},
860
870
  }
861
871
 
@@ -871,7 +881,7 @@ def _extract_generic_db(
871
881
 
872
882
  skip_tables = {"sqlite_sequence", "sqlite_stat1", "sqlite_stat4"}
873
883
 
874
- conn = sqlite3.connect(db_path)
884
+ conn = _connect_sqlite_readonly(db_path)
875
885
  cur = conn.cursor()
876
886
 
877
887
  cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
@@ -13,14 +13,25 @@ Usage:
13
13
  """
14
14
  import os
15
15
  import typer
16
+ import httpx
17
+ import logging
16
18
  from rich.console import Console
17
19
  from rich.panel import Panel
18
20
  from rich.table import Table
19
21
 
20
22
  from auth import login as do_login, logout as do_logout, is_authenticated, get_api_key
21
- from config import get_sources, add_source, remove_source, enable_source_sync, NIA_SYNC_DIR, find_folder_path
23
+ from config import get_sources, add_source, remove_source, enable_source_sync, NIA_SYNC_DIR, find_folder_path, API_BASE_URL, get_api_key
22
24
  from sync import sync_all_sources
23
- from extractor import detect_source_type
25
+ from extractor import (
26
+ detect_source_type,
27
+ TYPE_FOLDER,
28
+ TYPE_TELEGRAM,
29
+ TYPE_GENERIC_DB,
30
+ TYPE_IMESSAGE,
31
+ TYPE_SAFARI_HISTORY,
32
+ TYPE_CHROME_HISTORY,
33
+ TYPE_FIREFOX_HISTORY,
34
+ )
24
35
 
25
36
  app = typer.Typer(
26
37
  name="nia",
@@ -30,6 +41,7 @@ app = typer.Typer(
30
41
  epilog="[dim]Quick start: [cyan]nia login[/cyan] → [cyan]nia status[/cyan] → [cyan]nia[/cyan][/dim]",
31
42
  )
32
43
  console = Console()
44
+ logger = logging.getLogger(__name__)
33
45
 
34
46
 
35
47
  @app.callback(invoke_without_command=True)
@@ -71,6 +83,14 @@ KNOWN_PATHS = {
71
83
  "firefox_history": "~/Library/Application Support/Firefox/Profiles/*/places.sqlite",
72
84
  }
73
85
 
86
+ DB_SOURCE_TYPES = {
87
+ TYPE_IMESSAGE,
88
+ TYPE_SAFARI_HISTORY,
89
+ TYPE_CHROME_HISTORY,
90
+ TYPE_FIREFOX_HISTORY,
91
+ TYPE_GENERIC_DB,
92
+ }
93
+
74
94
 
75
95
  def _check_local_sources():
76
96
  """Check for indexed sources that exist locally and can be synced."""
@@ -381,6 +401,18 @@ def _resolve_sources(sources: list[dict], log_discoveries: bool = False) -> list
381
401
  return resolved
382
402
 
383
403
 
404
+ def _get_watched_files(source: dict) -> set[str] | None:
405
+ path = source.get("path")
406
+ detected_type = source.get("detected_type")
407
+ if not path or not detected_type:
408
+ return None
409
+ if detected_type in DB_SOURCE_TYPES:
410
+ expanded = os.path.abspath(os.path.expanduser(path))
411
+ watched = {expanded, f"{expanded}-wal", f"{expanded}-shm"}
412
+ return watched
413
+ return None
414
+
415
+
384
416
  @app.command(name="start", hidden=True)
385
417
  def daemon(
386
418
  watch: bool = typer.Option(True, "--watch/--poll", help="File watching (default) or polling"),
@@ -401,6 +433,9 @@ def daemon(
401
433
  pending_syncs: set[str] = set() # source_ids pending sync
402
434
  sync_lock = threading.Lock()
403
435
  sources_by_id: dict[str, dict] = {}
436
+ last_sync_times: dict[str, float] = {}
437
+ last_heartbeat_time = 0.0
438
+ heartbeat_interval = 30
404
439
 
405
440
  def handle_signal(signum, frame):
406
441
  nonlocal running
@@ -442,6 +477,7 @@ def daemon(
442
477
  if added > 0:
443
478
  total_added += added
444
479
  console.print(f"[green]✓ {src.get('display_name', 'Unknown')}[/green] - {added} items synced")
480
+ last_sync_times[source_id] = time.time()
445
481
  else:
446
482
  error = result.get("error", "unknown error")
447
483
  errors.append(f"{src.get('display_name', 'Unknown')}: {error}")
@@ -473,7 +509,8 @@ def daemon(
473
509
  # Add new watchers
474
510
  for source_id in new_source_ids - current_watching:
475
511
  src = new_sources_by_id[source_id]
476
- if watcher.watch(source_id, src["path"], on_source_changed):
512
+ watched_files = _get_watched_files(src)
513
+ if watcher.watch(source_id, src["path"], on_source_changed, watched_files=watched_files):
477
514
  console.print(f" [dim]+ Watching {src.get('display_name', 'Unknown')}[/dim]")
478
515
  newly_added.append(source_id)
479
516
 
@@ -559,6 +596,19 @@ def daemon(
559
596
  # Process any pending syncs from file watcher
560
597
  sync_pending_sources()
561
598
 
599
+ # Heartbeat to backend to mark daemon online
600
+ now = time.time()
601
+ if now - last_heartbeat_time >= heartbeat_interval:
602
+ _send_heartbeat(list(sources_by_id.keys()))
603
+ last_heartbeat_time = now
604
+
605
+ # Sanity sync to catch missed events
606
+ if fallback_interval > 0:
607
+ for source_id in list(sources_by_id.keys()):
608
+ last_sync = last_sync_times.get(source_id, 0)
609
+ if now - last_sync >= fallback_interval:
610
+ pending_syncs.add(source_id)
611
+
562
612
  # Instant refresh if new folder detected matching an unlinked source
563
613
  if refresh_triggered.is_set():
564
614
  refresh_triggered.clear()
@@ -604,6 +654,7 @@ def daemon(
604
654
  sync_count = 0
605
655
  while running:
606
656
  resolved, _ = refresh_sources()
657
+ _send_heartbeat([src["local_folder_id"] for src in resolved])
607
658
 
608
659
  sync_count += 1
609
660
  console.print(f"\n[bold]Sync #{sync_count}[/bold] - {len(resolved)} source(s)")
@@ -633,5 +684,22 @@ def daemon(
633
684
  console.print("[green]✓ Stopped[/green]")
634
685
 
635
686
 
687
+ def _send_heartbeat(source_ids: list[str]) -> None:
688
+ if not source_ids:
689
+ return
690
+ api_key = get_api_key()
691
+ if not api_key:
692
+ return
693
+ try:
694
+ with httpx.Client(timeout=10) as client:
695
+ client.post(
696
+ f"{API_BASE_URL}/v2/daemon/heartbeat",
697
+ headers={"Authorization": f"Bearer {api_key}"},
698
+ json={"source_ids": source_ids},
699
+ )
700
+ except Exception:
701
+ logger.debug("Heartbeat failed", exc_info=True)
702
+
703
+
636
704
  if __name__ == "__main__":
637
705
  app()
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nia-sync
3
- Version: 0.1.1
4
- Summary: Keep your local files in sync with Nia
3
+ Version: 0.1.3
4
+ Summary: Keep your local files in sync with Nia Cloud
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: typer>=0.9.0
7
7
  Requires-Dist: rich>=13.0.0
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "nia-sync"
7
- version = "0.1.1"
8
- description = "Keep your local files in sync with Nia"
7
+ version = "0.1.3"
8
+ description = "Keep your local files in sync with Nia Cloud"
9
9
  requires-python = ">=3.10"
10
10
  dependencies = [
11
11
  "typer>=0.9.0",
nia_sync-0.1.3/sync.py ADDED
@@ -0,0 +1,305 @@
1
+ """
2
+ Sync engine for Nia Local Sync CLI.
3
+
4
+ Handles:
5
+ - Extracting data from local sources (databases, folders)
6
+ - Uploading to cloud API
7
+ - Cursor management for incremental sync
8
+ """
9
+ import os
10
+ import logging
11
+ import random
12
+ import time
13
+ from pathlib import Path
14
+ from typing import Any
15
+ import httpx
16
+
17
+ from config import API_BASE_URL, get_api_key
18
+ from extractor import extract_incremental, detect_source_type
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ SYNC_TIMEOUT = 60 # 1 minute per sync request (reduced from 2 min)
23
+ CONNECT_TIMEOUT = 10 # 10 second connection timeout
24
+ MAX_FILES_PER_BATCH = 500 # Keep below backend limit (1000)
25
+ MAX_RETRIES = 4
26
+ RETRY_BASE_DELAY = 1.5
27
+ RETRY_MAX_DELAY = 15.0
28
+
29
+ # Reusable client for connection pooling
30
+ _http_client: httpx.Client | None = None
31
+
32
+ def get_http_client() -> httpx.Client:
33
+ """Get or create HTTP client with connection pooling."""
34
+ global _http_client
35
+ if _http_client is None:
36
+ _http_client = httpx.Client(
37
+ timeout=httpx.Timeout(SYNC_TIMEOUT, connect=CONNECT_TIMEOUT),
38
+ limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
39
+ )
40
+ return _http_client
41
+
42
+
43
+ def sync_all_sources(sources: list[dict[str, Any]]) -> list[dict[str, Any]]:
44
+ """
45
+ Sync all configured sources.
46
+
47
+ Args:
48
+ sources: List of source configs from cloud API
49
+
50
+ Returns:
51
+ List of results for each source
52
+ """
53
+ results = []
54
+
55
+ for source in sources:
56
+ result = sync_source(source)
57
+ results.append(result)
58
+
59
+ return results
60
+
61
+
62
+ def sync_source(source: dict[str, Any]) -> dict[str, Any]:
63
+ """
64
+ Sync a single source.
65
+
66
+ Args:
67
+ source: Source config from cloud API with:
68
+ - local_folder_id: UUID of the local folder
69
+ - path: Local path to sync
70
+ - detected_type: Type of source
71
+ - cursor: Current sync cursor
72
+
73
+ Returns:
74
+ Result dict with status, path, and stats
75
+ """
76
+ local_folder_id = source.get("local_folder_id")
77
+ path = source.get("path", "")
78
+ detected_type = source.get("detected_type")
79
+ cursor = source.get("cursor", {})
80
+
81
+ # Expand ~ in path
82
+ path = os.path.expanduser(path)
83
+
84
+ # Validate path exists
85
+ if not os.path.exists(path):
86
+ error_message = f"Path does not exist: {path}"
87
+ report_sync_error(local_folder_id, error_message, path)
88
+ return {
89
+ "path": path,
90
+ "status": "error",
91
+ "error": error_message,
92
+ }
93
+
94
+ # Auto-detect type if not specified
95
+ if not detected_type:
96
+ detected_type = detect_source_type(path)
97
+
98
+ logger.info(f"Syncing {path} (type={detected_type})")
99
+
100
+ try:
101
+ # Extract data incrementally
102
+ extraction_result = extract_incremental(
103
+ path=path,
104
+ source_type=detected_type,
105
+ cursor=cursor,
106
+ )
107
+
108
+ files = extraction_result.get("files", [])
109
+ new_cursor = extraction_result.get("cursor", {})
110
+ stats = extraction_result.get("stats", {})
111
+
112
+ if not files:
113
+ logger.info(f"No new data to sync for {path}")
114
+ return {
115
+ "path": path,
116
+ "status": "success",
117
+ "added": 0,
118
+ "message": "No new data",
119
+ }
120
+
121
+ # Upload to backend in batches
122
+ upload_result = upload_sync_batches(
123
+ local_folder_id=local_folder_id,
124
+ files=files,
125
+ cursor=new_cursor,
126
+ stats=stats,
127
+ )
128
+
129
+ if upload_result.get("status") == "ok":
130
+ # Update source cursor in-place so subsequent syncs use it
131
+ source["cursor"] = new_cursor
132
+ return {
133
+ "path": path,
134
+ "status": "success",
135
+ "added": len(files),
136
+ "chunks_indexed": upload_result.get("chunks_indexed", 0),
137
+ "new_cursor": new_cursor,
138
+ }
139
+ else:
140
+ report_sync_error(local_folder_id, upload_result.get("message", "Upload failed"), path)
141
+ return {
142
+ "path": path,
143
+ "status": "error",
144
+ "error": upload_result.get("message", "Upload failed"),
145
+ }
146
+
147
+ except PermissionError:
148
+ error_message = "Permission denied. Grant Full Disk Access in System Settings > Privacy & Security."
149
+ report_sync_error(local_folder_id, error_message, path)
150
+ return {
151
+ "path": path,
152
+ "status": "error",
153
+ "error": error_message,
154
+ }
155
+ except Exception as e:
156
+ logger.error(f"Error syncing {path}: {e}", exc_info=True)
157
+ report_sync_error(local_folder_id, str(e), path)
158
+ return {
159
+ "path": path,
160
+ "status": "error",
161
+ "error": str(e),
162
+ }
163
+
164
+
165
+ def upload_sync_data(
166
+ local_folder_id: str,
167
+ files: list[dict[str, Any]],
168
+ cursor: dict[str, Any],
169
+ stats: dict[str, Any],
170
+ is_final_batch: bool = True,
171
+ ) -> dict[str, Any]:
172
+ """
173
+ Upload extracted data to the cloud API.
174
+
175
+ Args:
176
+ local_folder_id: UUID of the local folder
177
+ files: List of extracted files with path, content, metadata
178
+ cursor: New cursor after extraction
179
+ stats: Extraction stats
180
+
181
+ Returns:
182
+ API response dict
183
+ """
184
+ api_key = get_api_key()
185
+ if not api_key:
186
+ return {"status": "error", "message": "Not authenticated"}
187
+
188
+ try:
189
+ client = get_http_client()
190
+ response = _post_with_retries(
191
+ client=client,
192
+ url=f"{API_BASE_URL}/v2/daemon/sync",
193
+ headers={"Authorization": f"Bearer {api_key}"},
194
+ payload={
195
+ "local_folder_id": local_folder_id,
196
+ "files": files,
197
+ "cursor": cursor,
198
+ "stats": stats,
199
+ "is_final_batch": is_final_batch,
200
+ },
201
+ )
202
+
203
+ if response is None:
204
+ return {"status": "error", "message": "Request failed after retries"}
205
+
206
+ if response.status_code == 200:
207
+ return response.json()
208
+ elif response.status_code == 401:
209
+ return {"status": "error", "message": "Authentication failed"}
210
+ elif response.status_code == 404:
211
+ return {"status": "error", "message": "Local folder not found"}
212
+ else:
213
+ try:
214
+ detail = response.json().get("detail", response.text)
215
+ except ValueError:
216
+ detail = response.text or f"HTTP {response.status_code}"
217
+ return {"status": "error", "message": f"API error: {detail}"}
218
+
219
+ except httpx.TimeoutException:
220
+ return {"status": "error", "message": "Request timeout"}
221
+ except httpx.RequestError as e:
222
+ return {"status": "error", "message": f"Network error: {e}"}
223
+
224
+
225
+ def upload_sync_batches(
226
+ local_folder_id: str,
227
+ files: list[dict[str, Any]],
228
+ cursor: dict[str, Any],
229
+ stats: dict[str, Any],
230
+ ) -> dict[str, Any]:
231
+ """Upload files in batches and only advance cursor after all succeed."""
232
+ if not files:
233
+ return {"status": "ok", "chunks_indexed": 0}
234
+
235
+ total_batches = max(1, (len(files) + MAX_FILES_PER_BATCH - 1) // MAX_FILES_PER_BATCH)
236
+ chunks_indexed = 0
237
+
238
+ for batch_index, batch in enumerate(_iter_batches(files, MAX_FILES_PER_BATCH), start=1):
239
+ is_last_batch = batch_index == total_batches
240
+ result = upload_sync_data(
241
+ local_folder_id=local_folder_id,
242
+ files=batch,
243
+ cursor=cursor if is_last_batch else {},
244
+ stats=stats if is_last_batch else {},
245
+ is_final_batch=is_last_batch,
246
+ )
247
+
248
+ if result.get("status") != "ok":
249
+ return result
250
+
251
+ chunks_indexed += result.get("chunks_indexed", 0)
252
+
253
+ return {"status": "ok", "chunks_indexed": chunks_indexed}
254
+
255
+
256
+ def report_sync_error(local_folder_id: str | None, error: str, path: str | None = None) -> None:
257
+ """Report local sync errors to backend for UI visibility."""
258
+ if not local_folder_id:
259
+ return
260
+ api_key = get_api_key()
261
+ if not api_key:
262
+ return
263
+
264
+ try:
265
+ client = get_http_client()
266
+ _post_with_retries(
267
+ client=client,
268
+ url=f"{API_BASE_URL}/v2/daemon/sources/{local_folder_id}/error",
269
+ headers={"Authorization": f"Bearer {api_key}"},
270
+ payload={"error": error, "path": path},
271
+ )
272
+ except Exception:
273
+ logger.debug("Failed to report sync error", exc_info=True)
274
+
275
+
276
+ def _iter_batches(items: list[dict[str, Any]], size: int):
277
+ for i in range(0, len(items), size):
278
+ yield items[i:i + size]
279
+
280
+
281
+ def _post_with_retries(
282
+ client: httpx.Client,
283
+ url: str,
284
+ headers: dict[str, str],
285
+ payload: dict[str, Any],
286
+ ) -> httpx.Response | None:
287
+ delay = RETRY_BASE_DELAY
288
+ for attempt in range(MAX_RETRIES):
289
+ try:
290
+ response = client.post(url, headers=headers, json=payload)
291
+ if response.status_code in {429} or response.status_code >= 500:
292
+ raise httpx.HTTPStatusError(
293
+ f"Retryable status {response.status_code}",
294
+ request=response.request,
295
+ response=response,
296
+ )
297
+ return response
298
+ except (httpx.TimeoutException, httpx.RequestError, httpx.HTTPStatusError) as e:
299
+ is_last_attempt = attempt >= MAX_RETRIES - 1
300
+ if is_last_attempt:
301
+ logger.warning(f"POST failed after retries: {e}")
302
+ return None
303
+ jitter = random.uniform(0.8, 1.2)
304
+ time.sleep(min(RETRY_MAX_DELAY, delay) * jitter)
305
+ delay *= 2
@@ -41,6 +41,7 @@ class SyncEventHandler(FileSystemEventHandler):
41
41
  source_path: str,
42
42
  on_change: Callable[[str], None],
43
43
  debounce_sec: float = 2.0,
44
+ watched_files: set[str] | None = None,
44
45
  ):
45
46
  super().__init__()
46
47
  self.source_id = source_id
@@ -51,8 +52,13 @@ class SyncEventHandler(FileSystemEventHandler):
51
52
  self._lock = threading.Lock()
52
53
  self._pending_changes = 0
53
54
 
55
+ # Watch specific files if provided (e.g., DB files without extensions)
56
+ if watched_files:
57
+ self._watched_files = {
58
+ os.path.abspath(os.path.expanduser(p)) for p in watched_files
59
+ }
54
60
  # For database files, also watch the WAL/SHM files
55
- if self.source_path.endswith('.db'):
61
+ elif self.source_path.endswith('.db'):
56
62
  self._watched_files = {
57
63
  self.source_path,
58
64
  self.source_path + '-wal',
@@ -157,6 +163,7 @@ class FileWatcher:
157
163
  source_id: str,
158
164
  path: str,
159
165
  on_change: Callable[[str], None],
166
+ watched_files: set[str] | None = None,
160
167
  ) -> bool:
161
168
  """
162
169
  Add a path to watch.
@@ -178,9 +185,8 @@ class FileWatcher:
178
185
  # Expand path
179
186
  expanded = os.path.expanduser(path)
180
187
 
181
- # For database files, watch the parent directory
182
- # to catch .db-wal changes
183
- if expanded.endswith('.db'):
188
+ # For database files (or explicit watched files), watch the parent directory
189
+ if watched_files or expanded.endswith('.db'):
184
190
  watch_path = os.path.dirname(expanded)
185
191
  else:
186
192
  watch_path = expanded
@@ -196,6 +202,7 @@ class FileWatcher:
196
202
  source_path=expanded,
197
203
  on_change=on_change,
198
204
  debounce_sec=self.debounce_sec,
205
+ watched_files=watched_files,
199
206
  )
200
207
 
201
208
  # Schedule watch
nia_sync-0.1.1/sync.py DELETED
@@ -1,187 +0,0 @@
1
- """
2
- Sync engine for Nia Local Sync CLI.
3
-
4
- Handles:
5
- - Extracting data from local sources (databases, folders)
6
- - Uploading to cloud API
7
- - Cursor management for incremental sync
8
- """
9
- import os
10
- import logging
11
- from pathlib import Path
12
- from typing import Any
13
- import httpx
14
-
15
- from config import API_BASE_URL, get_api_key
16
- from extractor import extract_incremental, detect_source_type
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
- SYNC_TIMEOUT = 120 # 2 minutes per sync request
21
-
22
-
23
- def sync_all_sources(sources: list[dict[str, Any]]) -> list[dict[str, Any]]:
24
- """
25
- Sync all configured sources.
26
-
27
- Args:
28
- sources: List of source configs from cloud API
29
-
30
- Returns:
31
- List of results for each source
32
- """
33
- results = []
34
-
35
- for source in sources:
36
- result = sync_source(source)
37
- results.append(result)
38
-
39
- return results
40
-
41
-
42
- def sync_source(source: dict[str, Any]) -> dict[str, Any]:
43
- """
44
- Sync a single source.
45
-
46
- Args:
47
- source: Source config from cloud API with:
48
- - local_folder_id: UUID of the local folder
49
- - path: Local path to sync
50
- - detected_type: Type of source
51
- - cursor: Current sync cursor
52
-
53
- Returns:
54
- Result dict with status, path, and stats
55
- """
56
- local_folder_id = source.get("local_folder_id")
57
- path = source.get("path", "")
58
- detected_type = source.get("detected_type")
59
- cursor = source.get("cursor", {})
60
-
61
- # Expand ~ in path
62
- path = os.path.expanduser(path)
63
-
64
- # Validate path exists
65
- if not os.path.exists(path):
66
- return {
67
- "path": path,
68
- "status": "error",
69
- "error": f"Path does not exist: {path}",
70
- }
71
-
72
- # Auto-detect type if not specified
73
- if not detected_type:
74
- detected_type = detect_source_type(path)
75
-
76
- logger.info(f"Syncing {path} (type={detected_type})")
77
-
78
- try:
79
- # Extract data incrementally
80
- extraction_result = extract_incremental(
81
- path=path,
82
- source_type=detected_type,
83
- cursor=cursor,
84
- )
85
-
86
- files = extraction_result.get("files", [])
87
- new_cursor = extraction_result.get("cursor", {})
88
- stats = extraction_result.get("stats", {})
89
-
90
- if not files:
91
- logger.info(f"No new data to sync for {path}")
92
- return {
93
- "path": path,
94
- "status": "success",
95
- "added": 0,
96
- "message": "No new data",
97
- }
98
-
99
- # Upload to backend
100
- upload_result = upload_sync_data(
101
- local_folder_id=local_folder_id,
102
- files=files,
103
- cursor=new_cursor,
104
- stats=stats,
105
- )
106
-
107
- if upload_result.get("status") == "ok":
108
- # Update source cursor in-place so subsequent syncs use it
109
- source["cursor"] = new_cursor
110
- return {
111
- "path": path,
112
- "status": "success",
113
- "added": len(files),
114
- "chunks_indexed": upload_result.get("chunks_indexed", 0),
115
- "new_cursor": new_cursor,
116
- }
117
- else:
118
- return {
119
- "path": path,
120
- "status": "error",
121
- "error": upload_result.get("message", "Upload failed"),
122
- }
123
-
124
- except PermissionError:
125
- return {
126
- "path": path,
127
- "status": "error",
128
- "error": "Permission denied. Grant Full Disk Access in System Settings > Privacy & Security.",
129
- }
130
- except Exception as e:
131
- logger.error(f"Error syncing {path}: {e}", exc_info=True)
132
- return {
133
- "path": path,
134
- "status": "error",
135
- "error": str(e),
136
- }
137
-
138
-
139
- def upload_sync_data(
140
- local_folder_id: str,
141
- files: list[dict[str, Any]],
142
- cursor: dict[str, Any],
143
- stats: dict[str, Any],
144
- ) -> dict[str, Any]:
145
- """
146
- Upload extracted data to the cloud API.
147
-
148
- Args:
149
- local_folder_id: UUID of the local folder
150
- files: List of extracted files with path, content, metadata
151
- cursor: New cursor after extraction
152
- stats: Extraction stats
153
-
154
- Returns:
155
- API response dict
156
- """
157
- api_key = get_api_key()
158
- if not api_key:
159
- return {"status": "error", "message": "Not authenticated"}
160
-
161
- try:
162
- with httpx.Client(timeout=SYNC_TIMEOUT) as client:
163
- response = client.post(
164
- f"{API_BASE_URL}/v2/daemon/sync",
165
- headers={"Authorization": f"Bearer {api_key}"},
166
- json={
167
- "local_folder_id": local_folder_id,
168
- "files": files,
169
- "cursor": cursor,
170
- "stats": stats,
171
- },
172
- )
173
-
174
- if response.status_code == 200:
175
- return response.json()
176
- elif response.status_code == 401:
177
- return {"status": "error", "message": "Authentication failed"}
178
- elif response.status_code == 404:
179
- return {"status": "error", "message": "Local folder not found"}
180
- else:
181
- detail = response.json().get("detail", response.text)
182
- return {"status": "error", "message": f"API error: {detail}"}
183
-
184
- except httpx.TimeoutException:
185
- return {"status": "error", "message": "Request timeout"}
186
- except httpx.RequestError as e:
187
- return {"status": "error", "message": f"Network error: {e}"}
File without changes
File without changes
File without changes