nia-sync 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sync.py ADDED
@@ -0,0 +1,192 @@
1
+ """
2
+ Sync engine for Nia Local Sync CLI.
3
+
4
+ Handles:
5
+ - Extracting data from local sources (databases, folders)
6
+ - Uploading to cloud API
7
+ - Cursor management for incremental sync
8
+ """
9
+ import os
10
+ import logging
11
+ from pathlib import Path
12
+ from typing import Any
13
+ import httpx
14
+
15
+ from config import API_BASE_URL, get_api_key, enable_source_sync
16
+ from extractor import extract_incremental, detect_source_type
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ SYNC_TIMEOUT = 120 # 2 minutes per sync request
21
+
22
+
23
+ def sync_all_sources(sources: list[dict[str, Any]]) -> list[dict[str, Any]]:
24
+ """
25
+ Sync all configured sources.
26
+
27
+ Args:
28
+ sources: List of source configs from cloud API
29
+
30
+ Returns:
31
+ List of results for each source
32
+ """
33
+ results = []
34
+
35
+ for source in sources:
36
+ result = sync_source(source)
37
+ results.append(result)
38
+
39
+ return results
40
+
41
+
42
+ def sync_source(source: dict[str, Any]) -> dict[str, Any]:
43
+ """
44
+ Sync a single source.
45
+
46
+ Args:
47
+ source: Source config from cloud API with:
48
+ - local_folder_id: UUID of the local folder
49
+ - path: Local path to sync
50
+ - detected_type: Type of source
51
+ - cursor: Current sync cursor
52
+
53
+ Returns:
54
+ Result dict with status, path, and stats
55
+ """
56
+ local_folder_id = source.get("local_folder_id")
57
+ path = source.get("path", "")
58
+ detected_type = source.get("detected_type")
59
+ cursor = source.get("cursor", {})
60
+
61
+ # Expand ~ in path
62
+ path = os.path.expanduser(path)
63
+
64
+ # Validate path exists
65
+ if not os.path.exists(path):
66
+ return {
67
+ "path": path,
68
+ "status": "error",
69
+ "error": f"Path does not exist: {path}",
70
+ }
71
+
72
+ # Auto-enable sync if source exists locally but sync not enabled
73
+ if not source.get("sync_enabled", False):
74
+ logger.info(f"Auto-enabling sync for {path}")
75
+ enable_source_sync(local_folder_id, path)
76
+
77
+ # Auto-detect type if not specified
78
+ if not detected_type:
79
+ detected_type = detect_source_type(path)
80
+
81
+ logger.info(f"Syncing {path} (type={detected_type})")
82
+
83
+ try:
84
+ # Extract data incrementally
85
+ extraction_result = extract_incremental(
86
+ path=path,
87
+ source_type=detected_type,
88
+ cursor=cursor,
89
+ )
90
+
91
+ files = extraction_result.get("files", [])
92
+ new_cursor = extraction_result.get("cursor", {})
93
+ stats = extraction_result.get("stats", {})
94
+
95
+ if not files:
96
+ logger.info(f"No new data to sync for {path}")
97
+ return {
98
+ "path": path,
99
+ "status": "success",
100
+ "added": 0,
101
+ "message": "No new data",
102
+ }
103
+
104
+ # Upload to backend
105
+ upload_result = upload_sync_data(
106
+ local_folder_id=local_folder_id,
107
+ files=files,
108
+ cursor=new_cursor,
109
+ stats=stats,
110
+ )
111
+
112
+ if upload_result.get("status") == "ok":
113
+ # Update source cursor in-place so subsequent syncs use it
114
+ source["cursor"] = new_cursor
115
+ return {
116
+ "path": path,
117
+ "status": "success",
118
+ "added": len(files),
119
+ "chunks_indexed": upload_result.get("chunks_indexed", 0),
120
+ "new_cursor": new_cursor,
121
+ }
122
+ else:
123
+ return {
124
+ "path": path,
125
+ "status": "error",
126
+ "error": upload_result.get("message", "Upload failed"),
127
+ }
128
+
129
+ except PermissionError:
130
+ return {
131
+ "path": path,
132
+ "status": "error",
133
+ "error": "Permission denied. Grant Full Disk Access in System Settings > Privacy & Security.",
134
+ }
135
+ except Exception as e:
136
+ logger.error(f"Error syncing {path}: {e}", exc_info=True)
137
+ return {
138
+ "path": path,
139
+ "status": "error",
140
+ "error": str(e),
141
+ }
142
+
143
+
144
+ def upload_sync_data(
145
+ local_folder_id: str,
146
+ files: list[dict[str, Any]],
147
+ cursor: dict[str, Any],
148
+ stats: dict[str, Any],
149
+ ) -> dict[str, Any]:
150
+ """
151
+ Upload extracted data to the cloud API.
152
+
153
+ Args:
154
+ local_folder_id: UUID of the local folder
155
+ files: List of extracted files with path, content, metadata
156
+ cursor: New cursor after extraction
157
+ stats: Extraction stats
158
+
159
+ Returns:
160
+ API response dict
161
+ """
162
+ api_key = get_api_key()
163
+ if not api_key:
164
+ return {"status": "error", "message": "Not authenticated"}
165
+
166
+ try:
167
+ with httpx.Client(timeout=SYNC_TIMEOUT) as client:
168
+ response = client.post(
169
+ f"{API_BASE_URL}/v2/daemon/sync",
170
+ headers={"Authorization": f"Bearer {api_key}"},
171
+ json={
172
+ "local_folder_id": local_folder_id,
173
+ "files": files,
174
+ "cursor": cursor,
175
+ "stats": stats,
176
+ },
177
+ )
178
+
179
+ if response.status_code == 200:
180
+ return response.json()
181
+ elif response.status_code == 401:
182
+ return {"status": "error", "message": "Authentication failed"}
183
+ elif response.status_code == 404:
184
+ return {"status": "error", "message": "Local folder not found"}
185
+ else:
186
+ detail = response.json().get("detail", response.text)
187
+ return {"status": "error", "message": f"API error: {detail}"}
188
+
189
+ except httpx.TimeoutException:
190
+ return {"status": "error", "message": "Request timeout"}
191
+ except httpx.RequestError as e:
192
+ return {"status": "error", "message": f"Network error: {e}"}
watcher.py ADDED
@@ -0,0 +1,304 @@
1
+ """
2
+ File system watcher for real-time sync.
3
+
4
+ Uses watchdog library to monitor file changes and trigger syncs
5
+ with debouncing to prevent rapid-fire updates.
6
+ """
7
+ import os
8
+ import threading
9
+ import logging
10
+ from typing import Callable
11
+ from pathlib import Path
12
+
13
+ from watchdog.observers import Observer
14
+ from watchdog.events import FileSystemEventHandler, FileSystemEvent
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # File extensions to watch for changes
19
+ WATCHED_EXTENSIONS = {
20
+ # Database files
21
+ '.db', '.db-wal', '.db-shm', '.sqlite', '.sqlite3',
22
+ # Document files
23
+ '.txt', '.md', '.json', '.yaml', '.yml',
24
+ # Code files (for folder sync)
25
+ '.py', '.js', '.ts', '.tsx', '.jsx', '.html', '.css',
26
+ }
27
+
28
+
29
+ class SyncEventHandler(FileSystemEventHandler):
30
+ """
31
+ Handles file system events with debouncing.
32
+
33
+ When a file changes, starts a timer. If more changes come in
34
+ before the timer expires, the timer resets. When the timer
35
+ finally expires, triggers the sync callback.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ source_id: str,
41
+ source_path: str,
42
+ on_change: Callable[[str], None],
43
+ debounce_sec: float = 2.0,
44
+ ):
45
+ super().__init__()
46
+ self.source_id = source_id
47
+ self.source_path = os.path.abspath(os.path.expanduser(source_path))
48
+ self.on_change = on_change
49
+ self.debounce_sec = debounce_sec
50
+ self._timer: threading.Timer | None = None
51
+ self._lock = threading.Lock()
52
+ self._pending_changes = 0
53
+
54
+ # For database files, also watch the WAL/SHM files
55
+ if self.source_path.endswith('.db'):
56
+ self._watched_files = {
57
+ self.source_path,
58
+ self.source_path + '-wal',
59
+ self.source_path + '-shm',
60
+ }
61
+ else:
62
+ self._watched_files = None # Watch all files in directory
63
+
64
+ def _should_handle(self, event: FileSystemEvent) -> bool:
65
+ """Check if this event should trigger a sync for THIS source."""
66
+ if event.is_directory:
67
+ return False
68
+
69
+ event_path = os.path.abspath(event.src_path)
70
+
71
+ # If we're watching specific files (database), only trigger for those
72
+ if self._watched_files is not None:
73
+ return event_path in self._watched_files
74
+
75
+ # For directories, watch all relevant extensions
76
+ ext = Path(event_path).suffix.lower()
77
+ if ext in WATCHED_EXTENSIONS:
78
+ return True
79
+
80
+ return False
81
+
82
+ def on_modified(self, event: FileSystemEvent):
83
+ """Called when a file is modified."""
84
+ if self._should_handle(event):
85
+ logger.debug(f"Modified: {event.src_path}")
86
+ self._debounce()
87
+
88
+ def on_created(self, event: FileSystemEvent):
89
+ """Called when a file is created."""
90
+ if self._should_handle(event):
91
+ logger.debug(f"Created: {event.src_path}")
92
+ self._debounce()
93
+
94
+ def on_deleted(self, event: FileSystemEvent):
95
+ """Called when a file is deleted."""
96
+ if self._should_handle(event):
97
+ logger.debug(f"Deleted: {event.src_path}")
98
+ self._debounce()
99
+
100
+ def _debounce(self):
101
+ """Reset the debounce timer."""
102
+ with self._lock:
103
+ self._pending_changes += 1
104
+
105
+ # Cancel existing timer
106
+ if self._timer is not None:
107
+ self._timer.cancel()
108
+
109
+ # Start new timer
110
+ self._timer = threading.Timer(self.debounce_sec, self._trigger_sync)
111
+ self._timer.start()
112
+
113
+ def _trigger_sync(self):
114
+ """Called when debounce timer expires - triggers actual sync."""
115
+ with self._lock:
116
+ changes = self._pending_changes
117
+ self._pending_changes = 0
118
+ self._timer = None
119
+
120
+ logger.info(f"Triggering sync for {self.source_id} ({changes} changes detected)")
121
+
122
+ try:
123
+ self.on_change(self.source_id)
124
+ except Exception as e:
125
+ logger.error(f"Error in sync callback: {e}")
126
+
127
+ def cancel(self):
128
+ """Cancel any pending timer."""
129
+ with self._lock:
130
+ if self._timer is not None:
131
+ self._timer.cancel()
132
+ self._timer = None
133
+
134
+
135
+ class FileWatcher:
136
+ """
137
+ Watches multiple source paths for changes.
138
+
139
+ Usage:
140
+ watcher = FileWatcher()
141
+ watcher.watch("source_id", "/path/to/file.db", on_change_callback)
142
+ watcher.start()
143
+ # ... later
144
+ watcher.stop()
145
+ """
146
+
147
+ def __init__(self, debounce_sec: float = 2.0):
148
+ self.debounce_sec = debounce_sec
149
+ self.observer = Observer()
150
+ self.handlers: dict[str, SyncEventHandler] = {}
151
+ self._watches: dict[str, any] = {}
152
+ self._lock = threading.Lock()
153
+ self._started = False
154
+
155
+ def watch(
156
+ self,
157
+ source_id: str,
158
+ path: str,
159
+ on_change: Callable[[str], None],
160
+ ) -> bool:
161
+ """
162
+ Add a path to watch.
163
+
164
+ Args:
165
+ source_id: Unique identifier for this source
166
+ path: File or directory path to watch
167
+ on_change: Callback when changes detected (receives source_id)
168
+
169
+ Returns:
170
+ True if successfully added, False otherwise
171
+ """
172
+ with self._lock:
173
+ # Skip if already watching this source
174
+ if source_id in self.handlers:
175
+ logger.debug(f"Already watching {source_id}")
176
+ return True
177
+
178
+ # Expand path
179
+ expanded = os.path.expanduser(path)
180
+
181
+ # For database files, watch the parent directory
182
+ # to catch .db-wal changes
183
+ if expanded.endswith('.db'):
184
+ watch_path = os.path.dirname(expanded)
185
+ else:
186
+ watch_path = expanded
187
+
188
+ # Verify path exists
189
+ if not os.path.exists(watch_path):
190
+ logger.warning(f"Path does not exist: {watch_path}")
191
+ return False
192
+
193
+ # Create handler
194
+ handler = SyncEventHandler(
195
+ source_id=source_id,
196
+ source_path=expanded,
197
+ on_change=on_change,
198
+ debounce_sec=self.debounce_sec,
199
+ )
200
+
201
+ # Schedule watch
202
+ try:
203
+ watch = self.observer.schedule(
204
+ handler,
205
+ watch_path,
206
+ recursive=os.path.isdir(watch_path),
207
+ )
208
+ self.handlers[source_id] = handler
209
+ self._watches[source_id] = watch
210
+ logger.info(f"Watching {source_id}: {watch_path}")
211
+ return True
212
+ except Exception as e:
213
+ logger.error(f"Failed to watch {watch_path}: {e}")
214
+ return False
215
+
216
+ def unwatch(self, source_id: str):
217
+ """Stop watching a source."""
218
+ with self._lock:
219
+ if source_id not in self.handlers:
220
+ return
221
+
222
+ handler = self.handlers.pop(source_id)
223
+ handler.cancel()
224
+
225
+ watch = self._watches.pop(source_id, None)
226
+ if watch:
227
+ self.observer.unschedule(watch)
228
+
229
+ logger.info(f"Stopped watching {source_id}")
230
+
231
+ def start(self):
232
+ """Start the file watcher."""
233
+ if not self._started:
234
+ self.observer.start()
235
+ self._started = True
236
+ logger.info("File watcher started")
237
+
238
+ def stop(self):
239
+ """Stop the file watcher."""
240
+ if self._started:
241
+ # Cancel all pending timers
242
+ for handler in self.handlers.values():
243
+ handler.cancel()
244
+
245
+ self.observer.stop()
246
+ self.observer.join(timeout=5.0)
247
+ self._started = False
248
+ logger.info("File watcher stopped")
249
+
250
+ @property
251
+ def watching(self) -> list[str]:
252
+ """Get list of source IDs being watched."""
253
+ with self._lock:
254
+ return list(self.handlers.keys())
255
+
256
+
257
+ class NewFolderHandler(FileSystemEventHandler):
258
+ """Detects new folders in watched directories."""
259
+
260
+ def __init__(self, on_folder_created: Callable[[str, str], None]):
261
+ super().__init__()
262
+ self.on_folder_created = on_folder_created
263
+
264
+ def on_created(self, event: FileSystemEvent):
265
+ if event.is_directory:
266
+ folder_name = os.path.basename(event.src_path)
267
+ self.on_folder_created(folder_name, event.src_path)
268
+
269
+
270
+ class DirectoryWatcher:
271
+ """
272
+ Watches common directories for new folder creation.
273
+
274
+ Used to instantly detect when user creates/clones a folder that
275
+ matches an indexed source name.
276
+ """
277
+
278
+ def __init__(self):
279
+ self.observer = Observer()
280
+ self._started = False
281
+
282
+ def watch(self, directories: list[str], on_folder_created: Callable[[str, str], None]):
283
+ """Watch directories for new folders (non-recursive, top-level only)."""
284
+ handler = NewFolderHandler(on_folder_created)
285
+
286
+ for dir_path in directories:
287
+ expanded = os.path.expanduser(dir_path)
288
+ if os.path.isdir(expanded):
289
+ try:
290
+ self.observer.schedule(handler, expanded, recursive=False)
291
+ logger.debug(f"Watching directory for new folders: {expanded}")
292
+ except Exception as e:
293
+ logger.warning(f"Can't watch {expanded}: {e}")
294
+
295
+ def start(self):
296
+ if not self._started:
297
+ self.observer.start()
298
+ self._started = True
299
+
300
+ def stop(self):
301
+ if self._started:
302
+ self.observer.stop()
303
+ self.observer.join(timeout=5.0)
304
+ self._started = False