nia-sync 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- auth.py +168 -0
- config.py +276 -0
- extractor.py +947 -0
- main.py +632 -0
- nia_sync-0.1.0.dist-info/METADATA +9 -0
- nia_sync-0.1.0.dist-info/RECORD +11 -0
- nia_sync-0.1.0.dist-info/WHEEL +5 -0
- nia_sync-0.1.0.dist-info/entry_points.txt +2 -0
- nia_sync-0.1.0.dist-info/top_level.txt +6 -0
- sync.py +192 -0
- watcher.py +304 -0
sync.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Sync engine for Nia Local Sync CLI.
|
|
3
|
+
|
|
4
|
+
Handles:
|
|
5
|
+
- Extracting data from local sources (databases, folders)
|
|
6
|
+
- Uploading to cloud API
|
|
7
|
+
- Cursor management for incremental sync
|
|
8
|
+
"""
|
|
9
|
+
import os
|
|
10
|
+
import logging
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
from config import API_BASE_URL, get_api_key, enable_source_sync
|
|
16
|
+
from extractor import extract_incremental, detect_source_type
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
SYNC_TIMEOUT = 120 # 2 minutes per sync request
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def sync_all_sources(sources: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
24
|
+
"""
|
|
25
|
+
Sync all configured sources.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
sources: List of source configs from cloud API
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
List of results for each source
|
|
32
|
+
"""
|
|
33
|
+
results = []
|
|
34
|
+
|
|
35
|
+
for source in sources:
|
|
36
|
+
result = sync_source(source)
|
|
37
|
+
results.append(result)
|
|
38
|
+
|
|
39
|
+
return results
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def sync_source(source: dict[str, Any]) -> dict[str, Any]:
|
|
43
|
+
"""
|
|
44
|
+
Sync a single source.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
source: Source config from cloud API with:
|
|
48
|
+
- local_folder_id: UUID of the local folder
|
|
49
|
+
- path: Local path to sync
|
|
50
|
+
- detected_type: Type of source
|
|
51
|
+
- cursor: Current sync cursor
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Result dict with status, path, and stats
|
|
55
|
+
"""
|
|
56
|
+
local_folder_id = source.get("local_folder_id")
|
|
57
|
+
path = source.get("path", "")
|
|
58
|
+
detected_type = source.get("detected_type")
|
|
59
|
+
cursor = source.get("cursor", {})
|
|
60
|
+
|
|
61
|
+
# Expand ~ in path
|
|
62
|
+
path = os.path.expanduser(path)
|
|
63
|
+
|
|
64
|
+
# Validate path exists
|
|
65
|
+
if not os.path.exists(path):
|
|
66
|
+
return {
|
|
67
|
+
"path": path,
|
|
68
|
+
"status": "error",
|
|
69
|
+
"error": f"Path does not exist: {path}",
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# Auto-enable sync if source exists locally but sync not enabled
|
|
73
|
+
if not source.get("sync_enabled", False):
|
|
74
|
+
logger.info(f"Auto-enabling sync for {path}")
|
|
75
|
+
enable_source_sync(local_folder_id, path)
|
|
76
|
+
|
|
77
|
+
# Auto-detect type if not specified
|
|
78
|
+
if not detected_type:
|
|
79
|
+
detected_type = detect_source_type(path)
|
|
80
|
+
|
|
81
|
+
logger.info(f"Syncing {path} (type={detected_type})")
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
# Extract data incrementally
|
|
85
|
+
extraction_result = extract_incremental(
|
|
86
|
+
path=path,
|
|
87
|
+
source_type=detected_type,
|
|
88
|
+
cursor=cursor,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
files = extraction_result.get("files", [])
|
|
92
|
+
new_cursor = extraction_result.get("cursor", {})
|
|
93
|
+
stats = extraction_result.get("stats", {})
|
|
94
|
+
|
|
95
|
+
if not files:
|
|
96
|
+
logger.info(f"No new data to sync for {path}")
|
|
97
|
+
return {
|
|
98
|
+
"path": path,
|
|
99
|
+
"status": "success",
|
|
100
|
+
"added": 0,
|
|
101
|
+
"message": "No new data",
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
# Upload to backend
|
|
105
|
+
upload_result = upload_sync_data(
|
|
106
|
+
local_folder_id=local_folder_id,
|
|
107
|
+
files=files,
|
|
108
|
+
cursor=new_cursor,
|
|
109
|
+
stats=stats,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
if upload_result.get("status") == "ok":
|
|
113
|
+
# Update source cursor in-place so subsequent syncs use it
|
|
114
|
+
source["cursor"] = new_cursor
|
|
115
|
+
return {
|
|
116
|
+
"path": path,
|
|
117
|
+
"status": "success",
|
|
118
|
+
"added": len(files),
|
|
119
|
+
"chunks_indexed": upload_result.get("chunks_indexed", 0),
|
|
120
|
+
"new_cursor": new_cursor,
|
|
121
|
+
}
|
|
122
|
+
else:
|
|
123
|
+
return {
|
|
124
|
+
"path": path,
|
|
125
|
+
"status": "error",
|
|
126
|
+
"error": upload_result.get("message", "Upload failed"),
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
except PermissionError:
|
|
130
|
+
return {
|
|
131
|
+
"path": path,
|
|
132
|
+
"status": "error",
|
|
133
|
+
"error": "Permission denied. Grant Full Disk Access in System Settings > Privacy & Security.",
|
|
134
|
+
}
|
|
135
|
+
except Exception as e:
|
|
136
|
+
logger.error(f"Error syncing {path}: {e}", exc_info=True)
|
|
137
|
+
return {
|
|
138
|
+
"path": path,
|
|
139
|
+
"status": "error",
|
|
140
|
+
"error": str(e),
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def upload_sync_data(
|
|
145
|
+
local_folder_id: str,
|
|
146
|
+
files: list[dict[str, Any]],
|
|
147
|
+
cursor: dict[str, Any],
|
|
148
|
+
stats: dict[str, Any],
|
|
149
|
+
) -> dict[str, Any]:
|
|
150
|
+
"""
|
|
151
|
+
Upload extracted data to the cloud API.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
local_folder_id: UUID of the local folder
|
|
155
|
+
files: List of extracted files with path, content, metadata
|
|
156
|
+
cursor: New cursor after extraction
|
|
157
|
+
stats: Extraction stats
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
API response dict
|
|
161
|
+
"""
|
|
162
|
+
api_key = get_api_key()
|
|
163
|
+
if not api_key:
|
|
164
|
+
return {"status": "error", "message": "Not authenticated"}
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
with httpx.Client(timeout=SYNC_TIMEOUT) as client:
|
|
168
|
+
response = client.post(
|
|
169
|
+
f"{API_BASE_URL}/v2/daemon/sync",
|
|
170
|
+
headers={"Authorization": f"Bearer {api_key}"},
|
|
171
|
+
json={
|
|
172
|
+
"local_folder_id": local_folder_id,
|
|
173
|
+
"files": files,
|
|
174
|
+
"cursor": cursor,
|
|
175
|
+
"stats": stats,
|
|
176
|
+
},
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
if response.status_code == 200:
|
|
180
|
+
return response.json()
|
|
181
|
+
elif response.status_code == 401:
|
|
182
|
+
return {"status": "error", "message": "Authentication failed"}
|
|
183
|
+
elif response.status_code == 404:
|
|
184
|
+
return {"status": "error", "message": "Local folder not found"}
|
|
185
|
+
else:
|
|
186
|
+
detail = response.json().get("detail", response.text)
|
|
187
|
+
return {"status": "error", "message": f"API error: {detail}"}
|
|
188
|
+
|
|
189
|
+
except httpx.TimeoutException:
|
|
190
|
+
return {"status": "error", "message": "Request timeout"}
|
|
191
|
+
except httpx.RequestError as e:
|
|
192
|
+
return {"status": "error", "message": f"Network error: {e}"}
|
watcher.py
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File system watcher for real-time sync.
|
|
3
|
+
|
|
4
|
+
Uses watchdog library to monitor file changes and trigger syncs
|
|
5
|
+
with debouncing to prevent rapid-fire updates.
|
|
6
|
+
"""
|
|
7
|
+
import os
|
|
8
|
+
import threading
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Callable
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from watchdog.observers import Observer
|
|
14
|
+
from watchdog.events import FileSystemEventHandler, FileSystemEvent
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# File extensions to watch for changes
|
|
19
|
+
WATCHED_EXTENSIONS = {
|
|
20
|
+
# Database files
|
|
21
|
+
'.db', '.db-wal', '.db-shm', '.sqlite', '.sqlite3',
|
|
22
|
+
# Document files
|
|
23
|
+
'.txt', '.md', '.json', '.yaml', '.yml',
|
|
24
|
+
# Code files (for folder sync)
|
|
25
|
+
'.py', '.js', '.ts', '.tsx', '.jsx', '.html', '.css',
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SyncEventHandler(FileSystemEventHandler):
|
|
30
|
+
"""
|
|
31
|
+
Handles file system events with debouncing.
|
|
32
|
+
|
|
33
|
+
When a file changes, starts a timer. If more changes come in
|
|
34
|
+
before the timer expires, the timer resets. When the timer
|
|
35
|
+
finally expires, triggers the sync callback.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
source_id: str,
|
|
41
|
+
source_path: str,
|
|
42
|
+
on_change: Callable[[str], None],
|
|
43
|
+
debounce_sec: float = 2.0,
|
|
44
|
+
):
|
|
45
|
+
super().__init__()
|
|
46
|
+
self.source_id = source_id
|
|
47
|
+
self.source_path = os.path.abspath(os.path.expanduser(source_path))
|
|
48
|
+
self.on_change = on_change
|
|
49
|
+
self.debounce_sec = debounce_sec
|
|
50
|
+
self._timer: threading.Timer | None = None
|
|
51
|
+
self._lock = threading.Lock()
|
|
52
|
+
self._pending_changes = 0
|
|
53
|
+
|
|
54
|
+
# For database files, also watch the WAL/SHM files
|
|
55
|
+
if self.source_path.endswith('.db'):
|
|
56
|
+
self._watched_files = {
|
|
57
|
+
self.source_path,
|
|
58
|
+
self.source_path + '-wal',
|
|
59
|
+
self.source_path + '-shm',
|
|
60
|
+
}
|
|
61
|
+
else:
|
|
62
|
+
self._watched_files = None # Watch all files in directory
|
|
63
|
+
|
|
64
|
+
def _should_handle(self, event: FileSystemEvent) -> bool:
|
|
65
|
+
"""Check if this event should trigger a sync for THIS source."""
|
|
66
|
+
if event.is_directory:
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
event_path = os.path.abspath(event.src_path)
|
|
70
|
+
|
|
71
|
+
# If we're watching specific files (database), only trigger for those
|
|
72
|
+
if self._watched_files is not None:
|
|
73
|
+
return event_path in self._watched_files
|
|
74
|
+
|
|
75
|
+
# For directories, watch all relevant extensions
|
|
76
|
+
ext = Path(event_path).suffix.lower()
|
|
77
|
+
if ext in WATCHED_EXTENSIONS:
|
|
78
|
+
return True
|
|
79
|
+
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
def on_modified(self, event: FileSystemEvent):
|
|
83
|
+
"""Called when a file is modified."""
|
|
84
|
+
if self._should_handle(event):
|
|
85
|
+
logger.debug(f"Modified: {event.src_path}")
|
|
86
|
+
self._debounce()
|
|
87
|
+
|
|
88
|
+
def on_created(self, event: FileSystemEvent):
|
|
89
|
+
"""Called when a file is created."""
|
|
90
|
+
if self._should_handle(event):
|
|
91
|
+
logger.debug(f"Created: {event.src_path}")
|
|
92
|
+
self._debounce()
|
|
93
|
+
|
|
94
|
+
def on_deleted(self, event: FileSystemEvent):
|
|
95
|
+
"""Called when a file is deleted."""
|
|
96
|
+
if self._should_handle(event):
|
|
97
|
+
logger.debug(f"Deleted: {event.src_path}")
|
|
98
|
+
self._debounce()
|
|
99
|
+
|
|
100
|
+
def _debounce(self):
|
|
101
|
+
"""Reset the debounce timer."""
|
|
102
|
+
with self._lock:
|
|
103
|
+
self._pending_changes += 1
|
|
104
|
+
|
|
105
|
+
# Cancel existing timer
|
|
106
|
+
if self._timer is not None:
|
|
107
|
+
self._timer.cancel()
|
|
108
|
+
|
|
109
|
+
# Start new timer
|
|
110
|
+
self._timer = threading.Timer(self.debounce_sec, self._trigger_sync)
|
|
111
|
+
self._timer.start()
|
|
112
|
+
|
|
113
|
+
def _trigger_sync(self):
|
|
114
|
+
"""Called when debounce timer expires - triggers actual sync."""
|
|
115
|
+
with self._lock:
|
|
116
|
+
changes = self._pending_changes
|
|
117
|
+
self._pending_changes = 0
|
|
118
|
+
self._timer = None
|
|
119
|
+
|
|
120
|
+
logger.info(f"Triggering sync for {self.source_id} ({changes} changes detected)")
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
self.on_change(self.source_id)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(f"Error in sync callback: {e}")
|
|
126
|
+
|
|
127
|
+
def cancel(self):
|
|
128
|
+
"""Cancel any pending timer."""
|
|
129
|
+
with self._lock:
|
|
130
|
+
if self._timer is not None:
|
|
131
|
+
self._timer.cancel()
|
|
132
|
+
self._timer = None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class FileWatcher:
|
|
136
|
+
"""
|
|
137
|
+
Watches multiple source paths for changes.
|
|
138
|
+
|
|
139
|
+
Usage:
|
|
140
|
+
watcher = FileWatcher()
|
|
141
|
+
watcher.watch("source_id", "/path/to/file.db", on_change_callback)
|
|
142
|
+
watcher.start()
|
|
143
|
+
# ... later
|
|
144
|
+
watcher.stop()
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
def __init__(self, debounce_sec: float = 2.0):
|
|
148
|
+
self.debounce_sec = debounce_sec
|
|
149
|
+
self.observer = Observer()
|
|
150
|
+
self.handlers: dict[str, SyncEventHandler] = {}
|
|
151
|
+
self._watches: dict[str, any] = {}
|
|
152
|
+
self._lock = threading.Lock()
|
|
153
|
+
self._started = False
|
|
154
|
+
|
|
155
|
+
def watch(
|
|
156
|
+
self,
|
|
157
|
+
source_id: str,
|
|
158
|
+
path: str,
|
|
159
|
+
on_change: Callable[[str], None],
|
|
160
|
+
) -> bool:
|
|
161
|
+
"""
|
|
162
|
+
Add a path to watch.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
source_id: Unique identifier for this source
|
|
166
|
+
path: File or directory path to watch
|
|
167
|
+
on_change: Callback when changes detected (receives source_id)
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
True if successfully added, False otherwise
|
|
171
|
+
"""
|
|
172
|
+
with self._lock:
|
|
173
|
+
# Skip if already watching this source
|
|
174
|
+
if source_id in self.handlers:
|
|
175
|
+
logger.debug(f"Already watching {source_id}")
|
|
176
|
+
return True
|
|
177
|
+
|
|
178
|
+
# Expand path
|
|
179
|
+
expanded = os.path.expanduser(path)
|
|
180
|
+
|
|
181
|
+
# For database files, watch the parent directory
|
|
182
|
+
# to catch .db-wal changes
|
|
183
|
+
if expanded.endswith('.db'):
|
|
184
|
+
watch_path = os.path.dirname(expanded)
|
|
185
|
+
else:
|
|
186
|
+
watch_path = expanded
|
|
187
|
+
|
|
188
|
+
# Verify path exists
|
|
189
|
+
if not os.path.exists(watch_path):
|
|
190
|
+
logger.warning(f"Path does not exist: {watch_path}")
|
|
191
|
+
return False
|
|
192
|
+
|
|
193
|
+
# Create handler
|
|
194
|
+
handler = SyncEventHandler(
|
|
195
|
+
source_id=source_id,
|
|
196
|
+
source_path=expanded,
|
|
197
|
+
on_change=on_change,
|
|
198
|
+
debounce_sec=self.debounce_sec,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Schedule watch
|
|
202
|
+
try:
|
|
203
|
+
watch = self.observer.schedule(
|
|
204
|
+
handler,
|
|
205
|
+
watch_path,
|
|
206
|
+
recursive=os.path.isdir(watch_path),
|
|
207
|
+
)
|
|
208
|
+
self.handlers[source_id] = handler
|
|
209
|
+
self._watches[source_id] = watch
|
|
210
|
+
logger.info(f"Watching {source_id}: {watch_path}")
|
|
211
|
+
return True
|
|
212
|
+
except Exception as e:
|
|
213
|
+
logger.error(f"Failed to watch {watch_path}: {e}")
|
|
214
|
+
return False
|
|
215
|
+
|
|
216
|
+
def unwatch(self, source_id: str):
|
|
217
|
+
"""Stop watching a source."""
|
|
218
|
+
with self._lock:
|
|
219
|
+
if source_id not in self.handlers:
|
|
220
|
+
return
|
|
221
|
+
|
|
222
|
+
handler = self.handlers.pop(source_id)
|
|
223
|
+
handler.cancel()
|
|
224
|
+
|
|
225
|
+
watch = self._watches.pop(source_id, None)
|
|
226
|
+
if watch:
|
|
227
|
+
self.observer.unschedule(watch)
|
|
228
|
+
|
|
229
|
+
logger.info(f"Stopped watching {source_id}")
|
|
230
|
+
|
|
231
|
+
def start(self):
|
|
232
|
+
"""Start the file watcher."""
|
|
233
|
+
if not self._started:
|
|
234
|
+
self.observer.start()
|
|
235
|
+
self._started = True
|
|
236
|
+
logger.info("File watcher started")
|
|
237
|
+
|
|
238
|
+
def stop(self):
|
|
239
|
+
"""Stop the file watcher."""
|
|
240
|
+
if self._started:
|
|
241
|
+
# Cancel all pending timers
|
|
242
|
+
for handler in self.handlers.values():
|
|
243
|
+
handler.cancel()
|
|
244
|
+
|
|
245
|
+
self.observer.stop()
|
|
246
|
+
self.observer.join(timeout=5.0)
|
|
247
|
+
self._started = False
|
|
248
|
+
logger.info("File watcher stopped")
|
|
249
|
+
|
|
250
|
+
@property
|
|
251
|
+
def watching(self) -> list[str]:
|
|
252
|
+
"""Get list of source IDs being watched."""
|
|
253
|
+
with self._lock:
|
|
254
|
+
return list(self.handlers.keys())
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class NewFolderHandler(FileSystemEventHandler):
|
|
258
|
+
"""Detects new folders in watched directories."""
|
|
259
|
+
|
|
260
|
+
def __init__(self, on_folder_created: Callable[[str, str], None]):
|
|
261
|
+
super().__init__()
|
|
262
|
+
self.on_folder_created = on_folder_created
|
|
263
|
+
|
|
264
|
+
def on_created(self, event: FileSystemEvent):
|
|
265
|
+
if event.is_directory:
|
|
266
|
+
folder_name = os.path.basename(event.src_path)
|
|
267
|
+
self.on_folder_created(folder_name, event.src_path)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class DirectoryWatcher:
|
|
271
|
+
"""
|
|
272
|
+
Watches common directories for new folder creation.
|
|
273
|
+
|
|
274
|
+
Used to instantly detect when user creates/clones a folder that
|
|
275
|
+
matches an indexed source name.
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
def __init__(self):
|
|
279
|
+
self.observer = Observer()
|
|
280
|
+
self._started = False
|
|
281
|
+
|
|
282
|
+
def watch(self, directories: list[str], on_folder_created: Callable[[str, str], None]):
|
|
283
|
+
"""Watch directories for new folders (non-recursive, top-level only)."""
|
|
284
|
+
handler = NewFolderHandler(on_folder_created)
|
|
285
|
+
|
|
286
|
+
for dir_path in directories:
|
|
287
|
+
expanded = os.path.expanduser(dir_path)
|
|
288
|
+
if os.path.isdir(expanded):
|
|
289
|
+
try:
|
|
290
|
+
self.observer.schedule(handler, expanded, recursive=False)
|
|
291
|
+
logger.debug(f"Watching directory for new folders: {expanded}")
|
|
292
|
+
except Exception as e:
|
|
293
|
+
logger.warning(f"Can't watch {expanded}: {e}")
|
|
294
|
+
|
|
295
|
+
def start(self):
|
|
296
|
+
if not self._started:
|
|
297
|
+
self.observer.start()
|
|
298
|
+
self._started = True
|
|
299
|
+
|
|
300
|
+
def stop(self):
|
|
301
|
+
if self._started:
|
|
302
|
+
self.observer.stop()
|
|
303
|
+
self.observer.join(timeout=5.0)
|
|
304
|
+
self._started = False
|