docspan 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. docspan/__init__.py +3 -0
  2. docspan/__main__.py +0 -0
  3. docspan/backends/__init__.py +19 -0
  4. docspan/backends/base.py +85 -0
  5. docspan/backends/confluence/__init__.py +0 -0
  6. docspan/backends/confluence/adf/__init__.py +14 -0
  7. docspan/backends/confluence/adf/comparator.py +427 -0
  8. docspan/backends/confluence/adf/converter.py +119 -0
  9. docspan/backends/confluence/adf/converters.py +1449 -0
  10. docspan/backends/confluence/adf/interfaces.py +191 -0
  11. docspan/backends/confluence/adf/nodes.py +2085 -0
  12. docspan/backends/confluence/adf/parser.py +400 -0
  13. docspan/backends/confluence/adf/validators.py +161 -0
  14. docspan/backends/confluence/adf/visitors.py +495 -0
  15. docspan/backends/confluence/backend.py +227 -0
  16. docspan/backends/confluence/client.py +44 -0
  17. docspan/backends/confluence/config/__init__.py +21 -0
  18. docspan/backends/confluence/config/loader.py +107 -0
  19. docspan/backends/confluence/config/models.py +167 -0
  20. docspan/backends/confluence/config/validation.py +297 -0
  21. docspan/backends/confluence/markdown/__init__.py +22 -0
  22. docspan/backends/confluence/markdown/ast.py +819 -0
  23. docspan/backends/confluence/markdown/extensions/__init__.py +5 -0
  24. docspan/backends/confluence/markdown/extensions/frontmatter.py +80 -0
  25. docspan/backends/confluence/markdown/extensions/mermaid.py +64 -0
  26. docspan/backends/confluence/markdown/extensions/wikilinks.py +179 -0
  27. docspan/backends/confluence/markdown/inline_parser.py +495 -0
  28. docspan/backends/confluence/markdown/parser.py +1006 -0
  29. docspan/backends/confluence/models/__init__.py +18 -0
  30. docspan/backends/confluence/models/markdown_file.py +402 -0
  31. docspan/backends/confluence/models/page.py +212 -0
  32. docspan/backends/confluence/models/path_utils.py +34 -0
  33. docspan/backends/confluence/models/results.py +28 -0
  34. docspan/backends/confluence/models/sync_status.py +382 -0
  35. docspan/backends/confluence/services/__init__.py +0 -0
  36. docspan/backends/confluence/services/confluence/__init__.py +40 -0
  37. docspan/backends/confluence/services/confluence/attachment_client.py +147 -0
  38. docspan/backends/confluence/services/confluence/base_client.py +420 -0
  39. docspan/backends/confluence/services/confluence/client.py +376 -0
  40. docspan/backends/confluence/services/confluence/comment_client.py +682 -0
  41. docspan/backends/confluence/services/confluence/crawler.py +587 -0
  42. docspan/backends/confluence/services/confluence/label_client.py +130 -0
  43. docspan/backends/confluence/services/confluence/page_client.py +1288 -0
  44. docspan/backends/confluence/services/confluence/space_client.py +179 -0
  45. docspan/backends/confluence/services/confluence/url_parser.py +106 -0
  46. docspan/backends/google_docs/__init__.py +0 -0
  47. docspan/backends/google_docs/auth.py +143 -0
  48. docspan/backends/google_docs/backend.py +140 -0
  49. docspan/backends/google_docs/client.py +665 -0
  50. docspan/backends/google_docs/converter.py +471 -0
  51. docspan/backends/google_docs/docs_request_builder.py +232 -0
  52. docspan/backends/google_docs/docs_structure_parser.py +120 -0
  53. docspan/backends/google_docs/markdown_to_paragraph_parser.py +145 -0
  54. docspan/cli/__init__.py +0 -0
  55. docspan/cli/main.py +408 -0
  56. docspan/config.py +62 -0
  57. docspan/core/__init__.py +49 -0
  58. docspan/core/merge.py +30 -0
  59. docspan/core/orchestrator.py +332 -0
  60. docspan/core/paths.py +8 -0
  61. docspan/core/state.py +53 -0
  62. docspan-0.1.0.dist-info/METADATA +273 -0
  63. docspan-0.1.0.dist-info/RECORD +65 -0
  64. docspan-0.1.0.dist-info/WHEEL +4 -0
  65. docspan-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,665 @@
1
+ """
2
+ Google Drive Client Module
3
+
4
+ Provides clients for interacting with Google Drive API:
5
+ - GoogleDocsClient: For Google Docs operations (Account A)
6
+ - VaultDriveClient: For Markdown files in Obsidian vault (Account B)
7
+ """
8
+
9
+ import io
10
+ import logging
11
+ import random
12
+ import socket
13
+ import time
14
+ from datetime import datetime
15
+ from typing import Dict, List, Optional
16
+
17
+ from googleapiclient.discovery import build
18
+ from googleapiclient.errors import HttpError
19
+ from googleapiclient.http import MediaIoBaseDownload, MediaIoBaseUpload
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Retry configuration
24
+ MAX_RETRIES = 3
25
+ RETRY_DELAY = 2 # seconds
26
+ TIMEOUT = 60 # seconds
27
+
28
+
29
+ class GoogleSheetsClient:
30
+ """Client for reading mappings from Google Sheets"""
31
+
32
+ def __init__(self, credentials):
33
+ """
34
+ Initialize Google Sheets client
35
+
36
+ Args:
37
+ credentials: Google OAuth2 credentials
38
+ """
39
+ self.credentials = credentials
40
+ self.sheets_service = build('sheets', 'v4', credentials=credentials)
41
+
42
+ def get_mappings(self, sheet_id: str, sheet_range: str) -> List[Dict[str, str]]:
43
+ """
44
+ Read mappings from Google Sheet and normalize
45
+
46
+ Args:
47
+ sheet_id: Spreadsheet ID
48
+ sheet_range: Range to read (e.g., 'Sheet1!A:B')
49
+
50
+ Returns:
51
+ list: [{ 'doc_id': ..., 'vault_path': ... }, ...]
52
+ """
53
+ result = self.sheets_service.spreadsheets().values().get(
54
+ spreadsheetId=sheet_id,
55
+ range=sheet_range
56
+ ).execute()
57
+ values = result.get('values', [])
58
+
59
+ if not values or len(values) < 2:
60
+ logger.warning("Sheet has no data rows for mappings")
61
+ return []
62
+
63
+ headers = [h.strip().lower() for h in values[0]]
64
+ try:
65
+ doc_idx = headers.index('doc_id')
66
+ vault_idx = headers.index('vault_path')
67
+ except ValueError:
68
+ raise ValueError("Sheet header must include 'doc_id' and 'vault_path'")
69
+
70
+ mappings: List[Dict[str, str]] = []
71
+ for row in values[1:]:
72
+ # Guard missing columns
73
+ doc_id = row[doc_idx].strip() if len(row) > doc_idx and row[doc_idx] else ''
74
+ vault_path = row[vault_idx].strip() if len(row) > vault_idx and row[vault_idx] else ''
75
+
76
+ if not doc_id or not vault_path:
77
+ continue
78
+
79
+ mappings.append({
80
+ 'doc_id': doc_id,
81
+ 'vault_path': vault_path
82
+ })
83
+
84
+ logger.info(f"Loaded {len(mappings)} mapping(s) from Google Sheet")
85
+ return mappings
86
+
87
+
88
+ class GoogleDocsClient:
89
+ """Client for Google Docs operations (Account A)"""
90
+
91
+ def __init__(self, credentials):
92
+ """
93
+ Initialize Google Docs client
94
+
95
+ Args:
96
+ credentials: Google OAuth2 credentials
97
+ """
98
+ self.credentials = credentials
99
+ # Set timeout for HTTP requests
100
+ self.drive_service = build('drive', 'v3', credentials=credentials)
101
+ self.docs_service = build('docs', 'v1', credentials=credentials)
102
+
103
+ # Set default socket timeout
104
+ socket.setdefaulttimeout(TIMEOUT)
105
+
106
+ def _with_backoff(self, fn, max_retries: int = 5, max_backoff: int = 64):
107
+ """Execute fn() with truncated exponential backoff on HTTP 429 errors."""
108
+ for n in range(max_retries):
109
+ try:
110
+ return fn()
111
+ except HttpError as e:
112
+ if e.resp.status == 429:
113
+ wait = min(2 ** n + random.random(), max_backoff)
114
+ time.sleep(wait)
115
+ else:
116
+ raise
117
+ raise RuntimeError("Max retries exceeded after rate limit backoff")
118
+
119
+ def get_document(self, doc_id: str) -> dict:
120
+ """
121
+ Get full Google Docs document JSON (including body structure).
122
+
123
+ Args:
124
+ doc_id: Google Doc ID
125
+
126
+ Returns:
127
+ dict: Full document resource including body.content
128
+ """
129
+ return self._with_backoff(
130
+ lambda: self.docs_service.documents().get(documentId=doc_id).execute()
131
+ )
132
+
133
+ def batch_update(self, doc_id: str, requests: list) -> dict:
134
+ """
135
+ Submit a list of batchUpdate requests to a Google Doc.
136
+
137
+ Applies exponential backoff on HTTP 429 (rate limit) errors.
138
+
139
+ Args:
140
+ doc_id: Google Doc ID
141
+ requests: List of request dicts (e.g. insertText, deleteContentRange)
142
+
143
+ Returns:
144
+ dict: batchUpdate response
145
+ """
146
+ return self._with_backoff(
147
+ lambda: self.docs_service.documents().batchUpdate(
148
+ documentId=doc_id,
149
+ body={"requests": requests},
150
+ ).execute()
151
+ )
152
+
153
+ def get_doc_content(self, doc_id: str) -> str:
154
+ """
155
+ Get Google Doc content as HTML with retry mechanism
156
+
157
+ Args:
158
+ doc_id: Google Doc ID
159
+
160
+ Returns:
161
+ str: Document content in HTML format
162
+ """
163
+ last_error = None
164
+
165
+ for attempt in range(MAX_RETRIES):
166
+ try:
167
+ # Export as HTML
168
+ request = self.drive_service.files().export_media(
169
+ fileId=doc_id,
170
+ mimeType='text/html'
171
+ )
172
+ content = request.execute()
173
+ logger.info(f"Retrieved content from Google Doc: {doc_id}")
174
+ return content.decode('utf-8')
175
+
176
+ except socket.timeout as e:
177
+ last_error = e
178
+ logger.warning(f"Timeout retrieving doc {doc_id} (attempt {attempt + 1}/{MAX_RETRIES})")
179
+ if attempt < MAX_RETRIES - 1:
180
+ time.sleep(RETRY_DELAY * (attempt + 1)) # Exponential backoff
181
+ continue
182
+
183
+ except HttpError as e:
184
+ # Don't retry on 404 or permission errors
185
+ if e.resp.status in [404, 403]:
186
+ logger.error(f"Error retrieving doc {doc_id}: {e}")
187
+ raise
188
+ # Retry on other HTTP errors
189
+ last_error = e
190
+ logger.warning(f"HTTP error retrieving doc {doc_id} (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
191
+ if attempt < MAX_RETRIES - 1:
192
+ time.sleep(RETRY_DELAY * (attempt + 1))
193
+ continue
194
+
195
+ except Exception as e:
196
+ last_error = e
197
+ logger.warning(f"Error retrieving doc {doc_id} (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
198
+ if attempt < MAX_RETRIES - 1:
199
+ time.sleep(RETRY_DELAY * (attempt + 1))
200
+ continue
201
+
202
+ # All retries failed
203
+ logger.error(f"Failed to retrieve doc {doc_id} after {MAX_RETRIES} attempts")
204
+ raise last_error
205
+
206
+ def get_doc_plain_text(self, doc_id: str) -> str:
207
+ """
208
+ Get Google Doc content as plain text
209
+
210
+ Args:
211
+ doc_id: Google Doc ID
212
+
213
+ Returns:
214
+ str: Document content in plain text
215
+ """
216
+ try:
217
+ request = self.drive_service.files().export_media(
218
+ fileId=doc_id,
219
+ mimeType='text/plain'
220
+ )
221
+ content = request.execute()
222
+ logger.info(f"Retrieved plain text from Google Doc: {doc_id}")
223
+ return content.decode('utf-8')
224
+ except HttpError as e:
225
+ logger.error(f"Error retrieving doc {doc_id}: {e}")
226
+ raise
227
+
228
+ def update_doc_content(self, doc_id: str, content: str) -> bool:
229
+ """
230
+ Update Google Doc content from plain text
231
+
232
+ Args:
233
+ doc_id: Google Doc ID
234
+ content: New content as plain text
235
+
236
+ Returns:
237
+ bool: True if successful
238
+ """
239
+ try:
240
+ # Get current document to find the end index
241
+ doc = self.docs_service.documents().get(documentId=doc_id).execute()
242
+ doc_content = doc.get('body').get('content')
243
+ end_index = doc_content[-1].get('endIndex') - 1
244
+
245
+ # Delete all content first
246
+ requests = [
247
+ {
248
+ 'deleteContentRange': {
249
+ 'range': {
250
+ 'startIndex': 1,
251
+ 'endIndex': end_index
252
+ }
253
+ }
254
+ },
255
+ {
256
+ 'insertText': {
257
+ 'location': {
258
+ 'index': 1
259
+ },
260
+ 'text': content
261
+ }
262
+ }
263
+ ]
264
+
265
+ self.docs_service.documents().batchUpdate(
266
+ documentId=doc_id,
267
+ body={'requests': requests}
268
+ ).execute()
269
+
270
+ logger.info(f"Updated Google Doc: {doc_id}")
271
+ return True
272
+ except HttpError as e:
273
+ logger.error(f"Error updating doc {doc_id}: {e}")
274
+ raise
275
+
276
+ def get_modified_time(self, doc_id: str) -> datetime:
277
+ """
278
+ Get last modified time of Google Doc
279
+
280
+ Args:
281
+ doc_id: Google Doc ID
282
+
283
+ Returns:
284
+ datetime: Last modified timestamp
285
+ """
286
+ try:
287
+ file = self.drive_service.files().get(
288
+ fileId=doc_id,
289
+ fields='modifiedTime'
290
+ ).execute()
291
+ modified_time_str = file.get('modifiedTime')
292
+ modified_time = datetime.fromisoformat(modified_time_str.replace('Z', '+00:00'))
293
+ logger.debug(f"Doc {doc_id} modified at: {modified_time}")
294
+ return modified_time
295
+ except HttpError as e:
296
+ if e.resp.status == 404:
297
+ logger.error(
298
+ f"Google Doc not found: {doc_id}\n"
299
+ f" → Please share this document with: "
300
+ f"obsidian-sync-account-aw@obsidian-sync-vault.iam.gserviceaccount.com\n"
301
+ f" → Document URL: https://docs.google.com/document/d/{doc_id}/edit"
302
+ )
303
+ else:
304
+ logger.error(f"Error getting modified time for doc {doc_id}: {e}")
305
+ raise
306
+
307
+ def get_doc_info(self, doc_id: str) -> Dict:
308
+ """
309
+ Get Google Doc metadata
310
+
311
+ Args:
312
+ doc_id: Google Doc ID
313
+
314
+ Returns:
315
+ dict: Document metadata
316
+ """
317
+ try:
318
+ file = self.drive_service.files().get(
319
+ fileId=doc_id,
320
+ fields='id,name,modifiedTime,mimeType'
321
+ ).execute()
322
+ return file
323
+ except HttpError as e:
324
+ logger.error(f"Error getting info for doc {doc_id}: {e}")
325
+ raise
326
+
327
+
328
+ class VaultDriveClient:
329
+ """Client for Obsidian vault operations in Google Drive (Account B)"""
330
+
331
+ def __init__(self, credentials, vault_folder_id: str):
332
+ """
333
+ Initialize Vault Drive client
334
+
335
+ Args:
336
+ credentials: Google OAuth2 credentials
337
+ vault_folder_id: Google Drive folder ID containing the vault
338
+ """
339
+ self.credentials = credentials
340
+ self.drive_service = build('drive', 'v3', credentials=credentials)
341
+ self.vault_folder_id = vault_folder_id
342
+
343
+ def _get_file_id_by_name(self, filename: str) -> Optional[str]:
344
+ """
345
+ Search for file by name in entire vault (recursively)
346
+
347
+ Args:
348
+ filename: File name to search for (e.g., "SignalPlus Log.md")
349
+
350
+ Returns:
351
+ str: File ID or None if not found
352
+ """
353
+ # Search in entire vault folder and subfolders
354
+ # Use 'in parents' to search within vault and its descendants
355
+ query = f"name='{filename}' and '{self.vault_folder_id}' in parents and trashed=false"
356
+
357
+ try:
358
+ # First try: search directly in vault root
359
+ results = self.drive_service.files().list(
360
+ q=query,
361
+ spaces='drive',
362
+ fields='files(id, name, parents)',
363
+ supportsAllDrives=True
364
+ ).execute()
365
+ files = results.get('files', [])
366
+
367
+ if files:
368
+ logger.info(f"Found file '{filename}' with ID: {files[0]['id']}")
369
+ return files[0]['id']
370
+
371
+ # Second try: recursive search in all subfolders
372
+ query = f"name='{filename}' and trashed=false"
373
+ results = self.drive_service.files().list(
374
+ q=query,
375
+ spaces='drive',
376
+ fields='files(id, name, parents)',
377
+ supportsAllDrives=True
378
+ ).execute()
379
+ files = results.get('files', [])
380
+
381
+ # Filter files that are within vault folder tree
382
+ for file in files:
383
+ if self._is_file_in_vault(file):
384
+ logger.info(f"Found file '{filename}' in subfolder with ID: {file['id']}")
385
+ return file['id']
386
+
387
+ logger.warning(f"File not found in vault: {filename}")
388
+ return None
389
+
390
+ except HttpError as e:
391
+ logger.error(f"Error searching for file {filename}: {e}")
392
+ return None
393
+
394
+ def _is_file_in_vault(self, file: Dict) -> bool:
395
+ """
396
+ Check if file is within vault folder tree
397
+
398
+ Args:
399
+ file: File metadata dict with 'parents' field
400
+
401
+ Returns:
402
+ bool: True if file is in vault tree
403
+ """
404
+ if 'parents' not in file:
405
+ return False
406
+
407
+ parents = file['parents']
408
+ current_id = parents[0] if parents else None
409
+
410
+ # Traverse up to check if we reach vault folder
411
+ max_depth = 20 # Prevent infinite loop
412
+ depth = 0
413
+
414
+ while current_id and depth < max_depth:
415
+ if current_id == self.vault_folder_id:
416
+ return True
417
+
418
+ try:
419
+ parent = self.drive_service.files().get(
420
+ fileId=current_id,
421
+ fields='parents'
422
+ ).execute()
423
+
424
+ if 'parents' in parent and parent['parents']:
425
+ current_id = parent['parents'][0]
426
+ else:
427
+ break
428
+
429
+ except HttpError:
430
+ break
431
+
432
+ depth += 1
433
+
434
+ return False
435
+
436
+ def _get_file_id_by_path(self, relative_path: str) -> Optional[str]:
437
+ """
438
+ Get file ID by relative path within vault
439
+
440
+ This method first tries to find by filename only (flexible),
441
+ then falls back to exact path matching (strict)
442
+
443
+ Args:
444
+ relative_path: Path relative to vault root (e.g., "01. Inbox/note.md")
445
+
446
+ Returns:
447
+ str: File ID or None if not found
448
+ """
449
+ # Extract filename from path
450
+ filename = relative_path.split('/')[-1]
451
+
452
+ # Try flexible search by filename first
453
+ file_id = self._get_file_id_by_name(filename)
454
+ if file_id:
455
+ return file_id
456
+
457
+ # Fallback to exact path matching
458
+ parts = relative_path.split('/')
459
+ current_folder_id = self.vault_folder_id
460
+
461
+ # Traverse folders
462
+ for i, part in enumerate(parts[:-1]):
463
+ query = f"name='{part}' and '{current_folder_id}' in parents and mimeType='application/vnd.google-apps.folder' and trashed=false"
464
+ try:
465
+ results = self.drive_service.files().list(
466
+ q=query,
467
+ spaces='drive',
468
+ fields='files(id, name)'
469
+ ).execute()
470
+ folders = results.get('files', [])
471
+ if not folders:
472
+ logger.warning(f"Folder not found: {part} in path {relative_path}")
473
+ return None
474
+ current_folder_id = folders[0]['id']
475
+ except HttpError as e:
476
+ logger.error(f"Error finding folder {part}: {e}")
477
+ return None
478
+
479
+ # Find file in specific folder
480
+ query = f"name='{filename}' and '{current_folder_id}' in parents and trashed=false"
481
+ try:
482
+ results = self.drive_service.files().list(
483
+ q=query,
484
+ spaces='drive',
485
+ fields='files(id, name)'
486
+ ).execute()
487
+ files = results.get('files', [])
488
+ if not files:
489
+ logger.warning(f"File not found: {filename} in path {relative_path}")
490
+ return None
491
+ return files[0]['id']
492
+ except HttpError as e:
493
+ logger.error(f"Error finding file {filename}: {e}")
494
+ return None
495
+
496
+ def read_file(self, relative_path: str) -> Optional[str]:
497
+ """
498
+ Read markdown file content from vault
499
+
500
+ Args:
501
+ relative_path: Path relative to vault root
502
+
503
+ Returns:
504
+ str: File content or None if not found
505
+ """
506
+ file_id = self._get_file_id_by_path(relative_path)
507
+ if not file_id:
508
+ return None
509
+
510
+ try:
511
+ request = self.drive_service.files().get_media(fileId=file_id)
512
+ fh = io.BytesIO()
513
+ downloader = MediaIoBaseDownload(fh, request)
514
+ done = False
515
+ while not done:
516
+ status, done = downloader.next_chunk()
517
+
518
+ content = fh.getvalue().decode('utf-8')
519
+ logger.info(f"Read file from vault: {relative_path}")
520
+ return content
521
+ except HttpError as e:
522
+ logger.error(f"Error reading file {relative_path}: {e}")
523
+ raise
524
+
525
+ def write_file(self, relative_path: str, content: str) -> bool:
526
+ """
527
+ Write or update markdown file in vault
528
+
529
+ Args:
530
+ relative_path: Path relative to vault root
531
+ content: File content
532
+
533
+ Returns:
534
+ bool: True if successful
535
+ """
536
+ file_id = self._get_file_id_by_path(relative_path)
537
+
538
+ try:
539
+ media = MediaIoBaseUpload(
540
+ io.BytesIO(content.encode('utf-8')),
541
+ mimetype='text/markdown',
542
+ resumable=True
543
+ )
544
+
545
+ if file_id:
546
+ # Update existing file
547
+ self.drive_service.files().update(
548
+ fileId=file_id,
549
+ media_body=media,
550
+ supportsAllDrives=True
551
+ ).execute()
552
+ logger.info(f"Updated file in vault: {relative_path}")
553
+ else:
554
+ # Create new file
555
+ parts = relative_path.split('/')
556
+ filename = parts[-1]
557
+
558
+ # Ensure parent folders exist
559
+ parent_id = self._ensure_folders_exist('/'.join(parts[:-1]))
560
+
561
+ file_metadata = {
562
+ 'name': filename,
563
+ 'parents': [parent_id],
564
+ 'mimeType': 'text/markdown'
565
+ }
566
+
567
+ self.drive_service.files().create(
568
+ body=file_metadata,
569
+ media_body=media,
570
+ fields='id',
571
+ supportsAllDrives=True
572
+ ).execute()
573
+ logger.info(f"Created new file in vault: {relative_path}")
574
+
575
+ return True
576
+ except HttpError as e:
577
+ logger.error(f"Error writing file {relative_path}: {e}")
578
+ raise
579
+
580
+ def _ensure_folders_exist(self, folder_path: str) -> str:
581
+ """
582
+ Ensure folder path exists, create if necessary
583
+
584
+ Args:
585
+ folder_path: Folder path relative to vault root
586
+
587
+ Returns:
588
+ str: Final folder ID
589
+ """
590
+ if not folder_path:
591
+ return self.vault_folder_id
592
+
593
+ parts = folder_path.split('/')
594
+ current_folder_id = self.vault_folder_id
595
+
596
+ for part in parts:
597
+ if not part:
598
+ continue
599
+
600
+ # Check if folder exists
601
+ query = f"name='{part}' and '{current_folder_id}' in parents and mimeType='application/vnd.google-apps.folder' and trashed=false"
602
+ results = self.drive_service.files().list(
603
+ q=query,
604
+ spaces='drive',
605
+ fields='files(id, name)'
606
+ ).execute()
607
+ folders = results.get('files', [])
608
+
609
+ if folders:
610
+ current_folder_id = folders[0]['id']
611
+ else:
612
+ # Create folder
613
+ file_metadata = {
614
+ 'name': part,
615
+ 'parents': [current_folder_id],
616
+ 'mimeType': 'application/vnd.google-apps.folder'
617
+ }
618
+ folder = self.drive_service.files().create(
619
+ body=file_metadata,
620
+ fields='id',
621
+ supportsAllDrives=True
622
+ ).execute()
623
+ current_folder_id = folder.get('id')
624
+ logger.info(f"Created folder: {part}")
625
+
626
+ return current_folder_id
627
+
628
+ def get_modified_time(self, relative_path: str) -> Optional[datetime]:
629
+ """
630
+ Get last modified time of file
631
+
632
+ Args:
633
+ relative_path: Path relative to vault root
634
+
635
+ Returns:
636
+ datetime: Last modified timestamp or None if not found
637
+ """
638
+ file_id = self._get_file_id_by_path(relative_path)
639
+ if not file_id:
640
+ return None
641
+
642
+ try:
643
+ file = self.drive_service.files().get(
644
+ fileId=file_id,
645
+ fields='modifiedTime'
646
+ ).execute()
647
+ modified_time_str = file.get('modifiedTime')
648
+ modified_time = datetime.fromisoformat(modified_time_str.replace('Z', '+00:00'))
649
+ logger.debug(f"File {relative_path} modified at: {modified_time}")
650
+ return modified_time
651
+ except HttpError as e:
652
+ logger.error(f"Error getting modified time for {relative_path}: {e}")
653
+ raise
654
+
655
+ def file_exists(self, relative_path: str) -> bool:
656
+ """
657
+ Check if file exists in vault
658
+
659
+ Args:
660
+ relative_path: Path relative to vault root
661
+
662
+ Returns:
663
+ bool: True if file exists
664
+ """
665
+ return self._get_file_id_by_path(relative_path) is not None