gslides-automator 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,937 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+ """
4
+ Script to generate L1-Data from L0-Data for entities in Tamil Nadu.
5
+ Reads CSV files and images from L0-Data folder, clones entity data templates,
6
+ populates Google Sheets tabs with CSV data, and copies images to L1-Data folder.
7
+ """
8
+
9
+ from googleapiclient.discovery import build
10
+ from googleapiclient.errors import HttpError
11
+ from googleapiclient.http import MediaIoBaseDownload
12
+ import gspread
13
+ import os
14
+ import sys
15
+ import time
16
+ import csv
17
+ import io
18
+ import argparse
19
+ import re
20
+
21
+ # Add project root to path to import auth module
22
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
23
+ PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
24
+ sys.path.insert(0, PROJECT_ROOT)
25
+
26
+ from gslides_automator.drive_layout import load_entities, resolve_layout, DriveLayout
27
+ from gslides_automator.auth import get_oauth_credentials
28
+
29
+ def retry_with_exponential_backoff(func, max_retries=5, initial_delay=1, max_delay=60, backoff_factor=2):
30
+ """
31
+ Retry a function with exponential backoff on 429 (Too Many Requests) and 5xx (Server) errors.
32
+
33
+ Args:
34
+ func: Function to retry (should be a callable that takes no arguments)
35
+ max_retries: Maximum number of retry attempts (default: 5)
36
+ initial_delay: Initial delay in seconds before first retry (default: 1)
37
+ max_delay: Maximum delay in seconds between retries (default: 60)
38
+ backoff_factor: Factor to multiply delay by after each retry (default: 2)
39
+
40
+ Returns:
41
+ The return value of func() if successful
42
+
43
+ Raises:
44
+ HttpError: If the error is not retryable or if max_retries is exceeded
45
+ Exception: Any other exception raised by func()
46
+ """
47
+ delay = initial_delay
48
+
49
+ for attempt in range(max_retries + 1):
50
+ try:
51
+ return func()
52
+ except HttpError as error:
53
+ status = error.resp.status
54
+ # Check if it's a retryable error (429 Too Many Requests or 5xx Server Errors)
55
+ is_retryable = (status == 429) or (500 <= status < 600)
56
+
57
+ if is_retryable:
58
+ if attempt < max_retries:
59
+ # Calculate wait time with exponential backoff
60
+ wait_time = min(delay, max_delay)
61
+ if status == 429:
62
+ error_msg = "Rate limit exceeded (429)"
63
+ else:
64
+ error_msg = f"Server error ({status})"
65
+ print(f" ⚠️ {error_msg}. Retrying in {wait_time:.1f} seconds... (attempt {attempt + 1}/{max_retries})")
66
+ time.sleep(wait_time)
67
+ delay *= backoff_factor
68
+ else:
69
+ if status == 429:
70
+ error_msg = "Rate limit exceeded (429)"
71
+ else:
72
+ error_msg = f"Server error ({status})"
73
+ print(f" ✗ {error_msg}. Max retries ({max_retries}) reached.")
74
+ raise
75
+ else:
76
+ # For non-retryable errors, re-raise immediately
77
+ raise
78
+ except Exception as e:
79
+ # For non-HttpError exceptions, check if it's a gspread rate limit error
80
+ error_str = str(e).lower()
81
+ if '429' in error_str or 'rate limit' in error_str or 'quota' in error_str:
82
+ if attempt < max_retries:
83
+ wait_time = min(delay, max_delay)
84
+ print(f" ⚠️ Rate limit error. Retrying in {wait_time:.1f} seconds... (attempt {attempt + 1}/{max_retries})")
85
+ time.sleep(wait_time)
86
+ delay *= backoff_factor
87
+ else:
88
+ print(f" ✗ Rate limit error. Max retries ({max_retries}) reached.")
89
+ raise
90
+ else:
91
+ # For non-retryable errors, re-raise immediately
92
+ raise
93
+
94
+
95
+ def read_entities_from_csv(csv_path):
96
+ """
97
+ Read entity names from a CSV file.
98
+ The CSV should have entity names in the first column.
99
+
100
+ Args:
101
+ csv_path: Path to the CSV file
102
+
103
+ Returns:
104
+ list: List of entity names (strings), or empty list if error
105
+ """
106
+ try:
107
+ entities = []
108
+ with open(csv_path, 'r', encoding='utf-8') as f:
109
+ reader = csv.reader(f)
110
+ for row in reader:
111
+ if row: # Check if row is not empty
112
+ entity_name = row[0].strip()
113
+ if entity_name: # Only add non-empty names
114
+ entities.append(entity_name)
115
+
116
+ # Remove header if it exists
117
+ if entities and entities[0].lower() in ['entity', 'entities', 'name', 'names']:
118
+ entities = entities[1:]
119
+
120
+ return entities
121
+ except Exception as e:
122
+ print(f"Error reading entities from CSV file: {e}")
123
+ import traceback
124
+ traceback.print_exc()
125
+ return []
126
+
127
+ def find_existing_file(drive_service, file_name, folder_id):
128
+ """
129
+ Check if a file with the given name exists in the specified folder.
130
+
131
+ Args:
132
+ drive_service: Google Drive API service instance
133
+ file_name: Name of the file to search for
134
+ folder_id: ID of the folder to search in
135
+
136
+ Returns:
137
+ str: File ID if found, None otherwise
138
+ """
139
+ def _find():
140
+ query = f"name='{file_name}' and '{folder_id}' in parents and trashed=false"
141
+ results = drive_service.files().list(
142
+ q=query,
143
+ fields="files(id, name)",
144
+ supportsAllDrives=True,
145
+ includeItemsFromAllDrives=True
146
+ ).execute()
147
+ files = results.get('files', [])
148
+ if files:
149
+ return files[0]['id']
150
+ return None
151
+
152
+ try:
153
+ return retry_with_exponential_backoff(_find)
154
+ except HttpError as error:
155
+ print(f"Error searching for existing file '{file_name}': {error}")
156
+ return None
157
+
158
+
159
+ def delete_file(drive_service, file_id):
160
+ """
161
+ Delete a file from Google Drive.
162
+
163
+ Args:
164
+ drive_service: Google Drive API service instance
165
+ file_id: ID of the file to delete
166
+
167
+ Returns:
168
+ bool: True if successful, False otherwise
169
+ """
170
+ # First, check if the file exists and is accessible
171
+ try:
172
+ file_metadata = drive_service.files().get(
173
+ fileId=file_id,
174
+ fields='id, name',
175
+ supportsAllDrives=True
176
+ ).execute()
177
+ file_name = file_metadata.get('name', 'Unknown')
178
+ except HttpError as check_error:
179
+ if check_error.resp.status == 404:
180
+ # File not found - might not be accessible to service account
181
+ try:
182
+ from .auth import get_service_account_email
183
+ service_account_email = get_service_account_email()
184
+ print(f" ⚠️ File not found or not accessible to service account.")
185
+ print(f" Service account email: {service_account_email}")
186
+ print(f" Please ensure the file is shared with this service account with 'Editor' permissions.")
187
+ except Exception:
188
+ print(f" ⚠️ File not found or not accessible to service account.")
189
+ print(f" Please ensure the file is shared with your service account with 'Editor' permissions.")
190
+ return False
191
+ else:
192
+ print(f" ⚠️ Error checking file access: {check_error}")
193
+ return False
194
+
195
+ def _delete():
196
+ drive_service.files().delete(
197
+ fileId=file_id,
198
+ supportsAllDrives=True
199
+ ).execute()
200
+ return True
201
+
202
+ try:
203
+ return retry_with_exponential_backoff(_delete)
204
+ except HttpError as error:
205
+ if error.resp.status == 404:
206
+ try:
207
+ from .auth import get_service_account_email
208
+ service_account_email = get_service_account_email()
209
+ print(f" ⚠️ Error deleting file '{file_name}': File not found or not accessible.")
210
+ print(f" Service account email: {service_account_email}")
211
+ print(f" Please ensure the file is shared with this service account with 'Editor' permissions.")
212
+ except Exception:
213
+ print(f" ⚠️ Error deleting file '{file_name}': File not found or not accessible.")
214
+ print(f" Please ensure the file is shared with your service account with 'Editor' permissions.")
215
+ elif error.resp.status == 403:
216
+ try:
217
+ from .auth import get_service_account_email
218
+ service_account_email = get_service_account_email()
219
+ print(f" ⚠️ Error deleting file '{file_name}': Permission denied.")
220
+ print(f" Service account email: {service_account_email}")
221
+ print(f" Please ensure the file is shared with this service account with 'Editor' permissions.")
222
+ except Exception:
223
+ print(f" ⚠️ Error deleting file '{file_name}': Permission denied.")
224
+ print(f" Please ensure the file is shared with your service account with 'Editor' permissions.")
225
+ else:
226
+ print(f" ⚠️ Error deleting file '{file_name}': {error}")
227
+ return False
228
+
229
+
230
+ def find_or_create_entity_folder(drive_service, entity_name, parent_folder_id):
231
+ """
232
+ Find entity subfolder in parent folder, create if doesn't exist.
233
+
234
+ Args:
235
+ drive_service: Google Drive API service instance
236
+ entity_name: Name of the entity (folder name)
237
+ parent_folder_id: ID of the parent folder
238
+
239
+ Returns:
240
+ str: Folder ID, or None if failed
241
+ """
242
+ def _find_folder():
243
+ query = f"mimeType='application/vnd.google-apps.folder' and name='{entity_name}' and '{parent_folder_id}' in parents and trashed=false"
244
+ results = drive_service.files().list(
245
+ q=query,
246
+ fields='files(id, name)',
247
+ supportsAllDrives=True,
248
+ includeItemsFromAllDrives=True
249
+ ).execute()
250
+ files = results.get('files', [])
251
+ if files:
252
+ return files[0]['id']
253
+ return None
254
+
255
+ try:
256
+ # Try to find existing folder
257
+ folder_id = retry_with_exponential_backoff(_find_folder)
258
+ if folder_id:
259
+ return folder_id
260
+
261
+ # Create new folder if not found
262
+ def _create_folder():
263
+ file_metadata = {
264
+ 'name': entity_name,
265
+ 'mimeType': 'application/vnd.google-apps.folder',
266
+ 'parents': [parent_folder_id]
267
+ }
268
+ folder = drive_service.files().create(
269
+ body=file_metadata,
270
+ fields='id',
271
+ supportsAllDrives=True
272
+ ).execute()
273
+ return folder.get('id')
274
+
275
+ folder_id = retry_with_exponential_backoff(_create_folder)
276
+ return folder_id
277
+ except HttpError as error:
278
+ print(f"Error finding/creating entity folder '{entity_name}': {error}")
279
+ return None
280
+
281
+
282
+ def clone_template_to_entity(drive_service, template_id, entity_name, folder_id):
283
+ """
284
+ Clone template spreadsheet to entity folder, deleting existing if present.
285
+
286
+ Args:
287
+ drive_service: Google Drive API service instance
288
+ template_id: ID of the template spreadsheet
289
+ entity_name: Name of the entity (file name)
290
+ folder_id: ID of the folder to place the file in
291
+
292
+ Returns:
293
+ str: ID of the copied file, or None if failed
294
+ """
295
+ file_name = f"{entity_name}"
296
+
297
+ # Check if file already exists
298
+ existing_file_id = find_existing_file(drive_service, file_name, folder_id)
299
+ if existing_file_id:
300
+ print(f" Found existing spreadsheet, deleting...")
301
+ if delete_file(drive_service, existing_file_id):
302
+ print(f" ✓ Deleted existing spreadsheet")
303
+ else:
304
+ print(f" ✗ Failed to delete existing spreadsheet")
305
+ return None
306
+
307
+ def _copy_template():
308
+ # Copy the template
309
+ copied_file = drive_service.files().copy(
310
+ fileId=template_id,
311
+ body={'name': file_name},
312
+ supportsAllDrives=True
313
+ ).execute()
314
+
315
+ new_file_id = copied_file.get('id')
316
+
317
+ # Move to target folder
318
+ file_metadata = drive_service.files().get(
319
+ fileId=new_file_id,
320
+ fields='parents',
321
+ supportsAllDrives=True
322
+ ).execute()
323
+ previous_parents = ",".join(file_metadata.get('parents', []))
324
+
325
+ # Move the file to the target folder
326
+ if previous_parents:
327
+ drive_service.files().update(
328
+ fileId=new_file_id,
329
+ addParents=folder_id,
330
+ removeParents=previous_parents,
331
+ fields='id, parents',
332
+ supportsAllDrives=True
333
+ ).execute()
334
+ else:
335
+ drive_service.files().update(
336
+ fileId=new_file_id,
337
+ addParents=folder_id,
338
+ fields='id, parents',
339
+ supportsAllDrives=True
340
+ ).execute()
341
+
342
+ return new_file_id
343
+
344
+ try:
345
+ new_file_id = retry_with_exponential_backoff(_copy_template)
346
+ return new_file_id
347
+ except HttpError as error:
348
+ if error.resp.status == 404:
349
+ print(f"Error: Template file not found (404). The file may have been deleted or you don't have access.")
350
+ elif error.resp.status == 403:
351
+ print(f"Error: Permission denied (403). You may not have permission to copy this file.")
352
+ else:
353
+ print(f"Error copying template: {error}")
354
+ return None
355
+
356
+
357
+ def list_csv_files_in_folder(drive_service, folder_id):
358
+ """
359
+ List all CSV files in a Google Drive folder.
360
+
361
+ Args:
362
+ drive_service: Google Drive API service instance
363
+ folder_id: ID of the folder to search
364
+
365
+ Returns:
366
+ list: List of tuples (file_id, file_name)
367
+ """
368
+ def _list_files():
369
+ query = f"mimeType='text/csv' and '{folder_id}' in parents and trashed=false"
370
+ results = drive_service.files().list(
371
+ q=query,
372
+ fields='files(id, name)',
373
+ pageSize=1000,
374
+ supportsAllDrives=True,
375
+ includeItemsFromAllDrives=True
376
+ ).execute()
377
+ files = results.get('files', [])
378
+ return [(f['id'], f['name']) for f in files]
379
+
380
+ try:
381
+ return retry_with_exponential_backoff(_list_files)
382
+ except HttpError as error:
383
+ print(f"Error listing CSV files in folder: {error}")
384
+ return []
385
+
386
+
387
+ def download_csv_from_drive(drive_service, file_id):
388
+ """
389
+ Download CSV file content from Google Drive.
390
+
391
+ Args:
392
+ drive_service: Google Drive API service instance
393
+ file_id: ID of the CSV file
394
+
395
+ Returns:
396
+ list: List of rows (each row is a list of values), or None if failed
397
+ """
398
+ def _download():
399
+ request = drive_service.files().get_media(fileId=file_id)
400
+ file_content = io.BytesIO()
401
+ downloader = MediaIoBaseDownload(file_content, request)
402
+ done = False
403
+ while done is False:
404
+ status, done = downloader.next_chunk()
405
+ file_content.seek(0)
406
+ # Decode and parse CSV
407
+ content_str = file_content.read().decode('utf-8')
408
+ # Use csv.reader with proper settings to preserve data integrity
409
+ csv_reader = csv.reader(io.StringIO(content_str), quoting=csv.QUOTE_MINIMAL)
410
+ rows = list(csv_reader)
411
+ # Ensure all rows have consistent structure (pad with empty strings if needed)
412
+ if rows:
413
+ max_cols = max(len(row) for row in rows)
414
+ # Pad rows to have the same number of columns
415
+ normalized_rows = []
416
+ for row in rows:
417
+ padded_row = row + [''] * (max_cols - len(row))
418
+ normalized_rows.append(padded_row)
419
+ return normalized_rows
420
+ return rows
421
+
422
+ try:
423
+ return retry_with_exponential_backoff(_download)
424
+ except HttpError as error:
425
+ print(f"Error downloading CSV file: {error}")
426
+ return None
427
+
428
+
429
+ def parse_csv_filename(filename):
430
+ """
431
+ Parse CSV filename to extract tab name.
432
+ Example: s25-chart:pass_percentage.csv -> s25-chart:pass_percentage
433
+
434
+ Args:
435
+ filename: CSV filename
436
+
437
+ Returns:
438
+ str: Tab name (without .csv extension)
439
+ """
440
+ # Remove .csv extension
441
+ if filename.endswith('.csv'):
442
+ return filename[:-4]
443
+ return filename
444
+
445
+
446
+ def find_existing_spreadsheet(drive_service, entity_name, folder_id):
447
+ """
448
+ Find existing spreadsheet in L1 folder (don't create new one).
449
+
450
+ Args:
451
+ drive_service: Google Drive API service instance
452
+ entity_name: Name of the entity (file name)
453
+ folder_id: ID of the folder to search in
454
+
455
+ Returns:
456
+ str: Spreadsheet ID if found, None otherwise
457
+ """
458
+ file_name = f"{entity_name}"
459
+ return find_existing_file(drive_service, file_name, folder_id)
460
+
461
+
462
+
463
+
464
+ def _column_number_to_letter(n):
465
+ """
466
+ Convert a column number (1-based) to Excel column letter (A, B, ..., Z, AA, AB, ...).
467
+
468
+ Args:
469
+ n: Column number (1-based)
470
+
471
+ Returns:
472
+ str: Column letter(s)
473
+ """
474
+ result = ""
475
+ while n > 0:
476
+ n -= 1
477
+ result = chr(65 + (n % 26)) + result
478
+ n //= 26
479
+ return result
480
+
481
+
482
+ def _convert_value_to_proper_type(value):
483
+ """
484
+ Convert a CSV string value to its proper type (number, boolean, or string).
485
+ This prevents Google Sheets from adding apostrophes.
486
+
487
+ Args:
488
+ value: String value from CSV
489
+
490
+ Returns:
491
+ Value converted to appropriate type (int, float, bool, or str)
492
+ """
493
+ if value is None or value == '':
494
+ return ''
495
+
496
+ value_str = str(value).strip()
497
+
498
+ # Try to convert to number
499
+ try:
500
+ # Try integer first
501
+ if value_str.isdigit() or (value_str.startswith('-') and value_str[1:].isdigit()):
502
+ return int(value_str)
503
+ # Try float
504
+ return float(value_str)
505
+ except ValueError:
506
+ pass
507
+
508
+ # Try boolean
509
+ if value_str.lower() in ('true', 'false'):
510
+ return value_str.lower() == 'true'
511
+
512
+ # Return as string
513
+ return value_str
514
+
515
+
516
+ def write_csv_to_sheet_tab(gspread_client, spreadsheet_id, tab_name, csv_data, creds):
517
+ """
518
+ Write CSV data to specified tab starting from A1.
519
+ Does not clear existing data - new data will overwrite starting from A1.
520
+
521
+ Args:
522
+ gspread_client: Authorized gspread client
523
+ spreadsheet_id: ID of the spreadsheet
524
+ tab_name: Name of the tab/worksheet
525
+ csv_data: List of rows (each row is a list of values)
526
+ creds: Service account credentials
527
+
528
+ Returns:
529
+ bool: True if successful, False otherwise
530
+ """
531
+ def _write_data():
532
+ # Use Sheets API directly for better control over data types
533
+ sheets_service = build('sheets', 'v4', credentials=creds)
534
+
535
+ # Get the worksheet ID
536
+ spreadsheet = gspread_client.open_by_key(spreadsheet_id)
537
+ try:
538
+ worksheet = spreadsheet.worksheet(tab_name)
539
+ sheet_id = worksheet.id
540
+ except gspread.exceptions.WorksheetNotFound:
541
+ print(f" ⚠️ Tab '{tab_name}' not found in spreadsheet")
542
+ return False
543
+
544
+ if not csv_data:
545
+ print(f" ⚠️ No data to write for tab '{tab_name}'")
546
+ return False
547
+
548
+ # Convert CSV data to proper types and format for Sheets API
549
+ values = []
550
+ for row in csv_data:
551
+ formatted_row = []
552
+ for cell in row:
553
+ converted_value = _convert_value_to_proper_type(cell)
554
+ formatted_row.append(converted_value)
555
+ values.append(formatted_row)
556
+
557
+ # Use batchUpdate to write data with proper types
558
+ range_name = f"{tab_name}!A1"
559
+ body = {
560
+ 'values': values
561
+ }
562
+
563
+ result = sheets_service.spreadsheets().values().update(
564
+ spreadsheetId=spreadsheet_id,
565
+ range=range_name,
566
+ valueInputOption='RAW', # RAW preserves exact values without interpretation
567
+ body=body
568
+ ).execute()
569
+
570
+ return True
571
+
572
+ try:
573
+ return retry_with_exponential_backoff(_write_data)
574
+ except Exception as e:
575
+ print(f" ✗ Error writing data to tab '{tab_name}': {e}")
576
+ return False
577
+
578
+
579
+ def list_image_files_in_folder(drive_service, folder_id):
580
+ """
581
+ List all image files in a Google Drive folder.
582
+
583
+ Args:
584
+ drive_service: Google Drive API service instance
585
+ folder_id: ID of the folder to search
586
+
587
+ Returns:
588
+ list: List of tuples (file_id, file_name)
589
+ """
590
+ image_mime_types = [
591
+ 'image/png',
592
+ 'image/jpeg',
593
+ 'image/jpg',
594
+ 'image/gif',
595
+ 'image/bmp',
596
+ 'image/webp',
597
+ 'image/svg+xml'
598
+ ]
599
+
600
+ mime_query = " or ".join([f"mimeType='{mime}'" for mime in image_mime_types])
601
+
602
+ def _list_files():
603
+ query = f"'{folder_id}' in parents and trashed=false and ({mime_query})"
604
+ results = drive_service.files().list(
605
+ q=query,
606
+ fields='files(id, name)',
607
+ pageSize=1000,
608
+ supportsAllDrives=True,
609
+ includeItemsFromAllDrives=True
610
+ ).execute()
611
+ files = results.get('files', [])
612
+ return [(f['id'], f['name']) for f in files]
613
+
614
+ try:
615
+ return retry_with_exponential_backoff(_list_files)
616
+ except HttpError as error:
617
+ print(f"Error listing image files in folder: {error}")
618
+ return []
619
+
620
+
621
+ def copy_image_to_folder(drive_service, source_file_id, destination_folder_id, file_name):
622
+ """
623
+ Copy image file from source to destination folder, deleting existing if present.
624
+
625
+ Args:
626
+ drive_service: Google Drive API service instance
627
+ source_file_id: ID of the source image file
628
+ destination_folder_id: ID of the destination folder
629
+ file_name: Name for the copied file
630
+
631
+ Returns:
632
+ str: ID of the copied file, or None if failed
633
+ """
634
+ # Check if file already exists
635
+ existing_file_id = find_existing_file(drive_service, file_name, destination_folder_id)
636
+ if existing_file_id:
637
+ print(f" Found existing image '{file_name}', deleting...")
638
+ if delete_file(drive_service, existing_file_id):
639
+ print(f" ✓ Deleted existing image")
640
+ else:
641
+ print(f" ✗ Failed to delete existing image")
642
+ return None
643
+
644
+ def _copy_file():
645
+ # Copy the file
646
+ copied_file = drive_service.files().copy(
647
+ fileId=source_file_id,
648
+ body={'name': file_name},
649
+ supportsAllDrives=True
650
+ ).execute()
651
+
652
+ new_file_id = copied_file.get('id')
653
+
654
+ # Move to target folder
655
+ file_metadata = drive_service.files().get(
656
+ fileId=new_file_id,
657
+ fields='parents',
658
+ supportsAllDrives=True
659
+ ).execute()
660
+ previous_parents = ",".join(file_metadata.get('parents', []))
661
+
662
+ # Move the file to the target folder
663
+ if previous_parents:
664
+ drive_service.files().update(
665
+ fileId=new_file_id,
666
+ addParents=destination_folder_id,
667
+ removeParents=previous_parents,
668
+ fields='id, parents',
669
+ supportsAllDrives=True
670
+ ).execute()
671
+ else:
672
+ drive_service.files().update(
673
+ fileId=new_file_id,
674
+ addParents=destination_folder_id,
675
+ fields='id, parents',
676
+ supportsAllDrives=True
677
+ ).execute()
678
+
679
+ return new_file_id
680
+
681
+ try:
682
+ new_file_id = retry_with_exponential_backoff(_copy_file)
683
+ return new_file_id
684
+ except HttpError as error:
685
+ print(f" ✗ Error copying image '{file_name}': {error}")
686
+ return None
687
+
688
+
689
+ def process_entity(entity_name, creds, layout: DriveLayout):
690
+ """
691
+ Main processing function for a single entity.
692
+
693
+ Args:
694
+ entity_name: Name of the entity
695
+ creds: Service account credentials
696
+ layout: DriveLayout object containing configuration
697
+
698
+ Returns:
699
+ bool: True if successful, False otherwise
700
+ """
701
+ print(f"\n{'='*80}")
702
+ print(f"Processing entity: {entity_name}")
703
+ print(f"{'='*80}\n")
704
+
705
+ drive_service = build('drive', 'v3', credentials=creds)
706
+ gspread_client = gspread.authorize(creds)
707
+
708
+ l1_root_id = layout.l1_data_id
709
+ l0_root_id = layout.l0_data_id
710
+ template_id = layout.data_template_id
711
+
712
+ try:
713
+ # 1. Find/create L1-Data entity folder
714
+ print(f"Finding/creating L1-Data folder for {entity_name}...")
715
+ l1_folder_id = find_or_create_entity_folder(drive_service, entity_name, l1_root_id)
716
+ if not l1_folder_id:
717
+ print(f"✗ Failed to find/create L1-Data folder for {entity_name}")
718
+ return False
719
+ print(f"✓ L1-Data folder ID: {l1_folder_id}")
720
+
721
+ # 2. Find L0-Data entity folder
722
+ print(f"Finding L0-Data folder for {entity_name}...")
723
+ l0_folder_id = find_or_create_entity_folder(drive_service, entity_name, l0_root_id)
724
+ if not l0_folder_id:
725
+ print(f"✗ Failed to find L0-Data folder for {entity_name}")
726
+ return False
727
+ print(f"✓ L0-Data folder ID: {l0_folder_id}")
728
+
729
+ # 3. Handle spreadsheet creation/update: always clone template fresh
730
+ print(f"Cloning template spreadsheet for {entity_name}...")
731
+ spreadsheet_id = clone_template_to_entity(drive_service, template_id, entity_name, l1_folder_id)
732
+ if not spreadsheet_id:
733
+ print(f"✗ Failed to clone template spreadsheet for {entity_name}")
734
+ return False
735
+ print(f"✓ Cloned spreadsheet ID: {spreadsheet_id}")
736
+
737
+ # 4. Process CSV files and write to matching tabs
738
+ print(f"Processing CSV files from L0-Data...")
739
+ csv_files = list_csv_files_in_folder(drive_service, l0_folder_id)
740
+ if not csv_files:
741
+ print(f" ⚠️ No CSV files found in L0-Data folder for {entity_name}")
742
+ else:
743
+ print(f" Found {len(csv_files)} CSV file(s)")
744
+
745
+ if csv_files:
746
+ csv_success = 0
747
+ csv_failed = 0
748
+
749
+ for file_id, file_name in csv_files:
750
+ print(f" Processing: {file_name}")
751
+ tab_name = parse_csv_filename(file_name)
752
+
753
+ # Download CSV
754
+ csv_data = download_csv_from_drive(drive_service, file_id)
755
+ if not csv_data:
756
+ print(f" ✗ Failed to download CSV file")
757
+ csv_failed += 1
758
+ continue
759
+
760
+ # Write to sheet tab
761
+ if write_csv_to_sheet_tab(gspread_client, spreadsheet_id, tab_name, csv_data, creds):
762
+ print(f" ✓ Wrote data to tab '{tab_name}'")
763
+ csv_success += 1
764
+ else:
765
+ print(f" ✗ Failed to write data to tab '{tab_name}'")
766
+ csv_failed += 1
767
+
768
+ print(f" CSV processing summary: {csv_success} succeeded, {csv_failed} failed")
769
+
770
+ # 5. Copy image files (delete existing if present)
771
+ print(f"Copying image files from L0-Data to L1-Data...")
772
+ image_files = list_image_files_in_folder(drive_service, l0_folder_id)
773
+ if not image_files:
774
+ print(f" ⚠️ No image files found in L0-Data folder for {entity_name}")
775
+ else:
776
+ print(f" Found {len(image_files)} image file(s)")
777
+
778
+ if image_files:
779
+ image_success = 0
780
+ image_failed = 0
781
+
782
+ for file_id, file_name in image_files:
783
+ print(f" Copying: {file_name}")
784
+ new_file_id = copy_image_to_folder(drive_service, file_id, l1_folder_id, file_name)
785
+ if new_file_id:
786
+ print(f" ✓ Copied image '{file_name}'")
787
+ image_success += 1
788
+ else:
789
+ print(f" ✗ Failed to copy image '{file_name}'")
790
+ image_failed += 1
791
+
792
+ print(f" Image copying summary: {image_success} succeeded, {image_failed} failed")
793
+
794
+ print(f"\n✓ Successfully processed entity: {entity_name}")
795
+ return True
796
+
797
+ except Exception as e:
798
+ print(f"\n✗ Error processing entity '{entity_name}': {e}")
799
+ import traceback
800
+ traceback.print_exc()
801
+ return False
802
+
803
+
804
+ def generate_data(creds=None, layout: DriveLayout = None):
805
+ """
806
+ Generate L1-Data from L0-Data for entities marked for generation in entities.csv.
807
+
808
+ Args:
809
+ creds: Google OAuth credentials. If None, will be obtained automatically.
810
+ layout: DriveLayout object containing configuration. Required.
811
+
812
+ Returns:
813
+ dict: Dictionary with 'successful' and 'failed' lists of entity names
814
+
815
+ Raises:
816
+ FileNotFoundError: If service account credentials are not found
817
+ ValueError: If layout is not provided
818
+ Exception: Other errors during processing
819
+ """
820
+ if layout is None:
821
+ raise ValueError("layout (DriveLayout) is required. Pass it as a parameter.")
822
+
823
+ if creds is None:
824
+ creds = get_oauth_credentials()
825
+
826
+ # Load entity names from entities.csv with generate flag
827
+ if layout.entities_csv_id:
828
+ entities = load_entities(layout.entities_csv_id, creds)
829
+ print(f"✓ Loaded {len(entities)} entities with generate=Y from entities.csv")
830
+ if not entities:
831
+ print("\n✗ No entities marked with generate=Y in entities.csv.")
832
+ return {'successful': [], 'failed': []}
833
+ else:
834
+ print("\n✗ No entities CSV ID found in layout.")
835
+ return {'successful': [], 'failed': []}
836
+
837
+ print(f"\n✓ Processing {len(entities)} entities")
838
+ print(f" Entities: {', '.join(entities)}\n")
839
+
840
+ # Process each entity
841
+ successful = []
842
+ failed = []
843
+
844
+ for i, entity in enumerate(entities, 1):
845
+ print(f"\n[{i}/{len(entities)}] Processing entity: {entity}")
846
+ if process_entity(entity, creds, layout):
847
+ successful.append(entity)
848
+ else:
849
+ failed.append(entity)
850
+
851
+ # Small delay to avoid rate limits
852
+ if i < len(entities):
853
+ time.sleep(0.5)
854
+
855
+ # Print summary
856
+ print(f"\n{'='*80}")
857
+ print("PROCESSING SUMMARY")
858
+ print(f"{'='*80}")
859
+ print(f"Total entities: {len(entities)}")
860
+ print(f"Successful: {len(successful)}")
861
+ print(f"Failed: {len(failed)}")
862
+ print()
863
+
864
+ if successful:
865
+ print("Successfully processed entities:")
866
+ for entity in successful:
867
+ print(f" ✓ {entity}")
868
+ print()
869
+
870
+ if failed:
871
+ print("Failed entities:")
872
+ for entity in failed:
873
+ print(f" ✗ {entity}")
874
+ print()
875
+
876
+ print("=" * 80)
877
+
878
+ return {'successful': successful, 'failed': failed}
879
+
880
+
881
+ def main():
882
+ """
883
+ Main function to process entities (CLI entry point).
884
+ """
885
+ parser = argparse.ArgumentParser(
886
+ description='Generate L1-Data from L0-Data for entities with generate=Y in entities.csv'
887
+ )
888
+ parser.add_argument(
889
+ '--shared-drive-url',
890
+ required=True,
891
+ help='Shared Drive root URL or ID that contains L0/L1 data and templates.',
892
+ )
893
+ parser.add_argument(
894
+ '--service-account-credentials',
895
+ default=None,
896
+ help='Path to the service account JSON key file.',
897
+ )
898
+ args = parser.parse_args()
899
+
900
+ print("Google Slide Automator")
901
+ print("=" * 80)
902
+
903
+ try:
904
+ # Get credentials
905
+ print("Authenticating...")
906
+ creds = get_oauth_credentials(service_account_credentials=args.service_account_credentials)
907
+
908
+ layout = resolve_layout(args.shared_drive_url, creds)
909
+
910
+ # Call the main function
911
+ generate_data(
912
+ creds=creds,
913
+ layout=layout
914
+ )
915
+
916
+ except ValueError as e:
917
+ print(f"\nError: {e}")
918
+ except FileNotFoundError as e:
919
+ print(f"\nError: {e}")
920
+ if "credentials file" in str(e):
921
+ print("\nTo set up service account credentials:")
922
+ print("1. Go to Google Cloud Console (https://console.cloud.google.com/)")
923
+ print("2. Create a new project or select an existing one")
924
+ print("3. Enable Google Sheets API and Google Drive API")
925
+ print("4. Go to 'Credentials' → 'Create Credentials' → 'Service account'")
926
+ print("5. Create a service account and download the JSON key file")
927
+ from .auth import PROJECT_ROOT as AUTH_PROJECT_ROOT
928
+ print(f"6. Save the JSON key file as 'service-account-credentials.json' in: {AUTH_PROJECT_ROOT}")
929
+ except Exception as e:
930
+ print(f"\nError: {e}")
931
+ import traceback
932
+ traceback.print_exc()
933
+
934
+
935
+ if __name__ == "__main__":
936
+ main()
937
+