gslides-automator 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gslides_automator/__init__.py +43 -0
- gslides_automator/__main__.py +7 -0
- gslides_automator/auth.py +103 -0
- gslides_automator/cli.py +96 -0
- gslides_automator/drive_layout.py +233 -0
- gslides_automator/generate_data.py +937 -0
- gslides_automator/generate_report.py +2761 -0
- gslides_automator-0.4.0.dist-info/METADATA +131 -0
- gslides_automator-0.4.0.dist-info/RECORD +13 -0
- gslides_automator-0.4.0.dist-info/WHEEL +5 -0
- gslides_automator-0.4.0.dist-info/entry_points.txt +2 -0
- gslides_automator-0.4.0.dist-info/licenses/LICENSE.txt +21 -0
- gslides_automator-0.4.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,937 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
"""
|
|
4
|
+
Script to generate L1-Data from L0-Data for entities in Tamil Nadu.
|
|
5
|
+
Reads CSV files and images from L0-Data folder, clones entity data templates,
|
|
6
|
+
populates Google Sheets tabs with CSV data, and copies images to L1-Data folder.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from googleapiclient.discovery import build
|
|
10
|
+
from googleapiclient.errors import HttpError
|
|
11
|
+
from googleapiclient.http import MediaIoBaseDownload
|
|
12
|
+
import gspread
|
|
13
|
+
import os
|
|
14
|
+
import sys
|
|
15
|
+
import time
|
|
16
|
+
import csv
|
|
17
|
+
import io
|
|
18
|
+
import argparse
|
|
19
|
+
import re
|
|
20
|
+
|
|
21
|
+
# Add project root to path to import auth module
|
|
22
|
+
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
23
|
+
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
|
|
24
|
+
sys.path.insert(0, PROJECT_ROOT)
|
|
25
|
+
|
|
26
|
+
from gslides_automator.drive_layout import load_entities, resolve_layout, DriveLayout
|
|
27
|
+
from gslides_automator.auth import get_oauth_credentials
|
|
28
|
+
|
|
29
|
+
def retry_with_exponential_backoff(func, max_retries=5, initial_delay=1, max_delay=60, backoff_factor=2):
|
|
30
|
+
"""
|
|
31
|
+
Retry a function with exponential backoff on 429 (Too Many Requests) and 5xx (Server) errors.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
func: Function to retry (should be a callable that takes no arguments)
|
|
35
|
+
max_retries: Maximum number of retry attempts (default: 5)
|
|
36
|
+
initial_delay: Initial delay in seconds before first retry (default: 1)
|
|
37
|
+
max_delay: Maximum delay in seconds between retries (default: 60)
|
|
38
|
+
backoff_factor: Factor to multiply delay by after each retry (default: 2)
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
The return value of func() if successful
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
HttpError: If the error is not retryable or if max_retries is exceeded
|
|
45
|
+
Exception: Any other exception raised by func()
|
|
46
|
+
"""
|
|
47
|
+
delay = initial_delay
|
|
48
|
+
|
|
49
|
+
for attempt in range(max_retries + 1):
|
|
50
|
+
try:
|
|
51
|
+
return func()
|
|
52
|
+
except HttpError as error:
|
|
53
|
+
status = error.resp.status
|
|
54
|
+
# Check if it's a retryable error (429 Too Many Requests or 5xx Server Errors)
|
|
55
|
+
is_retryable = (status == 429) or (500 <= status < 600)
|
|
56
|
+
|
|
57
|
+
if is_retryable:
|
|
58
|
+
if attempt < max_retries:
|
|
59
|
+
# Calculate wait time with exponential backoff
|
|
60
|
+
wait_time = min(delay, max_delay)
|
|
61
|
+
if status == 429:
|
|
62
|
+
error_msg = "Rate limit exceeded (429)"
|
|
63
|
+
else:
|
|
64
|
+
error_msg = f"Server error ({status})"
|
|
65
|
+
print(f" ⚠️ {error_msg}. Retrying in {wait_time:.1f} seconds... (attempt {attempt + 1}/{max_retries})")
|
|
66
|
+
time.sleep(wait_time)
|
|
67
|
+
delay *= backoff_factor
|
|
68
|
+
else:
|
|
69
|
+
if status == 429:
|
|
70
|
+
error_msg = "Rate limit exceeded (429)"
|
|
71
|
+
else:
|
|
72
|
+
error_msg = f"Server error ({status})"
|
|
73
|
+
print(f" ✗ {error_msg}. Max retries ({max_retries}) reached.")
|
|
74
|
+
raise
|
|
75
|
+
else:
|
|
76
|
+
# For non-retryable errors, re-raise immediately
|
|
77
|
+
raise
|
|
78
|
+
except Exception as e:
|
|
79
|
+
# For non-HttpError exceptions, check if it's a gspread rate limit error
|
|
80
|
+
error_str = str(e).lower()
|
|
81
|
+
if '429' in error_str or 'rate limit' in error_str or 'quota' in error_str:
|
|
82
|
+
if attempt < max_retries:
|
|
83
|
+
wait_time = min(delay, max_delay)
|
|
84
|
+
print(f" ⚠️ Rate limit error. Retrying in {wait_time:.1f} seconds... (attempt {attempt + 1}/{max_retries})")
|
|
85
|
+
time.sleep(wait_time)
|
|
86
|
+
delay *= backoff_factor
|
|
87
|
+
else:
|
|
88
|
+
print(f" ✗ Rate limit error. Max retries ({max_retries}) reached.")
|
|
89
|
+
raise
|
|
90
|
+
else:
|
|
91
|
+
# For non-retryable errors, re-raise immediately
|
|
92
|
+
raise
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def read_entities_from_csv(csv_path):
|
|
96
|
+
"""
|
|
97
|
+
Read entity names from a CSV file.
|
|
98
|
+
The CSV should have entity names in the first column.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
csv_path: Path to the CSV file
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
list: List of entity names (strings), or empty list if error
|
|
105
|
+
"""
|
|
106
|
+
try:
|
|
107
|
+
entities = []
|
|
108
|
+
with open(csv_path, 'r', encoding='utf-8') as f:
|
|
109
|
+
reader = csv.reader(f)
|
|
110
|
+
for row in reader:
|
|
111
|
+
if row: # Check if row is not empty
|
|
112
|
+
entity_name = row[0].strip()
|
|
113
|
+
if entity_name: # Only add non-empty names
|
|
114
|
+
entities.append(entity_name)
|
|
115
|
+
|
|
116
|
+
# Remove header if it exists
|
|
117
|
+
if entities and entities[0].lower() in ['entity', 'entities', 'name', 'names']:
|
|
118
|
+
entities = entities[1:]
|
|
119
|
+
|
|
120
|
+
return entities
|
|
121
|
+
except Exception as e:
|
|
122
|
+
print(f"Error reading entities from CSV file: {e}")
|
|
123
|
+
import traceback
|
|
124
|
+
traceback.print_exc()
|
|
125
|
+
return []
|
|
126
|
+
|
|
127
|
+
def find_existing_file(drive_service, file_name, folder_id):
|
|
128
|
+
"""
|
|
129
|
+
Check if a file with the given name exists in the specified folder.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
drive_service: Google Drive API service instance
|
|
133
|
+
file_name: Name of the file to search for
|
|
134
|
+
folder_id: ID of the folder to search in
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
str: File ID if found, None otherwise
|
|
138
|
+
"""
|
|
139
|
+
def _find():
|
|
140
|
+
query = f"name='{file_name}' and '{folder_id}' in parents and trashed=false"
|
|
141
|
+
results = drive_service.files().list(
|
|
142
|
+
q=query,
|
|
143
|
+
fields="files(id, name)",
|
|
144
|
+
supportsAllDrives=True,
|
|
145
|
+
includeItemsFromAllDrives=True
|
|
146
|
+
).execute()
|
|
147
|
+
files = results.get('files', [])
|
|
148
|
+
if files:
|
|
149
|
+
return files[0]['id']
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
return retry_with_exponential_backoff(_find)
|
|
154
|
+
except HttpError as error:
|
|
155
|
+
print(f"Error searching for existing file '{file_name}': {error}")
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def delete_file(drive_service, file_id):
|
|
160
|
+
"""
|
|
161
|
+
Delete a file from Google Drive.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
drive_service: Google Drive API service instance
|
|
165
|
+
file_id: ID of the file to delete
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
bool: True if successful, False otherwise
|
|
169
|
+
"""
|
|
170
|
+
# First, check if the file exists and is accessible
|
|
171
|
+
try:
|
|
172
|
+
file_metadata = drive_service.files().get(
|
|
173
|
+
fileId=file_id,
|
|
174
|
+
fields='id, name',
|
|
175
|
+
supportsAllDrives=True
|
|
176
|
+
).execute()
|
|
177
|
+
file_name = file_metadata.get('name', 'Unknown')
|
|
178
|
+
except HttpError as check_error:
|
|
179
|
+
if check_error.resp.status == 404:
|
|
180
|
+
# File not found - might not be accessible to service account
|
|
181
|
+
try:
|
|
182
|
+
from .auth import get_service_account_email
|
|
183
|
+
service_account_email = get_service_account_email()
|
|
184
|
+
print(f" ⚠️ File not found or not accessible to service account.")
|
|
185
|
+
print(f" Service account email: {service_account_email}")
|
|
186
|
+
print(f" Please ensure the file is shared with this service account with 'Editor' permissions.")
|
|
187
|
+
except Exception:
|
|
188
|
+
print(f" ⚠️ File not found or not accessible to service account.")
|
|
189
|
+
print(f" Please ensure the file is shared with your service account with 'Editor' permissions.")
|
|
190
|
+
return False
|
|
191
|
+
else:
|
|
192
|
+
print(f" ⚠️ Error checking file access: {check_error}")
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
def _delete():
|
|
196
|
+
drive_service.files().delete(
|
|
197
|
+
fileId=file_id,
|
|
198
|
+
supportsAllDrives=True
|
|
199
|
+
).execute()
|
|
200
|
+
return True
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
return retry_with_exponential_backoff(_delete)
|
|
204
|
+
except HttpError as error:
|
|
205
|
+
if error.resp.status == 404:
|
|
206
|
+
try:
|
|
207
|
+
from .auth import get_service_account_email
|
|
208
|
+
service_account_email = get_service_account_email()
|
|
209
|
+
print(f" ⚠️ Error deleting file '{file_name}': File not found or not accessible.")
|
|
210
|
+
print(f" Service account email: {service_account_email}")
|
|
211
|
+
print(f" Please ensure the file is shared with this service account with 'Editor' permissions.")
|
|
212
|
+
except Exception:
|
|
213
|
+
print(f" ⚠️ Error deleting file '{file_name}': File not found or not accessible.")
|
|
214
|
+
print(f" Please ensure the file is shared with your service account with 'Editor' permissions.")
|
|
215
|
+
elif error.resp.status == 403:
|
|
216
|
+
try:
|
|
217
|
+
from .auth import get_service_account_email
|
|
218
|
+
service_account_email = get_service_account_email()
|
|
219
|
+
print(f" ⚠️ Error deleting file '{file_name}': Permission denied.")
|
|
220
|
+
print(f" Service account email: {service_account_email}")
|
|
221
|
+
print(f" Please ensure the file is shared with this service account with 'Editor' permissions.")
|
|
222
|
+
except Exception:
|
|
223
|
+
print(f" ⚠️ Error deleting file '{file_name}': Permission denied.")
|
|
224
|
+
print(f" Please ensure the file is shared with your service account with 'Editor' permissions.")
|
|
225
|
+
else:
|
|
226
|
+
print(f" ⚠️ Error deleting file '{file_name}': {error}")
|
|
227
|
+
return False
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def find_or_create_entity_folder(drive_service, entity_name, parent_folder_id):
|
|
231
|
+
"""
|
|
232
|
+
Find entity subfolder in parent folder, create if doesn't exist.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
drive_service: Google Drive API service instance
|
|
236
|
+
entity_name: Name of the entity (folder name)
|
|
237
|
+
parent_folder_id: ID of the parent folder
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
str: Folder ID, or None if failed
|
|
241
|
+
"""
|
|
242
|
+
def _find_folder():
|
|
243
|
+
query = f"mimeType='application/vnd.google-apps.folder' and name='{entity_name}' and '{parent_folder_id}' in parents and trashed=false"
|
|
244
|
+
results = drive_service.files().list(
|
|
245
|
+
q=query,
|
|
246
|
+
fields='files(id, name)',
|
|
247
|
+
supportsAllDrives=True,
|
|
248
|
+
includeItemsFromAllDrives=True
|
|
249
|
+
).execute()
|
|
250
|
+
files = results.get('files', [])
|
|
251
|
+
if files:
|
|
252
|
+
return files[0]['id']
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
# Try to find existing folder
|
|
257
|
+
folder_id = retry_with_exponential_backoff(_find_folder)
|
|
258
|
+
if folder_id:
|
|
259
|
+
return folder_id
|
|
260
|
+
|
|
261
|
+
# Create new folder if not found
|
|
262
|
+
def _create_folder():
|
|
263
|
+
file_metadata = {
|
|
264
|
+
'name': entity_name,
|
|
265
|
+
'mimeType': 'application/vnd.google-apps.folder',
|
|
266
|
+
'parents': [parent_folder_id]
|
|
267
|
+
}
|
|
268
|
+
folder = drive_service.files().create(
|
|
269
|
+
body=file_metadata,
|
|
270
|
+
fields='id',
|
|
271
|
+
supportsAllDrives=True
|
|
272
|
+
).execute()
|
|
273
|
+
return folder.get('id')
|
|
274
|
+
|
|
275
|
+
folder_id = retry_with_exponential_backoff(_create_folder)
|
|
276
|
+
return folder_id
|
|
277
|
+
except HttpError as error:
|
|
278
|
+
print(f"Error finding/creating entity folder '{entity_name}': {error}")
|
|
279
|
+
return None
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def clone_template_to_entity(drive_service, template_id, entity_name, folder_id):
|
|
283
|
+
"""
|
|
284
|
+
Clone template spreadsheet to entity folder, deleting existing if present.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
drive_service: Google Drive API service instance
|
|
288
|
+
template_id: ID of the template spreadsheet
|
|
289
|
+
entity_name: Name of the entity (file name)
|
|
290
|
+
folder_id: ID of the folder to place the file in
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
str: ID of the copied file, or None if failed
|
|
294
|
+
"""
|
|
295
|
+
file_name = f"{entity_name}"
|
|
296
|
+
|
|
297
|
+
# Check if file already exists
|
|
298
|
+
existing_file_id = find_existing_file(drive_service, file_name, folder_id)
|
|
299
|
+
if existing_file_id:
|
|
300
|
+
print(f" Found existing spreadsheet, deleting...")
|
|
301
|
+
if delete_file(drive_service, existing_file_id):
|
|
302
|
+
print(f" ✓ Deleted existing spreadsheet")
|
|
303
|
+
else:
|
|
304
|
+
print(f" ✗ Failed to delete existing spreadsheet")
|
|
305
|
+
return None
|
|
306
|
+
|
|
307
|
+
def _copy_template():
|
|
308
|
+
# Copy the template
|
|
309
|
+
copied_file = drive_service.files().copy(
|
|
310
|
+
fileId=template_id,
|
|
311
|
+
body={'name': file_name},
|
|
312
|
+
supportsAllDrives=True
|
|
313
|
+
).execute()
|
|
314
|
+
|
|
315
|
+
new_file_id = copied_file.get('id')
|
|
316
|
+
|
|
317
|
+
# Move to target folder
|
|
318
|
+
file_metadata = drive_service.files().get(
|
|
319
|
+
fileId=new_file_id,
|
|
320
|
+
fields='parents',
|
|
321
|
+
supportsAllDrives=True
|
|
322
|
+
).execute()
|
|
323
|
+
previous_parents = ",".join(file_metadata.get('parents', []))
|
|
324
|
+
|
|
325
|
+
# Move the file to the target folder
|
|
326
|
+
if previous_parents:
|
|
327
|
+
drive_service.files().update(
|
|
328
|
+
fileId=new_file_id,
|
|
329
|
+
addParents=folder_id,
|
|
330
|
+
removeParents=previous_parents,
|
|
331
|
+
fields='id, parents',
|
|
332
|
+
supportsAllDrives=True
|
|
333
|
+
).execute()
|
|
334
|
+
else:
|
|
335
|
+
drive_service.files().update(
|
|
336
|
+
fileId=new_file_id,
|
|
337
|
+
addParents=folder_id,
|
|
338
|
+
fields='id, parents',
|
|
339
|
+
supportsAllDrives=True
|
|
340
|
+
).execute()
|
|
341
|
+
|
|
342
|
+
return new_file_id
|
|
343
|
+
|
|
344
|
+
try:
|
|
345
|
+
new_file_id = retry_with_exponential_backoff(_copy_template)
|
|
346
|
+
return new_file_id
|
|
347
|
+
except HttpError as error:
|
|
348
|
+
if error.resp.status == 404:
|
|
349
|
+
print(f"Error: Template file not found (404). The file may have been deleted or you don't have access.")
|
|
350
|
+
elif error.resp.status == 403:
|
|
351
|
+
print(f"Error: Permission denied (403). You may not have permission to copy this file.")
|
|
352
|
+
else:
|
|
353
|
+
print(f"Error copying template: {error}")
|
|
354
|
+
return None
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def list_csv_files_in_folder(drive_service, folder_id):
|
|
358
|
+
"""
|
|
359
|
+
List all CSV files in a Google Drive folder.
|
|
360
|
+
|
|
361
|
+
Args:
|
|
362
|
+
drive_service: Google Drive API service instance
|
|
363
|
+
folder_id: ID of the folder to search
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
list: List of tuples (file_id, file_name)
|
|
367
|
+
"""
|
|
368
|
+
def _list_files():
|
|
369
|
+
query = f"mimeType='text/csv' and '{folder_id}' in parents and trashed=false"
|
|
370
|
+
results = drive_service.files().list(
|
|
371
|
+
q=query,
|
|
372
|
+
fields='files(id, name)',
|
|
373
|
+
pageSize=1000,
|
|
374
|
+
supportsAllDrives=True,
|
|
375
|
+
includeItemsFromAllDrives=True
|
|
376
|
+
).execute()
|
|
377
|
+
files = results.get('files', [])
|
|
378
|
+
return [(f['id'], f['name']) for f in files]
|
|
379
|
+
|
|
380
|
+
try:
|
|
381
|
+
return retry_with_exponential_backoff(_list_files)
|
|
382
|
+
except HttpError as error:
|
|
383
|
+
print(f"Error listing CSV files in folder: {error}")
|
|
384
|
+
return []
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def download_csv_from_drive(drive_service, file_id):
|
|
388
|
+
"""
|
|
389
|
+
Download CSV file content from Google Drive.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
drive_service: Google Drive API service instance
|
|
393
|
+
file_id: ID of the CSV file
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
list: List of rows (each row is a list of values), or None if failed
|
|
397
|
+
"""
|
|
398
|
+
def _download():
|
|
399
|
+
request = drive_service.files().get_media(fileId=file_id)
|
|
400
|
+
file_content = io.BytesIO()
|
|
401
|
+
downloader = MediaIoBaseDownload(file_content, request)
|
|
402
|
+
done = False
|
|
403
|
+
while done is False:
|
|
404
|
+
status, done = downloader.next_chunk()
|
|
405
|
+
file_content.seek(0)
|
|
406
|
+
# Decode and parse CSV
|
|
407
|
+
content_str = file_content.read().decode('utf-8')
|
|
408
|
+
# Use csv.reader with proper settings to preserve data integrity
|
|
409
|
+
csv_reader = csv.reader(io.StringIO(content_str), quoting=csv.QUOTE_MINIMAL)
|
|
410
|
+
rows = list(csv_reader)
|
|
411
|
+
# Ensure all rows have consistent structure (pad with empty strings if needed)
|
|
412
|
+
if rows:
|
|
413
|
+
max_cols = max(len(row) for row in rows)
|
|
414
|
+
# Pad rows to have the same number of columns
|
|
415
|
+
normalized_rows = []
|
|
416
|
+
for row in rows:
|
|
417
|
+
padded_row = row + [''] * (max_cols - len(row))
|
|
418
|
+
normalized_rows.append(padded_row)
|
|
419
|
+
return normalized_rows
|
|
420
|
+
return rows
|
|
421
|
+
|
|
422
|
+
try:
|
|
423
|
+
return retry_with_exponential_backoff(_download)
|
|
424
|
+
except HttpError as error:
|
|
425
|
+
print(f"Error downloading CSV file: {error}")
|
|
426
|
+
return None
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def parse_csv_filename(filename):
|
|
430
|
+
"""
|
|
431
|
+
Parse CSV filename to extract tab name.
|
|
432
|
+
Example: s25-chart:pass_percentage.csv -> s25-chart:pass_percentage
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
filename: CSV filename
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
str: Tab name (without .csv extension)
|
|
439
|
+
"""
|
|
440
|
+
# Remove .csv extension
|
|
441
|
+
if filename.endswith('.csv'):
|
|
442
|
+
return filename[:-4]
|
|
443
|
+
return filename
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def find_existing_spreadsheet(drive_service, entity_name, folder_id):
|
|
447
|
+
"""
|
|
448
|
+
Find existing spreadsheet in L1 folder (don't create new one).
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
drive_service: Google Drive API service instance
|
|
452
|
+
entity_name: Name of the entity (file name)
|
|
453
|
+
folder_id: ID of the folder to search in
|
|
454
|
+
|
|
455
|
+
Returns:
|
|
456
|
+
str: Spreadsheet ID if found, None otherwise
|
|
457
|
+
"""
|
|
458
|
+
file_name = f"{entity_name}"
|
|
459
|
+
return find_existing_file(drive_service, file_name, folder_id)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def _column_number_to_letter(n):
|
|
465
|
+
"""
|
|
466
|
+
Convert a column number (1-based) to Excel column letter (A, B, ..., Z, AA, AB, ...).
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
n: Column number (1-based)
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
str: Column letter(s)
|
|
473
|
+
"""
|
|
474
|
+
result = ""
|
|
475
|
+
while n > 0:
|
|
476
|
+
n -= 1
|
|
477
|
+
result = chr(65 + (n % 26)) + result
|
|
478
|
+
n //= 26
|
|
479
|
+
return result
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def _convert_value_to_proper_type(value):
|
|
483
|
+
"""
|
|
484
|
+
Convert a CSV string value to its proper type (number, boolean, or string).
|
|
485
|
+
This prevents Google Sheets from adding apostrophes.
|
|
486
|
+
|
|
487
|
+
Args:
|
|
488
|
+
value: String value from CSV
|
|
489
|
+
|
|
490
|
+
Returns:
|
|
491
|
+
Value converted to appropriate type (int, float, bool, or str)
|
|
492
|
+
"""
|
|
493
|
+
if value is None or value == '':
|
|
494
|
+
return ''
|
|
495
|
+
|
|
496
|
+
value_str = str(value).strip()
|
|
497
|
+
|
|
498
|
+
# Try to convert to number
|
|
499
|
+
try:
|
|
500
|
+
# Try integer first
|
|
501
|
+
if value_str.isdigit() or (value_str.startswith('-') and value_str[1:].isdigit()):
|
|
502
|
+
return int(value_str)
|
|
503
|
+
# Try float
|
|
504
|
+
return float(value_str)
|
|
505
|
+
except ValueError:
|
|
506
|
+
pass
|
|
507
|
+
|
|
508
|
+
# Try boolean
|
|
509
|
+
if value_str.lower() in ('true', 'false'):
|
|
510
|
+
return value_str.lower() == 'true'
|
|
511
|
+
|
|
512
|
+
# Return as string
|
|
513
|
+
return value_str
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def write_csv_to_sheet_tab(gspread_client, spreadsheet_id, tab_name, csv_data, creds):
|
|
517
|
+
"""
|
|
518
|
+
Write CSV data to specified tab starting from A1.
|
|
519
|
+
Does not clear existing data - new data will overwrite starting from A1.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
gspread_client: Authorized gspread client
|
|
523
|
+
spreadsheet_id: ID of the spreadsheet
|
|
524
|
+
tab_name: Name of the tab/worksheet
|
|
525
|
+
csv_data: List of rows (each row is a list of values)
|
|
526
|
+
creds: Service account credentials
|
|
527
|
+
|
|
528
|
+
Returns:
|
|
529
|
+
bool: True if successful, False otherwise
|
|
530
|
+
"""
|
|
531
|
+
def _write_data():
|
|
532
|
+
# Use Sheets API directly for better control over data types
|
|
533
|
+
sheets_service = build('sheets', 'v4', credentials=creds)
|
|
534
|
+
|
|
535
|
+
# Get the worksheet ID
|
|
536
|
+
spreadsheet = gspread_client.open_by_key(spreadsheet_id)
|
|
537
|
+
try:
|
|
538
|
+
worksheet = spreadsheet.worksheet(tab_name)
|
|
539
|
+
sheet_id = worksheet.id
|
|
540
|
+
except gspread.exceptions.WorksheetNotFound:
|
|
541
|
+
print(f" ⚠️ Tab '{tab_name}' not found in spreadsheet")
|
|
542
|
+
return False
|
|
543
|
+
|
|
544
|
+
if not csv_data:
|
|
545
|
+
print(f" ⚠️ No data to write for tab '{tab_name}'")
|
|
546
|
+
return False
|
|
547
|
+
|
|
548
|
+
# Convert CSV data to proper types and format for Sheets API
|
|
549
|
+
values = []
|
|
550
|
+
for row in csv_data:
|
|
551
|
+
formatted_row = []
|
|
552
|
+
for cell in row:
|
|
553
|
+
converted_value = _convert_value_to_proper_type(cell)
|
|
554
|
+
formatted_row.append(converted_value)
|
|
555
|
+
values.append(formatted_row)
|
|
556
|
+
|
|
557
|
+
# Use batchUpdate to write data with proper types
|
|
558
|
+
range_name = f"{tab_name}!A1"
|
|
559
|
+
body = {
|
|
560
|
+
'values': values
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
result = sheets_service.spreadsheets().values().update(
|
|
564
|
+
spreadsheetId=spreadsheet_id,
|
|
565
|
+
range=range_name,
|
|
566
|
+
valueInputOption='RAW', # RAW preserves exact values without interpretation
|
|
567
|
+
body=body
|
|
568
|
+
).execute()
|
|
569
|
+
|
|
570
|
+
return True
|
|
571
|
+
|
|
572
|
+
try:
|
|
573
|
+
return retry_with_exponential_backoff(_write_data)
|
|
574
|
+
except Exception as e:
|
|
575
|
+
print(f" ✗ Error writing data to tab '{tab_name}': {e}")
|
|
576
|
+
return False
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def list_image_files_in_folder(drive_service, folder_id):
|
|
580
|
+
"""
|
|
581
|
+
List all image files in a Google Drive folder.
|
|
582
|
+
|
|
583
|
+
Args:
|
|
584
|
+
drive_service: Google Drive API service instance
|
|
585
|
+
folder_id: ID of the folder to search
|
|
586
|
+
|
|
587
|
+
Returns:
|
|
588
|
+
list: List of tuples (file_id, file_name)
|
|
589
|
+
"""
|
|
590
|
+
image_mime_types = [
|
|
591
|
+
'image/png',
|
|
592
|
+
'image/jpeg',
|
|
593
|
+
'image/jpg',
|
|
594
|
+
'image/gif',
|
|
595
|
+
'image/bmp',
|
|
596
|
+
'image/webp',
|
|
597
|
+
'image/svg+xml'
|
|
598
|
+
]
|
|
599
|
+
|
|
600
|
+
mime_query = " or ".join([f"mimeType='{mime}'" for mime in image_mime_types])
|
|
601
|
+
|
|
602
|
+
def _list_files():
|
|
603
|
+
query = f"'{folder_id}' in parents and trashed=false and ({mime_query})"
|
|
604
|
+
results = drive_service.files().list(
|
|
605
|
+
q=query,
|
|
606
|
+
fields='files(id, name)',
|
|
607
|
+
pageSize=1000,
|
|
608
|
+
supportsAllDrives=True,
|
|
609
|
+
includeItemsFromAllDrives=True
|
|
610
|
+
).execute()
|
|
611
|
+
files = results.get('files', [])
|
|
612
|
+
return [(f['id'], f['name']) for f in files]
|
|
613
|
+
|
|
614
|
+
try:
|
|
615
|
+
return retry_with_exponential_backoff(_list_files)
|
|
616
|
+
except HttpError as error:
|
|
617
|
+
print(f"Error listing image files in folder: {error}")
|
|
618
|
+
return []
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def copy_image_to_folder(drive_service, source_file_id, destination_folder_id, file_name):
|
|
622
|
+
"""
|
|
623
|
+
Copy image file from source to destination folder, deleting existing if present.
|
|
624
|
+
|
|
625
|
+
Args:
|
|
626
|
+
drive_service: Google Drive API service instance
|
|
627
|
+
source_file_id: ID of the source image file
|
|
628
|
+
destination_folder_id: ID of the destination folder
|
|
629
|
+
file_name: Name for the copied file
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
str: ID of the copied file, or None if failed
|
|
633
|
+
"""
|
|
634
|
+
# Check if file already exists
|
|
635
|
+
existing_file_id = find_existing_file(drive_service, file_name, destination_folder_id)
|
|
636
|
+
if existing_file_id:
|
|
637
|
+
print(f" Found existing image '{file_name}', deleting...")
|
|
638
|
+
if delete_file(drive_service, existing_file_id):
|
|
639
|
+
print(f" ✓ Deleted existing image")
|
|
640
|
+
else:
|
|
641
|
+
print(f" ✗ Failed to delete existing image")
|
|
642
|
+
return None
|
|
643
|
+
|
|
644
|
+
def _copy_file():
|
|
645
|
+
# Copy the file
|
|
646
|
+
copied_file = drive_service.files().copy(
|
|
647
|
+
fileId=source_file_id,
|
|
648
|
+
body={'name': file_name},
|
|
649
|
+
supportsAllDrives=True
|
|
650
|
+
).execute()
|
|
651
|
+
|
|
652
|
+
new_file_id = copied_file.get('id')
|
|
653
|
+
|
|
654
|
+
# Move to target folder
|
|
655
|
+
file_metadata = drive_service.files().get(
|
|
656
|
+
fileId=new_file_id,
|
|
657
|
+
fields='parents',
|
|
658
|
+
supportsAllDrives=True
|
|
659
|
+
).execute()
|
|
660
|
+
previous_parents = ",".join(file_metadata.get('parents', []))
|
|
661
|
+
|
|
662
|
+
# Move the file to the target folder
|
|
663
|
+
if previous_parents:
|
|
664
|
+
drive_service.files().update(
|
|
665
|
+
fileId=new_file_id,
|
|
666
|
+
addParents=destination_folder_id,
|
|
667
|
+
removeParents=previous_parents,
|
|
668
|
+
fields='id, parents',
|
|
669
|
+
supportsAllDrives=True
|
|
670
|
+
).execute()
|
|
671
|
+
else:
|
|
672
|
+
drive_service.files().update(
|
|
673
|
+
fileId=new_file_id,
|
|
674
|
+
addParents=destination_folder_id,
|
|
675
|
+
fields='id, parents',
|
|
676
|
+
supportsAllDrives=True
|
|
677
|
+
).execute()
|
|
678
|
+
|
|
679
|
+
return new_file_id
|
|
680
|
+
|
|
681
|
+
try:
|
|
682
|
+
new_file_id = retry_with_exponential_backoff(_copy_file)
|
|
683
|
+
return new_file_id
|
|
684
|
+
except HttpError as error:
|
|
685
|
+
print(f" ✗ Error copying image '{file_name}': {error}")
|
|
686
|
+
return None
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def process_entity(entity_name, creds, layout: DriveLayout):
|
|
690
|
+
"""
|
|
691
|
+
Main processing function for a single entity.
|
|
692
|
+
|
|
693
|
+
Args:
|
|
694
|
+
entity_name: Name of the entity
|
|
695
|
+
creds: Service account credentials
|
|
696
|
+
layout: DriveLayout object containing configuration
|
|
697
|
+
|
|
698
|
+
Returns:
|
|
699
|
+
bool: True if successful, False otherwise
|
|
700
|
+
"""
|
|
701
|
+
print(f"\n{'='*80}")
|
|
702
|
+
print(f"Processing entity: {entity_name}")
|
|
703
|
+
print(f"{'='*80}\n")
|
|
704
|
+
|
|
705
|
+
drive_service = build('drive', 'v3', credentials=creds)
|
|
706
|
+
gspread_client = gspread.authorize(creds)
|
|
707
|
+
|
|
708
|
+
l1_root_id = layout.l1_data_id
|
|
709
|
+
l0_root_id = layout.l0_data_id
|
|
710
|
+
template_id = layout.data_template_id
|
|
711
|
+
|
|
712
|
+
try:
|
|
713
|
+
# 1. Find/create L1-Data entity folder
|
|
714
|
+
print(f"Finding/creating L1-Data folder for {entity_name}...")
|
|
715
|
+
l1_folder_id = find_or_create_entity_folder(drive_service, entity_name, l1_root_id)
|
|
716
|
+
if not l1_folder_id:
|
|
717
|
+
print(f"✗ Failed to find/create L1-Data folder for {entity_name}")
|
|
718
|
+
return False
|
|
719
|
+
print(f"✓ L1-Data folder ID: {l1_folder_id}")
|
|
720
|
+
|
|
721
|
+
# 2. Find L0-Data entity folder
|
|
722
|
+
print(f"Finding L0-Data folder for {entity_name}...")
|
|
723
|
+
l0_folder_id = find_or_create_entity_folder(drive_service, entity_name, l0_root_id)
|
|
724
|
+
if not l0_folder_id:
|
|
725
|
+
print(f"✗ Failed to find L0-Data folder for {entity_name}")
|
|
726
|
+
return False
|
|
727
|
+
print(f"✓ L0-Data folder ID: {l0_folder_id}")
|
|
728
|
+
|
|
729
|
+
# 3. Handle spreadsheet creation/update: always clone template fresh
|
|
730
|
+
print(f"Cloning template spreadsheet for {entity_name}...")
|
|
731
|
+
spreadsheet_id = clone_template_to_entity(drive_service, template_id, entity_name, l1_folder_id)
|
|
732
|
+
if not spreadsheet_id:
|
|
733
|
+
print(f"✗ Failed to clone template spreadsheet for {entity_name}")
|
|
734
|
+
return False
|
|
735
|
+
print(f"✓ Cloned spreadsheet ID: {spreadsheet_id}")
|
|
736
|
+
|
|
737
|
+
# 4. Process CSV files and write to matching tabs
|
|
738
|
+
print(f"Processing CSV files from L0-Data...")
|
|
739
|
+
csv_files = list_csv_files_in_folder(drive_service, l0_folder_id)
|
|
740
|
+
if not csv_files:
|
|
741
|
+
print(f" ⚠️ No CSV files found in L0-Data folder for {entity_name}")
|
|
742
|
+
else:
|
|
743
|
+
print(f" Found {len(csv_files)} CSV file(s)")
|
|
744
|
+
|
|
745
|
+
if csv_files:
|
|
746
|
+
csv_success = 0
|
|
747
|
+
csv_failed = 0
|
|
748
|
+
|
|
749
|
+
for file_id, file_name in csv_files:
|
|
750
|
+
print(f" Processing: {file_name}")
|
|
751
|
+
tab_name = parse_csv_filename(file_name)
|
|
752
|
+
|
|
753
|
+
# Download CSV
|
|
754
|
+
csv_data = download_csv_from_drive(drive_service, file_id)
|
|
755
|
+
if not csv_data:
|
|
756
|
+
print(f" ✗ Failed to download CSV file")
|
|
757
|
+
csv_failed += 1
|
|
758
|
+
continue
|
|
759
|
+
|
|
760
|
+
# Write to sheet tab
|
|
761
|
+
if write_csv_to_sheet_tab(gspread_client, spreadsheet_id, tab_name, csv_data, creds):
|
|
762
|
+
print(f" ✓ Wrote data to tab '{tab_name}'")
|
|
763
|
+
csv_success += 1
|
|
764
|
+
else:
|
|
765
|
+
print(f" ✗ Failed to write data to tab '{tab_name}'")
|
|
766
|
+
csv_failed += 1
|
|
767
|
+
|
|
768
|
+
print(f" CSV processing summary: {csv_success} succeeded, {csv_failed} failed")
|
|
769
|
+
|
|
770
|
+
# 5. Copy image files (delete existing if present)
|
|
771
|
+
print(f"Copying image files from L0-Data to L1-Data...")
|
|
772
|
+
image_files = list_image_files_in_folder(drive_service, l0_folder_id)
|
|
773
|
+
if not image_files:
|
|
774
|
+
print(f" ⚠️ No image files found in L0-Data folder for {entity_name}")
|
|
775
|
+
else:
|
|
776
|
+
print(f" Found {len(image_files)} image file(s)")
|
|
777
|
+
|
|
778
|
+
if image_files:
|
|
779
|
+
image_success = 0
|
|
780
|
+
image_failed = 0
|
|
781
|
+
|
|
782
|
+
for file_id, file_name in image_files:
|
|
783
|
+
print(f" Copying: {file_name}")
|
|
784
|
+
new_file_id = copy_image_to_folder(drive_service, file_id, l1_folder_id, file_name)
|
|
785
|
+
if new_file_id:
|
|
786
|
+
print(f" ✓ Copied image '{file_name}'")
|
|
787
|
+
image_success += 1
|
|
788
|
+
else:
|
|
789
|
+
print(f" ✗ Failed to copy image '{file_name}'")
|
|
790
|
+
image_failed += 1
|
|
791
|
+
|
|
792
|
+
print(f" Image copying summary: {image_success} succeeded, {image_failed} failed")
|
|
793
|
+
|
|
794
|
+
print(f"\n✓ Successfully processed entity: {entity_name}")
|
|
795
|
+
return True
|
|
796
|
+
|
|
797
|
+
except Exception as e:
|
|
798
|
+
print(f"\n✗ Error processing entity '{entity_name}': {e}")
|
|
799
|
+
import traceback
|
|
800
|
+
traceback.print_exc()
|
|
801
|
+
return False
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
def generate_data(creds=None, layout: DriveLayout = None):
|
|
805
|
+
"""
|
|
806
|
+
Generate L1-Data from L0-Data for entities marked for generation in entities.csv.
|
|
807
|
+
|
|
808
|
+
Args:
|
|
809
|
+
creds: Google OAuth credentials. If None, will be obtained automatically.
|
|
810
|
+
layout: DriveLayout object containing configuration. Required.
|
|
811
|
+
|
|
812
|
+
Returns:
|
|
813
|
+
dict: Dictionary with 'successful' and 'failed' lists of entity names
|
|
814
|
+
|
|
815
|
+
Raises:
|
|
816
|
+
FileNotFoundError: If service account credentials are not found
|
|
817
|
+
ValueError: If layout is not provided
|
|
818
|
+
Exception: Other errors during processing
|
|
819
|
+
"""
|
|
820
|
+
if layout is None:
|
|
821
|
+
raise ValueError("layout (DriveLayout) is required. Pass it as a parameter.")
|
|
822
|
+
|
|
823
|
+
if creds is None:
|
|
824
|
+
creds = get_oauth_credentials()
|
|
825
|
+
|
|
826
|
+
# Load entity names from entities.csv with generate flag
|
|
827
|
+
if layout.entities_csv_id:
|
|
828
|
+
entities = load_entities(layout.entities_csv_id, creds)
|
|
829
|
+
print(f"✓ Loaded {len(entities)} entities with generate=Y from entities.csv")
|
|
830
|
+
if not entities:
|
|
831
|
+
print("\n✗ No entities marked with generate=Y in entities.csv.")
|
|
832
|
+
return {'successful': [], 'failed': []}
|
|
833
|
+
else:
|
|
834
|
+
print("\n✗ No entities CSV ID found in layout.")
|
|
835
|
+
return {'successful': [], 'failed': []}
|
|
836
|
+
|
|
837
|
+
print(f"\n✓ Processing {len(entities)} entities")
|
|
838
|
+
print(f" Entities: {', '.join(entities)}\n")
|
|
839
|
+
|
|
840
|
+
# Process each entity
|
|
841
|
+
successful = []
|
|
842
|
+
failed = []
|
|
843
|
+
|
|
844
|
+
for i, entity in enumerate(entities, 1):
|
|
845
|
+
print(f"\n[{i}/{len(entities)}] Processing entity: {entity}")
|
|
846
|
+
if process_entity(entity, creds, layout):
|
|
847
|
+
successful.append(entity)
|
|
848
|
+
else:
|
|
849
|
+
failed.append(entity)
|
|
850
|
+
|
|
851
|
+
# Small delay to avoid rate limits
|
|
852
|
+
if i < len(entities):
|
|
853
|
+
time.sleep(0.5)
|
|
854
|
+
|
|
855
|
+
# Print summary
|
|
856
|
+
print(f"\n{'='*80}")
|
|
857
|
+
print("PROCESSING SUMMARY")
|
|
858
|
+
print(f"{'='*80}")
|
|
859
|
+
print(f"Total entities: {len(entities)}")
|
|
860
|
+
print(f"Successful: {len(successful)}")
|
|
861
|
+
print(f"Failed: {len(failed)}")
|
|
862
|
+
print()
|
|
863
|
+
|
|
864
|
+
if successful:
|
|
865
|
+
print("Successfully processed entities:")
|
|
866
|
+
for entity in successful:
|
|
867
|
+
print(f" ✓ {entity}")
|
|
868
|
+
print()
|
|
869
|
+
|
|
870
|
+
if failed:
|
|
871
|
+
print("Failed entities:")
|
|
872
|
+
for entity in failed:
|
|
873
|
+
print(f" ✗ {entity}")
|
|
874
|
+
print()
|
|
875
|
+
|
|
876
|
+
print("=" * 80)
|
|
877
|
+
|
|
878
|
+
return {'successful': successful, 'failed': failed}
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
def main():
|
|
882
|
+
"""
|
|
883
|
+
Main function to process entities (CLI entry point).
|
|
884
|
+
"""
|
|
885
|
+
parser = argparse.ArgumentParser(
|
|
886
|
+
description='Generate L1-Data from L0-Data for entities with generate=Y in entities.csv'
|
|
887
|
+
)
|
|
888
|
+
parser.add_argument(
|
|
889
|
+
'--shared-drive-url',
|
|
890
|
+
required=True,
|
|
891
|
+
help='Shared Drive root URL or ID that contains L0/L1 data and templates.',
|
|
892
|
+
)
|
|
893
|
+
parser.add_argument(
|
|
894
|
+
'--service-account-credentials',
|
|
895
|
+
default=None,
|
|
896
|
+
help='Path to the service account JSON key file.',
|
|
897
|
+
)
|
|
898
|
+
args = parser.parse_args()
|
|
899
|
+
|
|
900
|
+
print("Google Slide Automator")
|
|
901
|
+
print("=" * 80)
|
|
902
|
+
|
|
903
|
+
try:
|
|
904
|
+
# Get credentials
|
|
905
|
+
print("Authenticating...")
|
|
906
|
+
creds = get_oauth_credentials(service_account_credentials=args.service_account_credentials)
|
|
907
|
+
|
|
908
|
+
layout = resolve_layout(args.shared_drive_url, creds)
|
|
909
|
+
|
|
910
|
+
# Call the main function
|
|
911
|
+
generate_data(
|
|
912
|
+
creds=creds,
|
|
913
|
+
layout=layout
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
except ValueError as e:
|
|
917
|
+
print(f"\nError: {e}")
|
|
918
|
+
except FileNotFoundError as e:
|
|
919
|
+
print(f"\nError: {e}")
|
|
920
|
+
if "credentials file" in str(e):
|
|
921
|
+
print("\nTo set up service account credentials:")
|
|
922
|
+
print("1. Go to Google Cloud Console (https://console.cloud.google.com/)")
|
|
923
|
+
print("2. Create a new project or select an existing one")
|
|
924
|
+
print("3. Enable Google Sheets API and Google Drive API")
|
|
925
|
+
print("4. Go to 'Credentials' → 'Create Credentials' → 'Service account'")
|
|
926
|
+
print("5. Create a service account and download the JSON key file")
|
|
927
|
+
from .auth import PROJECT_ROOT as AUTH_PROJECT_ROOT
|
|
928
|
+
print(f"6. Save the JSON key file as 'service-account-credentials.json' in: {AUTH_PROJECT_ROOT}")
|
|
929
|
+
except Exception as e:
|
|
930
|
+
print(f"\nError: {e}")
|
|
931
|
+
import traceback
|
|
932
|
+
traceback.print_exc()
|
|
933
|
+
|
|
934
|
+
|
|
935
|
+
if __name__ == "__main__":
|
|
936
|
+
main()
|
|
937
|
+
|