edsl 0.1.61__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -280,6 +280,18 @@ class Scenario(Base, UserDict):
280
280
 
281
281
  target = self if inplace else Scenario()
282
282
 
283
+ # First check if this Scenario itself has a base64_string (e.g., from FileStore.to_dict())
284
+ if "base64_string" in self and isinstance(self.get("base64_string"), str):
285
+ # This is likely a Scenario created from FileStore.to_dict()
286
+ if inplace:
287
+ self["base64_string"] = "offloaded"
288
+ else:
289
+ # Copy all keys to target
290
+ for k, v in self.items():
291
+ target[k] = v
292
+ target["base64_string"] = "offloaded"
293
+ return target
294
+
283
295
  for key, value in self.items():
284
296
  if isinstance(value, FileStore):
285
297
  file_store_dict = value.to_dict()
@@ -297,6 +309,227 @@ class Scenario(Base, UserDict):
297
309
 
298
310
  return target
299
311
 
312
+ def save_to_gcs_bucket(self, signed_url_or_dict) -> dict:
313
+ """
314
+ Saves FileStore objects contained within this Scenario to a Google Cloud Storage bucket.
315
+
316
+ This method finds all FileStore objects in the Scenario and uploads them to GCS using
317
+ the provided signed URL(s). If the Scenario itself was created from a FileStore (has
318
+ base64_string as a top-level key), it uploads that content directly.
319
+
320
+ Args:
321
+ signed_url_or_dict: Either:
322
+ - str: Single signed URL (for single FileStore or Scenario from FileStore)
323
+ - dict: Mapping of scenario keys to signed URLs for multiple FileStore objects
324
+ e.g., {"video": "signed_url_1", "image": "signed_url_2"}
325
+
326
+ Returns:
327
+ dict: Summary of upload operations performed
328
+
329
+ Raises:
330
+ ValueError: If no uploadable content found or content is offloaded
331
+ requests.RequestException: If any upload fails
332
+ """
333
+ from edsl.scenarios import FileStore
334
+ import requests
335
+ import base64
336
+
337
+ upload_results = []
338
+
339
+ # Case 1: This Scenario was created from a FileStore (has direct base64_string)
340
+ if "base64_string" in self and isinstance(self.get("base64_string"), str):
341
+ if self["base64_string"] == "offloaded":
342
+ raise ValueError("File content is offloaded. Cannot upload to GCS.")
343
+
344
+ # For single FileStore scenario, expect string URL
345
+ if isinstance(signed_url_or_dict, dict):
346
+ raise ValueError(
347
+ "For Scenario created from FileStore, provide a single signed URL string, not a dictionary."
348
+ )
349
+
350
+ signed_url = signed_url_or_dict
351
+
352
+ # Get file info from Scenario keys
353
+ mime_type = self.get("mime_type", "application/octet-stream")
354
+ suffix = self.get("suffix", "")
355
+
356
+ # Decode and upload
357
+ try:
358
+ file_content = base64.b64decode(self["base64_string"])
359
+ except Exception as e:
360
+ raise ValueError(f"Failed to decode base64 content: {e}")
361
+
362
+ headers = {
363
+ "Content-Type": mime_type,
364
+ "Content-Length": str(len(file_content)),
365
+ }
366
+
367
+ response = requests.put(signed_url, data=file_content, headers=headers)
368
+ response.raise_for_status()
369
+
370
+ upload_results.append(
371
+ {
372
+ "type": "scenario_filestore_content",
373
+ "status": "success",
374
+ "status_code": response.status_code,
375
+ "file_size": len(file_content),
376
+ "mime_type": mime_type,
377
+ "file_extension": suffix,
378
+ }
379
+ )
380
+
381
+ # Case 2: Find FileStore objects in Scenario values
382
+ else:
383
+ # Collect all FileStore keys first
384
+ filestore_keys = [
385
+ key for key, value in self.items() if isinstance(value, FileStore)
386
+ ]
387
+
388
+ if not filestore_keys:
389
+ raise ValueError("No FileStore objects found in Scenario to upload.")
390
+
391
+ # Handle URL parameter
392
+ if isinstance(signed_url_or_dict, str):
393
+ # Single URL provided for multiple FileStore objects - this will cause overwrites
394
+ if len(filestore_keys) > 1:
395
+ raise ValueError(
396
+ f"Multiple FileStore objects found ({filestore_keys}) but only one signed URL provided. "
397
+ f"Provide a dictionary mapping keys to URLs to avoid overwrites: "
398
+ f"{{'{filestore_keys[0]}': 'url1', '{filestore_keys[1]}': 'url2', ...}}"
399
+ )
400
+
401
+ # Single FileStore object, single URL is fine
402
+ url_mapping = {filestore_keys[0]: signed_url_or_dict}
403
+
404
+ elif isinstance(signed_url_or_dict, dict):
405
+ # Dictionary of URLs provided
406
+ missing_keys = set(filestore_keys) - set(signed_url_or_dict.keys())
407
+ if missing_keys:
408
+ raise ValueError(
409
+ f"Missing signed URLs for FileStore keys: {list(missing_keys)}"
410
+ )
411
+
412
+ extra_keys = set(signed_url_or_dict.keys()) - set(filestore_keys)
413
+ if extra_keys:
414
+ raise ValueError(
415
+ f"Signed URLs provided for non-FileStore keys: {list(extra_keys)}"
416
+ )
417
+
418
+ url_mapping = signed_url_or_dict
419
+
420
+ else:
421
+ raise ValueError(
422
+ "signed_url_or_dict must be either a string or a dictionary"
423
+ )
424
+
425
+ # Upload each FileStore object
426
+ for key, value in self.items():
427
+ if isinstance(value, FileStore):
428
+ try:
429
+ result = value.save_to_gcs_bucket(url_mapping[key])
430
+ result["scenario_key"] = key
431
+ result["type"] = "filestore_object"
432
+ upload_results.append(result)
433
+ except Exception as e:
434
+ upload_results.append(
435
+ {
436
+ "scenario_key": key,
437
+ "type": "filestore_object",
438
+ "status": "error",
439
+ "error": str(e),
440
+ }
441
+ )
442
+
443
+ return {
444
+ "total_uploads": len(upload_results),
445
+ "successful_uploads": len(
446
+ [r for r in upload_results if r.get("status") == "success"]
447
+ ),
448
+ "failed_uploads": len(
449
+ [r for r in upload_results if r.get("status") == "error"]
450
+ ),
451
+ "upload_details": upload_results,
452
+ }
453
+
454
+ def get_filestore_info(self) -> dict:
455
+ """
456
+ Returns information about FileStore objects present in this Scenario.
457
+
458
+ This method is useful for determining how many signed URLs need to be generated
459
+ and what file extensions/types are present before calling save_to_gcs_bucket().
460
+
461
+ Returns:
462
+ dict: Information about FileStore objects containing:
463
+ - total_count: Total number of FileStore objects
464
+ - filestore_keys: List of scenario keys that contain FileStore objects
465
+ - file_extensions: Dictionary mapping keys to file extensions
466
+ - file_types: Dictionary mapping keys to MIME types
467
+ - is_filestore_scenario: Boolean indicating if this Scenario was created from a FileStore
468
+ - summary: Human-readable summary of files
469
+
470
+
471
+ """
472
+ from edsl.scenarios import FileStore
473
+
474
+ # Check if this Scenario was created from a FileStore
475
+ is_filestore_scenario = "base64_string" in self and isinstance(
476
+ self.get("base64_string"), str
477
+ )
478
+
479
+ if is_filestore_scenario:
480
+ # Single FileStore scenario
481
+ return {
482
+ "total_count": 1,
483
+ "filestore_keys": ["filestore_content"],
484
+ "file_extensions": {"filestore_content": self.get("suffix", "")},
485
+ "file_types": {
486
+ "filestore_content": self.get(
487
+ "mime_type", "application/octet-stream"
488
+ )
489
+ },
490
+ "is_filestore_scenario": True,
491
+ "summary": f"Single FileStore content with extension '{self.get('suffix', 'unknown')}'",
492
+ }
493
+
494
+ # Regular Scenario with FileStore objects as values
495
+ filestore_info = {}
496
+ file_extensions = {}
497
+ file_types = {}
498
+
499
+ for key, value in self.items():
500
+ if isinstance(value, FileStore):
501
+ filestore_info[key] = {
502
+ "extension": getattr(value, "suffix", ""),
503
+ "mime_type": getattr(
504
+ value, "mime_type", "application/octet-stream"
505
+ ),
506
+ "binary": getattr(value, "binary", True),
507
+ "path": getattr(value, "path", "unknown"),
508
+ }
509
+ file_extensions[key] = getattr(value, "suffix", "")
510
+ file_types[key] = getattr(
511
+ value, "mime_type", "application/octet-stream"
512
+ )
513
+
514
+ # Generate summary
515
+ if filestore_info:
516
+ ext_summary = [f"{key}({ext})" for key, ext in file_extensions.items()]
517
+ summary = (
518
+ f"{len(filestore_info)} FileStore objects: {', '.join(ext_summary)}"
519
+ )
520
+ else:
521
+ summary = "No FileStore objects found"
522
+
523
+ return {
524
+ "total_count": len(filestore_info),
525
+ "filestore_keys": list(filestore_info.keys()),
526
+ "file_extensions": file_extensions,
527
+ "file_types": file_types,
528
+ "is_filestore_scenario": False,
529
+ "detailed_info": filestore_info,
530
+ "summary": summary,
531
+ }
532
+
300
533
  def to_dict(
301
534
  self, add_edsl_version: bool = True, offload_base64: bool = False
302
535
  ) -> dict: