edsl 0.1.60__py3-none-any.whl → 0.1.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -512,6 +512,75 @@ class FileStore(Scenario):
512
512
  )
513
513
  return info
514
514
 
515
+ def offload(self, inplace=False) -> "FileStore":
516
+ """
517
+ Offloads base64-encoded content from the FileStore by replacing 'base64_string'
518
+ with 'offloaded'. This reduces memory usage.
519
+
520
+ Args:
521
+ inplace (bool): If True, modify the current FileStore. If False, return a new one.
522
+
523
+ Returns:
524
+ FileStore: The modified FileStore (either self or a new instance).
525
+ """
526
+ if inplace:
527
+ if hasattr(self, "base64_string"):
528
+ self.base64_string = "offloaded"
529
+ return self
530
+ else:
531
+ # Create a copy and offload it
532
+ file_store_dict = self.to_dict()
533
+ if "base64_string" in file_store_dict:
534
+ file_store_dict["base64_string"] = "offloaded"
535
+ return self.__class__.from_dict(file_store_dict)
536
+
537
+ def save_to_gcs_bucket(self, signed_url: str) -> dict:
538
+ """
539
+ Saves the FileStore's file content to a Google Cloud Storage bucket using a signed URL.
540
+
541
+ Args:
542
+ signed_url (str): The signed URL for uploading to GCS bucket
543
+
544
+ Returns:
545
+ dict: Response from the GCS upload operation
546
+
547
+ Raises:
548
+ ValueError: If base64_string is offloaded or missing
549
+ requests.RequestException: If the upload fails
550
+ """
551
+ import requests
552
+ import base64
553
+
554
+ # Check if content is available
555
+ if not hasattr(self, "base64_string") or self.base64_string == "offloaded":
556
+ raise ValueError(
557
+ "File content is not available (offloaded or missing). Cannot upload to GCS."
558
+ )
559
+
560
+ # Decode base64 content to bytes
561
+ try:
562
+ file_content = base64.b64decode(self.base64_string)
563
+ except Exception as e:
564
+ raise ValueError(f"Failed to decode base64 content: {e}")
565
+
566
+ # Prepare headers with proper content type
567
+ headers = {
568
+ "Content-Type": self.mime_type or "application/octet-stream",
569
+ "Content-Length": str(len(file_content)),
570
+ }
571
+
572
+ # Upload to GCS using the signed URL
573
+ response = requests.put(signed_url, data=file_content, headers=headers)
574
+ response.raise_for_status()
575
+
576
+ return {
577
+ "status": "success",
578
+ "status_code": response.status_code,
579
+ "file_size": len(file_content),
580
+ "mime_type": self.mime_type,
581
+ "file_extension": self.suffix,
582
+ }
583
+
515
584
  @classmethod
516
585
  def pull(cls, url_or_uuid: Union[str, UUID]) -> "FileStore":
517
586
  """
@@ -280,6 +280,18 @@ class Scenario(Base, UserDict):
280
280
 
281
281
  target = self if inplace else Scenario()
282
282
 
283
+ # First check if this Scenario itself has a base64_string (e.g., from FileStore.to_dict())
284
+ if "base64_string" in self and isinstance(self.get("base64_string"), str):
285
+ # This is likely a Scenario created from FileStore.to_dict()
286
+ if inplace:
287
+ self["base64_string"] = "offloaded"
288
+ else:
289
+ # Copy all keys to target
290
+ for k, v in self.items():
291
+ target[k] = v
292
+ target["base64_string"] = "offloaded"
293
+ return target
294
+
283
295
  for key, value in self.items():
284
296
  if isinstance(value, FileStore):
285
297
  file_store_dict = value.to_dict()
@@ -297,6 +309,227 @@ class Scenario(Base, UserDict):
297
309
 
298
310
  return target
299
311
 
312
+ def save_to_gcs_bucket(self, signed_url_or_dict) -> dict:
313
+ """
314
+ Saves FileStore objects contained within this Scenario to a Google Cloud Storage bucket.
315
+
316
+ This method finds all FileStore objects in the Scenario and uploads them to GCS using
317
+ the provided signed URL(s). If the Scenario itself was created from a FileStore (has
318
+ base64_string as a top-level key), it uploads that content directly.
319
+
320
+ Args:
321
+ signed_url_or_dict: Either:
322
+ - str: Single signed URL (for single FileStore or Scenario from FileStore)
323
+ - dict: Mapping of scenario keys to signed URLs for multiple FileStore objects
324
+ e.g., {"video": "signed_url_1", "image": "signed_url_2"}
325
+
326
+ Returns:
327
+ dict: Summary of upload operations performed
328
+
329
+ Raises:
330
+ ValueError: If no uploadable content found or content is offloaded
331
+ requests.RequestException: If any upload fails
332
+ """
333
+ from edsl.scenarios import FileStore
334
+ import requests
335
+ import base64
336
+
337
+ upload_results = []
338
+
339
+ # Case 1: This Scenario was created from a FileStore (has direct base64_string)
340
+ if "base64_string" in self and isinstance(self.get("base64_string"), str):
341
+ if self["base64_string"] == "offloaded":
342
+ raise ValueError("File content is offloaded. Cannot upload to GCS.")
343
+
344
+ # For single FileStore scenario, expect string URL
345
+ if isinstance(signed_url_or_dict, dict):
346
+ raise ValueError(
347
+ "For Scenario created from FileStore, provide a single signed URL string, not a dictionary."
348
+ )
349
+
350
+ signed_url = signed_url_or_dict
351
+
352
+ # Get file info from Scenario keys
353
+ mime_type = self.get("mime_type", "application/octet-stream")
354
+ suffix = self.get("suffix", "")
355
+
356
+ # Decode and upload
357
+ try:
358
+ file_content = base64.b64decode(self["base64_string"])
359
+ except Exception as e:
360
+ raise ValueError(f"Failed to decode base64 content: {e}")
361
+
362
+ headers = {
363
+ "Content-Type": mime_type,
364
+ "Content-Length": str(len(file_content)),
365
+ }
366
+
367
+ response = requests.put(signed_url, data=file_content, headers=headers)
368
+ response.raise_for_status()
369
+
370
+ upload_results.append(
371
+ {
372
+ "type": "scenario_filestore_content",
373
+ "status": "success",
374
+ "status_code": response.status_code,
375
+ "file_size": len(file_content),
376
+ "mime_type": mime_type,
377
+ "file_extension": suffix,
378
+ }
379
+ )
380
+
381
+ # Case 2: Find FileStore objects in Scenario values
382
+ else:
383
+ # Collect all FileStore keys first
384
+ filestore_keys = [
385
+ key for key, value in self.items() if isinstance(value, FileStore)
386
+ ]
387
+
388
+ if not filestore_keys:
389
+ raise ValueError("No FileStore objects found in Scenario to upload.")
390
+
391
+ # Handle URL parameter
392
+ if isinstance(signed_url_or_dict, str):
393
+ # Single URL provided for multiple FileStore objects - this will cause overwrites
394
+ if len(filestore_keys) > 1:
395
+ raise ValueError(
396
+ f"Multiple FileStore objects found ({filestore_keys}) but only one signed URL provided. "
397
+ f"Provide a dictionary mapping keys to URLs to avoid overwrites: "
398
+ f"{{'{filestore_keys[0]}': 'url1', '{filestore_keys[1]}': 'url2', ...}}"
399
+ )
400
+
401
+ # Single FileStore object, single URL is fine
402
+ url_mapping = {filestore_keys[0]: signed_url_or_dict}
403
+
404
+ elif isinstance(signed_url_or_dict, dict):
405
+ # Dictionary of URLs provided
406
+ missing_keys = set(filestore_keys) - set(signed_url_or_dict.keys())
407
+ if missing_keys:
408
+ raise ValueError(
409
+ f"Missing signed URLs for FileStore keys: {list(missing_keys)}"
410
+ )
411
+
412
+ extra_keys = set(signed_url_or_dict.keys()) - set(filestore_keys)
413
+ if extra_keys:
414
+ raise ValueError(
415
+ f"Signed URLs provided for non-FileStore keys: {list(extra_keys)}"
416
+ )
417
+
418
+ url_mapping = signed_url_or_dict
419
+
420
+ else:
421
+ raise ValueError(
422
+ "signed_url_or_dict must be either a string or a dictionary"
423
+ )
424
+
425
+ # Upload each FileStore object
426
+ for key, value in self.items():
427
+ if isinstance(value, FileStore):
428
+ try:
429
+ result = value.save_to_gcs_bucket(url_mapping[key])
430
+ result["scenario_key"] = key
431
+ result["type"] = "filestore_object"
432
+ upload_results.append(result)
433
+ except Exception as e:
434
+ upload_results.append(
435
+ {
436
+ "scenario_key": key,
437
+ "type": "filestore_object",
438
+ "status": "error",
439
+ "error": str(e),
440
+ }
441
+ )
442
+
443
+ return {
444
+ "total_uploads": len(upload_results),
445
+ "successful_uploads": len(
446
+ [r for r in upload_results if r.get("status") == "success"]
447
+ ),
448
+ "failed_uploads": len(
449
+ [r for r in upload_results if r.get("status") == "error"]
450
+ ),
451
+ "upload_details": upload_results,
452
+ }
453
+
454
+ def get_filestore_info(self) -> dict:
455
+ """
456
+ Returns information about FileStore objects present in this Scenario.
457
+
458
+ This method is useful for determining how many signed URLs need to be generated
459
+ and what file extensions/types are present before calling save_to_gcs_bucket().
460
+
461
+ Returns:
462
+ dict: Information about FileStore objects containing:
463
+ - total_count: Total number of FileStore objects
464
+ - filestore_keys: List of scenario keys that contain FileStore objects
465
+ - file_extensions: Dictionary mapping keys to file extensions
466
+ - file_types: Dictionary mapping keys to MIME types
467
+ - is_filestore_scenario: Boolean indicating if this Scenario was created from a FileStore
468
+ - summary: Human-readable summary of files
469
+
470
+
471
+ """
472
+ from edsl.scenarios import FileStore
473
+
474
+ # Check if this Scenario was created from a FileStore
475
+ is_filestore_scenario = "base64_string" in self and isinstance(
476
+ self.get("base64_string"), str
477
+ )
478
+
479
+ if is_filestore_scenario:
480
+ # Single FileStore scenario
481
+ return {
482
+ "total_count": 1,
483
+ "filestore_keys": ["filestore_content"],
484
+ "file_extensions": {"filestore_content": self.get("suffix", "")},
485
+ "file_types": {
486
+ "filestore_content": self.get(
487
+ "mime_type", "application/octet-stream"
488
+ )
489
+ },
490
+ "is_filestore_scenario": True,
491
+ "summary": f"Single FileStore content with extension '{self.get('suffix', 'unknown')}'",
492
+ }
493
+
494
+ # Regular Scenario with FileStore objects as values
495
+ filestore_info = {}
496
+ file_extensions = {}
497
+ file_types = {}
498
+
499
+ for key, value in self.items():
500
+ if isinstance(value, FileStore):
501
+ filestore_info[key] = {
502
+ "extension": getattr(value, "suffix", ""),
503
+ "mime_type": getattr(
504
+ value, "mime_type", "application/octet-stream"
505
+ ),
506
+ "binary": getattr(value, "binary", True),
507
+ "path": getattr(value, "path", "unknown"),
508
+ }
509
+ file_extensions[key] = getattr(value, "suffix", "")
510
+ file_types[key] = getattr(
511
+ value, "mime_type", "application/octet-stream"
512
+ )
513
+
514
+ # Generate summary
515
+ if filestore_info:
516
+ ext_summary = [f"{key}({ext})" for key, ext in file_extensions.items()]
517
+ summary = (
518
+ f"{len(filestore_info)} FileStore objects: {', '.join(ext_summary)}"
519
+ )
520
+ else:
521
+ summary = "No FileStore objects found"
522
+
523
+ return {
524
+ "total_count": len(filestore_info),
525
+ "filestore_keys": list(filestore_info.keys()),
526
+ "file_extensions": file_extensions,
527
+ "file_types": file_types,
528
+ "is_filestore_scenario": False,
529
+ "detailed_info": filestore_info,
530
+ "summary": summary,
531
+ }
532
+
300
533
  def to_dict(
301
534
  self, add_edsl_version: bool = True, offload_base64: bool = False
302
535
  ) -> dict:
@@ -159,7 +159,15 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
159
159
 
160
160
  # Required MutableSequence abstract methods
161
161
  def __getitem__(self, index):
162
- """Get item at index."""
162
+ """Get item at index.
163
+
164
+ Example:
165
+ >>> from edsl.scenarios import Scenario, ScenarioList
166
+ >>> sl = ScenarioList([Scenario({'a': 12})])
167
+ >>> sl[0]['b'] = 100 # modify in-place
168
+ >>> sl[0]['b']
169
+ 100
170
+ """
163
171
  if isinstance(index, slice):
164
172
  return self.__class__(list(self.data[index]), self.codebook.copy())
165
173
  return self.data[index]
@@ -356,7 +364,29 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
356
364
  new_scenarios.append(Scenario(new_scenario))
357
365
 
358
366
  return new_scenarios
367
+
368
+ @classmethod
369
+ def from_prompt(self, description: str, name:Optional[str] = "item", target_number:int = 10, verbose = False):
370
+ from ..questions.question_list import QuestionList
371
+ q = QuestionList(question_name = name,
372
+ question_text = description + f"\n Please try to return {target_number} examples.")
373
+ results = q.run(verbose = verbose)
374
+ return results.select(name).to_scenario_list().expand(name)
359
375
 
376
+
377
+ def __add__(self, other):
378
+ if isinstance(other, Scenario):
379
+ new_list = self.duplicate()
380
+ new_list.append(other)
381
+ return new_list
382
+ elif isinstance(other, ScenarioList):
383
+ new_list = self.duplicate()
384
+ for item in other:
385
+ new_list.append(item)
386
+ else:
387
+ raise ScenarioError("Don't know how to combine!")
388
+ return new_list
389
+
360
390
  @classmethod
361
391
  def from_search_terms(cls, search_terms: List[str]) -> ScenarioList:
362
392
  """Create a ScenarioList from a list of search terms, using Wikipedia.