edsl 0.1.61__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +66 -0
- edsl/__version__.py +1 -1
- edsl/base/base_class.py +53 -0
- edsl/cli.py +93 -27
- edsl/config/config_class.py +4 -0
- edsl/coop/coop.py +403 -28
- edsl/coop/coop_jobs_objects.py +2 -2
- edsl/coop/coop_regular_objects.py +3 -1
- edsl/dataset/dataset.py +47 -41
- edsl/dataset/dataset_operations_mixin.py +138 -15
- edsl/dataset/report_from_template.py +509 -0
- edsl/inference_services/services/azure_ai.py +8 -2
- edsl/inference_services/services/open_ai_service.py +7 -5
- edsl/jobs/jobs.py +5 -4
- edsl/jobs/jobs_checks.py +11 -6
- edsl/jobs/remote_inference.py +17 -10
- edsl/prompts/prompt.py +7 -2
- edsl/questions/question_registry.py +4 -1
- edsl/results/result.py +93 -38
- edsl/results/results.py +24 -15
- edsl/scenarios/file_store.py +69 -0
- edsl/scenarios/scenario.py +233 -0
- edsl/scenarios/scenario_list.py +294 -130
- edsl/scenarios/scenario_source.py +1 -2
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/METADATA +1 -1
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/RECORD +29 -28
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/LICENSE +0 -0
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/WHEEL +0 -0
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/entry_points.txt +0 -0
edsl/scenarios/scenario.py
CHANGED
@@ -280,6 +280,18 @@ class Scenario(Base, UserDict):
|
|
280
280
|
|
281
281
|
target = self if inplace else Scenario()
|
282
282
|
|
283
|
+
# First check if this Scenario itself has a base64_string (e.g., from FileStore.to_dict())
|
284
|
+
if "base64_string" in self and isinstance(self.get("base64_string"), str):
|
285
|
+
# This is likely a Scenario created from FileStore.to_dict()
|
286
|
+
if inplace:
|
287
|
+
self["base64_string"] = "offloaded"
|
288
|
+
else:
|
289
|
+
# Copy all keys to target
|
290
|
+
for k, v in self.items():
|
291
|
+
target[k] = v
|
292
|
+
target["base64_string"] = "offloaded"
|
293
|
+
return target
|
294
|
+
|
283
295
|
for key, value in self.items():
|
284
296
|
if isinstance(value, FileStore):
|
285
297
|
file_store_dict = value.to_dict()
|
@@ -297,6 +309,227 @@ class Scenario(Base, UserDict):
|
|
297
309
|
|
298
310
|
return target
|
299
311
|
|
312
|
+
def save_to_gcs_bucket(self, signed_url_or_dict) -> dict:
|
313
|
+
"""
|
314
|
+
Saves FileStore objects contained within this Scenario to a Google Cloud Storage bucket.
|
315
|
+
|
316
|
+
This method finds all FileStore objects in the Scenario and uploads them to GCS using
|
317
|
+
the provided signed URL(s). If the Scenario itself was created from a FileStore (has
|
318
|
+
base64_string as a top-level key), it uploads that content directly.
|
319
|
+
|
320
|
+
Args:
|
321
|
+
signed_url_or_dict: Either:
|
322
|
+
- str: Single signed URL (for single FileStore or Scenario from FileStore)
|
323
|
+
- dict: Mapping of scenario keys to signed URLs for multiple FileStore objects
|
324
|
+
e.g., {"video": "signed_url_1", "image": "signed_url_2"}
|
325
|
+
|
326
|
+
Returns:
|
327
|
+
dict: Summary of upload operations performed
|
328
|
+
|
329
|
+
Raises:
|
330
|
+
ValueError: If no uploadable content found or content is offloaded
|
331
|
+
requests.RequestException: If any upload fails
|
332
|
+
"""
|
333
|
+
from edsl.scenarios import FileStore
|
334
|
+
import requests
|
335
|
+
import base64
|
336
|
+
|
337
|
+
upload_results = []
|
338
|
+
|
339
|
+
# Case 1: This Scenario was created from a FileStore (has direct base64_string)
|
340
|
+
if "base64_string" in self and isinstance(self.get("base64_string"), str):
|
341
|
+
if self["base64_string"] == "offloaded":
|
342
|
+
raise ValueError("File content is offloaded. Cannot upload to GCS.")
|
343
|
+
|
344
|
+
# For single FileStore scenario, expect string URL
|
345
|
+
if isinstance(signed_url_or_dict, dict):
|
346
|
+
raise ValueError(
|
347
|
+
"For Scenario created from FileStore, provide a single signed URL string, not a dictionary."
|
348
|
+
)
|
349
|
+
|
350
|
+
signed_url = signed_url_or_dict
|
351
|
+
|
352
|
+
# Get file info from Scenario keys
|
353
|
+
mime_type = self.get("mime_type", "application/octet-stream")
|
354
|
+
suffix = self.get("suffix", "")
|
355
|
+
|
356
|
+
# Decode and upload
|
357
|
+
try:
|
358
|
+
file_content = base64.b64decode(self["base64_string"])
|
359
|
+
except Exception as e:
|
360
|
+
raise ValueError(f"Failed to decode base64 content: {e}")
|
361
|
+
|
362
|
+
headers = {
|
363
|
+
"Content-Type": mime_type,
|
364
|
+
"Content-Length": str(len(file_content)),
|
365
|
+
}
|
366
|
+
|
367
|
+
response = requests.put(signed_url, data=file_content, headers=headers)
|
368
|
+
response.raise_for_status()
|
369
|
+
|
370
|
+
upload_results.append(
|
371
|
+
{
|
372
|
+
"type": "scenario_filestore_content",
|
373
|
+
"status": "success",
|
374
|
+
"status_code": response.status_code,
|
375
|
+
"file_size": len(file_content),
|
376
|
+
"mime_type": mime_type,
|
377
|
+
"file_extension": suffix,
|
378
|
+
}
|
379
|
+
)
|
380
|
+
|
381
|
+
# Case 2: Find FileStore objects in Scenario values
|
382
|
+
else:
|
383
|
+
# Collect all FileStore keys first
|
384
|
+
filestore_keys = [
|
385
|
+
key for key, value in self.items() if isinstance(value, FileStore)
|
386
|
+
]
|
387
|
+
|
388
|
+
if not filestore_keys:
|
389
|
+
raise ValueError("No FileStore objects found in Scenario to upload.")
|
390
|
+
|
391
|
+
# Handle URL parameter
|
392
|
+
if isinstance(signed_url_or_dict, str):
|
393
|
+
# Single URL provided for multiple FileStore objects - this will cause overwrites
|
394
|
+
if len(filestore_keys) > 1:
|
395
|
+
raise ValueError(
|
396
|
+
f"Multiple FileStore objects found ({filestore_keys}) but only one signed URL provided. "
|
397
|
+
f"Provide a dictionary mapping keys to URLs to avoid overwrites: "
|
398
|
+
f"{{'{filestore_keys[0]}': 'url1', '{filestore_keys[1]}': 'url2', ...}}"
|
399
|
+
)
|
400
|
+
|
401
|
+
# Single FileStore object, single URL is fine
|
402
|
+
url_mapping = {filestore_keys[0]: signed_url_or_dict}
|
403
|
+
|
404
|
+
elif isinstance(signed_url_or_dict, dict):
|
405
|
+
# Dictionary of URLs provided
|
406
|
+
missing_keys = set(filestore_keys) - set(signed_url_or_dict.keys())
|
407
|
+
if missing_keys:
|
408
|
+
raise ValueError(
|
409
|
+
f"Missing signed URLs for FileStore keys: {list(missing_keys)}"
|
410
|
+
)
|
411
|
+
|
412
|
+
extra_keys = set(signed_url_or_dict.keys()) - set(filestore_keys)
|
413
|
+
if extra_keys:
|
414
|
+
raise ValueError(
|
415
|
+
f"Signed URLs provided for non-FileStore keys: {list(extra_keys)}"
|
416
|
+
)
|
417
|
+
|
418
|
+
url_mapping = signed_url_or_dict
|
419
|
+
|
420
|
+
else:
|
421
|
+
raise ValueError(
|
422
|
+
"signed_url_or_dict must be either a string or a dictionary"
|
423
|
+
)
|
424
|
+
|
425
|
+
# Upload each FileStore object
|
426
|
+
for key, value in self.items():
|
427
|
+
if isinstance(value, FileStore):
|
428
|
+
try:
|
429
|
+
result = value.save_to_gcs_bucket(url_mapping[key])
|
430
|
+
result["scenario_key"] = key
|
431
|
+
result["type"] = "filestore_object"
|
432
|
+
upload_results.append(result)
|
433
|
+
except Exception as e:
|
434
|
+
upload_results.append(
|
435
|
+
{
|
436
|
+
"scenario_key": key,
|
437
|
+
"type": "filestore_object",
|
438
|
+
"status": "error",
|
439
|
+
"error": str(e),
|
440
|
+
}
|
441
|
+
)
|
442
|
+
|
443
|
+
return {
|
444
|
+
"total_uploads": len(upload_results),
|
445
|
+
"successful_uploads": len(
|
446
|
+
[r for r in upload_results if r.get("status") == "success"]
|
447
|
+
),
|
448
|
+
"failed_uploads": len(
|
449
|
+
[r for r in upload_results if r.get("status") == "error"]
|
450
|
+
),
|
451
|
+
"upload_details": upload_results,
|
452
|
+
}
|
453
|
+
|
454
|
+
def get_filestore_info(self) -> dict:
|
455
|
+
"""
|
456
|
+
Returns information about FileStore objects present in this Scenario.
|
457
|
+
|
458
|
+
This method is useful for determining how many signed URLs need to be generated
|
459
|
+
and what file extensions/types are present before calling save_to_gcs_bucket().
|
460
|
+
|
461
|
+
Returns:
|
462
|
+
dict: Information about FileStore objects containing:
|
463
|
+
- total_count: Total number of FileStore objects
|
464
|
+
- filestore_keys: List of scenario keys that contain FileStore objects
|
465
|
+
- file_extensions: Dictionary mapping keys to file extensions
|
466
|
+
- file_types: Dictionary mapping keys to MIME types
|
467
|
+
- is_filestore_scenario: Boolean indicating if this Scenario was created from a FileStore
|
468
|
+
- summary: Human-readable summary of files
|
469
|
+
|
470
|
+
|
471
|
+
"""
|
472
|
+
from edsl.scenarios import FileStore
|
473
|
+
|
474
|
+
# Check if this Scenario was created from a FileStore
|
475
|
+
is_filestore_scenario = "base64_string" in self and isinstance(
|
476
|
+
self.get("base64_string"), str
|
477
|
+
)
|
478
|
+
|
479
|
+
if is_filestore_scenario:
|
480
|
+
# Single FileStore scenario
|
481
|
+
return {
|
482
|
+
"total_count": 1,
|
483
|
+
"filestore_keys": ["filestore_content"],
|
484
|
+
"file_extensions": {"filestore_content": self.get("suffix", "")},
|
485
|
+
"file_types": {
|
486
|
+
"filestore_content": self.get(
|
487
|
+
"mime_type", "application/octet-stream"
|
488
|
+
)
|
489
|
+
},
|
490
|
+
"is_filestore_scenario": True,
|
491
|
+
"summary": f"Single FileStore content with extension '{self.get('suffix', 'unknown')}'",
|
492
|
+
}
|
493
|
+
|
494
|
+
# Regular Scenario with FileStore objects as values
|
495
|
+
filestore_info = {}
|
496
|
+
file_extensions = {}
|
497
|
+
file_types = {}
|
498
|
+
|
499
|
+
for key, value in self.items():
|
500
|
+
if isinstance(value, FileStore):
|
501
|
+
filestore_info[key] = {
|
502
|
+
"extension": getattr(value, "suffix", ""),
|
503
|
+
"mime_type": getattr(
|
504
|
+
value, "mime_type", "application/octet-stream"
|
505
|
+
),
|
506
|
+
"binary": getattr(value, "binary", True),
|
507
|
+
"path": getattr(value, "path", "unknown"),
|
508
|
+
}
|
509
|
+
file_extensions[key] = getattr(value, "suffix", "")
|
510
|
+
file_types[key] = getattr(
|
511
|
+
value, "mime_type", "application/octet-stream"
|
512
|
+
)
|
513
|
+
|
514
|
+
# Generate summary
|
515
|
+
if filestore_info:
|
516
|
+
ext_summary = [f"{key}({ext})" for key, ext in file_extensions.items()]
|
517
|
+
summary = (
|
518
|
+
f"{len(filestore_info)} FileStore objects: {', '.join(ext_summary)}"
|
519
|
+
)
|
520
|
+
else:
|
521
|
+
summary = "No FileStore objects found"
|
522
|
+
|
523
|
+
return {
|
524
|
+
"total_count": len(filestore_info),
|
525
|
+
"filestore_keys": list(filestore_info.keys()),
|
526
|
+
"file_extensions": file_extensions,
|
527
|
+
"file_types": file_types,
|
528
|
+
"is_filestore_scenario": False,
|
529
|
+
"detailed_info": filestore_info,
|
530
|
+
"summary": summary,
|
531
|
+
}
|
532
|
+
|
300
533
|
def to_dict(
|
301
534
|
self, add_edsl_version: bool = True, offload_base64: bool = False
|
302
535
|
) -> dict:
|