edsl 0.1.60__py3-none-any.whl → 0.1.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__version__.py +1 -1
- edsl/agents/agent.py +65 -17
- edsl/agents/agent_list.py +117 -33
- edsl/base/base_class.py +88 -11
- edsl/config/config_class.py +7 -2
- edsl/coop/coop.py +1552 -95
- edsl/coop/coop_jobs_objects.py +2 -2
- edsl/coop/coop_prolific_filters.py +171 -0
- edsl/coop/coop_regular_objects.py +3 -1
- edsl/dataset/display/table_display.py +40 -7
- edsl/db_list/sqlite_list.py +102 -3
- edsl/jobs/data_structures.py +46 -31
- edsl/jobs/jobs.py +73 -2
- edsl/jobs/remote_inference.py +47 -13
- edsl/prompts/prompt.py +7 -2
- edsl/questions/loop_processor.py +289 -10
- edsl/questions/question_registry.py +4 -1
- edsl/questions/templates/dict/answering_instructions.jinja +0 -1
- edsl/scenarios/file_store.py +69 -0
- edsl/scenarios/scenario.py +233 -0
- edsl/scenarios/scenario_list.py +31 -1
- edsl/scenarios/scenario_source.py +605 -498
- edsl/surveys/survey.py +198 -163
- {edsl-0.1.60.dist-info → edsl-0.1.62.dist-info}/METADATA +3 -3
- {edsl-0.1.60.dist-info → edsl-0.1.62.dist-info}/RECORD +28 -27
- {edsl-0.1.60.dist-info → edsl-0.1.62.dist-info}/LICENSE +0 -0
- {edsl-0.1.60.dist-info → edsl-0.1.62.dist-info}/WHEEL +0 -0
- {edsl-0.1.60.dist-info → edsl-0.1.62.dist-info}/entry_points.txt +0 -0
edsl/scenarios/file_store.py
CHANGED
@@ -512,6 +512,75 @@ class FileStore(Scenario):
|
|
512
512
|
)
|
513
513
|
return info
|
514
514
|
|
515
|
+
def offload(self, inplace=False) -> "FileStore":
|
516
|
+
"""
|
517
|
+
Offloads base64-encoded content from the FileStore by replacing 'base64_string'
|
518
|
+
with 'offloaded'. This reduces memory usage.
|
519
|
+
|
520
|
+
Args:
|
521
|
+
inplace (bool): If True, modify the current FileStore. If False, return a new one.
|
522
|
+
|
523
|
+
Returns:
|
524
|
+
FileStore: The modified FileStore (either self or a new instance).
|
525
|
+
"""
|
526
|
+
if inplace:
|
527
|
+
if hasattr(self, "base64_string"):
|
528
|
+
self.base64_string = "offloaded"
|
529
|
+
return self
|
530
|
+
else:
|
531
|
+
# Create a copy and offload it
|
532
|
+
file_store_dict = self.to_dict()
|
533
|
+
if "base64_string" in file_store_dict:
|
534
|
+
file_store_dict["base64_string"] = "offloaded"
|
535
|
+
return self.__class__.from_dict(file_store_dict)
|
536
|
+
|
537
|
+
def save_to_gcs_bucket(self, signed_url: str) -> dict:
|
538
|
+
"""
|
539
|
+
Saves the FileStore's file content to a Google Cloud Storage bucket using a signed URL.
|
540
|
+
|
541
|
+
Args:
|
542
|
+
signed_url (str): The signed URL for uploading to GCS bucket
|
543
|
+
|
544
|
+
Returns:
|
545
|
+
dict: Response from the GCS upload operation
|
546
|
+
|
547
|
+
Raises:
|
548
|
+
ValueError: If base64_string is offloaded or missing
|
549
|
+
requests.RequestException: If the upload fails
|
550
|
+
"""
|
551
|
+
import requests
|
552
|
+
import base64
|
553
|
+
|
554
|
+
# Check if content is available
|
555
|
+
if not hasattr(self, "base64_string") or self.base64_string == "offloaded":
|
556
|
+
raise ValueError(
|
557
|
+
"File content is not available (offloaded or missing). Cannot upload to GCS."
|
558
|
+
)
|
559
|
+
|
560
|
+
# Decode base64 content to bytes
|
561
|
+
try:
|
562
|
+
file_content = base64.b64decode(self.base64_string)
|
563
|
+
except Exception as e:
|
564
|
+
raise ValueError(f"Failed to decode base64 content: {e}")
|
565
|
+
|
566
|
+
# Prepare headers with proper content type
|
567
|
+
headers = {
|
568
|
+
"Content-Type": self.mime_type or "application/octet-stream",
|
569
|
+
"Content-Length": str(len(file_content)),
|
570
|
+
}
|
571
|
+
|
572
|
+
# Upload to GCS using the signed URL
|
573
|
+
response = requests.put(signed_url, data=file_content, headers=headers)
|
574
|
+
response.raise_for_status()
|
575
|
+
|
576
|
+
return {
|
577
|
+
"status": "success",
|
578
|
+
"status_code": response.status_code,
|
579
|
+
"file_size": len(file_content),
|
580
|
+
"mime_type": self.mime_type,
|
581
|
+
"file_extension": self.suffix,
|
582
|
+
}
|
583
|
+
|
515
584
|
@classmethod
|
516
585
|
def pull(cls, url_or_uuid: Union[str, UUID]) -> "FileStore":
|
517
586
|
"""
|
edsl/scenarios/scenario.py
CHANGED
@@ -280,6 +280,18 @@ class Scenario(Base, UserDict):
|
|
280
280
|
|
281
281
|
target = self if inplace else Scenario()
|
282
282
|
|
283
|
+
# First check if this Scenario itself has a base64_string (e.g., from FileStore.to_dict())
|
284
|
+
if "base64_string" in self and isinstance(self.get("base64_string"), str):
|
285
|
+
# This is likely a Scenario created from FileStore.to_dict()
|
286
|
+
if inplace:
|
287
|
+
self["base64_string"] = "offloaded"
|
288
|
+
else:
|
289
|
+
# Copy all keys to target
|
290
|
+
for k, v in self.items():
|
291
|
+
target[k] = v
|
292
|
+
target["base64_string"] = "offloaded"
|
293
|
+
return target
|
294
|
+
|
283
295
|
for key, value in self.items():
|
284
296
|
if isinstance(value, FileStore):
|
285
297
|
file_store_dict = value.to_dict()
|
@@ -297,6 +309,227 @@ class Scenario(Base, UserDict):
|
|
297
309
|
|
298
310
|
return target
|
299
311
|
|
312
|
+
def save_to_gcs_bucket(self, signed_url_or_dict) -> dict:
|
313
|
+
"""
|
314
|
+
Saves FileStore objects contained within this Scenario to a Google Cloud Storage bucket.
|
315
|
+
|
316
|
+
This method finds all FileStore objects in the Scenario and uploads them to GCS using
|
317
|
+
the provided signed URL(s). If the Scenario itself was created from a FileStore (has
|
318
|
+
base64_string as a top-level key), it uploads that content directly.
|
319
|
+
|
320
|
+
Args:
|
321
|
+
signed_url_or_dict: Either:
|
322
|
+
- str: Single signed URL (for single FileStore or Scenario from FileStore)
|
323
|
+
- dict: Mapping of scenario keys to signed URLs for multiple FileStore objects
|
324
|
+
e.g., {"video": "signed_url_1", "image": "signed_url_2"}
|
325
|
+
|
326
|
+
Returns:
|
327
|
+
dict: Summary of upload operations performed
|
328
|
+
|
329
|
+
Raises:
|
330
|
+
ValueError: If no uploadable content found or content is offloaded
|
331
|
+
requests.RequestException: If any upload fails
|
332
|
+
"""
|
333
|
+
from edsl.scenarios import FileStore
|
334
|
+
import requests
|
335
|
+
import base64
|
336
|
+
|
337
|
+
upload_results = []
|
338
|
+
|
339
|
+
# Case 1: This Scenario was created from a FileStore (has direct base64_string)
|
340
|
+
if "base64_string" in self and isinstance(self.get("base64_string"), str):
|
341
|
+
if self["base64_string"] == "offloaded":
|
342
|
+
raise ValueError("File content is offloaded. Cannot upload to GCS.")
|
343
|
+
|
344
|
+
# For single FileStore scenario, expect string URL
|
345
|
+
if isinstance(signed_url_or_dict, dict):
|
346
|
+
raise ValueError(
|
347
|
+
"For Scenario created from FileStore, provide a single signed URL string, not a dictionary."
|
348
|
+
)
|
349
|
+
|
350
|
+
signed_url = signed_url_or_dict
|
351
|
+
|
352
|
+
# Get file info from Scenario keys
|
353
|
+
mime_type = self.get("mime_type", "application/octet-stream")
|
354
|
+
suffix = self.get("suffix", "")
|
355
|
+
|
356
|
+
# Decode and upload
|
357
|
+
try:
|
358
|
+
file_content = base64.b64decode(self["base64_string"])
|
359
|
+
except Exception as e:
|
360
|
+
raise ValueError(f"Failed to decode base64 content: {e}")
|
361
|
+
|
362
|
+
headers = {
|
363
|
+
"Content-Type": mime_type,
|
364
|
+
"Content-Length": str(len(file_content)),
|
365
|
+
}
|
366
|
+
|
367
|
+
response = requests.put(signed_url, data=file_content, headers=headers)
|
368
|
+
response.raise_for_status()
|
369
|
+
|
370
|
+
upload_results.append(
|
371
|
+
{
|
372
|
+
"type": "scenario_filestore_content",
|
373
|
+
"status": "success",
|
374
|
+
"status_code": response.status_code,
|
375
|
+
"file_size": len(file_content),
|
376
|
+
"mime_type": mime_type,
|
377
|
+
"file_extension": suffix,
|
378
|
+
}
|
379
|
+
)
|
380
|
+
|
381
|
+
# Case 2: Find FileStore objects in Scenario values
|
382
|
+
else:
|
383
|
+
# Collect all FileStore keys first
|
384
|
+
filestore_keys = [
|
385
|
+
key for key, value in self.items() if isinstance(value, FileStore)
|
386
|
+
]
|
387
|
+
|
388
|
+
if not filestore_keys:
|
389
|
+
raise ValueError("No FileStore objects found in Scenario to upload.")
|
390
|
+
|
391
|
+
# Handle URL parameter
|
392
|
+
if isinstance(signed_url_or_dict, str):
|
393
|
+
# Single URL provided for multiple FileStore objects - this will cause overwrites
|
394
|
+
if len(filestore_keys) > 1:
|
395
|
+
raise ValueError(
|
396
|
+
f"Multiple FileStore objects found ({filestore_keys}) but only one signed URL provided. "
|
397
|
+
f"Provide a dictionary mapping keys to URLs to avoid overwrites: "
|
398
|
+
f"{{'{filestore_keys[0]}': 'url1', '{filestore_keys[1]}': 'url2', ...}}"
|
399
|
+
)
|
400
|
+
|
401
|
+
# Single FileStore object, single URL is fine
|
402
|
+
url_mapping = {filestore_keys[0]: signed_url_or_dict}
|
403
|
+
|
404
|
+
elif isinstance(signed_url_or_dict, dict):
|
405
|
+
# Dictionary of URLs provided
|
406
|
+
missing_keys = set(filestore_keys) - set(signed_url_or_dict.keys())
|
407
|
+
if missing_keys:
|
408
|
+
raise ValueError(
|
409
|
+
f"Missing signed URLs for FileStore keys: {list(missing_keys)}"
|
410
|
+
)
|
411
|
+
|
412
|
+
extra_keys = set(signed_url_or_dict.keys()) - set(filestore_keys)
|
413
|
+
if extra_keys:
|
414
|
+
raise ValueError(
|
415
|
+
f"Signed URLs provided for non-FileStore keys: {list(extra_keys)}"
|
416
|
+
)
|
417
|
+
|
418
|
+
url_mapping = signed_url_or_dict
|
419
|
+
|
420
|
+
else:
|
421
|
+
raise ValueError(
|
422
|
+
"signed_url_or_dict must be either a string or a dictionary"
|
423
|
+
)
|
424
|
+
|
425
|
+
# Upload each FileStore object
|
426
|
+
for key, value in self.items():
|
427
|
+
if isinstance(value, FileStore):
|
428
|
+
try:
|
429
|
+
result = value.save_to_gcs_bucket(url_mapping[key])
|
430
|
+
result["scenario_key"] = key
|
431
|
+
result["type"] = "filestore_object"
|
432
|
+
upload_results.append(result)
|
433
|
+
except Exception as e:
|
434
|
+
upload_results.append(
|
435
|
+
{
|
436
|
+
"scenario_key": key,
|
437
|
+
"type": "filestore_object",
|
438
|
+
"status": "error",
|
439
|
+
"error": str(e),
|
440
|
+
}
|
441
|
+
)
|
442
|
+
|
443
|
+
return {
|
444
|
+
"total_uploads": len(upload_results),
|
445
|
+
"successful_uploads": len(
|
446
|
+
[r for r in upload_results if r.get("status") == "success"]
|
447
|
+
),
|
448
|
+
"failed_uploads": len(
|
449
|
+
[r for r in upload_results if r.get("status") == "error"]
|
450
|
+
),
|
451
|
+
"upload_details": upload_results,
|
452
|
+
}
|
453
|
+
|
454
|
+
def get_filestore_info(self) -> dict:
|
455
|
+
"""
|
456
|
+
Returns information about FileStore objects present in this Scenario.
|
457
|
+
|
458
|
+
This method is useful for determining how many signed URLs need to be generated
|
459
|
+
and what file extensions/types are present before calling save_to_gcs_bucket().
|
460
|
+
|
461
|
+
Returns:
|
462
|
+
dict: Information about FileStore objects containing:
|
463
|
+
- total_count: Total number of FileStore objects
|
464
|
+
- filestore_keys: List of scenario keys that contain FileStore objects
|
465
|
+
- file_extensions: Dictionary mapping keys to file extensions
|
466
|
+
- file_types: Dictionary mapping keys to MIME types
|
467
|
+
- is_filestore_scenario: Boolean indicating if this Scenario was created from a FileStore
|
468
|
+
- summary: Human-readable summary of files
|
469
|
+
|
470
|
+
|
471
|
+
"""
|
472
|
+
from edsl.scenarios import FileStore
|
473
|
+
|
474
|
+
# Check if this Scenario was created from a FileStore
|
475
|
+
is_filestore_scenario = "base64_string" in self and isinstance(
|
476
|
+
self.get("base64_string"), str
|
477
|
+
)
|
478
|
+
|
479
|
+
if is_filestore_scenario:
|
480
|
+
# Single FileStore scenario
|
481
|
+
return {
|
482
|
+
"total_count": 1,
|
483
|
+
"filestore_keys": ["filestore_content"],
|
484
|
+
"file_extensions": {"filestore_content": self.get("suffix", "")},
|
485
|
+
"file_types": {
|
486
|
+
"filestore_content": self.get(
|
487
|
+
"mime_type", "application/octet-stream"
|
488
|
+
)
|
489
|
+
},
|
490
|
+
"is_filestore_scenario": True,
|
491
|
+
"summary": f"Single FileStore content with extension '{self.get('suffix', 'unknown')}'",
|
492
|
+
}
|
493
|
+
|
494
|
+
# Regular Scenario with FileStore objects as values
|
495
|
+
filestore_info = {}
|
496
|
+
file_extensions = {}
|
497
|
+
file_types = {}
|
498
|
+
|
499
|
+
for key, value in self.items():
|
500
|
+
if isinstance(value, FileStore):
|
501
|
+
filestore_info[key] = {
|
502
|
+
"extension": getattr(value, "suffix", ""),
|
503
|
+
"mime_type": getattr(
|
504
|
+
value, "mime_type", "application/octet-stream"
|
505
|
+
),
|
506
|
+
"binary": getattr(value, "binary", True),
|
507
|
+
"path": getattr(value, "path", "unknown"),
|
508
|
+
}
|
509
|
+
file_extensions[key] = getattr(value, "suffix", "")
|
510
|
+
file_types[key] = getattr(
|
511
|
+
value, "mime_type", "application/octet-stream"
|
512
|
+
)
|
513
|
+
|
514
|
+
# Generate summary
|
515
|
+
if filestore_info:
|
516
|
+
ext_summary = [f"{key}({ext})" for key, ext in file_extensions.items()]
|
517
|
+
summary = (
|
518
|
+
f"{len(filestore_info)} FileStore objects: {', '.join(ext_summary)}"
|
519
|
+
)
|
520
|
+
else:
|
521
|
+
summary = "No FileStore objects found"
|
522
|
+
|
523
|
+
return {
|
524
|
+
"total_count": len(filestore_info),
|
525
|
+
"filestore_keys": list(filestore_info.keys()),
|
526
|
+
"file_extensions": file_extensions,
|
527
|
+
"file_types": file_types,
|
528
|
+
"is_filestore_scenario": False,
|
529
|
+
"detailed_info": filestore_info,
|
530
|
+
"summary": summary,
|
531
|
+
}
|
532
|
+
|
300
533
|
def to_dict(
|
301
534
|
self, add_edsl_version: bool = True, offload_base64: bool = False
|
302
535
|
) -> dict:
|
edsl/scenarios/scenario_list.py
CHANGED
@@ -159,7 +159,15 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
159
159
|
|
160
160
|
# Required MutableSequence abstract methods
|
161
161
|
def __getitem__(self, index):
|
162
|
-
"""Get item at index.
|
162
|
+
"""Get item at index.
|
163
|
+
|
164
|
+
Example:
|
165
|
+
>>> from edsl.scenarios import Scenario, ScenarioList
|
166
|
+
>>> sl = ScenarioList([Scenario({'a': 12})])
|
167
|
+
>>> sl[0]['b'] = 100 # modify in-place
|
168
|
+
>>> sl[0]['b']
|
169
|
+
100
|
170
|
+
"""
|
163
171
|
if isinstance(index, slice):
|
164
172
|
return self.__class__(list(self.data[index]), self.codebook.copy())
|
165
173
|
return self.data[index]
|
@@ -356,7 +364,29 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
356
364
|
new_scenarios.append(Scenario(new_scenario))
|
357
365
|
|
358
366
|
return new_scenarios
|
367
|
+
|
368
|
+
@classmethod
|
369
|
+
def from_prompt(self, description: str, name:Optional[str] = "item", target_number:int = 10, verbose = False):
|
370
|
+
from ..questions.question_list import QuestionList
|
371
|
+
q = QuestionList(question_name = name,
|
372
|
+
question_text = description + f"\n Please try to return {target_number} examples.")
|
373
|
+
results = q.run(verbose = verbose)
|
374
|
+
return results.select(name).to_scenario_list().expand(name)
|
359
375
|
|
376
|
+
|
377
|
+
def __add__(self, other):
|
378
|
+
if isinstance(other, Scenario):
|
379
|
+
new_list = self.duplicate()
|
380
|
+
new_list.append(other)
|
381
|
+
return new_list
|
382
|
+
elif isinstance(other, ScenarioList):
|
383
|
+
new_list = self.duplicate()
|
384
|
+
for item in other:
|
385
|
+
new_list.append(item)
|
386
|
+
else:
|
387
|
+
raise ScenarioError("Don't know how to combine!")
|
388
|
+
return new_list
|
389
|
+
|
360
390
|
@classmethod
|
361
391
|
def from_search_terms(cls, search_terms: List[str]) -> ScenarioList:
|
362
392
|
"""Create a ScenarioList from a list of search terms, using Wikipedia.
|