edsl 0.1.61__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,7 +39,7 @@ import json
39
39
  import pickle
40
40
 
41
41
 
42
- # Import for refactoring to Source classes
42
+ # Import for refactoring to Source classes
43
43
  from edsl.scenarios.scenario_source import deprecated_classmethod, TuplesSource
44
44
 
45
45
  from simpleeval import EvalWithCompoundTypes, NameNotDefined # type: ignore
@@ -98,7 +98,6 @@ TableFormat: TypeAlias = Literal[
98
98
  ]
99
99
 
100
100
 
101
-
102
101
  class ScenarioSQLiteList(SQLiteList):
103
102
  """SQLite-backed list specifically for storing Scenario objects."""
104
103
 
@@ -112,12 +111,14 @@ class ScenarioSQLiteList(SQLiteList):
112
111
  return pickle.loads(data.encode())
113
112
  return pickle.loads(data)
114
113
 
115
-
116
- if use_sqlite := True:
114
+ from ..config import CONFIG
115
+
116
+ if use_sqlite := CONFIG.get("EDSL_USE_SQLITE_FOR_SCENARIO_LIST").lower() == "true":
117
117
  data_class = ScenarioSQLiteList
118
118
  else:
119
119
  data_class = list
120
120
 
121
+
121
122
  class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
122
123
  """
123
124
  A collection of Scenario objects with advanced operations for manipulation and analysis.
@@ -219,13 +220,13 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
219
220
  """
220
221
  seen_hashes = set()
221
222
  result = ScenarioList()
222
-
223
+
223
224
  for scenario in self.data:
224
225
  scenario_hash = hash(scenario)
225
226
  if scenario_hash not in seen_hashes:
226
227
  seen_hashes.add(scenario_hash)
227
228
  result.append(scenario)
228
-
229
+
229
230
  return result
230
231
 
231
232
  @property
@@ -306,8 +307,8 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
306
307
  ScenarioList([Scenario({'custom_name': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
307
308
  """
308
309
  codebook = existing_codebook.copy() if existing_codebook else {}
309
-
310
- new_scenarios = ScenarioList(data = [], codebook = codebook)
310
+
311
+ new_scenarios = ScenarioList(data=[], codebook=codebook)
311
312
 
312
313
  for scenario in self:
313
314
  new_scenario = {}
@@ -355,7 +356,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
355
356
  if value_vars is None:
356
357
  value_vars = [field for field in self[0].keys() if field not in id_vars]
357
358
 
358
- new_scenarios = ScenarioList(data = [], codebook = {})
359
+ new_scenarios = ScenarioList(data=[], codebook={})
359
360
  for scenario in self:
360
361
  for var in value_vars:
361
362
  new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
@@ -366,13 +367,22 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
366
367
  return new_scenarios
367
368
 
368
369
  @classmethod
369
- def from_prompt(self, description: str, name:Optional[str] = "item", target_number:int = 10, verbose = False):
370
+ def from_prompt(
371
+ self,
372
+ description: str,
373
+ name: Optional[str] = "item",
374
+ target_number: int = 10,
375
+ verbose=False,
376
+ ):
370
377
  from ..questions.question_list import QuestionList
371
- q = QuestionList(question_name = name,
372
- question_text = description + f"\n Please try to return {target_number} examples.")
373
- results = q.run(verbose = verbose)
378
+
379
+ q = QuestionList(
380
+ question_name=name,
381
+ question_text=description
382
+ + f"\n Please try to return {target_number} examples.",
383
+ )
384
+ results = q.run(verbose=verbose)
374
385
  return results.select(name).to_scenario_list().expand(name)
375
-
376
386
 
377
387
  def __add__(self, other):
378
388
  if isinstance(other, Scenario):
@@ -395,14 +405,20 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
395
405
  search_terms: A list of search terms.
396
406
  """
397
407
  from ..utilities.wikipedia import fetch_wikipedia_content
408
+
398
409
  results = fetch_wikipedia_content(search_terms)
399
410
  return cls([Scenario(result) for result in results])
400
-
401
- def augment_with_wikipedia(self, search_key:str, content_only: bool = True, key_name: str = "wikipedia_content") -> ScenarioList:
411
+
412
+ def augment_with_wikipedia(
413
+ self,
414
+ search_key: str,
415
+ content_only: bool = True,
416
+ key_name: str = "wikipedia_content",
417
+ ) -> ScenarioList:
402
418
  """Augment the ScenarioList with Wikipedia content."""
403
419
  search_terms = self.select(search_key).to_list()
404
420
  wikipedia_results = ScenarioList.from_search_terms(search_terms)
405
- new_sl = ScenarioList(data = [], codebook = self.codebook)
421
+ new_sl = ScenarioList(data=[], codebook=self.codebook)
406
422
  for scenario, wikipedia_result in zip(self, wikipedia_results):
407
423
  if content_only:
408
424
  scenario[key_name] = wikipedia_result["content"]
@@ -412,7 +428,6 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
412
428
  new_sl.append(scenario)
413
429
  return new_sl
414
430
 
415
-
416
431
  def pivot(
417
432
  self,
418
433
  id_vars: List[str] = None,
@@ -452,11 +467,11 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
452
467
  value = scenario[value_name]
453
468
  pivoted_dict[id_key][variable] = value
454
469
 
455
- new_sl = ScenarioList(data = [], codebook = self.codebook)
470
+ new_sl = ScenarioList(data=[], codebook=self.codebook)
456
471
  for id_key, values in pivoted_dict.items():
457
472
  new_sl.append(Scenario(dict(zip(id_vars, id_key), **values)))
458
473
  return new_sl
459
-
474
+
460
475
  def group_by(
461
476
  self, id_vars: List[str], variables: List[str], func: Callable
462
477
  ) -> ScenarioList:
@@ -497,7 +512,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
497
512
  grouped[key][var].append(scenario[var])
498
513
 
499
514
  # Apply the function to each group
500
- new_sl= ScenarioList(data = [], codebook = self.codebook)
515
+ new_sl = ScenarioList(data=[], codebook=self.codebook)
501
516
  for key, group in grouped.items():
502
517
  try:
503
518
  aggregated = func(*[group[var] for var in variables])
@@ -551,21 +566,22 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
551
566
  """
552
567
  # Start with a seed value
553
568
  running_hash = 0
554
-
569
+
555
570
  # Use a heap to maintain sorted order as we go
556
571
  import heapq
572
+
557
573
  heap = []
558
-
574
+
559
575
  # Process each scenario's hash and add to heap
560
576
  for scenario in self:
561
577
  heapq.heappush(heap, hash(scenario))
562
-
578
+
563
579
  # Combine hashes in sorted order
564
580
  while heap:
565
581
  h = heapq.heappop(heap)
566
582
  # Use a large prime number to mix the bits
567
583
  running_hash = (running_hash * 31) ^ h
568
-
584
+
569
585
  return running_hash
570
586
 
571
587
  def __eq__(self, other: Any) -> bool:
@@ -608,6 +624,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
608
624
  ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2}), Scenario({'a': 2, 'b': 1}), Scenario({'a': 2, 'b': 2})])
609
625
  """
610
626
  import warnings
627
+
611
628
  warnings.warn("times is deprecated, use * instead", DeprecationWarning)
612
629
  return self.__mul__(other)
613
630
 
@@ -671,6 +688,8 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
671
688
  fields: List[str],
672
689
  output_type: str = "string",
673
690
  separator: str = ";",
691
+ prefix: str = "",
692
+ postfix: str = "",
674
693
  new_field_name: Optional[str] = None,
675
694
  ) -> ScenarioList:
676
695
  """Private method to handle concatenation logic for different output types.
@@ -678,6 +697,8 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
678
697
  :param fields: The fields to concatenate.
679
698
  :param output_type: The type of output ("string", "list", or "set").
680
699
  :param separator: The separator to use for string concatenation.
700
+ :param prefix: String to prepend to each value before concatenation.
701
+ :param postfix: String to append to each value before concatenation.
681
702
  :param new_field_name: Optional custom name for the concatenated field.
682
703
  If None, defaults to "concat_field1_field2_..."
683
704
 
@@ -706,14 +727,25 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
706
727
  )
707
728
 
708
729
  if output_type == "string":
709
- # Convert all values to strings and join with separator
710
- new_scenario[field_name] = separator.join(str(v) for v in values)
730
+ # Apply prefix and postfix to each value, then join with separator
731
+ formatted_values = [f"{prefix}{str(v)}{postfix}" for v in values]
732
+ new_scenario[field_name] = separator.join(formatted_values)
711
733
  elif output_type == "list":
712
- # Keep as a list
713
- new_scenario[field_name] = values
734
+ # Apply prefix and postfix to each value if they are strings
735
+ if prefix or postfix:
736
+ formatted_values = [f"{prefix}{str(v)}{postfix}" for v in values]
737
+ new_scenario[field_name] = formatted_values
738
+ else:
739
+ # Keep as original list if no prefix/postfix
740
+ new_scenario[field_name] = values
714
741
  elif output_type == "set":
715
- # Convert to a set (removes duplicates)
716
- new_scenario[field_name] = set(values)
742
+ # Apply prefix and postfix to each value if they are strings, then convert to set
743
+ if prefix or postfix:
744
+ formatted_values = [f"{prefix}{str(v)}{postfix}" for v in values]
745
+ new_scenario[field_name] = set(formatted_values)
746
+ else:
747
+ # Convert to a set (removes duplicates)
748
+ new_scenario[field_name] = set(values)
717
749
  else:
718
750
  from .exceptions import ValueScenarioError
719
751
 
@@ -729,12 +761,16 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
729
761
  self,
730
762
  fields: List[str],
731
763
  separator: str = ";",
764
+ prefix: str = "",
765
+ postfix: str = "",
732
766
  new_field_name: Optional[str] = None,
733
767
  ) -> ScenarioList:
734
768
  """Concatenate specified fields into a single string field.
735
769
 
736
770
  :param fields: The fields to concatenate.
737
771
  :param separator: The separator to use.
772
+ :param prefix: String to prepend to each value before concatenation.
773
+ :param postfix: String to append to each value before concatenation.
738
774
  :param new_field_name: Optional custom name for the concatenated field.
739
775
 
740
776
  Returns:
@@ -746,20 +782,30 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
746
782
  ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
747
783
  >>> s.concatenate(['a', 'b', 'c'], new_field_name='combined')
748
784
  ScenarioList([Scenario({'combined': '1;2;3'}), Scenario({'combined': '4;5;6'})])
785
+ >>> s.concatenate(['a', 'b', 'c'], prefix='[', postfix=']')
786
+ ScenarioList([Scenario({'concat_a_b_c': '[1];[2];[3]'}), Scenario({'concat_a_b_c': '[4];[5];[6]'})])
749
787
  """
750
788
  return self._concatenate(
751
789
  fields,
752
790
  output_type="string",
753
791
  separator=separator,
792
+ prefix=prefix,
793
+ postfix=postfix,
754
794
  new_field_name=new_field_name,
755
795
  )
756
796
 
757
797
  def concatenate_to_list(
758
- self, fields: List[str], new_field_name: Optional[str] = None
798
+ self,
799
+ fields: List[str],
800
+ prefix: str = "",
801
+ postfix: str = "",
802
+ new_field_name: Optional[str] = None
759
803
  ) -> ScenarioList:
760
804
  """Concatenate specified fields into a single list field.
761
805
 
762
806
  :param fields: The fields to concatenate.
807
+ :param prefix: String to prepend to each value before concatenation.
808
+ :param postfix: String to append to each value before concatenation.
763
809
  :param new_field_name: Optional custom name for the concatenated field.
764
810
 
765
811
  Returns:
@@ -771,17 +817,29 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
771
817
  ScenarioList([Scenario({'concat_a_b_c': [1, 2, 3]}), Scenario({'concat_a_b_c': [4, 5, 6]})])
772
818
  >>> s.concatenate_to_list(['a', 'b', 'c'], new_field_name='values')
773
819
  ScenarioList([Scenario({'values': [1, 2, 3]}), Scenario({'values': [4, 5, 6]})])
820
+ >>> s.concatenate_to_list(['a', 'b', 'c'], prefix='[', postfix=']')
821
+ ScenarioList([Scenario({'concat_a_b_c': ['[1]', '[2]', '[3]']}), Scenario({'concat_a_b_c': ['[4]', '[5]', '[6]']})])
774
822
  """
775
823
  return self._concatenate(
776
- fields, output_type="list", new_field_name=new_field_name
824
+ fields,
825
+ output_type="list",
826
+ prefix=prefix,
827
+ postfix=postfix,
828
+ new_field_name=new_field_name
777
829
  )
778
830
 
779
831
  def concatenate_to_set(
780
- self, fields: List[str], new_field_name: Optional[str] = None
832
+ self,
833
+ fields: List[str],
834
+ prefix: str = "",
835
+ postfix: str = "",
836
+ new_field_name: Optional[str] = None
781
837
  ) -> ScenarioList:
782
838
  """Concatenate specified fields into a single set field.
783
839
 
784
840
  :param fields: The fields to concatenate.
841
+ :param prefix: String to prepend to each value before concatenation.
842
+ :param postfix: String to append to each value before concatenation.
785
843
  :param new_field_name: Optional custom name for the concatenated field.
786
844
 
787
845
  Returns:
@@ -789,13 +847,26 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
789
847
 
790
848
  Example:
791
849
  >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
792
- >>> s.concatenate_to_set(['a', 'b', 'c'])
793
- ScenarioList([Scenario({'concat_a_b_c': {1, 2, 3}}), Scenario({'concat_a_b_c': {4, 5, 6}})])
794
- >>> s.concatenate_to_set(['a', 'b', 'c'], new_field_name='unique_values')
795
- ScenarioList([Scenario({'unique_values': {1, 2, 3}}), Scenario({'unique_values': {4, 5, 6}})])
850
+ >>> result = s.concatenate_to_set(['a', 'b', 'c'])
851
+ >>> result[0]['concat_a_b_c'] == {1, 2, 3}
852
+ True
853
+ >>> result[1]['concat_a_b_c'] == {4, 5, 6}
854
+ True
855
+ >>> result = s.concatenate_to_set(['a', 'b', 'c'], new_field_name='unique_values')
856
+ >>> result[0]['unique_values'] == {1, 2, 3}
857
+ True
858
+ >>> result = s.concatenate_to_set(['a', 'b', 'c'], prefix='[', postfix=']')
859
+ >>> result[0]['concat_a_b_c'] == {'[1]', '[2]', '[3]'}
860
+ True
861
+ >>> result[1]['concat_a_b_c'] == {'[4]', '[5]', '[6]'}
862
+ True
796
863
  """
797
864
  return self._concatenate(
798
- fields, output_type="set", new_field_name=new_field_name
865
+ fields,
866
+ output_type="set",
867
+ prefix=prefix,
868
+ postfix=postfix,
869
+ new_field_name=new_field_name
799
870
  )
800
871
 
801
872
  def unpack_dict(
@@ -927,6 +998,27 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
927
998
  new_list.append(scenario.copy())
928
999
  return new_list
929
1000
 
1001
+ def offload(self, inplace: bool = False) -> "ScenarioList":
1002
+ """
1003
+ Offloads base64-encoded content from all scenarios in the list by replacing
1004
+ 'base64_string' fields with 'offloaded'. This reduces memory usage.
1005
+
1006
+ Args:
1007
+ inplace (bool): If True, modify the current scenario list. If False, return a new one.
1008
+
1009
+ Returns:
1010
+ ScenarioList: The modified scenario list (either self or a new instance).
1011
+ """
1012
+ if inplace:
1013
+ for i, scenario in enumerate(self.data):
1014
+ self.data[i] = scenario.offload(inplace=True)
1015
+ return self
1016
+ else:
1017
+ new_list = ScenarioList(codebook=self.codebook)
1018
+ for scenario in self.data:
1019
+ new_list.append(scenario.offload(inplace=False))
1020
+ return new_list
1021
+
930
1022
  def __iter__(self):
931
1023
  """Iterate over scenarios using streaming."""
932
1024
  return iter(self.data)
@@ -967,7 +1059,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
967
1059
  sample_size = min(len(self), 100) # Check at most 100 scenarios
968
1060
  base_keys = set(first_item.keys())
969
1061
  keys = set()
970
-
1062
+
971
1063
  # Use a counter to check only the sample_size
972
1064
  count = 0
973
1065
  for scenario in self:
@@ -975,9 +1067,10 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
975
1067
  count += 1
976
1068
  if count >= sample_size:
977
1069
  break
978
-
1070
+
979
1071
  if keys != base_keys:
980
1072
  import warnings
1073
+
981
1074
  warnings.warn(
982
1075
  "Ragged ScenarioList detected (different keys for different scenario entries). This may cause unexpected behavior."
983
1076
  )
@@ -999,10 +1092,10 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
999
1092
  # Create a copy and immediately append to the new list
1000
1093
  scenario_copy = scenario.copy()
1001
1094
  new_sl.append(scenario_copy)
1002
-
1095
+
1003
1096
  # Remove reference to allow for garbage collection
1004
1097
  del scenario_copy
1005
-
1098
+
1006
1099
  except NameNotDefined as e:
1007
1100
  # Get available fields for error message
1008
1101
  try:
@@ -1023,22 +1116,26 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1023
1116
 
1024
1117
  return new_sl
1025
1118
 
1026
-
1027
1119
  @classmethod
1028
- def from_urls(cls, urls: list[str], field_name: Optional[str] = "text") -> ScenarioList:
1120
+ def from_urls(
1121
+ cls, urls: list[str], field_name: Optional[str] = "text"
1122
+ ) -> ScenarioList:
1029
1123
  from .scenario_source import URLSource
1124
+
1030
1125
  return URLSource(urls, field_name).to_scenario_list()
1031
-
1126
+
1032
1127
  @classmethod
1033
- def from_list(cls, field_name: str, values: list, use_indexes: bool = False) -> ScenarioList:
1128
+ def from_list(
1129
+ cls, field_name: str, values: list, use_indexes: bool = False
1130
+ ) -> ScenarioList:
1034
1131
  """Create a ScenarioList from a list of values with a specified field name.
1035
-
1132
+
1036
1133
  >>> ScenarioList.from_list('text', ['a', 'b', 'c'])
1037
1134
  ScenarioList([Scenario({'text': 'a'}), Scenario({'text': 'b'}), Scenario({'text': 'c'})])
1038
1135
  """
1039
1136
  from .scenario_source import ListSource
1137
+
1040
1138
  return ListSource(field_name, values, use_indexes).to_scenario_list()
1041
-
1042
1139
 
1043
1140
  def select(self, *fields: str) -> ScenarioList:
1044
1141
  """
@@ -1131,23 +1228,24 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1131
1228
  sl = ScenarioList.from_directory(recursive=True, key_name="document")
1132
1229
  """
1133
1230
  import warnings
1231
+
1134
1232
  warnings.warn(
1135
1233
  "from_directory is deprecated. Use ScenarioSource.from_source('directory', ...) instead.",
1136
1234
  DeprecationWarning,
1137
- stacklevel=2
1235
+ stacklevel=2,
1138
1236
  )
1139
1237
  from .scenario_source import DirectorySource
1140
-
1238
+
1141
1239
  source = DirectorySource(
1142
1240
  directory=path or os.getcwd(),
1143
1241
  pattern="*",
1144
1242
  recursive=recursive,
1145
- metadata=True
1243
+ metadata=True,
1146
1244
  )
1147
-
1245
+
1148
1246
  # Get the ScenarioList with FileStore objects under "file" key
1149
1247
  sl = source.to_scenario_list()
1150
-
1248
+
1151
1249
  # If the requested key is different from the default "file" key used by DirectoryScanner.scan_directory,
1152
1250
  # rename the keys in all scenarios
1153
1251
  if key_name != "file":
@@ -1162,7 +1260,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1162
1260
  new_data[k] = v
1163
1261
  result.append(Scenario(new_data))
1164
1262
  return result
1165
-
1263
+
1166
1264
  return sl
1167
1265
 
1168
1266
  # @classmethod
@@ -1298,18 +1396,21 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1298
1396
 
1299
1397
  @classmethod
1300
1398
  @deprecated_classmethod("ScenarioSource.from_source('list_of_tuples', ...)")
1301
- def from_list_of_tuples(cls, field_names: list[str], values: list[tuple], use_indexes: bool = False) -> ScenarioList:
1399
+ def from_list_of_tuples(
1400
+ cls, field_names: list[str], values: list[tuple], use_indexes: bool = False
1401
+ ) -> ScenarioList:
1302
1402
  """Create a ScenarioList from a list of tuples with specified field names.
1303
-
1403
+
1304
1404
  Args:
1305
1405
  field_names: A list of field names for the tuples
1306
1406
  values: A list of tuples with values matching the field_names
1307
1407
  use_indexes: Whether to add an index field to each scenario
1308
-
1408
+
1309
1409
  Returns:
1310
1410
  A ScenarioList containing the data from the tuples
1311
1411
  """
1312
1412
  from .scenario_source import TuplesSource
1413
+
1313
1414
  source = TuplesSource(field_names, values, use_indexes)
1314
1415
  return source.to_scenario_list()
1315
1416
 
@@ -1322,7 +1423,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1322
1423
  >>> s.add_list('age', [30, 25])
1323
1424
  ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
1324
1425
  """
1325
- #sl = self.duplicate()
1426
+ # sl = self.duplicate()
1326
1427
  if len(values) != len(self.data):
1327
1428
  raise ScenarioError(
1328
1429
  f"Length of values ({len(values)}) does not match length of ScenarioList ({len(self)})"
@@ -1375,7 +1476,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1375
1476
  ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
1376
1477
 
1377
1478
  """
1378
- new_sl = ScenarioList(data = [], codebook=self.codebook)
1479
+ new_sl = ScenarioList(data=[], codebook=self.codebook)
1379
1480
  for scenario in self:
1380
1481
  new_scenario = scenario.rename(replacement_dict)
1381
1482
  new_sl.append(new_scenario)
@@ -1441,12 +1542,13 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1441
1542
  sqlite3.Error: If there is an error executing the database query
1442
1543
  """
1443
1544
  from .scenario_source import SQLiteSource
1444
-
1545
+
1445
1546
  # Handle the case where sql_query is provided instead of table
1446
1547
  if table is None and sql_query is None:
1447
1548
  from .exceptions import ValueScenarioError
1549
+
1448
1550
  raise ValueScenarioError("Either table or sql_query must be provided")
1449
-
1551
+
1450
1552
  if table is None:
1451
1553
  # We need to use the old implementation for SQL queries
1452
1554
  import sqlite3
@@ -1469,19 +1571,22 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1469
1571
 
1470
1572
  @classmethod
1471
1573
  @deprecated_classmethod("ScenarioSource.from_source('latex', ...)")
1472
- def from_latex(cls, tex_file_path: str, table_index: int = 0, has_header: bool = True):
1574
+ def from_latex(
1575
+ cls, tex_file_path: str, table_index: int = 0, has_header: bool = True
1576
+ ):
1473
1577
  """Create a ScenarioList from a LaTeX file.
1474
-
1578
+
1475
1579
  Args:
1476
1580
  tex_file_path: The path to the LaTeX file.
1477
1581
  table_index: The index of the table to extract (if multiple tables exist).
1478
1582
  Default is 0 (first table).
1479
1583
  has_header: Whether the table has a header row. Default is True.
1480
-
1584
+
1481
1585
  Returns:
1482
1586
  ScenarioList: A new ScenarioList containing the data from the LaTeX table.
1483
1587
  """
1484
1588
  from .scenario_source import LaTeXSource
1589
+
1485
1590
  source = LaTeXSource(tex_file_path, table_index, has_header)
1486
1591
  return source.to_scenario_list()
1487
1592
 
@@ -1501,6 +1606,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1501
1606
 
1502
1607
  """
1503
1608
  from .scenario_source import GoogleDocSource
1609
+
1504
1610
  source = GoogleDocSource(url)
1505
1611
  return source.to_scenario_list()
1506
1612
 
@@ -1517,6 +1623,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1517
1623
  ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
1518
1624
  """
1519
1625
  from .scenario_source import PandasSource
1626
+
1520
1627
  source = PandasSource(df)
1521
1628
  return source.to_scenario_list()
1522
1629
 
@@ -1534,6 +1641,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1534
1641
  ScenarioList: A ScenarioList containing the data from the Stata file
1535
1642
  """
1536
1643
  from .scenario_source import StataSource
1644
+
1537
1645
  source = StataSource(filepath, include_metadata)
1538
1646
  return source.to_scenario_list()
1539
1647
 
@@ -1550,12 +1658,13 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1550
1658
 
1551
1659
  Returns:
1552
1660
  ScenarioList: A ScenarioList containing data from the Wikipedia table.
1553
-
1661
+
1554
1662
  Example usage:
1555
1663
  url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
1556
1664
  scenarios = ScenarioList.from_wikipedia(url, 0)
1557
1665
  """
1558
1666
  from .scenario_source import WikipediaSource
1667
+
1559
1668
  source = WikipediaSource(url, table_index, header)
1560
1669
  return source.to_scenario_list()
1561
1670
 
@@ -1584,7 +1693,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1584
1693
  sheet_name: Optional[str] = None,
1585
1694
  skip_rows: Optional[List[int]] = None,
1586
1695
  use_codebook: bool = False,
1587
- **kwargs
1696
+ **kwargs,
1588
1697
  ) -> ScenarioList:
1589
1698
  """Create a ScenarioList from an Excel file.
1590
1699
 
@@ -1633,19 +1742,24 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1633
1742
  'Charlie'
1634
1743
  """
1635
1744
  from .scenario_source import ExcelSource
1745
+
1636
1746
  source = ExcelSource(
1637
- file_path=filename,
1638
- sheet_name=sheet_name,
1639
- skip_rows=skip_rows,
1747
+ file_path=filename,
1748
+ sheet_name=sheet_name,
1749
+ skip_rows=skip_rows,
1640
1750
  use_codebook=use_codebook,
1641
- **kwargs
1751
+ **kwargs,
1642
1752
  )
1643
1753
  return source.to_scenario_list()
1644
1754
 
1645
1755
  @classmethod
1646
1756
  @deprecated_classmethod("ScenarioSource.from_source('google_sheet', ...)")
1647
1757
  def from_google_sheet(
1648
- cls, url: str, sheet_name: str = None, column_names: Optional[List[str]] = None, **kwargs
1758
+ cls,
1759
+ url: str,
1760
+ sheet_name: str = None,
1761
+ column_names: Optional[List[str]] = None,
1762
+ **kwargs,
1649
1763
  ) -> ScenarioList:
1650
1764
  """Create a ScenarioList from a Google Sheet.
1651
1765
 
@@ -1665,102 +1779,112 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1665
1779
 
1666
1780
  """
1667
1781
  from .scenario_source import GoogleSheetSource
1668
- source = GoogleSheetSource(url, sheet_name=sheet_name, column_names=column_names, **kwargs)
1782
+
1783
+ source = GoogleSheetSource(
1784
+ url, sheet_name=sheet_name, column_names=column_names, **kwargs
1785
+ )
1669
1786
  return source.to_scenario_list()
1670
1787
 
1671
1788
  @classmethod
1672
1789
  @deprecated_classmethod("ScenarioSource.from_source('delimited_file', ...)")
1673
1790
  def from_delimited_file(
1674
- cls, source: Union[str, "ParseResult"], delimiter: str = ",", encoding: str = "utf-8", **kwargs
1791
+ cls,
1792
+ source: Union[str, "ParseResult"],
1793
+ delimiter: str = ",",
1794
+ encoding: str = "utf-8",
1795
+ **kwargs,
1675
1796
  ) -> ScenarioList:
1676
1797
  """Create a ScenarioList from a delimited file (CSV/TSV) or URL.
1677
-
1798
+
1678
1799
  Args:
1679
1800
  source: Path to a local file or URL to a remote file.
1680
1801
  delimiter: The delimiter character used in the file (default is ',').
1681
1802
  encoding: The file encoding to use (default is 'utf-8').
1682
1803
  **kwargs: Additional parameters for csv reader.
1683
-
1804
+
1684
1805
  Returns:
1685
1806
  ScenarioList: An instance of the ScenarioList class.
1686
1807
  """
1687
1808
  from .scenario_source import DelimitedFileSource
1688
1809
  from urllib.parse import ParseResult
1689
-
1810
+
1690
1811
  if isinstance(source, ParseResult):
1691
1812
  # Convert ParseResult to string URL
1692
1813
  file_or_url = source.geturl()
1693
1814
  else:
1694
1815
  file_or_url = source
1695
-
1816
+
1696
1817
  source = DelimitedFileSource(
1697
- file_or_url=file_or_url,
1698
- delimiter=delimiter,
1699
- encoding=encoding,
1700
- **kwargs
1818
+ file_or_url=file_or_url, delimiter=delimiter, encoding=encoding, **kwargs
1701
1819
  )
1702
1820
  return source.to_scenario_list()
1703
1821
 
1704
1822
  # Convenience methods for specific file types
1705
1823
  @classmethod
1706
1824
  @deprecated_classmethod("ScenarioSource.from_source('csv', ...)")
1707
- def from_csv(cls, source: Union[str, "ParseResult"], has_header: bool = True, encoding: str = "utf-8", **kwargs) -> ScenarioList:
1825
+ def from_csv(
1826
+ cls,
1827
+ source: Union[str, "ParseResult"],
1828
+ has_header: bool = True,
1829
+ encoding: str = "utf-8",
1830
+ **kwargs,
1831
+ ) -> ScenarioList:
1708
1832
  """Create a ScenarioList from a CSV file or URL.
1709
-
1833
+
1710
1834
  Args:
1711
1835
  source: Path to a local file or URL to a remote file.
1712
1836
  has_header: Whether the file has a header row (default is True).
1713
1837
  encoding: The file encoding to use (default is 'utf-8').
1714
1838
  **kwargs: Additional parameters for csv reader.
1715
-
1839
+
1716
1840
  Returns:
1717
1841
  ScenarioList: An instance of the ScenarioList class.
1718
1842
  """
1719
1843
  from .scenario_source import CSVSource
1720
1844
  from urllib.parse import ParseResult
1721
-
1845
+
1722
1846
  if isinstance(source, ParseResult):
1723
1847
  # Convert ParseResult to string URL
1724
1848
  file_or_url = source.geturl()
1725
1849
  else:
1726
1850
  file_or_url = source
1727
-
1851
+
1728
1852
  source = CSVSource(
1729
- file_or_url=file_or_url,
1730
- has_header=has_header,
1731
- encoding=encoding,
1732
- **kwargs
1853
+ file_or_url=file_or_url, has_header=has_header, encoding=encoding, **kwargs
1733
1854
  )
1734
1855
  return source.to_scenario_list()
1735
-
1856
+
1736
1857
  @classmethod
1737
1858
  @deprecated_classmethod("ScenarioSource.from_source('tsv', ...)")
1738
- def from_tsv(cls, source: Union[str, "ParseResult"], has_header: bool = True, encoding: str = "utf-8", **kwargs) -> ScenarioList:
1859
+ def from_tsv(
1860
+ cls,
1861
+ source: Union[str, "ParseResult"],
1862
+ has_header: bool = True,
1863
+ encoding: str = "utf-8",
1864
+ **kwargs,
1865
+ ) -> ScenarioList:
1739
1866
  """Create a ScenarioList from a TSV file or URL.
1740
-
1867
+
1741
1868
  Args:
1742
1869
  source: Path to a local file or URL to a remote file.
1743
1870
  has_header: Whether the file has a header row (default is True).
1744
1871
  encoding: The file encoding to use (default is 'utf-8').
1745
1872
  **kwargs: Additional parameters for csv reader.
1746
-
1873
+
1747
1874
  Returns:
1748
1875
  ScenarioList: An instance of the ScenarioList class.
1749
1876
  """
1750
1877
  from .scenario_source import TSVSource
1751
1878
  from urllib.parse import ParseResult
1752
-
1879
+
1753
1880
  if isinstance(source, ParseResult):
1754
1881
  # Convert ParseResult to string URL
1755
1882
  file_or_url = source.geturl()
1756
1883
  else:
1757
1884
  file_or_url = source
1758
-
1885
+
1759
1886
  source = TSVSource(
1760
- file_or_url=file_or_url,
1761
- has_header=has_header,
1762
- encoding=encoding,
1763
- **kwargs
1887
+ file_or_url=file_or_url, has_header=has_header, encoding=encoding, **kwargs
1764
1888
  )
1765
1889
  return source.to_scenario_list()
1766
1890
 
@@ -1786,7 +1910,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1786
1910
  def from_tsv(cls, source: Union[str, "ParseResult"]) -> ScenarioList:
1787
1911
  """Create a ScenarioList from a TSV file or URL."""
1788
1912
  from .scenario_source import ScenarioSource
1789
-
1913
+
1790
1914
  # Delegate to ScenarioSource implementation
1791
1915
  return ScenarioSource._from_tsv(source)
1792
1916
 
@@ -1807,12 +1931,12 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1807
1931
  data = sorted(self, key=lambda x: hash(x))
1808
1932
  else:
1809
1933
  data = self
1810
-
1934
+
1811
1935
  d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
1812
1936
 
1813
1937
  # Add codebook if it exists
1814
- if hasattr(self, 'codebook') and self.codebook:
1815
- d['codebook'] = self.codebook
1938
+ if hasattr(self, "codebook") and self.codebook:
1939
+ d["codebook"] = self.codebook
1816
1940
 
1817
1941
  if add_edsl_version:
1818
1942
  from .. import __version__
@@ -1821,6 +1945,37 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1821
1945
  d["edsl_class_name"] = self.__class__.__name__
1822
1946
  return d
1823
1947
 
1948
+ def clipboard_data(self) -> str:
1949
+ """Return TSV representation of this ScenarioList for clipboard operations.
1950
+
1951
+ This method is called by the clipboard() method in the base class to provide
1952
+ a custom format for copying ScenarioList objects to the system clipboard.
1953
+
1954
+ Returns:
1955
+ str: Tab-separated values representation of the ScenarioList
1956
+ """
1957
+ # Use the to_csv method with tab separator to create TSV format
1958
+ csv_filestore = self.to_csv()
1959
+
1960
+ # Get the CSV content and convert it to TSV
1961
+ csv_content = csv_filestore.text
1962
+
1963
+ # Convert CSV to TSV by replacing commas with tabs
1964
+ # This is a simple approach, but we should handle quoted fields properly
1965
+ import csv
1966
+ import io
1967
+
1968
+ # Parse the CSV content
1969
+ csv_reader = csv.reader(io.StringIO(csv_content))
1970
+ rows = list(csv_reader)
1971
+
1972
+ # Convert to TSV format
1973
+ tsv_lines = []
1974
+ for row in rows:
1975
+ tsv_lines.append('\t'.join(row))
1976
+
1977
+ return '\n'.join(tsv_lines)
1978
+
1824
1979
  def to(self, survey: Union["Survey", "QuestionBase"]) -> "Jobs":
1825
1980
  """Create a Jobs object from a ScenarioList and a Survey object.
1826
1981
 
@@ -1868,10 +2023,12 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1868
2023
  from .scenario import Scenario
1869
2024
 
1870
2025
  # Extract codebook if it exists
1871
- codebook = data.get('codebook', None)
1872
-
2026
+ codebook = data.get("codebook", None)
2027
+
1873
2028
  # Create ScenarioList with scenarios and codebook
1874
- return cls([Scenario.from_dict(s) for s in data["scenarios"]], codebook=codebook)
2029
+ return cls(
2030
+ [Scenario.from_dict(s) for s in data["scenarios"]], codebook=codebook
2031
+ )
1875
2032
 
1876
2033
  @classmethod
1877
2034
  def from_nested_dict(cls, data: dict) -> ScenarioList:
@@ -1916,7 +2073,6 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1916
2073
  """
1917
2074
  return cls([Scenario.example(randomize), Scenario.example(randomize)])
1918
2075
 
1919
-
1920
2076
  def items(self):
1921
2077
  """Make this class compatible with dict.items() by accessing first scenario items.
1922
2078
 
@@ -1950,7 +2106,6 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1950
2106
  # Fallback to empty scenario
1951
2107
  return Scenario({})
1952
2108
 
1953
-
1954
2109
  def to_agent_list(self):
1955
2110
  """Convert the ScenarioList to an AgentList.
1956
2111
 
@@ -1989,6 +2144,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
1989
2144
  'Assistant'
1990
2145
  """
1991
2146
  from ..agents import AgentList
2147
+
1992
2148
  return AgentList.from_scenario_list(self)
1993
2149
 
1994
2150
  def chunk(
@@ -2020,7 +2176,12 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
2020
2176
  return ScenarioList(new_scenarios)
2021
2177
 
2022
2178
  def collapse(
2023
- self, field: str, separator: Optional[str] = None, add_count: bool = False
2179
+ self,
2180
+ field: str,
2181
+ separator: Optional[str] = None,
2182
+ prefix: str = "",
2183
+ postfix: str = "",
2184
+ add_count: bool = False
2024
2185
  ) -> ScenarioList:
2025
2186
  """Collapse a ScenarioList by grouping on all fields except the specified one,
2026
2187
  collecting the values of the specified field into a list.
@@ -2028,6 +2189,8 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
2028
2189
  Args:
2029
2190
  field: The field to collapse (whose values will be collected into lists)
2030
2191
  separator: Optional string to join the values with instead of keeping as a list
2192
+ prefix: String to prepend to each value before joining (only used with separator)
2193
+ postfix: String to append to each value before joining (only used with separator)
2031
2194
  add_count: If True, adds a field showing the number of collapsed rows
2032
2195
 
2033
2196
  Returns:
@@ -2041,6 +2204,8 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
2041
2204
  ... ])
2042
2205
  >>> s.collapse('item', add_count=True)
2043
2206
  ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry'], 'num_collapsed_rows': 2}), Scenario({'category': 'vegetable', 'color': 'green', 'item': ['spinach'], 'num_collapsed_rows': 1})])
2207
+ >>> s.collapse('item', separator='; ', prefix='<example>', postfix='</example>')
2208
+ ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': '<example>apple</example>; <example>cherry</example>'}), Scenario({'category': 'vegetable', 'color': 'green', 'item': '<example>spinach</example>'})])
2044
2209
  """
2045
2210
  if not self:
2046
2211
  return ScenarioList([])
@@ -2057,18 +2222,19 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
2057
2222
  grouped[key].append(scenario[field])
2058
2223
 
2059
2224
  # Create a new ScenarioList with the collapsed field
2060
- new_sl = ScenarioList(data = [], codebook=self.codebook)
2225
+ new_sl = ScenarioList(data=[], codebook=self.codebook)
2061
2226
  for key, values in grouped.items():
2062
2227
  new_scenario = dict(zip(id_vars, key))
2063
2228
  if separator:
2064
- new_scenario[field] = separator.join([str(x) for x in values])
2229
+ # Apply prefix and postfix to each value, then join with separator
2230
+ formatted_values = [f"{prefix}{str(v)}{postfix}" for v in values]
2231
+ new_scenario[field] = separator.join(formatted_values)
2065
2232
  else:
2066
2233
  new_scenario[field] = values
2067
2234
  if add_count:
2068
2235
  new_scenario["num_collapsed_rows"] = len(values)
2069
2236
  new_sl.append(Scenario(new_scenario))
2070
2237
 
2071
- #return ScenarioList(result)
2072
2238
  return new_sl
2073
2239
 
2074
2240
  def create_comparisons(
@@ -2163,7 +2329,6 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
2163
2329
  result.append(Scenario(new_scenario))
2164
2330
 
2165
2331
  return ScenarioList(result)
2166
-
2167
2332
 
2168
2333
  @classmethod
2169
2334
  @deprecated_classmethod("ScenarioSource.from_source('parquet', ...)")
@@ -2177,6 +2342,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
2177
2342
  ScenarioList: A new ScenarioList containing the scenarios from the Parquet file.
2178
2343
  """
2179
2344
  from .scenario_source import ParquetSource
2345
+
2180
2346
  source = ParquetSource(filepath)
2181
2347
  return source.to_scenario_list()
2182
2348
 
@@ -2212,17 +2378,17 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
2212
2378
  new_scenario[key] = value
2213
2379
  new_sl.append(Scenario(new_scenario))
2214
2380
  return new_sl
2215
-
2381
+
2216
2382
  @classmethod
2217
2383
  @deprecated_classmethod("ScenarioSource.from_source('pdf', ...)")
2218
2384
  def from_pdf(cls, filename_or_url, collapse_pages=False):
2219
2385
  """Create a ScenarioList from a PDF file or URL."""
2220
2386
  from .scenario_source import PDFSource
2221
-
2387
+
2222
2388
  source = PDFSource(
2223
2389
  file_path=filename_or_url,
2224
2390
  chunk_type="page" if not collapse_pages else "text",
2225
- chunk_size=1
2391
+ chunk_size=1,
2226
2392
  )
2227
2393
  return source.to_scenario_list()
2228
2394
 
@@ -2231,32 +2397,28 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
2231
2397
  def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
2232
2398
  """Create a ScenarioList with images extracted from a PDF file."""
2233
2399
  from .scenario_source import PDFImageSource
2234
-
2235
- source = PDFImageSource(
2236
- file_path=pdf_path,
2237
- base_width=2000,
2238
- include_text=True
2239
- )
2400
+
2401
+ source = PDFImageSource(file_path=pdf_path, base_width=2000, include_text=True)
2240
2402
  return source.to_scenario_list()
2241
-
2403
+
2242
2404
  @classmethod
2243
2405
  def from_source(cls, source_type: str, *args, **kwargs) -> "ScenarioList":
2244
2406
  """
2245
2407
  Create a ScenarioList from a specified source type.
2246
-
2408
+
2247
2409
  This method serves as the main entry point for creating ScenarioList objects,
2248
2410
  providing a unified interface for various data sources.
2249
-
2411
+
2250
2412
  Args:
2251
2413
  source_type: The type of source to create a ScenarioList from.
2252
2414
  Valid values include: 'urls', 'directory', 'csv', 'tsv',
2253
2415
  'excel', 'pdf', 'pdf_to_image', and others.
2254
2416
  *args: Positional arguments to pass to the source-specific method.
2255
2417
  **kwargs: Keyword arguments to pass to the source-specific method.
2256
-
2418
+
2257
2419
  Returns:
2258
2420
  A ScenarioList object created from the specified source.
2259
-
2421
+
2260
2422
  Examples:
2261
2423
  >>> # This is a simplified example for doctest
2262
2424
  >>> # In real usage, you would provide a path to your CSV file:
@@ -2266,9 +2428,11 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
2266
2428
  >>> # sl_dir = ScenarioList.from_source('directory', '/path/to/files')
2267
2429
  """
2268
2430
  from .scenario_source import ScenarioSource
2431
+
2269
2432
  return ScenarioSource.from_source(source_type, *args, **kwargs)
2270
2433
 
2271
2434
 
2272
2435
  if __name__ == "__main__":
2273
2436
  import doctest
2274
- doctest.testmod(optionflags=doctest.ELLIPSIS)
2437
+
2438
+ doctest.testmod(optionflags=doctest.ELLIPSIS)