edsl 0.1.61__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +66 -0
- edsl/__version__.py +1 -1
- edsl/base/base_class.py +53 -0
- edsl/cli.py +93 -27
- edsl/config/config_class.py +4 -0
- edsl/coop/coop.py +403 -28
- edsl/coop/coop_jobs_objects.py +2 -2
- edsl/coop/coop_regular_objects.py +3 -1
- edsl/dataset/dataset.py +47 -41
- edsl/dataset/dataset_operations_mixin.py +138 -15
- edsl/dataset/report_from_template.py +509 -0
- edsl/inference_services/services/azure_ai.py +8 -2
- edsl/inference_services/services/open_ai_service.py +7 -5
- edsl/jobs/jobs.py +5 -4
- edsl/jobs/jobs_checks.py +11 -6
- edsl/jobs/remote_inference.py +17 -10
- edsl/prompts/prompt.py +7 -2
- edsl/questions/question_registry.py +4 -1
- edsl/results/result.py +93 -38
- edsl/results/results.py +24 -15
- edsl/scenarios/file_store.py +69 -0
- edsl/scenarios/scenario.py +233 -0
- edsl/scenarios/scenario_list.py +294 -130
- edsl/scenarios/scenario_source.py +1 -2
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/METADATA +1 -1
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/RECORD +29 -28
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/LICENSE +0 -0
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/WHEEL +0 -0
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/entry_points.txt +0 -0
edsl/scenarios/scenario_list.py
CHANGED
@@ -39,7 +39,7 @@ import json
|
|
39
39
|
import pickle
|
40
40
|
|
41
41
|
|
42
|
-
# Import for refactoring to Source classes
|
42
|
+
# Import for refactoring to Source classes
|
43
43
|
from edsl.scenarios.scenario_source import deprecated_classmethod, TuplesSource
|
44
44
|
|
45
45
|
from simpleeval import EvalWithCompoundTypes, NameNotDefined # type: ignore
|
@@ -98,7 +98,6 @@ TableFormat: TypeAlias = Literal[
|
|
98
98
|
]
|
99
99
|
|
100
100
|
|
101
|
-
|
102
101
|
class ScenarioSQLiteList(SQLiteList):
|
103
102
|
"""SQLite-backed list specifically for storing Scenario objects."""
|
104
103
|
|
@@ -112,12 +111,14 @@ class ScenarioSQLiteList(SQLiteList):
|
|
112
111
|
return pickle.loads(data.encode())
|
113
112
|
return pickle.loads(data)
|
114
113
|
|
115
|
-
|
116
|
-
|
114
|
+
from ..config import CONFIG
|
115
|
+
|
116
|
+
if use_sqlite := CONFIG.get("EDSL_USE_SQLITE_FOR_SCENARIO_LIST").lower() == "true":
|
117
117
|
data_class = ScenarioSQLiteList
|
118
118
|
else:
|
119
119
|
data_class = list
|
120
120
|
|
121
|
+
|
121
122
|
class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
122
123
|
"""
|
123
124
|
A collection of Scenario objects with advanced operations for manipulation and analysis.
|
@@ -219,13 +220,13 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
219
220
|
"""
|
220
221
|
seen_hashes = set()
|
221
222
|
result = ScenarioList()
|
222
|
-
|
223
|
+
|
223
224
|
for scenario in self.data:
|
224
225
|
scenario_hash = hash(scenario)
|
225
226
|
if scenario_hash not in seen_hashes:
|
226
227
|
seen_hashes.add(scenario_hash)
|
227
228
|
result.append(scenario)
|
228
|
-
|
229
|
+
|
229
230
|
return result
|
230
231
|
|
231
232
|
@property
|
@@ -306,8 +307,8 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
306
307
|
ScenarioList([Scenario({'custom_name': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
|
307
308
|
"""
|
308
309
|
codebook = existing_codebook.copy() if existing_codebook else {}
|
309
|
-
|
310
|
-
new_scenarios = ScenarioList(data
|
310
|
+
|
311
|
+
new_scenarios = ScenarioList(data=[], codebook=codebook)
|
311
312
|
|
312
313
|
for scenario in self:
|
313
314
|
new_scenario = {}
|
@@ -355,7 +356,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
355
356
|
if value_vars is None:
|
356
357
|
value_vars = [field for field in self[0].keys() if field not in id_vars]
|
357
358
|
|
358
|
-
new_scenarios = ScenarioList(data
|
359
|
+
new_scenarios = ScenarioList(data=[], codebook={})
|
359
360
|
for scenario in self:
|
360
361
|
for var in value_vars:
|
361
362
|
new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
|
@@ -366,13 +367,22 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
366
367
|
return new_scenarios
|
367
368
|
|
368
369
|
@classmethod
|
369
|
-
def from_prompt(
|
370
|
+
def from_prompt(
|
371
|
+
self,
|
372
|
+
description: str,
|
373
|
+
name: Optional[str] = "item",
|
374
|
+
target_number: int = 10,
|
375
|
+
verbose=False,
|
376
|
+
):
|
370
377
|
from ..questions.question_list import QuestionList
|
371
|
-
|
372
|
-
|
373
|
-
|
378
|
+
|
379
|
+
q = QuestionList(
|
380
|
+
question_name=name,
|
381
|
+
question_text=description
|
382
|
+
+ f"\n Please try to return {target_number} examples.",
|
383
|
+
)
|
384
|
+
results = q.run(verbose=verbose)
|
374
385
|
return results.select(name).to_scenario_list().expand(name)
|
375
|
-
|
376
386
|
|
377
387
|
def __add__(self, other):
|
378
388
|
if isinstance(other, Scenario):
|
@@ -395,14 +405,20 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
395
405
|
search_terms: A list of search terms.
|
396
406
|
"""
|
397
407
|
from ..utilities.wikipedia import fetch_wikipedia_content
|
408
|
+
|
398
409
|
results = fetch_wikipedia_content(search_terms)
|
399
410
|
return cls([Scenario(result) for result in results])
|
400
|
-
|
401
|
-
def augment_with_wikipedia(
|
411
|
+
|
412
|
+
def augment_with_wikipedia(
|
413
|
+
self,
|
414
|
+
search_key: str,
|
415
|
+
content_only: bool = True,
|
416
|
+
key_name: str = "wikipedia_content",
|
417
|
+
) -> ScenarioList:
|
402
418
|
"""Augment the ScenarioList with Wikipedia content."""
|
403
419
|
search_terms = self.select(search_key).to_list()
|
404
420
|
wikipedia_results = ScenarioList.from_search_terms(search_terms)
|
405
|
-
new_sl = ScenarioList(data
|
421
|
+
new_sl = ScenarioList(data=[], codebook=self.codebook)
|
406
422
|
for scenario, wikipedia_result in zip(self, wikipedia_results):
|
407
423
|
if content_only:
|
408
424
|
scenario[key_name] = wikipedia_result["content"]
|
@@ -412,7 +428,6 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
412
428
|
new_sl.append(scenario)
|
413
429
|
return new_sl
|
414
430
|
|
415
|
-
|
416
431
|
def pivot(
|
417
432
|
self,
|
418
433
|
id_vars: List[str] = None,
|
@@ -452,11 +467,11 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
452
467
|
value = scenario[value_name]
|
453
468
|
pivoted_dict[id_key][variable] = value
|
454
469
|
|
455
|
-
new_sl = ScenarioList(data
|
470
|
+
new_sl = ScenarioList(data=[], codebook=self.codebook)
|
456
471
|
for id_key, values in pivoted_dict.items():
|
457
472
|
new_sl.append(Scenario(dict(zip(id_vars, id_key), **values)))
|
458
473
|
return new_sl
|
459
|
-
|
474
|
+
|
460
475
|
def group_by(
|
461
476
|
self, id_vars: List[str], variables: List[str], func: Callable
|
462
477
|
) -> ScenarioList:
|
@@ -497,7 +512,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
497
512
|
grouped[key][var].append(scenario[var])
|
498
513
|
|
499
514
|
# Apply the function to each group
|
500
|
-
new_sl= ScenarioList(data
|
515
|
+
new_sl = ScenarioList(data=[], codebook=self.codebook)
|
501
516
|
for key, group in grouped.items():
|
502
517
|
try:
|
503
518
|
aggregated = func(*[group[var] for var in variables])
|
@@ -551,21 +566,22 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
551
566
|
"""
|
552
567
|
# Start with a seed value
|
553
568
|
running_hash = 0
|
554
|
-
|
569
|
+
|
555
570
|
# Use a heap to maintain sorted order as we go
|
556
571
|
import heapq
|
572
|
+
|
557
573
|
heap = []
|
558
|
-
|
574
|
+
|
559
575
|
# Process each scenario's hash and add to heap
|
560
576
|
for scenario in self:
|
561
577
|
heapq.heappush(heap, hash(scenario))
|
562
|
-
|
578
|
+
|
563
579
|
# Combine hashes in sorted order
|
564
580
|
while heap:
|
565
581
|
h = heapq.heappop(heap)
|
566
582
|
# Use a large prime number to mix the bits
|
567
583
|
running_hash = (running_hash * 31) ^ h
|
568
|
-
|
584
|
+
|
569
585
|
return running_hash
|
570
586
|
|
571
587
|
def __eq__(self, other: Any) -> bool:
|
@@ -608,6 +624,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
608
624
|
ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2}), Scenario({'a': 2, 'b': 1}), Scenario({'a': 2, 'b': 2})])
|
609
625
|
"""
|
610
626
|
import warnings
|
627
|
+
|
611
628
|
warnings.warn("times is deprecated, use * instead", DeprecationWarning)
|
612
629
|
return self.__mul__(other)
|
613
630
|
|
@@ -671,6 +688,8 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
671
688
|
fields: List[str],
|
672
689
|
output_type: str = "string",
|
673
690
|
separator: str = ";",
|
691
|
+
prefix: str = "",
|
692
|
+
postfix: str = "",
|
674
693
|
new_field_name: Optional[str] = None,
|
675
694
|
) -> ScenarioList:
|
676
695
|
"""Private method to handle concatenation logic for different output types.
|
@@ -678,6 +697,8 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
678
697
|
:param fields: The fields to concatenate.
|
679
698
|
:param output_type: The type of output ("string", "list", or "set").
|
680
699
|
:param separator: The separator to use for string concatenation.
|
700
|
+
:param prefix: String to prepend to each value before concatenation.
|
701
|
+
:param postfix: String to append to each value before concatenation.
|
681
702
|
:param new_field_name: Optional custom name for the concatenated field.
|
682
703
|
If None, defaults to "concat_field1_field2_..."
|
683
704
|
|
@@ -706,14 +727,25 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
706
727
|
)
|
707
728
|
|
708
729
|
if output_type == "string":
|
709
|
-
#
|
710
|
-
|
730
|
+
# Apply prefix and postfix to each value, then join with separator
|
731
|
+
formatted_values = [f"{prefix}{str(v)}{postfix}" for v in values]
|
732
|
+
new_scenario[field_name] = separator.join(formatted_values)
|
711
733
|
elif output_type == "list":
|
712
|
-
#
|
713
|
-
|
734
|
+
# Apply prefix and postfix to each value if they are strings
|
735
|
+
if prefix or postfix:
|
736
|
+
formatted_values = [f"{prefix}{str(v)}{postfix}" for v in values]
|
737
|
+
new_scenario[field_name] = formatted_values
|
738
|
+
else:
|
739
|
+
# Keep as original list if no prefix/postfix
|
740
|
+
new_scenario[field_name] = values
|
714
741
|
elif output_type == "set":
|
715
|
-
#
|
716
|
-
|
742
|
+
# Apply prefix and postfix to each value if they are strings, then convert to set
|
743
|
+
if prefix or postfix:
|
744
|
+
formatted_values = [f"{prefix}{str(v)}{postfix}" for v in values]
|
745
|
+
new_scenario[field_name] = set(formatted_values)
|
746
|
+
else:
|
747
|
+
# Convert to a set (removes duplicates)
|
748
|
+
new_scenario[field_name] = set(values)
|
717
749
|
else:
|
718
750
|
from .exceptions import ValueScenarioError
|
719
751
|
|
@@ -729,12 +761,16 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
729
761
|
self,
|
730
762
|
fields: List[str],
|
731
763
|
separator: str = ";",
|
764
|
+
prefix: str = "",
|
765
|
+
postfix: str = "",
|
732
766
|
new_field_name: Optional[str] = None,
|
733
767
|
) -> ScenarioList:
|
734
768
|
"""Concatenate specified fields into a single string field.
|
735
769
|
|
736
770
|
:param fields: The fields to concatenate.
|
737
771
|
:param separator: The separator to use.
|
772
|
+
:param prefix: String to prepend to each value before concatenation.
|
773
|
+
:param postfix: String to append to each value before concatenation.
|
738
774
|
:param new_field_name: Optional custom name for the concatenated field.
|
739
775
|
|
740
776
|
Returns:
|
@@ -746,20 +782,30 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
746
782
|
ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
|
747
783
|
>>> s.concatenate(['a', 'b', 'c'], new_field_name='combined')
|
748
784
|
ScenarioList([Scenario({'combined': '1;2;3'}), Scenario({'combined': '4;5;6'})])
|
785
|
+
>>> s.concatenate(['a', 'b', 'c'], prefix='[', postfix=']')
|
786
|
+
ScenarioList([Scenario({'concat_a_b_c': '[1];[2];[3]'}), Scenario({'concat_a_b_c': '[4];[5];[6]'})])
|
749
787
|
"""
|
750
788
|
return self._concatenate(
|
751
789
|
fields,
|
752
790
|
output_type="string",
|
753
791
|
separator=separator,
|
792
|
+
prefix=prefix,
|
793
|
+
postfix=postfix,
|
754
794
|
new_field_name=new_field_name,
|
755
795
|
)
|
756
796
|
|
757
797
|
def concatenate_to_list(
|
758
|
-
self,
|
798
|
+
self,
|
799
|
+
fields: List[str],
|
800
|
+
prefix: str = "",
|
801
|
+
postfix: str = "",
|
802
|
+
new_field_name: Optional[str] = None
|
759
803
|
) -> ScenarioList:
|
760
804
|
"""Concatenate specified fields into a single list field.
|
761
805
|
|
762
806
|
:param fields: The fields to concatenate.
|
807
|
+
:param prefix: String to prepend to each value before concatenation.
|
808
|
+
:param postfix: String to append to each value before concatenation.
|
763
809
|
:param new_field_name: Optional custom name for the concatenated field.
|
764
810
|
|
765
811
|
Returns:
|
@@ -771,17 +817,29 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
771
817
|
ScenarioList([Scenario({'concat_a_b_c': [1, 2, 3]}), Scenario({'concat_a_b_c': [4, 5, 6]})])
|
772
818
|
>>> s.concatenate_to_list(['a', 'b', 'c'], new_field_name='values')
|
773
819
|
ScenarioList([Scenario({'values': [1, 2, 3]}), Scenario({'values': [4, 5, 6]})])
|
820
|
+
>>> s.concatenate_to_list(['a', 'b', 'c'], prefix='[', postfix=']')
|
821
|
+
ScenarioList([Scenario({'concat_a_b_c': ['[1]', '[2]', '[3]']}), Scenario({'concat_a_b_c': ['[4]', '[5]', '[6]']})])
|
774
822
|
"""
|
775
823
|
return self._concatenate(
|
776
|
-
fields,
|
824
|
+
fields,
|
825
|
+
output_type="list",
|
826
|
+
prefix=prefix,
|
827
|
+
postfix=postfix,
|
828
|
+
new_field_name=new_field_name
|
777
829
|
)
|
778
830
|
|
779
831
|
def concatenate_to_set(
|
780
|
-
self,
|
832
|
+
self,
|
833
|
+
fields: List[str],
|
834
|
+
prefix: str = "",
|
835
|
+
postfix: str = "",
|
836
|
+
new_field_name: Optional[str] = None
|
781
837
|
) -> ScenarioList:
|
782
838
|
"""Concatenate specified fields into a single set field.
|
783
839
|
|
784
840
|
:param fields: The fields to concatenate.
|
841
|
+
:param prefix: String to prepend to each value before concatenation.
|
842
|
+
:param postfix: String to append to each value before concatenation.
|
785
843
|
:param new_field_name: Optional custom name for the concatenated field.
|
786
844
|
|
787
845
|
Returns:
|
@@ -789,13 +847,26 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
789
847
|
|
790
848
|
Example:
|
791
849
|
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
792
|
-
>>> s.concatenate_to_set(['a', 'b', 'c'])
|
793
|
-
|
794
|
-
|
795
|
-
|
850
|
+
>>> result = s.concatenate_to_set(['a', 'b', 'c'])
|
851
|
+
>>> result[0]['concat_a_b_c'] == {1, 2, 3}
|
852
|
+
True
|
853
|
+
>>> result[1]['concat_a_b_c'] == {4, 5, 6}
|
854
|
+
True
|
855
|
+
>>> result = s.concatenate_to_set(['a', 'b', 'c'], new_field_name='unique_values')
|
856
|
+
>>> result[0]['unique_values'] == {1, 2, 3}
|
857
|
+
True
|
858
|
+
>>> result = s.concatenate_to_set(['a', 'b', 'c'], prefix='[', postfix=']')
|
859
|
+
>>> result[0]['concat_a_b_c'] == {'[1]', '[2]', '[3]'}
|
860
|
+
True
|
861
|
+
>>> result[1]['concat_a_b_c'] == {'[4]', '[5]', '[6]'}
|
862
|
+
True
|
796
863
|
"""
|
797
864
|
return self._concatenate(
|
798
|
-
fields,
|
865
|
+
fields,
|
866
|
+
output_type="set",
|
867
|
+
prefix=prefix,
|
868
|
+
postfix=postfix,
|
869
|
+
new_field_name=new_field_name
|
799
870
|
)
|
800
871
|
|
801
872
|
def unpack_dict(
|
@@ -927,6 +998,27 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
927
998
|
new_list.append(scenario.copy())
|
928
999
|
return new_list
|
929
1000
|
|
1001
|
+
def offload(self, inplace: bool = False) -> "ScenarioList":
|
1002
|
+
"""
|
1003
|
+
Offloads base64-encoded content from all scenarios in the list by replacing
|
1004
|
+
'base64_string' fields with 'offloaded'. This reduces memory usage.
|
1005
|
+
|
1006
|
+
Args:
|
1007
|
+
inplace (bool): If True, modify the current scenario list. If False, return a new one.
|
1008
|
+
|
1009
|
+
Returns:
|
1010
|
+
ScenarioList: The modified scenario list (either self or a new instance).
|
1011
|
+
"""
|
1012
|
+
if inplace:
|
1013
|
+
for i, scenario in enumerate(self.data):
|
1014
|
+
self.data[i] = scenario.offload(inplace=True)
|
1015
|
+
return self
|
1016
|
+
else:
|
1017
|
+
new_list = ScenarioList(codebook=self.codebook)
|
1018
|
+
for scenario in self.data:
|
1019
|
+
new_list.append(scenario.offload(inplace=False))
|
1020
|
+
return new_list
|
1021
|
+
|
930
1022
|
def __iter__(self):
|
931
1023
|
"""Iterate over scenarios using streaming."""
|
932
1024
|
return iter(self.data)
|
@@ -967,7 +1059,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
967
1059
|
sample_size = min(len(self), 100) # Check at most 100 scenarios
|
968
1060
|
base_keys = set(first_item.keys())
|
969
1061
|
keys = set()
|
970
|
-
|
1062
|
+
|
971
1063
|
# Use a counter to check only the sample_size
|
972
1064
|
count = 0
|
973
1065
|
for scenario in self:
|
@@ -975,9 +1067,10 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
975
1067
|
count += 1
|
976
1068
|
if count >= sample_size:
|
977
1069
|
break
|
978
|
-
|
1070
|
+
|
979
1071
|
if keys != base_keys:
|
980
1072
|
import warnings
|
1073
|
+
|
981
1074
|
warnings.warn(
|
982
1075
|
"Ragged ScenarioList detected (different keys for different scenario entries). This may cause unexpected behavior."
|
983
1076
|
)
|
@@ -999,10 +1092,10 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
999
1092
|
# Create a copy and immediately append to the new list
|
1000
1093
|
scenario_copy = scenario.copy()
|
1001
1094
|
new_sl.append(scenario_copy)
|
1002
|
-
|
1095
|
+
|
1003
1096
|
# Remove reference to allow for garbage collection
|
1004
1097
|
del scenario_copy
|
1005
|
-
|
1098
|
+
|
1006
1099
|
except NameNotDefined as e:
|
1007
1100
|
# Get available fields for error message
|
1008
1101
|
try:
|
@@ -1023,22 +1116,26 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1023
1116
|
|
1024
1117
|
return new_sl
|
1025
1118
|
|
1026
|
-
|
1027
1119
|
@classmethod
|
1028
|
-
def from_urls(
|
1120
|
+
def from_urls(
|
1121
|
+
cls, urls: list[str], field_name: Optional[str] = "text"
|
1122
|
+
) -> ScenarioList:
|
1029
1123
|
from .scenario_source import URLSource
|
1124
|
+
|
1030
1125
|
return URLSource(urls, field_name).to_scenario_list()
|
1031
|
-
|
1126
|
+
|
1032
1127
|
@classmethod
|
1033
|
-
def from_list(
|
1128
|
+
def from_list(
|
1129
|
+
cls, field_name: str, values: list, use_indexes: bool = False
|
1130
|
+
) -> ScenarioList:
|
1034
1131
|
"""Create a ScenarioList from a list of values with a specified field name.
|
1035
|
-
|
1132
|
+
|
1036
1133
|
>>> ScenarioList.from_list('text', ['a', 'b', 'c'])
|
1037
1134
|
ScenarioList([Scenario({'text': 'a'}), Scenario({'text': 'b'}), Scenario({'text': 'c'})])
|
1038
1135
|
"""
|
1039
1136
|
from .scenario_source import ListSource
|
1137
|
+
|
1040
1138
|
return ListSource(field_name, values, use_indexes).to_scenario_list()
|
1041
|
-
|
1042
1139
|
|
1043
1140
|
def select(self, *fields: str) -> ScenarioList:
|
1044
1141
|
"""
|
@@ -1131,23 +1228,24 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1131
1228
|
sl = ScenarioList.from_directory(recursive=True, key_name="document")
|
1132
1229
|
"""
|
1133
1230
|
import warnings
|
1231
|
+
|
1134
1232
|
warnings.warn(
|
1135
1233
|
"from_directory is deprecated. Use ScenarioSource.from_source('directory', ...) instead.",
|
1136
1234
|
DeprecationWarning,
|
1137
|
-
stacklevel=2
|
1235
|
+
stacklevel=2,
|
1138
1236
|
)
|
1139
1237
|
from .scenario_source import DirectorySource
|
1140
|
-
|
1238
|
+
|
1141
1239
|
source = DirectorySource(
|
1142
1240
|
directory=path or os.getcwd(),
|
1143
1241
|
pattern="*",
|
1144
1242
|
recursive=recursive,
|
1145
|
-
metadata=True
|
1243
|
+
metadata=True,
|
1146
1244
|
)
|
1147
|
-
|
1245
|
+
|
1148
1246
|
# Get the ScenarioList with FileStore objects under "file" key
|
1149
1247
|
sl = source.to_scenario_list()
|
1150
|
-
|
1248
|
+
|
1151
1249
|
# If the requested key is different from the default "file" key used by DirectoryScanner.scan_directory,
|
1152
1250
|
# rename the keys in all scenarios
|
1153
1251
|
if key_name != "file":
|
@@ -1162,7 +1260,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1162
1260
|
new_data[k] = v
|
1163
1261
|
result.append(Scenario(new_data))
|
1164
1262
|
return result
|
1165
|
-
|
1263
|
+
|
1166
1264
|
return sl
|
1167
1265
|
|
1168
1266
|
# @classmethod
|
@@ -1298,18 +1396,21 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1298
1396
|
|
1299
1397
|
@classmethod
|
1300
1398
|
@deprecated_classmethod("ScenarioSource.from_source('list_of_tuples', ...)")
|
1301
|
-
def from_list_of_tuples(
|
1399
|
+
def from_list_of_tuples(
|
1400
|
+
cls, field_names: list[str], values: list[tuple], use_indexes: bool = False
|
1401
|
+
) -> ScenarioList:
|
1302
1402
|
"""Create a ScenarioList from a list of tuples with specified field names.
|
1303
|
-
|
1403
|
+
|
1304
1404
|
Args:
|
1305
1405
|
field_names: A list of field names for the tuples
|
1306
1406
|
values: A list of tuples with values matching the field_names
|
1307
1407
|
use_indexes: Whether to add an index field to each scenario
|
1308
|
-
|
1408
|
+
|
1309
1409
|
Returns:
|
1310
1410
|
A ScenarioList containing the data from the tuples
|
1311
1411
|
"""
|
1312
1412
|
from .scenario_source import TuplesSource
|
1413
|
+
|
1313
1414
|
source = TuplesSource(field_names, values, use_indexes)
|
1314
1415
|
return source.to_scenario_list()
|
1315
1416
|
|
@@ -1322,7 +1423,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1322
1423
|
>>> s.add_list('age', [30, 25])
|
1323
1424
|
ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
|
1324
1425
|
"""
|
1325
|
-
#sl = self.duplicate()
|
1426
|
+
# sl = self.duplicate()
|
1326
1427
|
if len(values) != len(self.data):
|
1327
1428
|
raise ScenarioError(
|
1328
1429
|
f"Length of values ({len(values)}) does not match length of ScenarioList ({len(self)})"
|
@@ -1375,7 +1476,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1375
1476
|
ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
|
1376
1477
|
|
1377
1478
|
"""
|
1378
|
-
new_sl = ScenarioList(data
|
1479
|
+
new_sl = ScenarioList(data=[], codebook=self.codebook)
|
1379
1480
|
for scenario in self:
|
1380
1481
|
new_scenario = scenario.rename(replacement_dict)
|
1381
1482
|
new_sl.append(new_scenario)
|
@@ -1441,12 +1542,13 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1441
1542
|
sqlite3.Error: If there is an error executing the database query
|
1442
1543
|
"""
|
1443
1544
|
from .scenario_source import SQLiteSource
|
1444
|
-
|
1545
|
+
|
1445
1546
|
# Handle the case where sql_query is provided instead of table
|
1446
1547
|
if table is None and sql_query is None:
|
1447
1548
|
from .exceptions import ValueScenarioError
|
1549
|
+
|
1448
1550
|
raise ValueScenarioError("Either table or sql_query must be provided")
|
1449
|
-
|
1551
|
+
|
1450
1552
|
if table is None:
|
1451
1553
|
# We need to use the old implementation for SQL queries
|
1452
1554
|
import sqlite3
|
@@ -1469,19 +1571,22 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1469
1571
|
|
1470
1572
|
@classmethod
|
1471
1573
|
@deprecated_classmethod("ScenarioSource.from_source('latex', ...)")
|
1472
|
-
def from_latex(
|
1574
|
+
def from_latex(
|
1575
|
+
cls, tex_file_path: str, table_index: int = 0, has_header: bool = True
|
1576
|
+
):
|
1473
1577
|
"""Create a ScenarioList from a LaTeX file.
|
1474
|
-
|
1578
|
+
|
1475
1579
|
Args:
|
1476
1580
|
tex_file_path: The path to the LaTeX file.
|
1477
1581
|
table_index: The index of the table to extract (if multiple tables exist).
|
1478
1582
|
Default is 0 (first table).
|
1479
1583
|
has_header: Whether the table has a header row. Default is True.
|
1480
|
-
|
1584
|
+
|
1481
1585
|
Returns:
|
1482
1586
|
ScenarioList: A new ScenarioList containing the data from the LaTeX table.
|
1483
1587
|
"""
|
1484
1588
|
from .scenario_source import LaTeXSource
|
1589
|
+
|
1485
1590
|
source = LaTeXSource(tex_file_path, table_index, has_header)
|
1486
1591
|
return source.to_scenario_list()
|
1487
1592
|
|
@@ -1501,6 +1606,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1501
1606
|
|
1502
1607
|
"""
|
1503
1608
|
from .scenario_source import GoogleDocSource
|
1609
|
+
|
1504
1610
|
source = GoogleDocSource(url)
|
1505
1611
|
return source.to_scenario_list()
|
1506
1612
|
|
@@ -1517,6 +1623,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1517
1623
|
ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
|
1518
1624
|
"""
|
1519
1625
|
from .scenario_source import PandasSource
|
1626
|
+
|
1520
1627
|
source = PandasSource(df)
|
1521
1628
|
return source.to_scenario_list()
|
1522
1629
|
|
@@ -1534,6 +1641,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1534
1641
|
ScenarioList: A ScenarioList containing the data from the Stata file
|
1535
1642
|
"""
|
1536
1643
|
from .scenario_source import StataSource
|
1644
|
+
|
1537
1645
|
source = StataSource(filepath, include_metadata)
|
1538
1646
|
return source.to_scenario_list()
|
1539
1647
|
|
@@ -1550,12 +1658,13 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1550
1658
|
|
1551
1659
|
Returns:
|
1552
1660
|
ScenarioList: A ScenarioList containing data from the Wikipedia table.
|
1553
|
-
|
1661
|
+
|
1554
1662
|
Example usage:
|
1555
1663
|
url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
|
1556
1664
|
scenarios = ScenarioList.from_wikipedia(url, 0)
|
1557
1665
|
"""
|
1558
1666
|
from .scenario_source import WikipediaSource
|
1667
|
+
|
1559
1668
|
source = WikipediaSource(url, table_index, header)
|
1560
1669
|
return source.to_scenario_list()
|
1561
1670
|
|
@@ -1584,7 +1693,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1584
1693
|
sheet_name: Optional[str] = None,
|
1585
1694
|
skip_rows: Optional[List[int]] = None,
|
1586
1695
|
use_codebook: bool = False,
|
1587
|
-
**kwargs
|
1696
|
+
**kwargs,
|
1588
1697
|
) -> ScenarioList:
|
1589
1698
|
"""Create a ScenarioList from an Excel file.
|
1590
1699
|
|
@@ -1633,19 +1742,24 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1633
1742
|
'Charlie'
|
1634
1743
|
"""
|
1635
1744
|
from .scenario_source import ExcelSource
|
1745
|
+
|
1636
1746
|
source = ExcelSource(
|
1637
|
-
file_path=filename,
|
1638
|
-
sheet_name=sheet_name,
|
1639
|
-
skip_rows=skip_rows,
|
1747
|
+
file_path=filename,
|
1748
|
+
sheet_name=sheet_name,
|
1749
|
+
skip_rows=skip_rows,
|
1640
1750
|
use_codebook=use_codebook,
|
1641
|
-
**kwargs
|
1751
|
+
**kwargs,
|
1642
1752
|
)
|
1643
1753
|
return source.to_scenario_list()
|
1644
1754
|
|
1645
1755
|
@classmethod
|
1646
1756
|
@deprecated_classmethod("ScenarioSource.from_source('google_sheet', ...)")
|
1647
1757
|
def from_google_sheet(
|
1648
|
-
cls,
|
1758
|
+
cls,
|
1759
|
+
url: str,
|
1760
|
+
sheet_name: str = None,
|
1761
|
+
column_names: Optional[List[str]] = None,
|
1762
|
+
**kwargs,
|
1649
1763
|
) -> ScenarioList:
|
1650
1764
|
"""Create a ScenarioList from a Google Sheet.
|
1651
1765
|
|
@@ -1665,102 +1779,112 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1665
1779
|
|
1666
1780
|
"""
|
1667
1781
|
from .scenario_source import GoogleSheetSource
|
1668
|
-
|
1782
|
+
|
1783
|
+
source = GoogleSheetSource(
|
1784
|
+
url, sheet_name=sheet_name, column_names=column_names, **kwargs
|
1785
|
+
)
|
1669
1786
|
return source.to_scenario_list()
|
1670
1787
|
|
1671
1788
|
@classmethod
|
1672
1789
|
@deprecated_classmethod("ScenarioSource.from_source('delimited_file', ...)")
|
1673
1790
|
def from_delimited_file(
|
1674
|
-
cls,
|
1791
|
+
cls,
|
1792
|
+
source: Union[str, "ParseResult"],
|
1793
|
+
delimiter: str = ",",
|
1794
|
+
encoding: str = "utf-8",
|
1795
|
+
**kwargs,
|
1675
1796
|
) -> ScenarioList:
|
1676
1797
|
"""Create a ScenarioList from a delimited file (CSV/TSV) or URL.
|
1677
|
-
|
1798
|
+
|
1678
1799
|
Args:
|
1679
1800
|
source: Path to a local file or URL to a remote file.
|
1680
1801
|
delimiter: The delimiter character used in the file (default is ',').
|
1681
1802
|
encoding: The file encoding to use (default is 'utf-8').
|
1682
1803
|
**kwargs: Additional parameters for csv reader.
|
1683
|
-
|
1804
|
+
|
1684
1805
|
Returns:
|
1685
1806
|
ScenarioList: An instance of the ScenarioList class.
|
1686
1807
|
"""
|
1687
1808
|
from .scenario_source import DelimitedFileSource
|
1688
1809
|
from urllib.parse import ParseResult
|
1689
|
-
|
1810
|
+
|
1690
1811
|
if isinstance(source, ParseResult):
|
1691
1812
|
# Convert ParseResult to string URL
|
1692
1813
|
file_or_url = source.geturl()
|
1693
1814
|
else:
|
1694
1815
|
file_or_url = source
|
1695
|
-
|
1816
|
+
|
1696
1817
|
source = DelimitedFileSource(
|
1697
|
-
file_or_url=file_or_url,
|
1698
|
-
delimiter=delimiter,
|
1699
|
-
encoding=encoding,
|
1700
|
-
**kwargs
|
1818
|
+
file_or_url=file_or_url, delimiter=delimiter, encoding=encoding, **kwargs
|
1701
1819
|
)
|
1702
1820
|
return source.to_scenario_list()
|
1703
1821
|
|
1704
1822
|
# Convenience methods for specific file types
|
1705
1823
|
@classmethod
|
1706
1824
|
@deprecated_classmethod("ScenarioSource.from_source('csv', ...)")
|
1707
|
-
def from_csv(
|
1825
|
+
def from_csv(
|
1826
|
+
cls,
|
1827
|
+
source: Union[str, "ParseResult"],
|
1828
|
+
has_header: bool = True,
|
1829
|
+
encoding: str = "utf-8",
|
1830
|
+
**kwargs,
|
1831
|
+
) -> ScenarioList:
|
1708
1832
|
"""Create a ScenarioList from a CSV file or URL.
|
1709
|
-
|
1833
|
+
|
1710
1834
|
Args:
|
1711
1835
|
source: Path to a local file or URL to a remote file.
|
1712
1836
|
has_header: Whether the file has a header row (default is True).
|
1713
1837
|
encoding: The file encoding to use (default is 'utf-8').
|
1714
1838
|
**kwargs: Additional parameters for csv reader.
|
1715
|
-
|
1839
|
+
|
1716
1840
|
Returns:
|
1717
1841
|
ScenarioList: An instance of the ScenarioList class.
|
1718
1842
|
"""
|
1719
1843
|
from .scenario_source import CSVSource
|
1720
1844
|
from urllib.parse import ParseResult
|
1721
|
-
|
1845
|
+
|
1722
1846
|
if isinstance(source, ParseResult):
|
1723
1847
|
# Convert ParseResult to string URL
|
1724
1848
|
file_or_url = source.geturl()
|
1725
1849
|
else:
|
1726
1850
|
file_or_url = source
|
1727
|
-
|
1851
|
+
|
1728
1852
|
source = CSVSource(
|
1729
|
-
file_or_url=file_or_url,
|
1730
|
-
has_header=has_header,
|
1731
|
-
encoding=encoding,
|
1732
|
-
**kwargs
|
1853
|
+
file_or_url=file_or_url, has_header=has_header, encoding=encoding, **kwargs
|
1733
1854
|
)
|
1734
1855
|
return source.to_scenario_list()
|
1735
|
-
|
1856
|
+
|
1736
1857
|
@classmethod
|
1737
1858
|
@deprecated_classmethod("ScenarioSource.from_source('tsv', ...)")
|
1738
|
-
def from_tsv(
|
1859
|
+
def from_tsv(
|
1860
|
+
cls,
|
1861
|
+
source: Union[str, "ParseResult"],
|
1862
|
+
has_header: bool = True,
|
1863
|
+
encoding: str = "utf-8",
|
1864
|
+
**kwargs,
|
1865
|
+
) -> ScenarioList:
|
1739
1866
|
"""Create a ScenarioList from a TSV file or URL.
|
1740
|
-
|
1867
|
+
|
1741
1868
|
Args:
|
1742
1869
|
source: Path to a local file or URL to a remote file.
|
1743
1870
|
has_header: Whether the file has a header row (default is True).
|
1744
1871
|
encoding: The file encoding to use (default is 'utf-8').
|
1745
1872
|
**kwargs: Additional parameters for csv reader.
|
1746
|
-
|
1873
|
+
|
1747
1874
|
Returns:
|
1748
1875
|
ScenarioList: An instance of the ScenarioList class.
|
1749
1876
|
"""
|
1750
1877
|
from .scenario_source import TSVSource
|
1751
1878
|
from urllib.parse import ParseResult
|
1752
|
-
|
1879
|
+
|
1753
1880
|
if isinstance(source, ParseResult):
|
1754
1881
|
# Convert ParseResult to string URL
|
1755
1882
|
file_or_url = source.geturl()
|
1756
1883
|
else:
|
1757
1884
|
file_or_url = source
|
1758
|
-
|
1885
|
+
|
1759
1886
|
source = TSVSource(
|
1760
|
-
file_or_url=file_or_url,
|
1761
|
-
has_header=has_header,
|
1762
|
-
encoding=encoding,
|
1763
|
-
**kwargs
|
1887
|
+
file_or_url=file_or_url, has_header=has_header, encoding=encoding, **kwargs
|
1764
1888
|
)
|
1765
1889
|
return source.to_scenario_list()
|
1766
1890
|
|
@@ -1786,7 +1910,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1786
1910
|
def from_tsv(cls, source: Union[str, "ParseResult"]) -> ScenarioList:
|
1787
1911
|
"""Create a ScenarioList from a TSV file or URL."""
|
1788
1912
|
from .scenario_source import ScenarioSource
|
1789
|
-
|
1913
|
+
|
1790
1914
|
# Delegate to ScenarioSource implementation
|
1791
1915
|
return ScenarioSource._from_tsv(source)
|
1792
1916
|
|
@@ -1807,12 +1931,12 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1807
1931
|
data = sorted(self, key=lambda x: hash(x))
|
1808
1932
|
else:
|
1809
1933
|
data = self
|
1810
|
-
|
1934
|
+
|
1811
1935
|
d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
|
1812
1936
|
|
1813
1937
|
# Add codebook if it exists
|
1814
|
-
if hasattr(self,
|
1815
|
-
d[
|
1938
|
+
if hasattr(self, "codebook") and self.codebook:
|
1939
|
+
d["codebook"] = self.codebook
|
1816
1940
|
|
1817
1941
|
if add_edsl_version:
|
1818
1942
|
from .. import __version__
|
@@ -1821,6 +1945,37 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1821
1945
|
d["edsl_class_name"] = self.__class__.__name__
|
1822
1946
|
return d
|
1823
1947
|
|
1948
|
+
def clipboard_data(self) -> str:
|
1949
|
+
"""Return TSV representation of this ScenarioList for clipboard operations.
|
1950
|
+
|
1951
|
+
This method is called by the clipboard() method in the base class to provide
|
1952
|
+
a custom format for copying ScenarioList objects to the system clipboard.
|
1953
|
+
|
1954
|
+
Returns:
|
1955
|
+
str: Tab-separated values representation of the ScenarioList
|
1956
|
+
"""
|
1957
|
+
# Use the to_csv method with tab separator to create TSV format
|
1958
|
+
csv_filestore = self.to_csv()
|
1959
|
+
|
1960
|
+
# Get the CSV content and convert it to TSV
|
1961
|
+
csv_content = csv_filestore.text
|
1962
|
+
|
1963
|
+
# Convert CSV to TSV by replacing commas with tabs
|
1964
|
+
# This is a simple approach, but we should handle quoted fields properly
|
1965
|
+
import csv
|
1966
|
+
import io
|
1967
|
+
|
1968
|
+
# Parse the CSV content
|
1969
|
+
csv_reader = csv.reader(io.StringIO(csv_content))
|
1970
|
+
rows = list(csv_reader)
|
1971
|
+
|
1972
|
+
# Convert to TSV format
|
1973
|
+
tsv_lines = []
|
1974
|
+
for row in rows:
|
1975
|
+
tsv_lines.append('\t'.join(row))
|
1976
|
+
|
1977
|
+
return '\n'.join(tsv_lines)
|
1978
|
+
|
1824
1979
|
def to(self, survey: Union["Survey", "QuestionBase"]) -> "Jobs":
|
1825
1980
|
"""Create a Jobs object from a ScenarioList and a Survey object.
|
1826
1981
|
|
@@ -1868,10 +2023,12 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1868
2023
|
from .scenario import Scenario
|
1869
2024
|
|
1870
2025
|
# Extract codebook if it exists
|
1871
|
-
codebook = data.get(
|
1872
|
-
|
2026
|
+
codebook = data.get("codebook", None)
|
2027
|
+
|
1873
2028
|
# Create ScenarioList with scenarios and codebook
|
1874
|
-
return cls(
|
2029
|
+
return cls(
|
2030
|
+
[Scenario.from_dict(s) for s in data["scenarios"]], codebook=codebook
|
2031
|
+
)
|
1875
2032
|
|
1876
2033
|
@classmethod
|
1877
2034
|
def from_nested_dict(cls, data: dict) -> ScenarioList:
|
@@ -1916,7 +2073,6 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1916
2073
|
"""
|
1917
2074
|
return cls([Scenario.example(randomize), Scenario.example(randomize)])
|
1918
2075
|
|
1919
|
-
|
1920
2076
|
def items(self):
|
1921
2077
|
"""Make this class compatible with dict.items() by accessing first scenario items.
|
1922
2078
|
|
@@ -1950,7 +2106,6 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1950
2106
|
# Fallback to empty scenario
|
1951
2107
|
return Scenario({})
|
1952
2108
|
|
1953
|
-
|
1954
2109
|
def to_agent_list(self):
|
1955
2110
|
"""Convert the ScenarioList to an AgentList.
|
1956
2111
|
|
@@ -1989,6 +2144,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
1989
2144
|
'Assistant'
|
1990
2145
|
"""
|
1991
2146
|
from ..agents import AgentList
|
2147
|
+
|
1992
2148
|
return AgentList.from_scenario_list(self)
|
1993
2149
|
|
1994
2150
|
def chunk(
|
@@ -2020,7 +2176,12 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
2020
2176
|
return ScenarioList(new_scenarios)
|
2021
2177
|
|
2022
2178
|
def collapse(
|
2023
|
-
self,
|
2179
|
+
self,
|
2180
|
+
field: str,
|
2181
|
+
separator: Optional[str] = None,
|
2182
|
+
prefix: str = "",
|
2183
|
+
postfix: str = "",
|
2184
|
+
add_count: bool = False
|
2024
2185
|
) -> ScenarioList:
|
2025
2186
|
"""Collapse a ScenarioList by grouping on all fields except the specified one,
|
2026
2187
|
collecting the values of the specified field into a list.
|
@@ -2028,6 +2189,8 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
2028
2189
|
Args:
|
2029
2190
|
field: The field to collapse (whose values will be collected into lists)
|
2030
2191
|
separator: Optional string to join the values with instead of keeping as a list
|
2192
|
+
prefix: String to prepend to each value before joining (only used with separator)
|
2193
|
+
postfix: String to append to each value before joining (only used with separator)
|
2031
2194
|
add_count: If True, adds a field showing the number of collapsed rows
|
2032
2195
|
|
2033
2196
|
Returns:
|
@@ -2041,6 +2204,8 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
2041
2204
|
... ])
|
2042
2205
|
>>> s.collapse('item', add_count=True)
|
2043
2206
|
ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry'], 'num_collapsed_rows': 2}), Scenario({'category': 'vegetable', 'color': 'green', 'item': ['spinach'], 'num_collapsed_rows': 1})])
|
2207
|
+
>>> s.collapse('item', separator='; ', prefix='<example>', postfix='</example>')
|
2208
|
+
ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': '<example>apple</example>; <example>cherry</example>'}), Scenario({'category': 'vegetable', 'color': 'green', 'item': '<example>spinach</example>'})])
|
2044
2209
|
"""
|
2045
2210
|
if not self:
|
2046
2211
|
return ScenarioList([])
|
@@ -2057,18 +2222,19 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
2057
2222
|
grouped[key].append(scenario[field])
|
2058
2223
|
|
2059
2224
|
# Create a new ScenarioList with the collapsed field
|
2060
|
-
new_sl = ScenarioList(data
|
2225
|
+
new_sl = ScenarioList(data=[], codebook=self.codebook)
|
2061
2226
|
for key, values in grouped.items():
|
2062
2227
|
new_scenario = dict(zip(id_vars, key))
|
2063
2228
|
if separator:
|
2064
|
-
|
2229
|
+
# Apply prefix and postfix to each value, then join with separator
|
2230
|
+
formatted_values = [f"{prefix}{str(v)}{postfix}" for v in values]
|
2231
|
+
new_scenario[field] = separator.join(formatted_values)
|
2065
2232
|
else:
|
2066
2233
|
new_scenario[field] = values
|
2067
2234
|
if add_count:
|
2068
2235
|
new_scenario["num_collapsed_rows"] = len(values)
|
2069
2236
|
new_sl.append(Scenario(new_scenario))
|
2070
2237
|
|
2071
|
-
#return ScenarioList(result)
|
2072
2238
|
return new_sl
|
2073
2239
|
|
2074
2240
|
def create_comparisons(
|
@@ -2163,7 +2329,6 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
2163
2329
|
result.append(Scenario(new_scenario))
|
2164
2330
|
|
2165
2331
|
return ScenarioList(result)
|
2166
|
-
|
2167
2332
|
|
2168
2333
|
@classmethod
|
2169
2334
|
@deprecated_classmethod("ScenarioSource.from_source('parquet', ...)")
|
@@ -2177,6 +2342,7 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
2177
2342
|
ScenarioList: A new ScenarioList containing the scenarios from the Parquet file.
|
2178
2343
|
"""
|
2179
2344
|
from .scenario_source import ParquetSource
|
2345
|
+
|
2180
2346
|
source = ParquetSource(filepath)
|
2181
2347
|
return source.to_scenario_list()
|
2182
2348
|
|
@@ -2212,17 +2378,17 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
2212
2378
|
new_scenario[key] = value
|
2213
2379
|
new_sl.append(Scenario(new_scenario))
|
2214
2380
|
return new_sl
|
2215
|
-
|
2381
|
+
|
2216
2382
|
@classmethod
|
2217
2383
|
@deprecated_classmethod("ScenarioSource.from_source('pdf', ...)")
|
2218
2384
|
def from_pdf(cls, filename_or_url, collapse_pages=False):
|
2219
2385
|
"""Create a ScenarioList from a PDF file or URL."""
|
2220
2386
|
from .scenario_source import PDFSource
|
2221
|
-
|
2387
|
+
|
2222
2388
|
source = PDFSource(
|
2223
2389
|
file_path=filename_or_url,
|
2224
2390
|
chunk_type="page" if not collapse_pages else "text",
|
2225
|
-
chunk_size=1
|
2391
|
+
chunk_size=1,
|
2226
2392
|
)
|
2227
2393
|
return source.to_scenario_list()
|
2228
2394
|
|
@@ -2231,32 +2397,28 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
2231
2397
|
def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
|
2232
2398
|
"""Create a ScenarioList with images extracted from a PDF file."""
|
2233
2399
|
from .scenario_source import PDFImageSource
|
2234
|
-
|
2235
|
-
source = PDFImageSource(
|
2236
|
-
file_path=pdf_path,
|
2237
|
-
base_width=2000,
|
2238
|
-
include_text=True
|
2239
|
-
)
|
2400
|
+
|
2401
|
+
source = PDFImageSource(file_path=pdf_path, base_width=2000, include_text=True)
|
2240
2402
|
return source.to_scenario_list()
|
2241
|
-
|
2403
|
+
|
2242
2404
|
@classmethod
|
2243
2405
|
def from_source(cls, source_type: str, *args, **kwargs) -> "ScenarioList":
|
2244
2406
|
"""
|
2245
2407
|
Create a ScenarioList from a specified source type.
|
2246
|
-
|
2408
|
+
|
2247
2409
|
This method serves as the main entry point for creating ScenarioList objects,
|
2248
2410
|
providing a unified interface for various data sources.
|
2249
|
-
|
2411
|
+
|
2250
2412
|
Args:
|
2251
2413
|
source_type: The type of source to create a ScenarioList from.
|
2252
2414
|
Valid values include: 'urls', 'directory', 'csv', 'tsv',
|
2253
2415
|
'excel', 'pdf', 'pdf_to_image', and others.
|
2254
2416
|
*args: Positional arguments to pass to the source-specific method.
|
2255
2417
|
**kwargs: Keyword arguments to pass to the source-specific method.
|
2256
|
-
|
2418
|
+
|
2257
2419
|
Returns:
|
2258
2420
|
A ScenarioList object created from the specified source.
|
2259
|
-
|
2421
|
+
|
2260
2422
|
Examples:
|
2261
2423
|
>>> # This is a simplified example for doctest
|
2262
2424
|
>>> # In real usage, you would provide a path to your CSV file:
|
@@ -2266,9 +2428,11 @@ class ScenarioList(MutableSequence, Base, ScenarioListOperationsMixin):
|
|
2266
2428
|
>>> # sl_dir = ScenarioList.from_source('directory', '/path/to/files')
|
2267
2429
|
"""
|
2268
2430
|
from .scenario_source import ScenarioSource
|
2431
|
+
|
2269
2432
|
return ScenarioSource.from_source(source_type, *args, **kwargs)
|
2270
2433
|
|
2271
2434
|
|
2272
2435
|
if __name__ == "__main__":
|
2273
2436
|
import doctest
|
2274
|
-
|
2437
|
+
|
2438
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|