edsl 0.1.44__py3-none-any.whl → 0.1.46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +7 -3
- edsl/__version__.py +1 -1
- edsl/agents/InvigilatorBase.py +3 -1
- edsl/agents/PromptConstructor.py +66 -91
- edsl/agents/QuestionInstructionPromptBuilder.py +160 -79
- edsl/agents/QuestionTemplateReplacementsBuilder.py +80 -17
- edsl/agents/question_option_processor.py +15 -6
- edsl/coop/CoopFunctionsMixin.py +3 -4
- edsl/coop/coop.py +171 -96
- edsl/data/RemoteCacheSync.py +10 -9
- edsl/enums.py +3 -3
- edsl/inference_services/AnthropicService.py +11 -9
- edsl/inference_services/AvailableModelFetcher.py +2 -0
- edsl/inference_services/AwsBedrock.py +1 -2
- edsl/inference_services/AzureAI.py +12 -9
- edsl/inference_services/GoogleService.py +9 -4
- edsl/inference_services/InferenceServicesCollection.py +2 -2
- edsl/inference_services/MistralAIService.py +1 -2
- edsl/inference_services/OpenAIService.py +9 -4
- edsl/inference_services/PerplexityService.py +2 -1
- edsl/inference_services/{GrokService.py → XAIService.py} +2 -2
- edsl/inference_services/registry.py +2 -2
- edsl/jobs/AnswerQuestionFunctionConstructor.py +12 -1
- edsl/jobs/Jobs.py +24 -17
- edsl/jobs/JobsChecks.py +10 -13
- edsl/jobs/JobsPrompts.py +49 -26
- edsl/jobs/JobsRemoteInferenceHandler.py +4 -5
- edsl/jobs/async_interview_runner.py +3 -1
- edsl/jobs/check_survey_scenario_compatibility.py +5 -5
- edsl/jobs/data_structures.py +3 -0
- edsl/jobs/interviews/Interview.py +6 -3
- edsl/jobs/interviews/InterviewExceptionEntry.py +12 -0
- edsl/jobs/tasks/TaskHistory.py +1 -1
- edsl/language_models/LanguageModel.py +6 -3
- edsl/language_models/PriceManager.py +45 -5
- edsl/language_models/model.py +47 -26
- edsl/questions/QuestionBase.py +21 -0
- edsl/questions/QuestionBasePromptsMixin.py +103 -0
- edsl/questions/QuestionFreeText.py +22 -5
- edsl/questions/descriptors.py +4 -0
- edsl/questions/question_base_gen_mixin.py +96 -29
- edsl/results/Dataset.py +65 -0
- edsl/results/DatasetExportMixin.py +320 -32
- edsl/results/Result.py +27 -0
- edsl/results/Results.py +22 -2
- edsl/results/ResultsGGMixin.py +7 -3
- edsl/scenarios/DocumentChunker.py +2 -0
- edsl/scenarios/FileStore.py +10 -0
- edsl/scenarios/PdfExtractor.py +21 -1
- edsl/scenarios/Scenario.py +25 -9
- edsl/scenarios/ScenarioList.py +226 -24
- edsl/scenarios/handlers/__init__.py +1 -0
- edsl/scenarios/handlers/docx.py +5 -1
- edsl/scenarios/handlers/jpeg.py +39 -0
- edsl/surveys/Survey.py +5 -4
- edsl/surveys/SurveyFlowVisualization.py +91 -43
- edsl/templates/error_reporting/exceptions_table.html +7 -8
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/interviews.html +0 -1
- edsl/templates/error_reporting/overview.html +2 -7
- edsl/templates/error_reporting/performance_plot.html +1 -1
- edsl/templates/error_reporting/report.css +1 -1
- edsl/utilities/PrettyList.py +14 -0
- edsl-0.1.46.dist-info/METADATA +246 -0
- {edsl-0.1.44.dist-info → edsl-0.1.46.dist-info}/RECORD +67 -66
- edsl-0.1.44.dist-info/METADATA +0 -110
- {edsl-0.1.44.dist-info → edsl-0.1.46.dist-info}/LICENSE +0 -0
- {edsl-0.1.44.dist-info → edsl-0.1.46.dist-info}/WHEEL +0 -0
edsl/results/Result.py
CHANGED
@@ -439,6 +439,33 @@ class Result(Base, UserDict):
|
|
439
439
|
from edsl.results.Results import Results
|
440
440
|
|
441
441
|
return Results.example()[0]
|
442
|
+
|
443
|
+
def score_with_answer_key(self, answer_key: dict) -> Union[int, float]:
|
444
|
+
"""Score the result using an answer key.
|
445
|
+
|
446
|
+
:param answer_key: A dictionary that maps question_names to answers
|
447
|
+
|
448
|
+
>>> Result.example()['answer']
|
449
|
+
{'how_feeling': 'OK', 'how_feeling_yesterday': 'Great'}
|
450
|
+
|
451
|
+
>>> answer_key = {'how_feeling': 'OK', 'how_feeling_yesterday': 'Great'}
|
452
|
+
>>> Result.example().score_with_answer_key(answer_key)
|
453
|
+
{'correct': 2, 'incorrect': 0, 'missing': 0}
|
454
|
+
>>> answer_key = {'how_feeling': 'OK', 'how_feeling_yesterday': ['Great', 'Good']}
|
455
|
+
>>> Result.example().score_with_answer_key(answer_key)
|
456
|
+
{'correct': 2, 'incorrect': 0, 'missing': 0}
|
457
|
+
"""
|
458
|
+
final_scores = {'correct': 0, 'incorrect': 0, 'missing': 0}
|
459
|
+
for question_name, answer in self.answer.items():
|
460
|
+
if question_name in answer_key:
|
461
|
+
if answer == answer_key[question_name] or answer in answer_key[question_name]:
|
462
|
+
final_scores['correct'] += 1
|
463
|
+
else:
|
464
|
+
final_scores['incorrect'] += 1
|
465
|
+
else:
|
466
|
+
final_scores['missing'] += 1
|
467
|
+
|
468
|
+
return final_scores
|
442
469
|
|
443
470
|
def score(self, scoring_function: Callable) -> Union[int, float]:
|
444
471
|
"""Score the result using a passed-in scoring function.
|
edsl/results/Results.py
CHANGED
@@ -34,7 +34,7 @@ if TYPE_CHECKING:
|
|
34
34
|
from simpleeval import EvalWithCompoundTypes
|
35
35
|
|
36
36
|
from edsl.results.ResultsExportMixin import ResultsExportMixin
|
37
|
-
from edsl.results.ResultsGGMixin import
|
37
|
+
from edsl.results.ResultsGGMixin import GGPlotMethod
|
38
38
|
from edsl.results.results_fetch_mixin import ResultsFetchMixin
|
39
39
|
from edsl.utilities.remove_edsl_version import remove_edsl_version
|
40
40
|
|
@@ -100,7 +100,7 @@ class NotReadyObject:
|
|
100
100
|
class Mixins(
|
101
101
|
ResultsExportMixin,
|
102
102
|
ResultsFetchMixin,
|
103
|
-
ResultsGGMixin,
|
103
|
+
# ResultsGGMixin,
|
104
104
|
):
|
105
105
|
def long(self):
|
106
106
|
return self.table().long()
|
@@ -151,6 +151,19 @@ class Results(UserList, Mixins, Base):
|
|
151
151
|
"cache_keys",
|
152
152
|
]
|
153
153
|
|
154
|
+
def ggplot2(
|
155
|
+
self,
|
156
|
+
ggplot_code: str,
|
157
|
+
shape="wide",
|
158
|
+
sql: str = None,
|
159
|
+
remove_prefix: bool = True,
|
160
|
+
debug: bool = False,
|
161
|
+
height=4,
|
162
|
+
width=6,
|
163
|
+
factor_orders: Optional[dict] = None,
|
164
|
+
):
|
165
|
+
return GGPlotMethod(self).ggplot2(ggplot_code, shape, sql, remove_prefix, debug, height, width, factor_orders)
|
166
|
+
|
154
167
|
@classmethod
|
155
168
|
def from_job_info(cls, job_info: dict) -> Results:
|
156
169
|
"""
|
@@ -1277,6 +1290,13 @@ class Results(UserList, Mixins, Base):
|
|
1277
1290
|
"""
|
1278
1291
|
return [r.score(f) for r in self.data]
|
1279
1292
|
|
1293
|
+
def score_with_answer_key(self, answer_key: dict) -> list:
|
1294
|
+
"""Score the results using an answer key.
|
1295
|
+
|
1296
|
+
:param answer_key: A dictionary that maps answer values to scores.
|
1297
|
+
"""
|
1298
|
+
return [r.score_with_answer_key(answer_key) for r in self.data]
|
1299
|
+
|
1280
1300
|
|
1281
1301
|
def fetch_remote(self, job_info: "RemoteJobInfo") -> None:
|
1282
1302
|
"""
|
edsl/results/ResultsGGMixin.py
CHANGED
@@ -75,7 +75,11 @@ class GGPlot:
|
|
75
75
|
|
76
76
|
return self._svg_data
|
77
77
|
|
78
|
-
class
|
78
|
+
class GGPlotMethod:
|
79
|
+
|
80
|
+
def __init__(self, results: 'Results'):
|
81
|
+
self.results = results
|
82
|
+
|
79
83
|
"""Mixin class for ggplot2 plotting."""
|
80
84
|
|
81
85
|
def ggplot2(
|
@@ -106,9 +110,9 @@ class ResultsGGMixin:
|
|
106
110
|
sql = "select * from self"
|
107
111
|
|
108
112
|
if shape == "long":
|
109
|
-
df = self.sql(sql, shape="long")
|
113
|
+
df = self.results.sql(sql, shape="long")
|
110
114
|
elif shape == "wide":
|
111
|
-
df = self.sql(sql, remove_prefix=remove_prefix)
|
115
|
+
df = self.results.sql(sql, remove_prefix=remove_prefix)
|
112
116
|
|
113
117
|
# Convert DataFrame to CSV format
|
114
118
|
csv_data = df.to_csv().text
|
@@ -85,6 +85,8 @@ class DocumentChunker:
|
|
85
85
|
new_scenario = copy.deepcopy(self.scenario)
|
86
86
|
new_scenario[field] = chunk
|
87
87
|
new_scenario[field + "_chunk"] = i
|
88
|
+
new_scenario[field + "_char_count"] = len(chunk)
|
89
|
+
new_scenario[field + "_word_count"] = len(chunk.split())
|
88
90
|
if include_original:
|
89
91
|
if hash_original:
|
90
92
|
new_scenario[field + "_original"] = hashlib.md5(
|
edsl/scenarios/FileStore.py
CHANGED
@@ -29,6 +29,12 @@ class FileStore(Scenario):
|
|
29
29
|
if path is None and "filename" in kwargs:
|
30
30
|
path = kwargs["filename"]
|
31
31
|
|
32
|
+
# Check if path is a URL and handle download
|
33
|
+
if path and (path.startswith('http://') or path.startswith('https://')):
|
34
|
+
temp_filestore = self.from_url(path, mime_type=mime_type)
|
35
|
+
path = temp_filestore._path
|
36
|
+
mime_type = temp_filestore.mime_type
|
37
|
+
|
32
38
|
self._path = path # Store the original path privately
|
33
39
|
self._temp_path = None # Track any generated temporary file
|
34
40
|
|
@@ -138,6 +144,10 @@ class FileStore(Scenario):
|
|
138
144
|
base64_encoded_data = base64.b64encode(binary_data)
|
139
145
|
self.binary = True
|
140
146
|
# Convert the base64 bytes to a string
|
147
|
+
except FileNotFoundError:
|
148
|
+
print(f"File not found: {file_path}")
|
149
|
+
print("Current working directory:", os.getcwd())
|
150
|
+
raise
|
141
151
|
base64_string = base64_encoded_data.decode("utf-8")
|
142
152
|
|
143
153
|
return base64_string
|
edsl/scenarios/PdfExtractor.py
CHANGED
@@ -4,10 +4,30 @@ import os
|
|
4
4
|
class PdfExtractor:
|
5
5
|
def __init__(self, pdf_path: str):
|
6
6
|
self.pdf_path = pdf_path
|
7
|
+
self._has_pymupdf = self._check_pymupdf()
|
7
8
|
#self.constructor = parent_object.__class__
|
8
9
|
|
10
|
+
def _check_pymupdf(self):
|
11
|
+
"""Check if PyMuPDF is installed."""
|
12
|
+
try:
|
13
|
+
import fitz
|
14
|
+
return True
|
15
|
+
except ImportError:
|
16
|
+
return False
|
17
|
+
|
9
18
|
def get_pdf_dict(self) -> dict:
|
10
|
-
#
|
19
|
+
# First check if the file exists
|
20
|
+
if not os.path.exists(self.pdf_path):
|
21
|
+
raise FileNotFoundError(f"The file {self.pdf_path} does not exist.")
|
22
|
+
|
23
|
+
# Then check if PyMuPDF is available
|
24
|
+
if not self._has_pymupdf:
|
25
|
+
raise ImportError(
|
26
|
+
"The 'fitz' module (PyMuPDF) is required for PDF extraction. "
|
27
|
+
"Please install it with: pip install pymupdf"
|
28
|
+
)
|
29
|
+
|
30
|
+
# If we get here, we can safely import and use fitz
|
11
31
|
import fitz
|
12
32
|
|
13
33
|
if not os.path.exists(self.pdf_path):
|
edsl/scenarios/Scenario.py
CHANGED
@@ -64,6 +64,15 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
64
64
|
self.data = data if data is not None else {}
|
65
65
|
self.name = name
|
66
66
|
|
67
|
+
def __mul__(self, scenario_list_or_scenario: Union["ScenarioList", "Scenario"]) -> "ScenarioList":
|
68
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
69
|
+
if isinstance(scenario_list_or_scenario, ScenarioList):
|
70
|
+
return scenario_list_or_scenario * self
|
71
|
+
elif isinstance(scenario_list_or_scenario, Scenario):
|
72
|
+
return ScenarioList([self]) * scenario_list_or_scenario
|
73
|
+
else:
|
74
|
+
raise TypeError(f"Cannot multiply Scenario with {type(scenario_list_or_scenario)}")
|
75
|
+
|
67
76
|
def replicate(self, n: int) -> "ScenarioList":
|
68
77
|
"""Replicate a scenario n times to return a ScenarioList.
|
69
78
|
|
@@ -356,11 +365,18 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
356
365
|
|
357
366
|
@classmethod
|
358
367
|
def from_pdf(cls, pdf_path: str):
|
359
|
-
from
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
368
|
+
"""Create a Scenario from a PDF file."""
|
369
|
+
try:
|
370
|
+
from edsl.scenarios.PdfExtractor import PdfExtractor
|
371
|
+
extractor = PdfExtractor(pdf_path)
|
372
|
+
return Scenario(extractor.get_pdf_dict())
|
373
|
+
except ImportError as e:
|
374
|
+
raise ImportError(
|
375
|
+
f"Could not extract text from PDF: {str(e)}. "
|
376
|
+
"PDF extraction requires the PyMuPDF library. "
|
377
|
+
"Install it with: pip install pymupdf"
|
378
|
+
)
|
379
|
+
|
364
380
|
@classmethod
|
365
381
|
def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
|
366
382
|
"""
|
@@ -442,18 +458,18 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
442
458
|
|
443
459
|
>>> s = Scenario({"text": "This is a test.\\nThis is a test.\\n\\nThis is a test."})
|
444
460
|
>>> s.chunk("text", num_lines = 1)
|
445
|
-
ScenarioList([Scenario({'text': 'This is a test.', 'text_chunk': 0}), Scenario({'text': 'This is a test.', 'text_chunk': 1}), Scenario({'text': '', 'text_chunk': 2}), Scenario({'text': 'This is a test.', 'text_chunk': 3})])
|
461
|
+
ScenarioList([Scenario({'text': 'This is a test.', 'text_chunk': 0, 'text_char_count': 15, 'text_word_count': 4}), Scenario({'text': 'This is a test.', 'text_chunk': 1, 'text_char_count': 15, 'text_word_count': 4}), Scenario({'text': '', 'text_chunk': 2, 'text_char_count': 0, 'text_word_count': 0}), Scenario({'text': 'This is a test.', 'text_chunk': 3, 'text_char_count': 15, 'text_word_count': 4})])
|
446
462
|
|
447
463
|
>>> s.chunk("text", num_words = 2)
|
448
|
-
ScenarioList([Scenario({'text': 'This is', 'text_chunk': 0}), Scenario({'text': 'a test.', 'text_chunk': 1}), Scenario({'text': 'This is', 'text_chunk': 2}), Scenario({'text': 'a test.', 'text_chunk': 3}), Scenario({'text': 'This is', 'text_chunk': 4}), Scenario({'text': 'a test.', 'text_chunk': 5})])
|
464
|
+
ScenarioList([Scenario({'text': 'This is', 'text_chunk': 0, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 1, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'This is', 'text_chunk': 2, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 3, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'This is', 'text_chunk': 4, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 5, 'text_char_count': 7, 'text_word_count': 2})])
|
449
465
|
|
450
466
|
>>> s = Scenario({"text": "Hello World"})
|
451
467
|
>>> s.chunk("text", num_words = 1, include_original = True)
|
452
|
-
ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_original': 'Hello World'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_original': 'Hello World'})])
|
468
|
+
ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'Hello World'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'Hello World'})])
|
453
469
|
|
454
470
|
>>> s = Scenario({"text": "Hello World"})
|
455
471
|
>>> s.chunk("text", num_words = 1, include_original = True, hash_original = True)
|
456
|
-
ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'})])
|
472
|
+
ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'})])
|
457
473
|
|
458
474
|
>>> s.chunk("text")
|
459
475
|
Traceback (most recent call last):
|
edsl/scenarios/ScenarioList.py
CHANGED
@@ -360,6 +360,11 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
360
360
|
ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
|
361
361
|
"""
|
362
362
|
from itertools import product
|
363
|
+
from edsl import Scenario
|
364
|
+
if isinstance(other, Scenario):
|
365
|
+
other = ScenarioList([other])
|
366
|
+
elif not isinstance(other, ScenarioList):
|
367
|
+
raise TypeError(f"Cannot multiply ScenarioList with {type(other)}")
|
363
368
|
|
364
369
|
new_sl = []
|
365
370
|
for s1, s2 in list(product(self, other)):
|
@@ -431,35 +436,98 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
431
436
|
new_scenarios.append(new_scenario)
|
432
437
|
return ScenarioList(new_scenarios)
|
433
438
|
|
434
|
-
def
|
435
|
-
"""
|
436
|
-
|
439
|
+
def _concatenate(self, fields: List[str], output_type: str = "string", separator: str = ";") -> ScenarioList:
|
440
|
+
"""Private method to handle concatenation logic for different output types.
|
441
|
+
|
437
442
|
:param fields: The fields to concatenate.
|
438
|
-
:param
|
439
|
-
|
443
|
+
:param output_type: The type of output ("string", "list", or "set").
|
444
|
+
:param separator: The separator to use for string concatenation.
|
445
|
+
|
440
446
|
Returns:
|
441
447
|
ScenarioList: A new ScenarioList with concatenated fields.
|
442
|
-
|
443
|
-
Example:
|
444
|
-
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
445
|
-
>>> s.concatenate(['a', 'b', 'c'])
|
446
|
-
ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
|
447
448
|
"""
|
449
|
+
# Check if fields is a string and raise an exception
|
450
|
+
if isinstance(fields, str):
|
451
|
+
raise ScenarioError(
|
452
|
+
f"The 'fields' parameter must be a list of field names, not a string. Got '{fields}'."
|
453
|
+
)
|
454
|
+
|
448
455
|
new_scenarios = []
|
449
456
|
for scenario in self:
|
450
457
|
new_scenario = scenario.copy()
|
451
|
-
|
458
|
+
values = []
|
452
459
|
for field in fields:
|
453
460
|
if field in new_scenario:
|
454
|
-
|
461
|
+
values.append(new_scenario[field])
|
455
462
|
del new_scenario[field]
|
456
463
|
|
457
464
|
new_field_name = f"concat_{'_'.join(fields)}"
|
458
|
-
|
465
|
+
|
466
|
+
if output_type == "string":
|
467
|
+
# Convert all values to strings and join with separator
|
468
|
+
new_scenario[new_field_name] = separator.join(str(v) for v in values)
|
469
|
+
elif output_type == "list":
|
470
|
+
# Keep as a list
|
471
|
+
new_scenario[new_field_name] = values
|
472
|
+
elif output_type == "set":
|
473
|
+
# Convert to a set (removes duplicates)
|
474
|
+
new_scenario[new_field_name] = set(values)
|
475
|
+
else:
|
476
|
+
raise ValueError(f"Invalid output_type: {output_type}. Must be 'string', 'list', or 'set'.")
|
477
|
+
|
459
478
|
new_scenarios.append(new_scenario)
|
460
479
|
|
461
480
|
return ScenarioList(new_scenarios)
|
462
481
|
|
482
|
+
def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
|
483
|
+
"""Concatenate specified fields into a single string field.
|
484
|
+
|
485
|
+
:param fields: The fields to concatenate.
|
486
|
+
:param separator: The separator to use.
|
487
|
+
|
488
|
+
Returns:
|
489
|
+
ScenarioList: A new ScenarioList with concatenated fields.
|
490
|
+
|
491
|
+
Example:
|
492
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
493
|
+
>>> s.concatenate(['a', 'b', 'c'])
|
494
|
+
ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
|
495
|
+
"""
|
496
|
+
return self._concatenate(fields, output_type="string", separator=separator)
|
497
|
+
|
498
|
+
def concatenate_to_list(self, fields: List[str]) -> ScenarioList:
|
499
|
+
"""Concatenate specified fields into a single list field.
|
500
|
+
|
501
|
+
:param fields: The fields to concatenate.
|
502
|
+
|
503
|
+
Returns:
|
504
|
+
ScenarioList: A new ScenarioList with fields concatenated into a list.
|
505
|
+
|
506
|
+
Example:
|
507
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
508
|
+
>>> s.concatenate_to_list(['a', 'b', 'c'])
|
509
|
+
ScenarioList([Scenario({'concat_a_b_c': [1, 2, 3]}), Scenario({'concat_a_b_c': [4, 5, 6]})])
|
510
|
+
"""
|
511
|
+
return self._concatenate(fields, output_type="list")
|
512
|
+
|
513
|
+
def concatenate_to_set(self, fields: List[str]) -> ScenarioList:
|
514
|
+
"""Concatenate specified fields into a single set field.
|
515
|
+
|
516
|
+
:param fields: The fields to concatenate.
|
517
|
+
|
518
|
+
Returns:
|
519
|
+
ScenarioList: A new ScenarioList with fields concatenated into a set.
|
520
|
+
|
521
|
+
Example:
|
522
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
|
523
|
+
>>> s.concatenate_to_set(['a', 'b', 'c'])
|
524
|
+
ScenarioList([Scenario({'concat_a_b_c': {1, 2, 3}}), Scenario({'concat_a_b_c': {4, 5, 6}})])
|
525
|
+
>>> s = ScenarioList([Scenario({'a': 1, 'b': 1, 'c': 3})])
|
526
|
+
>>> s.concatenate_to_set(['a', 'b', 'c'])
|
527
|
+
ScenarioList([Scenario({'concat_a_b_c': {1, 3}})])
|
528
|
+
"""
|
529
|
+
return self._concatenate(fields, output_type="set")
|
530
|
+
|
463
531
|
def unpack_dict(
|
464
532
|
self, field: str, prefix: Optional[str] = None, drop_field: bool = False
|
465
533
|
) -> ScenarioList:
|
@@ -872,7 +940,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
872
940
|
for scenario in sl:
|
873
941
|
scenario[name] = value
|
874
942
|
return sl
|
875
|
-
|
943
|
+
|
876
944
|
def rename(self, replacement_dict: dict) -> ScenarioList:
|
877
945
|
"""Rename the fields in the scenarios.
|
878
946
|
|
@@ -885,13 +953,35 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
885
953
|
ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
|
886
954
|
|
887
955
|
"""
|
888
|
-
|
889
956
|
new_list = ScenarioList([])
|
890
957
|
for obj in self:
|
891
958
|
new_obj = obj.rename(replacement_dict)
|
892
959
|
new_list.append(new_obj)
|
893
960
|
return new_list
|
894
961
|
|
962
|
+
def replace_names(self, new_names: list) -> ScenarioList:
|
963
|
+
"""Replace the field names in the scenarios with a new list of names.
|
964
|
+
|
965
|
+
:param new_names: A list of new field names to use.
|
966
|
+
|
967
|
+
Example:
|
968
|
+
|
969
|
+
>>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
|
970
|
+
>>> s.replace_names(['first_name', 'years'])
|
971
|
+
ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
|
972
|
+
"""
|
973
|
+
if not self:
|
974
|
+
return ScenarioList([])
|
975
|
+
|
976
|
+
if len(new_names) != len(self[0].keys()):
|
977
|
+
raise ScenarioError(
|
978
|
+
f"Length of new names ({len(new_names)}) does not match number of fields ({len(self[0].keys())})"
|
979
|
+
)
|
980
|
+
|
981
|
+
old_names = list(self[0].keys())
|
982
|
+
replacement_dict = dict(zip(old_names, new_names))
|
983
|
+
return self.rename(replacement_dict)
|
984
|
+
|
895
985
|
## NEEDS TO BE FIXED
|
896
986
|
# def new_column_names(self, new_names: List[str]) -> ScenarioList:
|
897
987
|
# """Rename the fields in the scenarios.
|
@@ -910,16 +1000,42 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
910
1000
|
# return new_list
|
911
1001
|
|
912
1002
|
@classmethod
|
913
|
-
def from_sqlite(cls, filepath: str, table: str):
|
914
|
-
"""Create a ScenarioList from a SQLite database.
|
1003
|
+
def from_sqlite(cls, filepath: str, table: Optional[str] = None, sql_query: Optional[str] = None):
|
1004
|
+
"""Create a ScenarioList from a SQLite database.
|
1005
|
+
|
1006
|
+
Args:
|
1007
|
+
filepath (str): Path to the SQLite database file
|
1008
|
+
table (Optional[str]): Name of table to query. If None, sql_query must be provided.
|
1009
|
+
sql_query (Optional[str]): SQL query to execute. Used if table is None.
|
1010
|
+
|
1011
|
+
Returns:
|
1012
|
+
ScenarioList: List of scenarios created from database rows
|
1013
|
+
|
1014
|
+
Raises:
|
1015
|
+
ValueError: If both table and sql_query are None
|
1016
|
+
sqlite3.Error: If there is an error executing the database query
|
1017
|
+
"""
|
915
1018
|
import sqlite3
|
916
1019
|
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
1020
|
+
if table is None and sql_query is None:
|
1021
|
+
raise ValueError("Either table or sql_query must be provided")
|
1022
|
+
|
1023
|
+
try:
|
1024
|
+
with sqlite3.connect(filepath) as conn:
|
1025
|
+
cursor = conn.cursor()
|
1026
|
+
|
1027
|
+
if table is not None:
|
1028
|
+
cursor.execute(f"SELECT * FROM {table}")
|
1029
|
+
else:
|
1030
|
+
cursor.execute(sql_query)
|
1031
|
+
|
1032
|
+
columns = [description[0] for description in cursor.description]
|
1033
|
+
data = cursor.fetchall()
|
1034
|
+
|
1035
|
+
return cls([Scenario(dict(zip(columns, row))) for row in data])
|
1036
|
+
|
1037
|
+
except sqlite3.Error as e:
|
1038
|
+
raise sqlite3.Error(f"Database error occurred: {str(e)}")
|
923
1039
|
|
924
1040
|
@classmethod
|
925
1041
|
def from_latex(cls, tex_file_path: str):
|
@@ -935,6 +1051,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
935
1051
|
entry = {
|
936
1052
|
"line_no": line_no + 1, # Using 1-based index for line numbers
|
937
1053
|
"text": text,
|
1054
|
+
"num_words": len(text.split()),
|
1055
|
+
"num_chars": len(text),
|
938
1056
|
"line_before": non_blank_lines[index - 1][1] if index > 0 else None,
|
939
1057
|
"line_after": (
|
940
1058
|
non_blank_lines[index + 1][1]
|
@@ -995,8 +1113,49 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
995
1113
|
ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
|
996
1114
|
"""
|
997
1115
|
return cls([Scenario(row) for row in df.to_dict(orient="records")])
|
1116
|
+
|
998
1117
|
|
999
1118
|
@classmethod
|
1119
|
+
def from_dta(cls, filepath: str, include_metadata: bool = True) -> ScenarioList:
|
1120
|
+
"""Create a ScenarioList from a Stata file.
|
1121
|
+
|
1122
|
+
Args:
|
1123
|
+
filepath (str): Path to the Stata (.dta) file
|
1124
|
+
include_metadata (bool): If True, extract and preserve variable labels and value labels
|
1125
|
+
as additional metadata in the ScenarioList
|
1126
|
+
|
1127
|
+
Returns:
|
1128
|
+
ScenarioList: A ScenarioList containing the data from the Stata file
|
1129
|
+
"""
|
1130
|
+
import pandas as pd
|
1131
|
+
|
1132
|
+
# Read the Stata file with pandas
|
1133
|
+
df = pd.read_stata(filepath)
|
1134
|
+
|
1135
|
+
# Create the basic ScenarioList
|
1136
|
+
scenario_list = cls.from_pandas(df)
|
1137
|
+
|
1138
|
+
# Extract and preserve metadata if requested
|
1139
|
+
if include_metadata:
|
1140
|
+
# Get variable labels (if any)
|
1141
|
+
variable_labels = {}
|
1142
|
+
if hasattr(df, 'variable_labels') and df.variable_labels:
|
1143
|
+
variable_labels = df.variable_labels
|
1144
|
+
|
1145
|
+
# Get value labels (if any)
|
1146
|
+
value_labels = {}
|
1147
|
+
if hasattr(df, 'value_labels') and df.value_labels:
|
1148
|
+
value_labels = df.value_labels
|
1149
|
+
|
1150
|
+
# Store the metadata in the ScenarioList's codebook
|
1151
|
+
if variable_labels or value_labels:
|
1152
|
+
scenario_list.codebook = {
|
1153
|
+
'variable_labels': variable_labels,
|
1154
|
+
'value_labels': value_labels
|
1155
|
+
}
|
1156
|
+
|
1157
|
+
return scenario_list
|
1158
|
+
|
1000
1159
|
def from_wikipedia(cls, url: str, table_index: int = 0):
|
1001
1160
|
"""
|
1002
1161
|
Extracts a table from a Wikipedia page.
|
@@ -1456,7 +1615,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1456
1615
|
|
1457
1616
|
>>> s = ScenarioList([Scenario({'text': 'The quick brown fox jumps over the lazy dog.'})])
|
1458
1617
|
>>> s.chunk('text', num_words=3)
|
1459
|
-
ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0}), Scenario({'text': 'fox jumps over', 'text_chunk': 1}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2})])
|
1618
|
+
ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0, 'text_char_count': 15, 'text_word_count': 3}), Scenario({'text': 'fox jumps over', 'text_chunk': 1, 'text_char_count': 14, 'text_word_count': 3}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2, 'text_char_count': 13, 'text_word_count': 3})])
|
1460
1619
|
"""
|
1461
1620
|
new_scenarios = []
|
1462
1621
|
for scenario in self:
|
@@ -1470,6 +1629,49 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1470
1629
|
new_scenarios.extend(replacement_scenarios)
|
1471
1630
|
return ScenarioList(new_scenarios)
|
1472
1631
|
|
1632
|
+
def collapse(self, field: str) -> ScenarioList:
|
1633
|
+
"""Collapse a ScenarioList by grouping on all fields except the specified one,
|
1634
|
+
collecting the values of the specified field into a list.
|
1635
|
+
|
1636
|
+
Args:
|
1637
|
+
field: The field to collapse (whose values will be collected into lists)
|
1638
|
+
|
1639
|
+
Returns:
|
1640
|
+
ScenarioList: A new ScenarioList with the specified field collapsed into lists
|
1641
|
+
|
1642
|
+
Example:
|
1643
|
+
>>> s = ScenarioList([
|
1644
|
+
... Scenario({'category': 'fruit', 'color': 'red', 'item': 'apple'}),
|
1645
|
+
... Scenario({'category': 'fruit', 'color': 'yellow', 'item': 'banana'}),
|
1646
|
+
... Scenario({'category': 'fruit', 'color': 'red', 'item': 'cherry'}),
|
1647
|
+
... Scenario({'category': 'vegetable', 'color': 'green', 'item': 'spinach'})
|
1648
|
+
... ])
|
1649
|
+
>>> s.collapse('item')
|
1650
|
+
ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry']}), Scenario({'category': 'fruit', 'color': 'yellow', 'item': ['banana']}), Scenario({'category': 'vegetable', 'color': 'green', 'item': ['spinach']})])
|
1651
|
+
"""
|
1652
|
+
if not self:
|
1653
|
+
return ScenarioList([])
|
1654
|
+
|
1655
|
+
# Determine all fields except the one to collapse
|
1656
|
+
id_vars = [key for key in self[0].keys() if key != field]
|
1657
|
+
|
1658
|
+
# Group the scenarios
|
1659
|
+
grouped = defaultdict(list)
|
1660
|
+
for scenario in self:
|
1661
|
+
# Create a tuple of the values of all fields except the one to collapse
|
1662
|
+
key = tuple(scenario[id_var] for id_var in id_vars)
|
1663
|
+
# Add the value of the field to collapse to the list for this key
|
1664
|
+
grouped[key].append(scenario[field])
|
1665
|
+
|
1666
|
+
# Create a new ScenarioList with the collapsed field
|
1667
|
+
result = []
|
1668
|
+
for key, values in grouped.items():
|
1669
|
+
new_scenario = dict(zip(id_vars, key))
|
1670
|
+
new_scenario[field] = values
|
1671
|
+
result.append(Scenario(new_scenario))
|
1672
|
+
|
1673
|
+
return ScenarioList(result)
|
1674
|
+
|
1473
1675
|
|
1474
1676
|
if __name__ == "__main__":
|
1475
1677
|
import doctest
|
edsl/scenarios/handlers/docx.py
CHANGED
@@ -37,7 +37,11 @@ class DocxMethods(FileMethods):
|
|
37
37
|
print("DOCX file was not found.")
|
38
38
|
|
39
39
|
def view_notebook(self):
|
40
|
-
|
40
|
+
try:
|
41
|
+
import mammoth
|
42
|
+
except ImportError:
|
43
|
+
print("mammoth is not installed. Please install it using 'pip install mammoth'.")
|
44
|
+
return
|
41
45
|
from IPython.display import HTML, display
|
42
46
|
|
43
47
|
with open(self.path, "rb") as docx_file:
|
@@ -0,0 +1,39 @@
|
|
1
|
+
import tempfile
|
2
|
+
from edsl.scenarios.file_methods import FileMethods
|
3
|
+
|
4
|
+
|
5
|
+
class JpegMethods(FileMethods):
|
6
|
+
suffix = "jpeg"
|
7
|
+
|
8
|
+
def view_system(self):
|
9
|
+
import os
|
10
|
+
import subprocess
|
11
|
+
|
12
|
+
if os.path.exists(self.path):
|
13
|
+
try:
|
14
|
+
if (os_name := os.name) == "posix":
|
15
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
16
|
+
elif os_name == "nt":
|
17
|
+
os.startfile(self.path) # Windows
|
18
|
+
else:
|
19
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
20
|
+
except Exception as e:
|
21
|
+
print(f"Error opening JPEG: {e}")
|
22
|
+
else:
|
23
|
+
print("JPEG file was not found.")
|
24
|
+
|
25
|
+
def view_notebook(self):
|
26
|
+
from IPython.display import Image, display
|
27
|
+
|
28
|
+
display(Image(filename=self.path))
|
29
|
+
|
30
|
+
def example(self):
|
31
|
+
import matplotlib.pyplot as plt
|
32
|
+
import numpy as np
|
33
|
+
|
34
|
+
x = np.linspace(0, 10, 100)
|
35
|
+
y = np.sin(x)
|
36
|
+
plt.plot(x, y)
|
37
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpeg") as f:
|
38
|
+
plt.savefig(f.name)
|
39
|
+
return f.name
|
edsl/surveys/Survey.py
CHANGED
@@ -1248,14 +1248,15 @@ class Survey(SurveyExportMixin, Base):
|
|
1248
1248
|
###################
|
1249
1249
|
def humanize(
|
1250
1250
|
self,
|
1251
|
-
project_name: str,
|
1251
|
+
project_name: str = "Project",
|
1252
1252
|
survey_description: Optional[str] = None,
|
1253
1253
|
survey_alias: Optional[str] = None,
|
1254
1254
|
survey_visibility: Optional["VisibilityType"] = "unlisted",
|
1255
|
-
):
|
1255
|
+
) -> dict:
|
1256
1256
|
"""
|
1257
|
-
|
1258
|
-
|
1257
|
+
Send the survey to Coop.
|
1258
|
+
|
1259
|
+
Then, create a project on Coop so you can share the survey with human respondents.
|
1259
1260
|
"""
|
1260
1261
|
from edsl.coop import Coop
|
1261
1262
|
|