edsl 0.1.43__py3-none-any.whl → 0.1.45__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +15 -6
- edsl/__version__.py +1 -1
- edsl/agents/InvigilatorBase.py +3 -1
- edsl/agents/PromptConstructor.py +62 -34
- edsl/agents/QuestionInstructionPromptBuilder.py +111 -68
- edsl/agents/QuestionTemplateReplacementsBuilder.py +69 -16
- edsl/agents/question_option_processor.py +15 -6
- edsl/coop/CoopFunctionsMixin.py +3 -4
- edsl/coop/coop.py +56 -10
- edsl/enums.py +4 -1
- edsl/inference_services/AnthropicService.py +12 -8
- edsl/inference_services/AvailableModelFetcher.py +2 -0
- edsl/inference_services/AwsBedrock.py +1 -2
- edsl/inference_services/AzureAI.py +12 -9
- edsl/inference_services/GoogleService.py +10 -3
- edsl/inference_services/InferenceServiceABC.py +1 -0
- edsl/inference_services/InferenceServicesCollection.py +2 -2
- edsl/inference_services/MistralAIService.py +1 -2
- edsl/inference_services/OpenAIService.py +10 -4
- edsl/inference_services/PerplexityService.py +2 -1
- edsl/inference_services/TestService.py +1 -0
- edsl/inference_services/XAIService.py +11 -0
- edsl/inference_services/registry.py +2 -0
- edsl/jobs/Jobs.py +9 -0
- edsl/jobs/JobsChecks.py +11 -14
- edsl/jobs/JobsPrompts.py +3 -3
- edsl/jobs/async_interview_runner.py +3 -1
- edsl/jobs/check_survey_scenario_compatibility.py +5 -5
- edsl/jobs/interviews/InterviewExceptionEntry.py +12 -0
- edsl/jobs/tasks/TaskHistory.py +1 -1
- edsl/language_models/LanguageModel.py +3 -3
- edsl/language_models/PriceManager.py +45 -5
- edsl/language_models/model.py +89 -36
- edsl/questions/QuestionBase.py +21 -0
- edsl/questions/QuestionBasePromptsMixin.py +103 -0
- edsl/questions/QuestionFreeText.py +22 -5
- edsl/questions/descriptors.py +4 -0
- edsl/questions/question_base_gen_mixin.py +94 -29
- edsl/results/Dataset.py +65 -0
- edsl/results/DatasetExportMixin.py +299 -32
- edsl/results/Result.py +27 -0
- edsl/results/Results.py +24 -3
- edsl/results/ResultsGGMixin.py +7 -3
- edsl/scenarios/DocumentChunker.py +2 -0
- edsl/scenarios/FileStore.py +29 -8
- edsl/scenarios/PdfExtractor.py +21 -1
- edsl/scenarios/Scenario.py +25 -9
- edsl/scenarios/ScenarioList.py +73 -3
- edsl/scenarios/handlers/__init__.py +1 -0
- edsl/scenarios/handlers/docx.py +5 -1
- edsl/scenarios/handlers/jpeg.py +39 -0
- edsl/surveys/Survey.py +28 -6
- edsl/surveys/SurveyFlowVisualization.py +91 -43
- edsl/templates/error_reporting/exceptions_table.html +7 -8
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/interviews.html +0 -1
- edsl/templates/error_reporting/overview.html +2 -7
- edsl/templates/error_reporting/performance_plot.html +1 -1
- edsl/templates/error_reporting/report.css +1 -1
- edsl/utilities/PrettyList.py +14 -0
- edsl-0.1.45.dist-info/METADATA +246 -0
- {edsl-0.1.43.dist-info → edsl-0.1.45.dist-info}/RECORD +64 -62
- edsl-0.1.43.dist-info/METADATA +0 -110
- {edsl-0.1.43.dist-info → edsl-0.1.45.dist-info}/LICENSE +0 -0
- {edsl-0.1.43.dist-info → edsl-0.1.45.dist-info}/WHEEL +0 -0
edsl/scenarios/PdfExtractor.py
CHANGED
@@ -4,10 +4,30 @@ import os
|
|
4
4
|
class PdfExtractor:
|
5
5
|
def __init__(self, pdf_path: str):
|
6
6
|
self.pdf_path = pdf_path
|
7
|
+
self._has_pymupdf = self._check_pymupdf()
|
7
8
|
#self.constructor = parent_object.__class__
|
8
9
|
|
10
|
+
def _check_pymupdf(self):
|
11
|
+
"""Check if PyMuPDF is installed."""
|
12
|
+
try:
|
13
|
+
import fitz
|
14
|
+
return True
|
15
|
+
except ImportError:
|
16
|
+
return False
|
17
|
+
|
9
18
|
def get_pdf_dict(self) -> dict:
|
10
|
-
#
|
19
|
+
# First check if the file exists
|
20
|
+
if not os.path.exists(self.pdf_path):
|
21
|
+
raise FileNotFoundError(f"The file {self.pdf_path} does not exist.")
|
22
|
+
|
23
|
+
# Then check if PyMuPDF is available
|
24
|
+
if not self._has_pymupdf:
|
25
|
+
raise ImportError(
|
26
|
+
"The 'fitz' module (PyMuPDF) is required for PDF extraction. "
|
27
|
+
"Please install it with: pip install pymupdf"
|
28
|
+
)
|
29
|
+
|
30
|
+
# If we get here, we can safely import and use fitz
|
11
31
|
import fitz
|
12
32
|
|
13
33
|
if not os.path.exists(self.pdf_path):
|
edsl/scenarios/Scenario.py
CHANGED
@@ -64,6 +64,15 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
64
64
|
self.data = data if data is not None else {}
|
65
65
|
self.name = name
|
66
66
|
|
67
|
+
def __mul__(self, scenario_list_or_scenario: Union["ScenarioList", "Scenario"]) -> "ScenarioList":
|
68
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
69
|
+
if isinstance(scenario_list_or_scenario, ScenarioList):
|
70
|
+
return scenario_list_or_scenario * self
|
71
|
+
elif isinstance(scenario_list_or_scenario, Scenario):
|
72
|
+
return ScenarioList([self]) * scenario_list_or_scenario
|
73
|
+
else:
|
74
|
+
raise TypeError(f"Cannot multiply Scenario with {type(scenario_list_or_scenario)}")
|
75
|
+
|
67
76
|
def replicate(self, n: int) -> "ScenarioList":
|
68
77
|
"""Replicate a scenario n times to return a ScenarioList.
|
69
78
|
|
@@ -356,11 +365,18 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
356
365
|
|
357
366
|
@classmethod
|
358
367
|
def from_pdf(cls, pdf_path: str):
|
359
|
-
from
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
368
|
+
"""Create a Scenario from a PDF file."""
|
369
|
+
try:
|
370
|
+
from edsl.scenarios.PdfExtractor import PdfExtractor
|
371
|
+
extractor = PdfExtractor(pdf_path)
|
372
|
+
return Scenario(extractor.get_pdf_dict())
|
373
|
+
except ImportError as e:
|
374
|
+
raise ImportError(
|
375
|
+
f"Could not extract text from PDF: {str(e)}. "
|
376
|
+
"PDF extraction requires the PyMuPDF library. "
|
377
|
+
"Install it with: pip install pymupdf"
|
378
|
+
)
|
379
|
+
|
364
380
|
@classmethod
|
365
381
|
def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
|
366
382
|
"""
|
@@ -442,18 +458,18 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
|
|
442
458
|
|
443
459
|
>>> s = Scenario({"text": "This is a test.\\nThis is a test.\\n\\nThis is a test."})
|
444
460
|
>>> s.chunk("text", num_lines = 1)
|
445
|
-
ScenarioList([Scenario({'text': 'This is a test.', 'text_chunk': 0}), Scenario({'text': 'This is a test.', 'text_chunk': 1}), Scenario({'text': '', 'text_chunk': 2}), Scenario({'text': 'This is a test.', 'text_chunk': 3})])
|
461
|
+
ScenarioList([Scenario({'text': 'This is a test.', 'text_chunk': 0, 'text_char_count': 15, 'text_word_count': 4}), Scenario({'text': 'This is a test.', 'text_chunk': 1, 'text_char_count': 15, 'text_word_count': 4}), Scenario({'text': '', 'text_chunk': 2, 'text_char_count': 0, 'text_word_count': 0}), Scenario({'text': 'This is a test.', 'text_chunk': 3, 'text_char_count': 15, 'text_word_count': 4})])
|
446
462
|
|
447
463
|
>>> s.chunk("text", num_words = 2)
|
448
|
-
ScenarioList([Scenario({'text': 'This is', 'text_chunk': 0}), Scenario({'text': 'a test.', 'text_chunk': 1}), Scenario({'text': 'This is', 'text_chunk': 2}), Scenario({'text': 'a test.', 'text_chunk': 3}), Scenario({'text': 'This is', 'text_chunk': 4}), Scenario({'text': 'a test.', 'text_chunk': 5})])
|
464
|
+
ScenarioList([Scenario({'text': 'This is', 'text_chunk': 0, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 1, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'This is', 'text_chunk': 2, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 3, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'This is', 'text_chunk': 4, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 5, 'text_char_count': 7, 'text_word_count': 2})])
|
449
465
|
|
450
466
|
>>> s = Scenario({"text": "Hello World"})
|
451
467
|
>>> s.chunk("text", num_words = 1, include_original = True)
|
452
|
-
ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_original': 'Hello World'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_original': 'Hello World'})])
|
468
|
+
ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'Hello World'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'Hello World'})])
|
453
469
|
|
454
470
|
>>> s = Scenario({"text": "Hello World"})
|
455
471
|
>>> s.chunk("text", num_words = 1, include_original = True, hash_original = True)
|
456
|
-
ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'})])
|
472
|
+
ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'})])
|
457
473
|
|
458
474
|
>>> s.chunk("text")
|
459
475
|
Traceback (most recent call last):
|
edsl/scenarios/ScenarioList.py
CHANGED
@@ -360,6 +360,11 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
360
360
|
ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
|
361
361
|
"""
|
362
362
|
from itertools import product
|
363
|
+
from edsl import Scenario
|
364
|
+
if isinstance(other, Scenario):
|
365
|
+
other = ScenarioList([other])
|
366
|
+
elif not isinstance(other, ScenarioList):
|
367
|
+
raise TypeError(f"Cannot multiply ScenarioList with {type(other)}")
|
363
368
|
|
364
369
|
new_sl = []
|
365
370
|
for s1, s2 in list(product(self, other)):
|
@@ -872,7 +877,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
872
877
|
for scenario in sl:
|
873
878
|
scenario[name] = value
|
874
879
|
return sl
|
875
|
-
|
880
|
+
|
876
881
|
def rename(self, replacement_dict: dict) -> ScenarioList:
|
877
882
|
"""Rename the fields in the scenarios.
|
878
883
|
|
@@ -885,13 +890,35 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
885
890
|
ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
|
886
891
|
|
887
892
|
"""
|
888
|
-
|
889
893
|
new_list = ScenarioList([])
|
890
894
|
for obj in self:
|
891
895
|
new_obj = obj.rename(replacement_dict)
|
892
896
|
new_list.append(new_obj)
|
893
897
|
return new_list
|
894
898
|
|
899
|
+
def replace_names(self, new_names: list) -> ScenarioList:
|
900
|
+
"""Replace the field names in the scenarios with a new list of names.
|
901
|
+
|
902
|
+
:param new_names: A list of new field names to use.
|
903
|
+
|
904
|
+
Example:
|
905
|
+
|
906
|
+
>>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
|
907
|
+
>>> s.replace_names(['first_name', 'years'])
|
908
|
+
ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
|
909
|
+
"""
|
910
|
+
if not self:
|
911
|
+
return ScenarioList([])
|
912
|
+
|
913
|
+
if len(new_names) != len(self[0].keys()):
|
914
|
+
raise ScenarioError(
|
915
|
+
f"Length of new names ({len(new_names)}) does not match number of fields ({len(self[0].keys())})"
|
916
|
+
)
|
917
|
+
|
918
|
+
old_names = list(self[0].keys())
|
919
|
+
replacement_dict = dict(zip(old_names, new_names))
|
920
|
+
return self.rename(replacement_dict)
|
921
|
+
|
895
922
|
## NEEDS TO BE FIXED
|
896
923
|
# def new_column_names(self, new_names: List[str]) -> ScenarioList:
|
897
924
|
# """Rename the fields in the scenarios.
|
@@ -935,6 +962,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
935
962
|
entry = {
|
936
963
|
"line_no": line_no + 1, # Using 1-based index for line numbers
|
937
964
|
"text": text,
|
965
|
+
"num_words": len(text.split()),
|
966
|
+
"num_chars": len(text),
|
938
967
|
"line_before": non_blank_lines[index - 1][1] if index > 0 else None,
|
939
968
|
"line_after": (
|
940
969
|
non_blank_lines[index + 1][1]
|
@@ -995,8 +1024,49 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
995
1024
|
ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
|
996
1025
|
"""
|
997
1026
|
return cls([Scenario(row) for row in df.to_dict(orient="records")])
|
1027
|
+
|
998
1028
|
|
999
1029
|
@classmethod
|
1030
|
+
def from_dta(cls, filepath: str, include_metadata: bool = True) -> ScenarioList:
|
1031
|
+
"""Create a ScenarioList from a Stata file.
|
1032
|
+
|
1033
|
+
Args:
|
1034
|
+
filepath (str): Path to the Stata (.dta) file
|
1035
|
+
include_metadata (bool): If True, extract and preserve variable labels and value labels
|
1036
|
+
as additional metadata in the ScenarioList
|
1037
|
+
|
1038
|
+
Returns:
|
1039
|
+
ScenarioList: A ScenarioList containing the data from the Stata file
|
1040
|
+
"""
|
1041
|
+
import pandas as pd
|
1042
|
+
|
1043
|
+
# Read the Stata file with pandas
|
1044
|
+
df = pd.read_stata(filepath)
|
1045
|
+
|
1046
|
+
# Create the basic ScenarioList
|
1047
|
+
scenario_list = cls.from_pandas(df)
|
1048
|
+
|
1049
|
+
# Extract and preserve metadata if requested
|
1050
|
+
if include_metadata:
|
1051
|
+
# Get variable labels (if any)
|
1052
|
+
variable_labels = {}
|
1053
|
+
if hasattr(df, 'variable_labels') and df.variable_labels:
|
1054
|
+
variable_labels = df.variable_labels
|
1055
|
+
|
1056
|
+
# Get value labels (if any)
|
1057
|
+
value_labels = {}
|
1058
|
+
if hasattr(df, 'value_labels') and df.value_labels:
|
1059
|
+
value_labels = df.value_labels
|
1060
|
+
|
1061
|
+
# Store the metadata in the ScenarioList's codebook
|
1062
|
+
if variable_labels or value_labels:
|
1063
|
+
scenario_list.codebook = {
|
1064
|
+
'variable_labels': variable_labels,
|
1065
|
+
'value_labels': value_labels
|
1066
|
+
}
|
1067
|
+
|
1068
|
+
return scenario_list
|
1069
|
+
|
1000
1070
|
def from_wikipedia(cls, url: str, table_index: int = 0):
|
1001
1071
|
"""
|
1002
1072
|
Extracts a table from a Wikipedia page.
|
@@ -1456,7 +1526,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
1456
1526
|
|
1457
1527
|
>>> s = ScenarioList([Scenario({'text': 'The quick brown fox jumps over the lazy dog.'})])
|
1458
1528
|
>>> s.chunk('text', num_words=3)
|
1459
|
-
ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0}), Scenario({'text': 'fox jumps over', 'text_chunk': 1}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2})])
|
1529
|
+
ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0, 'text_char_count': 15, 'text_word_count': 3}), Scenario({'text': 'fox jumps over', 'text_chunk': 1, 'text_char_count': 14, 'text_word_count': 3}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2, 'text_char_count': 13, 'text_word_count': 3})])
|
1460
1530
|
"""
|
1461
1531
|
new_scenarios = []
|
1462
1532
|
for scenario in self:
|
edsl/scenarios/handlers/docx.py
CHANGED
@@ -37,7 +37,11 @@ class DocxMethods(FileMethods):
|
|
37
37
|
print("DOCX file was not found.")
|
38
38
|
|
39
39
|
def view_notebook(self):
|
40
|
-
|
40
|
+
try:
|
41
|
+
import mammoth
|
42
|
+
except ImportError:
|
43
|
+
print("mammoth is not installed. Please install it using 'pip install mammoth'.")
|
44
|
+
return
|
41
45
|
from IPython.display import HTML, display
|
42
46
|
|
43
47
|
with open(self.path, "rb") as docx_file:
|
@@ -0,0 +1,39 @@
|
|
1
|
+
import tempfile
|
2
|
+
from edsl.scenarios.file_methods import FileMethods
|
3
|
+
|
4
|
+
|
5
|
+
class JpegMethods(FileMethods):
|
6
|
+
suffix = "jpeg"
|
7
|
+
|
8
|
+
def view_system(self):
|
9
|
+
import os
|
10
|
+
import subprocess
|
11
|
+
|
12
|
+
if os.path.exists(self.path):
|
13
|
+
try:
|
14
|
+
if (os_name := os.name) == "posix":
|
15
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
16
|
+
elif os_name == "nt":
|
17
|
+
os.startfile(self.path) # Windows
|
18
|
+
else:
|
19
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
20
|
+
except Exception as e:
|
21
|
+
print(f"Error opening JPEG: {e}")
|
22
|
+
else:
|
23
|
+
print("JPEG file was not found.")
|
24
|
+
|
25
|
+
def view_notebook(self):
|
26
|
+
from IPython.display import Image, display
|
27
|
+
|
28
|
+
display(Image(filename=self.path))
|
29
|
+
|
30
|
+
def example(self):
|
31
|
+
import matplotlib.pyplot as plt
|
32
|
+
import numpy as np
|
33
|
+
|
34
|
+
x = np.linspace(0, 10, 100)
|
35
|
+
y = np.sin(x)
|
36
|
+
plt.plot(x, y)
|
37
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpeg") as f:
|
38
|
+
plt.savefig(f.name)
|
39
|
+
return f.name
|
edsl/surveys/Survey.py
CHANGED
@@ -942,12 +942,11 @@ class Survey(SurveyExportMixin, Base):
|
|
942
942
|
# TODO: temp fix by creating a cache
|
943
943
|
if cache is None:
|
944
944
|
from edsl.data import Cache
|
945
|
+
|
945
946
|
c = Cache()
|
946
947
|
else:
|
947
948
|
c = cache
|
948
949
|
|
949
|
-
|
950
|
-
|
951
950
|
jobs: "Jobs" = self.get_job(model=model, agent=agent, **kwargs).using(c)
|
952
951
|
return await jobs.run_async(
|
953
952
|
disable_remote_inference=disable_remote_inference,
|
@@ -1244,6 +1243,29 @@ class Survey(SurveyExportMixin, Base):
|
|
1244
1243
|
|
1245
1244
|
return self.by(s).by(agent).by(model)
|
1246
1245
|
|
1246
|
+
###################
|
1247
|
+
# COOP METHODS
|
1248
|
+
###################
|
1249
|
+
def humanize(
|
1250
|
+
self,
|
1251
|
+
project_name: str = "Project",
|
1252
|
+
survey_description: Optional[str] = None,
|
1253
|
+
survey_alias: Optional[str] = None,
|
1254
|
+
survey_visibility: Optional["VisibilityType"] = "unlisted",
|
1255
|
+
) -> dict:
|
1256
|
+
"""
|
1257
|
+
Send the survey to Coop.
|
1258
|
+
|
1259
|
+
Then, create a project on Coop so you can share the survey with human respondents.
|
1260
|
+
"""
|
1261
|
+
from edsl.coop import Coop
|
1262
|
+
|
1263
|
+
c = Coop()
|
1264
|
+
project_details = c.create_project(
|
1265
|
+
self, project_name, survey_description, survey_alias, survey_visibility
|
1266
|
+
)
|
1267
|
+
return project_details
|
1268
|
+
|
1247
1269
|
|
1248
1270
|
def main():
|
1249
1271
|
"""Run the example survey."""
|
@@ -1255,16 +1277,16 @@ def main():
|
|
1255
1277
|
q0 = QuestionMultipleChoice(
|
1256
1278
|
question_name="q0",
|
1257
1279
|
question_text="What is the capital of France?",
|
1258
|
-
question_options=["London", "Paris", "Rome", "Boston", "I don't know"]
|
1280
|
+
question_options=["London", "Paris", "Rome", "Boston", "I don't know"],
|
1259
1281
|
)
|
1260
1282
|
q1 = QuestionList(
|
1261
1283
|
question_name="q1",
|
1262
1284
|
question_text="Name some cities in France.",
|
1263
|
-
max_list_items
|
1285
|
+
max_list_items=5,
|
1264
1286
|
)
|
1265
1287
|
q2 = QuestionNumerical(
|
1266
1288
|
question_name="q2",
|
1267
|
-
question_text="What is the population of {{ q0.answer }}?"
|
1289
|
+
question_text="What is the population of {{ q0.answer }}?",
|
1268
1290
|
)
|
1269
1291
|
s = Survey(questions=[q0, q1, q2])
|
1270
1292
|
s = s.add_rule(q0, "q0 == 'Paris'", q2)
|
@@ -1281,4 +1303,4 @@ if __name__ == "__main__":
|
|
1281
1303
|
import doctest
|
1282
1304
|
|
1283
1305
|
# doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.SKIP)
|
1284
|
-
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
1306
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
@@ -8,51 +8,113 @@ import tempfile
|
|
8
8
|
class SurveyFlowVisualization:
|
9
9
|
"""A mixin for visualizing the flow of a survey with parameter visualization."""
|
10
10
|
|
11
|
-
def __init__(self, survey: "Survey"):
|
11
|
+
def __init__(self, survey: "Survey", scenario: Optional["Scenario"] = None, agent: Optional["Agent"] = None):
|
12
12
|
self.survey = survey
|
13
|
+
self.scenario = scenario or {}
|
14
|
+
self.agent = agent
|
15
|
+
#from edsl import Scenario
|
16
|
+
#self.scenario = Scenario({'hello': 'world'})
|
13
17
|
|
14
18
|
def show_flow(self, filename: Optional[str] = None):
|
15
19
|
"""Create an image showing the flow of users through the survey and question parameters."""
|
16
20
|
# Create a graph object
|
17
21
|
import pydot
|
18
22
|
|
19
|
-
|
23
|
+
FONT_SIZE = "10"
|
20
24
|
|
21
|
-
|
25
|
+
graph = pydot.Dot(graph_type="digraph", fontsize=FONT_SIZE)
|
26
|
+
|
27
|
+
# First collect all unique parameters and different types of references
|
22
28
|
params_and_refs = set()
|
23
29
|
param_to_questions = {} # Keep track of which questions use each parameter
|
24
|
-
|
30
|
+
reference_types = {} # Dictionary to store different types of references
|
31
|
+
reference_colors = {
|
32
|
+
'answer': 'purple',
|
33
|
+
'question_text': 'red',
|
34
|
+
'question_options': 'orange',
|
35
|
+
'comment': 'blue',
|
36
|
+
'default': "grey"
|
37
|
+
}
|
25
38
|
|
26
39
|
# First pass: collect parameters and their question associations
|
27
40
|
for index, question in enumerate(self.survey.questions):
|
28
|
-
# Add the main question node
|
29
41
|
question_node = pydot.Node(
|
30
|
-
f"Q{index}", label=f"{question.question_name}", shape="ellipse"
|
42
|
+
f"Q{index}", label=f"{question.question_name}", shape="ellipse", fontsize=FONT_SIZE
|
31
43
|
)
|
32
44
|
graph.add_node(question_node)
|
33
45
|
|
34
|
-
if hasattr(question, "
|
35
|
-
for param in question.
|
36
|
-
|
37
|
-
|
38
|
-
|
46
|
+
if hasattr(question, "detailed_parameters"):
|
47
|
+
for param in question.detailed_parameters:
|
48
|
+
if "agent." in param:
|
49
|
+
# Handle agent trait references
|
50
|
+
trait_name = param.replace("agent.", "")
|
51
|
+
params_and_refs.add(param)
|
52
|
+
if param not in param_to_questions:
|
53
|
+
param_to_questions[param] = []
|
54
|
+
param_to_questions[param].append(index)
|
55
|
+
elif "." in param:
|
56
|
+
source_q, ref_type = param.split(".", 1)
|
57
|
+
if ref_type not in reference_types:
|
58
|
+
reference_types[ref_type] = set()
|
59
|
+
reference_types[ref_type].add((source_q, index))
|
39
60
|
else:
|
40
61
|
params_and_refs.add(param)
|
41
62
|
if param not in param_to_questions:
|
42
63
|
param_to_questions[param] = []
|
43
64
|
param_to_questions[param].append(index)
|
44
65
|
|
66
|
+
# Add edges for all reference types
|
67
|
+
for ref_type, references in reference_types.items():
|
68
|
+
color = reference_colors.get(ref_type, reference_colors['default'])
|
69
|
+
for source_q_name, target_q_index in references:
|
70
|
+
# Find the source question index by name
|
71
|
+
try:
|
72
|
+
source_q_index = next(
|
73
|
+
i
|
74
|
+
for i, q in enumerate(self.survey.questions)
|
75
|
+
if q.question_name == source_q_name
|
76
|
+
)
|
77
|
+
except StopIteration:
|
78
|
+
print(f"Source question {source_q_name} not found in survey.")
|
79
|
+
continue
|
80
|
+
|
81
|
+
ref_edge = pydot.Edge(
|
82
|
+
f"Q{source_q_index}",
|
83
|
+
f"Q{target_q_index}",
|
84
|
+
style="dashed",
|
85
|
+
color=color,
|
86
|
+
label=f".{ref_type}",
|
87
|
+
fontcolor=color,
|
88
|
+
fontname="Courier",
|
89
|
+
fontsize=FONT_SIZE
|
90
|
+
)
|
91
|
+
graph.add_edge(ref_edge)
|
92
|
+
|
45
93
|
# Create parameter nodes and connect them to questions
|
46
94
|
for param in params_and_refs:
|
47
95
|
param_node_name = f"param_{param}"
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
96
|
+
node_attrs = {
|
97
|
+
"label": f"{{{{ {param} }}}}",
|
98
|
+
"shape": "box",
|
99
|
+
"style": "filled",
|
100
|
+
"fillcolor": "lightgrey",
|
101
|
+
"fontsize": FONT_SIZE,
|
102
|
+
}
|
103
|
+
|
104
|
+
# Special handling for agent traits
|
105
|
+
if param.startswith("agent."):
|
106
|
+
node_attrs.update({
|
107
|
+
"fillcolor": "lightpink",
|
108
|
+
"label": f"Agent Trait\n{{{{ {param} }}}}"
|
109
|
+
})
|
110
|
+
# Check if parameter exists in scenario
|
111
|
+
elif self.scenario and param in self.scenario:
|
112
|
+
node_attrs.update({
|
113
|
+
"fillcolor": "lightgreen",
|
114
|
+
"label": f"Scenario\n{{{{ {param} }}}}"
|
115
|
+
})
|
116
|
+
|
117
|
+
param_node = pydot.Node(param_node_name, **node_attrs)
|
56
118
|
graph.add_node(param_node)
|
57
119
|
|
58
120
|
# Connect this parameter to all questions that use it
|
@@ -61,39 +123,22 @@ class SurveyFlowVisualization:
|
|
61
123
|
param_node_name,
|
62
124
|
f"Q{q_index}",
|
63
125
|
style="dotted",
|
64
|
-
color="grey",
|
65
126
|
arrowsize="0.5",
|
127
|
+
fontsize=FONT_SIZE,
|
66
128
|
)
|
67
129
|
graph.add_edge(param_edge)
|
68
130
|
|
69
|
-
# Add edges for answer references
|
70
|
-
for source_q_name, target_q_index in answer_refs:
|
71
|
-
# Find the source question index by name
|
72
|
-
source_q_index = next(
|
73
|
-
i
|
74
|
-
for i, q in enumerate(self.survey.questions)
|
75
|
-
if q.question_name == source_q_name
|
76
|
-
)
|
77
|
-
ref_edge = pydot.Edge(
|
78
|
-
f"Q{source_q_index}",
|
79
|
-
f"Q{target_q_index}",
|
80
|
-
style="dashed",
|
81
|
-
color="purple",
|
82
|
-
label="answer reference",
|
83
|
-
)
|
84
|
-
graph.add_edge(ref_edge)
|
85
|
-
|
86
131
|
# Add an "EndOfSurvey" node
|
87
132
|
graph.add_node(
|
88
|
-
pydot.Node("EndOfSurvey", label="End of Survey", shape="rectangle")
|
133
|
+
pydot.Node("EndOfSurvey", label="End of Survey", shape="rectangle", fontsize=FONT_SIZE, style="filled", fillcolor="lightgrey")
|
89
134
|
)
|
90
135
|
|
91
136
|
# Add edges for normal flow through the survey
|
92
137
|
num_questions = len(self.survey.questions)
|
93
138
|
for index in range(num_questions - 1):
|
94
|
-
graph.add_edge(pydot.Edge(f"Q{index}", f"Q{index+1}"))
|
139
|
+
graph.add_edge(pydot.Edge(f"Q{index}", f"Q{index+1}", fontsize=FONT_SIZE))
|
95
140
|
|
96
|
-
graph.add_edge(pydot.Edge(f"Q{num_questions-1}", "EndOfSurvey"))
|
141
|
+
graph.add_edge(pydot.Edge(f"Q{num_questions-1}", "EndOfSurvey", fontsize=FONT_SIZE))
|
97
142
|
|
98
143
|
relevant_rules = [
|
99
144
|
rule
|
@@ -109,7 +154,7 @@ class SurveyFlowVisualization:
|
|
109
154
|
"purple",
|
110
155
|
"brown",
|
111
156
|
"cyan",
|
112
|
-
"
|
157
|
+
"darkgreen",
|
113
158
|
]
|
114
159
|
rule_colors = {
|
115
160
|
rule: colors[i % len(colors)] for i, rule in enumerate(relevant_rules)
|
@@ -133,6 +178,8 @@ class SurveyFlowVisualization:
|
|
133
178
|
fontcolor=color,
|
134
179
|
tailport="n",
|
135
180
|
headport="n",
|
181
|
+
fontname="Courier",
|
182
|
+
fontsize=FONT_SIZE,
|
136
183
|
)
|
137
184
|
else:
|
138
185
|
edge = pydot.Edge(
|
@@ -141,6 +188,8 @@ class SurveyFlowVisualization:
|
|
141
188
|
label=edge_label,
|
142
189
|
color=color,
|
143
190
|
fontcolor=color,
|
191
|
+
fontname="Courier",
|
192
|
+
fontsize=FONT_SIZE,
|
144
193
|
)
|
145
194
|
|
146
195
|
graph.add_edge(edge)
|
@@ -156,9 +205,8 @@ class SurveyFlowVisualization:
|
|
156
205
|
except FileNotFoundError:
|
157
206
|
print(
|
158
207
|
"""File not found. Most likely it's because you don't have graphviz installed. Please install it and try again.
|
159
|
-
|
208
|
+
On Ubuntu, you can install it by running:
|
160
209
|
$ sudo apt-get install graphviz
|
161
|
-
on Ubuntu.
|
162
210
|
"""
|
163
211
|
)
|
164
212
|
from edsl.utilities.is_notebook import is_notebook
|
@@ -7,11 +7,11 @@
|
|
7
7
|
<table border="1">
|
8
8
|
<thead>
|
9
9
|
<tr>
|
10
|
-
<th>Exception Type</th>
|
11
|
-
<th>Service</th>
|
12
|
-
<th>Model</th>
|
13
|
-
<th>Question Name</th>
|
14
|
-
<th>Total</th>
|
10
|
+
<th style="text-align: left">Exception Type</th>
|
11
|
+
<th style="text-align: left">Service</th>
|
12
|
+
<th style="text-align: left">Model</th>
|
13
|
+
<th style="text-align: left">Question Name</th>
|
14
|
+
<th style="text-align: left">Total</th>
|
15
15
|
</tr>
|
16
16
|
</thead>
|
17
17
|
<tbody>
|
@@ -27,9 +27,8 @@
|
|
27
27
|
</tbody>
|
28
28
|
</table>
|
29
29
|
<p>
|
30
|
-
|
31
|
-
You can modify the maximum number of attempts for failed API calls in `edsl/config.py`.
|
30
|
+
Note: You may encounter repeated exceptions where retries were attempted.
|
32
31
|
</p>
|
33
32
|
<p>
|
34
|
-
|
33
|
+
See details about each exception, including code for reproducing it (click to expand).
|
35
34
|
</p>
|
@@ -4,16 +4,11 @@
|
|
4
4
|
}
|
5
5
|
</style>
|
6
6
|
|
7
|
-
<
|
7
|
+
<h3>Exceptions Report</h3>
|
8
8
|
<p>
|
9
9
|
This report summarizes exceptions encountered in the job that was run.
|
10
10
|
</p>
|
11
|
-
<p>
|
12
|
-
For advice on dealing with exceptions, please see the EDSL <a href="https://docs.expectedparrot.com/en/latest/exceptions.html">documentation</a> page. <br>
|
13
|
-
You can also post a question at the Expected Parrot <a href="https://discord.com/invite/mxAYkjfy9m">Discord channel</a>, open an issue on <a href="https://github.com/expectedparrot/edsl">GitHub</a>, or send an email to <a href="mailto:info@expectedparrot.com">info@expectedparrot.com</a>.
|
14
|
-
</p>
|
15
11
|
|
16
|
-
<h2>Overview</h2>
|
17
12
|
<table border="1">
|
18
13
|
<tbody>
|
19
14
|
<tr>
|
@@ -27,5 +22,5 @@
|
|
27
22
|
</tbody>
|
28
23
|
</table>
|
29
24
|
<p>
|
30
|
-
An
|
25
|
+
An interview is the result of one survey, taken by one agent, with one model and one scenario (if any).
|
31
26
|
</p>
|
@@ -1,2 +1,2 @@
|
|
1
|
-
<h2>Performance Plot</h2>
|
1
|
+
<!-- <h2>Performance Plot</h2> -->
|
2
2
|
{{ performance_plot_html }}
|