edsl 0.1.38.dev2__py3-none-any.whl → 0.1.38.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. edsl/Base.py +60 -31
  2. edsl/__version__.py +1 -1
  3. edsl/agents/Agent.py +18 -9
  4. edsl/agents/AgentList.py +59 -8
  5. edsl/agents/Invigilator.py +18 -7
  6. edsl/agents/InvigilatorBase.py +0 -19
  7. edsl/agents/PromptConstructor.py +5 -4
  8. edsl/config.py +8 -0
  9. edsl/coop/coop.py +74 -7
  10. edsl/data/Cache.py +27 -2
  11. edsl/data/CacheEntry.py +8 -3
  12. edsl/data/RemoteCacheSync.py +0 -19
  13. edsl/enums.py +2 -0
  14. edsl/inference_services/GoogleService.py +7 -15
  15. edsl/inference_services/PerplexityService.py +163 -0
  16. edsl/inference_services/registry.py +2 -0
  17. edsl/jobs/Jobs.py +88 -548
  18. edsl/jobs/JobsChecks.py +147 -0
  19. edsl/jobs/JobsPrompts.py +268 -0
  20. edsl/jobs/JobsRemoteInferenceHandler.py +239 -0
  21. edsl/jobs/interviews/Interview.py +11 -11
  22. edsl/jobs/runners/JobsRunnerAsyncio.py +140 -35
  23. edsl/jobs/runners/JobsRunnerStatus.py +0 -2
  24. edsl/jobs/tasks/TaskHistory.py +15 -16
  25. edsl/language_models/LanguageModel.py +44 -84
  26. edsl/language_models/ModelList.py +47 -1
  27. edsl/language_models/registry.py +57 -4
  28. edsl/prompts/Prompt.py +8 -3
  29. edsl/questions/QuestionBase.py +20 -16
  30. edsl/questions/QuestionExtract.py +3 -4
  31. edsl/questions/question_registry.py +36 -6
  32. edsl/results/CSSParameterizer.py +108 -0
  33. edsl/results/Dataset.py +146 -15
  34. edsl/results/DatasetExportMixin.py +231 -217
  35. edsl/results/DatasetTree.py +134 -4
  36. edsl/results/Result.py +18 -9
  37. edsl/results/Results.py +145 -51
  38. edsl/results/TableDisplay.py +198 -0
  39. edsl/results/table_display.css +78 -0
  40. edsl/scenarios/FileStore.py +187 -13
  41. edsl/scenarios/Scenario.py +61 -4
  42. edsl/scenarios/ScenarioJoin.py +127 -0
  43. edsl/scenarios/ScenarioList.py +237 -62
  44. edsl/surveys/Survey.py +16 -2
  45. edsl/surveys/SurveyFlowVisualizationMixin.py +67 -9
  46. edsl/surveys/instructions/Instruction.py +12 -0
  47. edsl/templates/error_reporting/interview_details.html +3 -3
  48. edsl/templates/error_reporting/interviews.html +18 -9
  49. edsl/utilities/utilities.py +15 -0
  50. {edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev4.dist-info}/METADATA +2 -1
  51. {edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev4.dist-info}/RECORD +53 -45
  52. {edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev4.dist-info}/LICENSE +0 -0
  53. {edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev4.dist-info}/WHEEL +0 -0
@@ -1,10 +1,18 @@
1
- from typing import Dict, List, Any, Optional
1
+ from typing import Dict, List, Any, Optional, List
2
2
  from docx import Document
3
3
  from docx.shared import Inches, Pt
4
4
  from docx.enum.text import WD_ALIGN_PARAGRAPH
5
5
  from docx.enum.style import WD_STYLE_TYPE
6
6
 
7
7
 
8
+ def is_hashable(v):
9
+ try:
10
+ hash(v)
11
+ return True
12
+ except TypeError:
13
+ return False
14
+
15
+
8
16
  class TreeNode:
9
17
  def __init__(self, key=None, value=None):
10
18
  self.key = key
@@ -13,16 +21,21 @@ class TreeNode:
13
21
 
14
22
 
15
23
  class Tree:
16
- def __init__(self, data: "Dataset"):
24
+ def __init__(self, data: "Dataset", node_order: Optional[List[str]] = None):
17
25
  d = {}
18
26
  for entry in data:
19
27
  d.update(entry)
20
28
  self.data = d
21
29
  self.root = None
22
30
 
31
+ self.node_order = node_order
32
+
33
+ self.construct_tree(node_order)
34
+
23
35
  def unique_values_by_keys(self) -> dict:
24
36
  unique_values = {}
25
- for key, values in self.data.items():
37
+ for key, raw_values in self.data.items():
38
+ values = [v if is_hashable(v) else str(v) for v in raw_values]
26
39
  unique_values[key] = list(set(values))
27
40
  return unique_values
28
41
 
@@ -45,15 +58,25 @@ class Tree:
45
58
  current = self.root
46
59
  for level in node_order[:-1]:
47
60
  value = self.data[level][i]
61
+ if not is_hashable(value):
62
+ value = str(value)
48
63
  if value not in current.children:
49
64
  current.children[value] = TreeNode(key=level, value=value)
50
65
  current = current.children[value]
51
66
 
52
67
  leaf_key = node_order[-1]
53
68
  leaf_value = self.data[leaf_key][i]
69
+ if not is_hashable(leaf_value):
70
+ leaf_value = str(leaf_value)
54
71
  if leaf_value not in current.children:
55
72
  current.children[leaf_value] = TreeNode(key=leaf_key, value=leaf_value)
56
73
 
74
+ def __repr__(self):
75
+ if self.node_order is not None:
76
+ return f"Tree(Dataset({self.data}), node_order={self.node_order})"
77
+ else:
78
+ return f"Tree(Dataset({self.data}))"
79
+
57
80
  def print_tree(
58
81
  self, node: Optional[TreeNode] = None, level: int = 0, print_keys: bool = False
59
82
  ):
@@ -71,7 +94,10 @@ class Tree:
71
94
  for child in node.children.values():
72
95
  self.print_tree(child, level + 1, print_keys)
73
96
 
74
- def to_docx(self, filename: str):
97
+ def to_docx(self, filename: Optional[str] = None):
98
+ if filename is None:
99
+ filename = "tree_structure.docx"
100
+
75
101
  doc = Document()
76
102
 
77
103
  # Create styles for headings
@@ -90,6 +116,110 @@ class Tree:
90
116
 
91
117
  self._add_to_docx(doc, self.root, 0)
92
118
  doc.save(filename)
119
+ from edsl.utilities.utilities import file_notice
120
+
121
+ file_notice(filename)
122
+
123
+ def _repr_html_(self):
124
+ """Returns an interactive HTML representation of the tree with collapsible sections."""
125
+
126
+ # Generate a unique ID for this tree instance
127
+ import uuid
128
+
129
+ tree_id = f"tree_{uuid.uuid4().hex[:8]}"
130
+
131
+ styles = f"""
132
+ <div class="{tree_id}">
133
+ <style>
134
+ .{tree_id} details {{
135
+ margin-left: 20px;
136
+ }}
137
+ .{tree_id} summary {{
138
+ cursor: pointer;
139
+ margin: 2px 0;
140
+ }}
141
+ .{tree_id} .value {{
142
+ font-family: monospace;
143
+ background: #f5f5f5;
144
+ padding: 2px 6px;
145
+ border-radius: 3px;
146
+ margin: 1px 0;
147
+ }}
148
+ .{tree_id} .key {{
149
+ color: #666;
150
+ font-style: italic;
151
+ }}
152
+ </style>
153
+ """
154
+
155
+ def node_to_html(node, level=0, print_keys=True):
156
+ if node is None:
157
+ return "Tree has not been constructed yet."
158
+
159
+ html = []
160
+
161
+ if node.value is not None:
162
+ # Create the node content
163
+ content = []
164
+ if print_keys and node.key is not None:
165
+ content.append(f'<span class="key">{node.key}: </span>')
166
+ content.append(f'<span class="value">{node.value}</span>')
167
+ content_html = "".join(content)
168
+
169
+ if node.children:
170
+ # Node with children
171
+ html.append(f'<details {"open" if level < 1 else ""}>')
172
+ html.append(f"<summary>{content_html}</summary>")
173
+ for child in node.children.values():
174
+ html.append(node_to_html(child, level + 1, print_keys))
175
+ html.append("</details>")
176
+ else:
177
+ # Leaf node
178
+ html.append(f"<div>{content_html}</div>")
179
+ else:
180
+ # Root node with no value
181
+ if node.children:
182
+ for child in node.children.values():
183
+ html.append(node_to_html(child, level, print_keys))
184
+
185
+ return "\n".join(html)
186
+
187
+ tree_html = node_to_html(self.root)
188
+ return f"{styles}{tree_html}</div>"
189
+
190
+ # def _repr_html_(self):
191
+ # """Returns an HTML representation of the tree, following the same logic as print_tree."""
192
+ # styles = """
193
+ # <style>
194
+ # .tree-container {
195
+ # font-family: monospace;
196
+ # white-space: pre;
197
+ # margin: 10px;
198
+ # }
199
+ # </style>
200
+ # """
201
+
202
+ # def node_to_html(node, level=0, print_keys=False):
203
+ # if node is None:
204
+ # node = self.root
205
+ # if node is None:
206
+ # return "Tree has not been constructed yet."
207
+
208
+ # html = []
209
+ # if node.value is not None:
210
+ # indent = "&nbsp;" * 2 * level # Using &nbsp; for HTML spaces
211
+ # if print_keys and node.key is not None:
212
+ # html.append(f"{indent}{node.key}: {node.value}<br>")
213
+ # else:
214
+ # html.append(f"{indent}{node.value}<br>")
215
+
216
+ # for child in node.children.values():
217
+ # html.append(node_to_html(child, level + 1, print_keys))
218
+
219
+ # return "".join(html)
220
+
221
+ # tree_html = node_to_html(self.root)
222
+ # return f'<div class="tree-container">{tree_html}</div>{styles}'
93
223
 
94
224
  def _add_to_docx(self, doc, node: TreeNode, level: int):
95
225
  if node.value is not None:
edsl/results/Result.py CHANGED
@@ -137,6 +137,15 @@ class Result(Base, UserDict):
137
137
  self._combined_dict = None
138
138
  self._problem_keys = None
139
139
 
140
+ def _repr_html_(self):
141
+ # d = self.to_dict(add_edsl_version=False)
142
+ d = self.to_dict(add_edsl_version=False)
143
+ data = [[k, v] for k, v in d.items()]
144
+ from tabulate import tabulate
145
+
146
+ table = str(tabulate(data, headers=["keys", "values"], tablefmt="html"))
147
+ return f"<pre>{table}</pre>"
148
+
140
149
  ###############
141
150
  # Used in Results
142
151
  ###############
@@ -156,15 +165,15 @@ class Result(Base, UserDict):
156
165
  if key in self.question_to_attributes:
157
166
  # You might be tempted to just use the naked key
158
167
  # but this is a bad idea because it pollutes the namespace
159
- question_text_dict[key + "_question_text"] = (
160
- self.question_to_attributes[key]["question_text"]
161
- )
162
- question_options_dict[key + "_question_options"] = (
163
- self.question_to_attributes[key]["question_options"]
164
- )
165
- question_type_dict[key + "_question_type"] = (
166
- self.question_to_attributes[key]["question_type"]
167
- )
168
+ question_text_dict[
169
+ key + "_question_text"
170
+ ] = self.question_to_attributes[key]["question_text"]
171
+ question_options_dict[
172
+ key + "_question_options"
173
+ ] = self.question_to_attributes[key]["question_options"]
174
+ question_type_dict[
175
+ key + "_question_type"
176
+ ] = self.question_to_attributes[key]["question_type"]
168
177
 
169
178
  return {
170
179
  "agent": self.agent.traits
edsl/results/Results.py CHANGED
@@ -32,7 +32,7 @@ from edsl.results.ResultsDBMixin import ResultsDBMixin
32
32
  from edsl.results.ResultsGGMixin import ResultsGGMixin
33
33
  from edsl.results.ResultsFetchMixin import ResultsFetchMixin
34
34
 
35
- from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
35
+ from edsl.utilities.decorators import remove_edsl_version
36
36
  from edsl.utilities.utilities import dict_hash
37
37
 
38
38
 
@@ -46,6 +46,9 @@ class Mixins(
46
46
  ResultsGGMixin,
47
47
  ResultsToolsMixin,
48
48
  ):
49
+ def long(self):
50
+ return self.table().long()
51
+
49
52
  def print_long(self, max_rows: int = None) -> None:
50
53
  """Print the results in long format.
51
54
 
@@ -73,6 +76,8 @@ class Results(UserList, Mixins, Base):
73
76
  It also has a list of created_columns, which are columns that have been created with `mutate` and are not part of the original data.
74
77
  """
75
78
 
79
+ __documentation__ = "https://docs.expectedparrot.com/en/latest/results.html"
80
+
76
81
  known_data_types = [
77
82
  "answer",
78
83
  "scenario",
@@ -121,13 +126,55 @@ class Results(UserList, Mixins, Base):
121
126
  if hasattr(self, "_add_output_functions"):
122
127
  self._add_output_functions()
123
128
 
129
+ def _summary(self) -> dict:
130
+ import reprlib
131
+
132
+ # import yaml
133
+
134
+ d = {
135
+ "EDSL Class": "Results",
136
+ # "docs_url": self.__documentation__,
137
+ "# of agents": len(set(self.agents)),
138
+ "# of distinct models": len(set(self.models)),
139
+ "# of observations": len(self),
140
+ "# Scenarios": len(set(self.scenarios)),
141
+ "Survey Length (# questions)": len(self.survey),
142
+ "Survey question names": reprlib.repr(self.survey.question_names),
143
+ "Object hash": hash(self),
144
+ }
145
+ return d
146
+
147
+ def compute_job_cost(self, include_cached_responses_in_cost=False) -> float:
148
+ """
149
+ Computes the cost of a completed job in USD.
150
+ """
151
+ total_cost = 0
152
+ for result in self:
153
+ for key in result.raw_model_response:
154
+ if key.endswith("_cost"):
155
+ result_cost = result.raw_model_response[key]
156
+
157
+ question_name = key.removesuffix("_cost")
158
+ cache_used = result.cache_used_dict[question_name]
159
+
160
+ if isinstance(result_cost, (int, float)):
161
+ if include_cached_responses_in_cost:
162
+ total_cost += result_cost
163
+ elif not include_cached_responses_in_cost and not cache_used:
164
+ total_cost += result_cost
165
+
166
+ return total_cost
167
+
124
168
  def leaves(self):
125
169
  leaves = []
126
170
  for result in self:
127
171
  leaves.extend(result.leaves())
128
172
  return leaves
129
173
 
130
- def tree(
174
+ def tree(self, node_list: Optional[List[str]] = None):
175
+ return self.to_scenario_list().tree(node_list)
176
+
177
+ def interactive_tree(
131
178
  self,
132
179
  fold_attributes: Optional[List[str]] = None,
133
180
  drop: Optional[List[str]] = None,
@@ -260,13 +307,67 @@ class Results(UserList, Mixins, Base):
260
307
 
261
308
  return f"Results(data = {reprlib.repr(self.data)}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
262
309
 
310
+ def table(
311
+ self,
312
+ # selector_string: Optional[str] = "*.*",
313
+ *fields,
314
+ tablefmt: Optional[str] = None,
315
+ pretty_labels: Optional[dict] = None,
316
+ print_parameters: Optional[dict] = None,
317
+ ):
318
+ new_fields = []
319
+ for field in fields:
320
+ if "." in field:
321
+ data_type, key = field.split(".")
322
+ if data_type not in self.known_data_types:
323
+ raise ResultsInvalidNameError(
324
+ f"{data_type} is not a valid data type. Must be in {self.known_data_types}"
325
+ )
326
+ if key == "*":
327
+ for k in self._data_type_to_keys[data_type]:
328
+ new_fields.append(k)
329
+ else:
330
+ if key not in self._key_to_data_type:
331
+ raise ResultsColumnNotFoundError(
332
+ f"{key} is not a valid key. Must be in {self._key_to_data_type}"
333
+ )
334
+ new_fields.append(key)
335
+ else:
336
+ new_fields.append(field)
337
+
338
+ return (
339
+ self.to_scenario_list()
340
+ .to_dataset()
341
+ .table(
342
+ *new_fields,
343
+ tablefmt=tablefmt,
344
+ pretty_labels=pretty_labels,
345
+ print_parameters=print_parameters,
346
+ )
347
+ )
348
+ # return (
349
+ # self.select(f"{selector_string}")
350
+ # .to_scenario_list()
351
+ # .table(*fields, tablefmt=tablefmt)
352
+ # )
353
+
263
354
  def _repr_html_(self) -> str:
264
- # from IPython.display import HTML
355
+ d = self._summary()
356
+ from edsl import Scenario
265
357
 
266
- json_str = json.dumps(self.to_dict(add_edsl_version=False)["data"], indent=4)
267
- return f"<pre>{json_str}</pre>"
358
+ footer = f"<a href={self.__documentation__}>(docs)</a>"
268
359
 
269
- def to_dict(self, sort=False, add_edsl_version=False) -> dict[str, Any]:
360
+ s = Scenario(d)
361
+ td = s.to_dataset().table(tablefmt="html")
362
+ return td._repr_html_() + footer
363
+
364
+ def to_dict(
365
+ self,
366
+ sort=False,
367
+ add_edsl_version=False,
368
+ include_cache=False,
369
+ include_task_history=False,
370
+ ) -> dict[str, Any]:
270
371
  from edsl.data.Cache import Cache
271
372
 
272
373
  if sort:
@@ -280,13 +381,21 @@ class Results(UserList, Mixins, Base):
280
381
  ],
281
382
  "survey": self.survey.to_dict(add_edsl_version=add_edsl_version),
282
383
  "created_columns": self.created_columns,
283
- "cache": (
284
- Cache()
285
- if not hasattr(self, "cache")
286
- else self.cache.to_dict(add_edsl_version=add_edsl_version)
287
- ),
288
- "task_history": self.task_history.to_dict(),
289
384
  }
385
+ if include_cache:
386
+ d.update(
387
+ {
388
+ "cache": (
389
+ Cache()
390
+ if not hasattr(self, "cache")
391
+ else self.cache.to_dict(add_edsl_version=add_edsl_version)
392
+ )
393
+ }
394
+ )
395
+
396
+ if self.task_history.has_unfixed_exceptions or include_task_history:
397
+ d.update({"task_history": self.task_history.to_dict()})
398
+
290
399
  if add_edsl_version:
291
400
  from edsl import __version__
292
401
 
@@ -375,7 +484,11 @@ class Results(UserList, Mixins, Base):
375
484
  cache=(
376
485
  Cache.from_dict(data.get("cache")) if "cache" in data else Cache()
377
486
  ),
378
- task_history=TaskHistory.from_dict(data.get("task_history")),
487
+ task_history=(
488
+ TaskHistory.from_dict(data.get("task_history"))
489
+ if "task_history" in data
490
+ else TaskHistory(interviews=[])
491
+ ),
379
492
  )
380
493
  except Exception as e:
381
494
  raise ResultsDeserializationError(f"Error in Results.from_dict: {e}")
@@ -875,31 +988,19 @@ class Results(UserList, Mixins, Base):
875
988
 
876
989
  >>> r = Results.example()
877
990
  >>> r.sort_by('how_feeling', reverse=False).select('how_feeling').print()
878
- ┏━━━━━━━━━━━━━━┓
879
- ┃ answer ┃
880
- ┃ .how_feeling ┃
881
- ┡━━━━━━━━━━━━━━┩
882
- │ Great │
883
- ├──────────────┤
884
- │ OK │
885
- ├──────────────┤
886
- │ OK │
887
- ├──────────────┤
888
- │ Terrible │
889
- └──────────────┘
991
+ answer.how_feeling
992
+ --------------------
993
+ Great
994
+ OK
995
+ OK
996
+ Terrible
890
997
  >>> r.sort_by('how_feeling', reverse=True).select('how_feeling').print()
891
- ┏━━━━━━━━━━━━━━┓
892
- ┃ answer ┃
893
- ┃ .how_feeling ┃
894
- ┡━━━━━━━━━━━━━━┩
895
- │ Terrible │
896
- ├──────────────┤
897
- │ OK │
898
- ├──────────────┤
899
- │ OK │
900
- ├──────────────┤
901
- │ Great │
902
- └──────────────┘
998
+ answer.how_feeling
999
+ --------------------
1000
+ Terrible
1001
+ OK
1002
+ OK
1003
+ Great
903
1004
  """
904
1005
 
905
1006
  def to_numeric_if_possible(v):
@@ -932,12 +1033,9 @@ class Results(UserList, Mixins, Base):
932
1033
 
933
1034
  >>> r = Results.example()
934
1035
  >>> r.filter("how_feeling == 'Great'").select('how_feeling').print()
935
- ┏━━━━━━━━━━━━━━┓
936
- ┃ answer ┃
937
- ┃ .how_feeling ┃
938
- ┡━━━━━━━━━━━━━━┩
939
- │ Great │
940
- └──────────────┘
1036
+ answer.how_feeling
1037
+ --------------------
1038
+ Great
941
1039
 
942
1040
  Example usage: Using an OR operator in the filter expression.
943
1041
 
@@ -948,14 +1046,10 @@ class Results(UserList, Mixins, Base):
948
1046
  ...
949
1047
 
950
1048
  >>> r.filter("how_feeling == 'Great' or how_feeling == 'Terrible'").select('how_feeling').print()
951
- ┏━━━━━━━━━━━━━━┓
952
- ┃ answer ┃
953
- ┃ .how_feeling ┃
954
- ┡━━━━━━━━━━━━━━┩
955
- │ Great │
956
- ├──────────────┤
957
- │ Terrible │
958
- └──────────────┘
1049
+ answer.how_feeling
1050
+ --------------------
1051
+ Great
1052
+ Terrible
959
1053
  """
960
1054
 
961
1055
  def has_single_equals(string):