edsl 0.1.38__py3-none-any.whl → 0.1.38.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. edsl/Base.py +31 -60
  2. edsl/__version__.py +1 -1
  3. edsl/agents/Agent.py +9 -18
  4. edsl/agents/AgentList.py +8 -59
  5. edsl/agents/Invigilator.py +7 -18
  6. edsl/agents/InvigilatorBase.py +19 -0
  7. edsl/agents/PromptConstructor.py +4 -5
  8. edsl/config.py +0 -8
  9. edsl/coop/coop.py +7 -74
  10. edsl/data/Cache.py +2 -27
  11. edsl/data/CacheEntry.py +3 -8
  12. edsl/data/RemoteCacheSync.py +19 -0
  13. edsl/enums.py +0 -2
  14. edsl/inference_services/GoogleService.py +15 -7
  15. edsl/inference_services/registry.py +0 -2
  16. edsl/jobs/Jobs.py +548 -88
  17. edsl/jobs/interviews/Interview.py +11 -11
  18. edsl/jobs/runners/JobsRunnerAsyncio.py +35 -140
  19. edsl/jobs/runners/JobsRunnerStatus.py +2 -0
  20. edsl/jobs/tasks/TaskHistory.py +16 -15
  21. edsl/language_models/LanguageModel.py +84 -44
  22. edsl/language_models/ModelList.py +1 -47
  23. edsl/language_models/registry.py +4 -57
  24. edsl/prompts/Prompt.py +3 -8
  25. edsl/questions/QuestionBase.py +16 -20
  26. edsl/questions/QuestionExtract.py +4 -3
  27. edsl/questions/question_registry.py +6 -36
  28. edsl/results/Dataset.py +15 -146
  29. edsl/results/DatasetExportMixin.py +217 -231
  30. edsl/results/DatasetTree.py +4 -134
  31. edsl/results/Result.py +9 -18
  32. edsl/results/Results.py +51 -145
  33. edsl/scenarios/FileStore.py +13 -187
  34. edsl/scenarios/Scenario.py +4 -61
  35. edsl/scenarios/ScenarioList.py +62 -237
  36. edsl/surveys/Survey.py +2 -16
  37. edsl/surveys/SurveyFlowVisualizationMixin.py +9 -67
  38. edsl/surveys/instructions/Instruction.py +0 -12
  39. edsl/templates/error_reporting/interview_details.html +3 -3
  40. edsl/templates/error_reporting/interviews.html +9 -18
  41. edsl/utilities/utilities.py +0 -15
  42. {edsl-0.1.38.dist-info → edsl-0.1.38.dev2.dist-info}/METADATA +1 -2
  43. {edsl-0.1.38.dist-info → edsl-0.1.38.dev2.dist-info}/RECORD +45 -53
  44. edsl/inference_services/PerplexityService.py +0 -163
  45. edsl/jobs/JobsChecks.py +0 -147
  46. edsl/jobs/JobsPrompts.py +0 -268
  47. edsl/jobs/JobsRemoteInferenceHandler.py +0 -239
  48. edsl/results/CSSParameterizer.py +0 -108
  49. edsl/results/TableDisplay.py +0 -198
  50. edsl/results/table_display.css +0 -78
  51. edsl/scenarios/ScenarioJoin.py +0 -127
  52. {edsl-0.1.38.dist-info → edsl-0.1.38.dev2.dist-info}/LICENSE +0 -0
  53. {edsl-0.1.38.dist-info → edsl-0.1.38.dev2.dist-info}/WHEEL +0 -0
@@ -1,18 +1,10 @@
1
- from typing import Dict, List, Any, Optional, List
1
+ from typing import Dict, List, Any, Optional
2
2
  from docx import Document
3
3
  from docx.shared import Inches, Pt
4
4
  from docx.enum.text import WD_ALIGN_PARAGRAPH
5
5
  from docx.enum.style import WD_STYLE_TYPE
6
6
 
7
7
 
8
- def is_hashable(v):
9
- try:
10
- hash(v)
11
- return True
12
- except TypeError:
13
- return False
14
-
15
-
16
8
  class TreeNode:
17
9
  def __init__(self, key=None, value=None):
18
10
  self.key = key
@@ -21,21 +13,16 @@ class TreeNode:
21
13
 
22
14
 
23
15
  class Tree:
24
- def __init__(self, data: "Dataset", node_order: Optional[List[str]] = None):
16
+ def __init__(self, data: "Dataset"):
25
17
  d = {}
26
18
  for entry in data:
27
19
  d.update(entry)
28
20
  self.data = d
29
21
  self.root = None
30
22
 
31
- self.node_order = node_order
32
-
33
- self.construct_tree(node_order)
34
-
35
23
  def unique_values_by_keys(self) -> dict:
36
24
  unique_values = {}
37
- for key, raw_values in self.data.items():
38
- values = [v if is_hashable(v) else str(v) for v in raw_values]
25
+ for key, values in self.data.items():
39
26
  unique_values[key] = list(set(values))
40
27
  return unique_values
41
28
 
@@ -58,25 +45,15 @@ class Tree:
58
45
  current = self.root
59
46
  for level in node_order[:-1]:
60
47
  value = self.data[level][i]
61
- if not is_hashable(value):
62
- value = str(value)
63
48
  if value not in current.children:
64
49
  current.children[value] = TreeNode(key=level, value=value)
65
50
  current = current.children[value]
66
51
 
67
52
  leaf_key = node_order[-1]
68
53
  leaf_value = self.data[leaf_key][i]
69
- if not is_hashable(leaf_value):
70
- leaf_value = str(leaf_value)
71
54
  if leaf_value not in current.children:
72
55
  current.children[leaf_value] = TreeNode(key=leaf_key, value=leaf_value)
73
56
 
74
- def __repr__(self):
75
- if self.node_order is not None:
76
- return f"Tree(Dataset({self.data}), node_order={self.node_order})"
77
- else:
78
- return f"Tree(Dataset({self.data}))"
79
-
80
57
  def print_tree(
81
58
  self, node: Optional[TreeNode] = None, level: int = 0, print_keys: bool = False
82
59
  ):
@@ -94,10 +71,7 @@ class Tree:
94
71
  for child in node.children.values():
95
72
  self.print_tree(child, level + 1, print_keys)
96
73
 
97
- def to_docx(self, filename: Optional[str] = None):
98
- if filename is None:
99
- filename = "tree_structure.docx"
100
-
74
+ def to_docx(self, filename: str):
101
75
  doc = Document()
102
76
 
103
77
  # Create styles for headings
@@ -116,110 +90,6 @@ class Tree:
116
90
 
117
91
  self._add_to_docx(doc, self.root, 0)
118
92
  doc.save(filename)
119
- from edsl.utilities.utilities import file_notice
120
-
121
- file_notice(filename)
122
-
123
- def _repr_html_(self):
124
- """Returns an interactive HTML representation of the tree with collapsible sections."""
125
-
126
- # Generate a unique ID for this tree instance
127
- import uuid
128
-
129
- tree_id = f"tree_{uuid.uuid4().hex[:8]}"
130
-
131
- styles = f"""
132
- <div class="{tree_id}">
133
- <style>
134
- .{tree_id} details {{
135
- margin-left: 20px;
136
- }}
137
- .{tree_id} summary {{
138
- cursor: pointer;
139
- margin: 2px 0;
140
- }}
141
- .{tree_id} .value {{
142
- font-family: monospace;
143
- background: #f5f5f5;
144
- padding: 2px 6px;
145
- border-radius: 3px;
146
- margin: 1px 0;
147
- }}
148
- .{tree_id} .key {{
149
- color: #666;
150
- font-style: italic;
151
- }}
152
- </style>
153
- """
154
-
155
- def node_to_html(node, level=0, print_keys=True):
156
- if node is None:
157
- return "Tree has not been constructed yet."
158
-
159
- html = []
160
-
161
- if node.value is not None:
162
- # Create the node content
163
- content = []
164
- if print_keys and node.key is not None:
165
- content.append(f'<span class="key">{node.key}: </span>')
166
- content.append(f'<span class="value">{node.value}</span>')
167
- content_html = "".join(content)
168
-
169
- if node.children:
170
- # Node with children
171
- html.append(f'<details {"open" if level < 1 else ""}>')
172
- html.append(f"<summary>{content_html}</summary>")
173
- for child in node.children.values():
174
- html.append(node_to_html(child, level + 1, print_keys))
175
- html.append("</details>")
176
- else:
177
- # Leaf node
178
- html.append(f"<div>{content_html}</div>")
179
- else:
180
- # Root node with no value
181
- if node.children:
182
- for child in node.children.values():
183
- html.append(node_to_html(child, level, print_keys))
184
-
185
- return "\n".join(html)
186
-
187
- tree_html = node_to_html(self.root)
188
- return f"{styles}{tree_html}</div>"
189
-
190
- # def _repr_html_(self):
191
- # """Returns an HTML representation of the tree, following the same logic as print_tree."""
192
- # styles = """
193
- # <style>
194
- # .tree-container {
195
- # font-family: monospace;
196
- # white-space: pre;
197
- # margin: 10px;
198
- # }
199
- # </style>
200
- # """
201
-
202
- # def node_to_html(node, level=0, print_keys=False):
203
- # if node is None:
204
- # node = self.root
205
- # if node is None:
206
- # return "Tree has not been constructed yet."
207
-
208
- # html = []
209
- # if node.value is not None:
210
- # indent = "&nbsp;" * 2 * level # Using &nbsp; for HTML spaces
211
- # if print_keys and node.key is not None:
212
- # html.append(f"{indent}{node.key}: {node.value}<br>")
213
- # else:
214
- # html.append(f"{indent}{node.value}<br>")
215
-
216
- # for child in node.children.values():
217
- # html.append(node_to_html(child, level + 1, print_keys))
218
-
219
- # return "".join(html)
220
-
221
- # tree_html = node_to_html(self.root)
222
- # return f'<div class="tree-container">{tree_html}</div>{styles}'
223
93
 
224
94
  def _add_to_docx(self, doc, node: TreeNode, level: int):
225
95
  if node.value is not None:
edsl/results/Result.py CHANGED
@@ -137,15 +137,6 @@ class Result(Base, UserDict):
137
137
  self._combined_dict = None
138
138
  self._problem_keys = None
139
139
 
140
- def _repr_html_(self):
141
- # d = self.to_dict(add_edsl_version=False)
142
- d = self.to_dict(add_edsl_version=False)
143
- data = [[k, v] for k, v in d.items()]
144
- from tabulate import tabulate
145
-
146
- table = str(tabulate(data, headers=["keys", "values"], tablefmt="html"))
147
- return f"<pre>{table}</pre>"
148
-
149
140
  ###############
150
141
  # Used in Results
151
142
  ###############
@@ -165,15 +156,15 @@ class Result(Base, UserDict):
165
156
  if key in self.question_to_attributes:
166
157
  # You might be tempted to just use the naked key
167
158
  # but this is a bad idea because it pollutes the namespace
168
- question_text_dict[
169
- key + "_question_text"
170
- ] = self.question_to_attributes[key]["question_text"]
171
- question_options_dict[
172
- key + "_question_options"
173
- ] = self.question_to_attributes[key]["question_options"]
174
- question_type_dict[
175
- key + "_question_type"
176
- ] = self.question_to_attributes[key]["question_type"]
159
+ question_text_dict[key + "_question_text"] = (
160
+ self.question_to_attributes[key]["question_text"]
161
+ )
162
+ question_options_dict[key + "_question_options"] = (
163
+ self.question_to_attributes[key]["question_options"]
164
+ )
165
+ question_type_dict[key + "_question_type"] = (
166
+ self.question_to_attributes[key]["question_type"]
167
+ )
177
168
 
178
169
  return {
179
170
  "agent": self.agent.traits
edsl/results/Results.py CHANGED
@@ -32,7 +32,7 @@ from edsl.results.ResultsDBMixin import ResultsDBMixin
32
32
  from edsl.results.ResultsGGMixin import ResultsGGMixin
33
33
  from edsl.results.ResultsFetchMixin import ResultsFetchMixin
34
34
 
35
- from edsl.utilities.decorators import remove_edsl_version
35
+ from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
36
36
  from edsl.utilities.utilities import dict_hash
37
37
 
38
38
 
@@ -46,9 +46,6 @@ class Mixins(
46
46
  ResultsGGMixin,
47
47
  ResultsToolsMixin,
48
48
  ):
49
- def long(self):
50
- return self.table().long()
51
-
52
49
  def print_long(self, max_rows: int = None) -> None:
53
50
  """Print the results in long format.
54
51
 
@@ -76,8 +73,6 @@ class Results(UserList, Mixins, Base):
76
73
  It also has a list of created_columns, which are columns that have been created with `mutate` and are not part of the original data.
77
74
  """
78
75
 
79
- __documentation__ = "https://docs.expectedparrot.com/en/latest/results.html"
80
-
81
76
  known_data_types = [
82
77
  "answer",
83
78
  "scenario",
@@ -126,55 +121,13 @@ class Results(UserList, Mixins, Base):
126
121
  if hasattr(self, "_add_output_functions"):
127
122
  self._add_output_functions()
128
123
 
129
- def _summary(self) -> dict:
130
- import reprlib
131
-
132
- # import yaml
133
-
134
- d = {
135
- "EDSL Class": "Results",
136
- # "docs_url": self.__documentation__,
137
- "# of agents": len(set(self.agents)),
138
- "# of distinct models": len(set(self.models)),
139
- "# of observations": len(self),
140
- "# Scenarios": len(set(self.scenarios)),
141
- "Survey Length (# questions)": len(self.survey),
142
- "Survey question names": reprlib.repr(self.survey.question_names),
143
- "Object hash": hash(self),
144
- }
145
- return d
146
-
147
- def compute_job_cost(self, include_cached_responses_in_cost=False) -> float:
148
- """
149
- Computes the cost of a completed job in USD.
150
- """
151
- total_cost = 0
152
- for result in self:
153
- for key in result.raw_model_response:
154
- if key.endswith("_cost"):
155
- result_cost = result.raw_model_response[key]
156
-
157
- question_name = key.removesuffix("_cost")
158
- cache_used = result.cache_used_dict[question_name]
159
-
160
- if isinstance(result_cost, (int, float)):
161
- if include_cached_responses_in_cost:
162
- total_cost += result_cost
163
- elif not include_cached_responses_in_cost and not cache_used:
164
- total_cost += result_cost
165
-
166
- return total_cost
167
-
168
124
  def leaves(self):
169
125
  leaves = []
170
126
  for result in self:
171
127
  leaves.extend(result.leaves())
172
128
  return leaves
173
129
 
174
- def tree(self, node_list: Optional[List[str]] = None):
175
- return self.to_scenario_list().tree(node_list)
176
-
177
- def interactive_tree(
130
+ def tree(
178
131
  self,
179
132
  fold_attributes: Optional[List[str]] = None,
180
133
  drop: Optional[List[str]] = None,
@@ -307,67 +260,13 @@ class Results(UserList, Mixins, Base):
307
260
 
308
261
  return f"Results(data = {reprlib.repr(self.data)}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
309
262
 
310
- def table(
311
- self,
312
- # selector_string: Optional[str] = "*.*",
313
- *fields,
314
- tablefmt: Optional[str] = None,
315
- pretty_labels: Optional[dict] = None,
316
- print_parameters: Optional[dict] = None,
317
- ):
318
- new_fields = []
319
- for field in fields:
320
- if "." in field:
321
- data_type, key = field.split(".")
322
- if data_type not in self.known_data_types:
323
- raise ResultsInvalidNameError(
324
- f"{data_type} is not a valid data type. Must be in {self.known_data_types}"
325
- )
326
- if key == "*":
327
- for k in self._data_type_to_keys[data_type]:
328
- new_fields.append(k)
329
- else:
330
- if key not in self._key_to_data_type:
331
- raise ResultsColumnNotFoundError(
332
- f"{key} is not a valid key. Must be in {self._key_to_data_type}"
333
- )
334
- new_fields.append(key)
335
- else:
336
- new_fields.append(field)
337
-
338
- return (
339
- self.to_scenario_list()
340
- .to_dataset()
341
- .table(
342
- *new_fields,
343
- tablefmt=tablefmt,
344
- pretty_labels=pretty_labels,
345
- print_parameters=print_parameters,
346
- )
347
- )
348
- # return (
349
- # self.select(f"{selector_string}")
350
- # .to_scenario_list()
351
- # .table(*fields, tablefmt=tablefmt)
352
- # )
353
-
354
263
  def _repr_html_(self) -> str:
355
- d = self._summary()
356
- from edsl import Scenario
264
+ # from IPython.display import HTML
357
265
 
358
- footer = f"<a href={self.__documentation__}>(docs)</a>"
266
+ json_str = json.dumps(self.to_dict(add_edsl_version=False)["data"], indent=4)
267
+ return f"<pre>{json_str}</pre>"
359
268
 
360
- s = Scenario(d)
361
- td = s.to_dataset().table(tablefmt="html")
362
- return td._repr_html_() + footer
363
-
364
- def to_dict(
365
- self,
366
- sort=False,
367
- add_edsl_version=False,
368
- include_cache=False,
369
- include_task_history=False,
370
- ) -> dict[str, Any]:
269
+ def to_dict(self, sort=False, add_edsl_version=False) -> dict[str, Any]:
371
270
  from edsl.data.Cache import Cache
372
271
 
373
272
  if sort:
@@ -381,21 +280,13 @@ class Results(UserList, Mixins, Base):
381
280
  ],
382
281
  "survey": self.survey.to_dict(add_edsl_version=add_edsl_version),
383
282
  "created_columns": self.created_columns,
283
+ "cache": (
284
+ Cache()
285
+ if not hasattr(self, "cache")
286
+ else self.cache.to_dict(add_edsl_version=add_edsl_version)
287
+ ),
288
+ "task_history": self.task_history.to_dict(),
384
289
  }
385
- if include_cache:
386
- d.update(
387
- {
388
- "cache": (
389
- Cache()
390
- if not hasattr(self, "cache")
391
- else self.cache.to_dict(add_edsl_version=add_edsl_version)
392
- )
393
- }
394
- )
395
-
396
- if self.task_history.has_unfixed_exceptions or include_task_history:
397
- d.update({"task_history": self.task_history.to_dict()})
398
-
399
290
  if add_edsl_version:
400
291
  from edsl import __version__
401
292
 
@@ -484,11 +375,7 @@ class Results(UserList, Mixins, Base):
484
375
  cache=(
485
376
  Cache.from_dict(data.get("cache")) if "cache" in data else Cache()
486
377
  ),
487
- task_history=(
488
- TaskHistory.from_dict(data.get("task_history"))
489
- if "task_history" in data
490
- else TaskHistory(interviews=[])
491
- ),
378
+ task_history=TaskHistory.from_dict(data.get("task_history")),
492
379
  )
493
380
  except Exception as e:
494
381
  raise ResultsDeserializationError(f"Error in Results.from_dict: {e}")
@@ -988,19 +875,31 @@ class Results(UserList, Mixins, Base):
988
875
 
989
876
  >>> r = Results.example()
990
877
  >>> r.sort_by('how_feeling', reverse=False).select('how_feeling').print()
991
- answer.how_feeling
992
- --------------------
993
- Great
994
- OK
995
- OK
996
- Terrible
878
+ ┏━━━━━━━━━━━━━━┓
879
+ ┃ answer ┃
880
+ ┃ .how_feeling ┃
881
+ ┡━━━━━━━━━━━━━━┩
882
+ │ Great │
883
+ ├──────────────┤
884
+ │ OK │
885
+ ├──────────────┤
886
+ │ OK │
887
+ ├──────────────┤
888
+ │ Terrible │
889
+ └──────────────┘
997
890
  >>> r.sort_by('how_feeling', reverse=True).select('how_feeling').print()
998
- answer.how_feeling
999
- --------------------
1000
- Terrible
1001
- OK
1002
- OK
1003
- Great
891
+ ┏━━━━━━━━━━━━━━┓
892
+ ┃ answer ┃
893
+ ┃ .how_feeling ┃
894
+ ┡━━━━━━━━━━━━━━┩
895
+ │ Terrible │
896
+ ├──────────────┤
897
+ │ OK │
898
+ ├──────────────┤
899
+ │ OK │
900
+ ├──────────────┤
901
+ │ Great │
902
+ └──────────────┘
1004
903
  """
1005
904
 
1006
905
  def to_numeric_if_possible(v):
@@ -1033,9 +932,12 @@ class Results(UserList, Mixins, Base):
1033
932
 
1034
933
  >>> r = Results.example()
1035
934
  >>> r.filter("how_feeling == 'Great'").select('how_feeling').print()
1036
- answer.how_feeling
1037
- --------------------
1038
- Great
935
+ ┏━━━━━━━━━━━━━━┓
936
+ ┃ answer ┃
937
+ ┃ .how_feeling ┃
938
+ ┡━━━━━━━━━━━━━━┩
939
+ │ Great │
940
+ └──────────────┘
1039
941
 
1040
942
  Example usage: Using an OR operator in the filter expression.
1041
943
 
@@ -1046,10 +948,14 @@ class Results(UserList, Mixins, Base):
1046
948
  ...
1047
949
 
1048
950
  >>> r.filter("how_feeling == 'Great' or how_feeling == 'Terrible'").select('how_feeling').print()
1049
- answer.how_feeling
1050
- --------------------
1051
- Great
1052
- Terrible
951
+ ┏━━━━━━━━━━━━━━┓
952
+ ┃ answer ┃
953
+ ┃ .how_feeling ┃
954
+ ┡━━━━━━━━━━━━━━┩
955
+ │ Great │
956
+ ├──────────────┤
957
+ │ Terrible │
958
+ └──────────────┘
1053
959
  """
1054
960
 
1055
961
  def has_single_equals(string):