edsl 0.1.38__py3-none-any.whl → 0.1.38.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. edsl/Base.py +31 -60
  2. edsl/__version__.py +1 -1
  3. edsl/agents/Agent.py +9 -18
  4. edsl/agents/AgentList.py +8 -59
  5. edsl/agents/Invigilator.py +7 -18
  6. edsl/agents/InvigilatorBase.py +19 -0
  7. edsl/agents/PromptConstructor.py +4 -5
  8. edsl/config.py +0 -8
  9. edsl/coop/coop.py +7 -74
  10. edsl/data/Cache.py +2 -27
  11. edsl/data/CacheEntry.py +3 -8
  12. edsl/data/RemoteCacheSync.py +19 -0
  13. edsl/enums.py +0 -2
  14. edsl/inference_services/GoogleService.py +15 -7
  15. edsl/inference_services/registry.py +0 -2
  16. edsl/jobs/Jobs.py +548 -88
  17. edsl/jobs/interviews/Interview.py +11 -11
  18. edsl/jobs/runners/JobsRunnerAsyncio.py +35 -140
  19. edsl/jobs/runners/JobsRunnerStatus.py +2 -0
  20. edsl/jobs/tasks/TaskHistory.py +16 -15
  21. edsl/language_models/LanguageModel.py +84 -44
  22. edsl/language_models/ModelList.py +1 -47
  23. edsl/language_models/registry.py +4 -57
  24. edsl/prompts/Prompt.py +3 -8
  25. edsl/questions/QuestionBase.py +16 -20
  26. edsl/questions/QuestionExtract.py +4 -3
  27. edsl/questions/question_registry.py +6 -36
  28. edsl/results/Dataset.py +15 -146
  29. edsl/results/DatasetExportMixin.py +217 -231
  30. edsl/results/DatasetTree.py +4 -134
  31. edsl/results/Result.py +9 -18
  32. edsl/results/Results.py +51 -145
  33. edsl/scenarios/FileStore.py +13 -187
  34. edsl/scenarios/Scenario.py +4 -61
  35. edsl/scenarios/ScenarioList.py +62 -237
  36. edsl/surveys/Survey.py +2 -16
  37. edsl/surveys/SurveyFlowVisualizationMixin.py +9 -67
  38. edsl/surveys/instructions/Instruction.py +0 -12
  39. edsl/templates/error_reporting/interview_details.html +3 -3
  40. edsl/templates/error_reporting/interviews.html +9 -18
  41. edsl/utilities/utilities.py +0 -15
  42. {edsl-0.1.38.dist-info → edsl-0.1.38.dev2.dist-info}/METADATA +1 -2
  43. {edsl-0.1.38.dist-info → edsl-0.1.38.dev2.dist-info}/RECORD +45 -53
  44. edsl/inference_services/PerplexityService.py +0 -163
  45. edsl/jobs/JobsChecks.py +0 -147
  46. edsl/jobs/JobsPrompts.py +0 -268
  47. edsl/jobs/JobsRemoteInferenceHandler.py +0 -239
  48. edsl/results/CSSParameterizer.py +0 -108
  49. edsl/results/TableDisplay.py +0 -198
  50. edsl/results/table_display.css +0 -78
  51. edsl/scenarios/ScenarioJoin.py +0 -127
  52. {edsl-0.1.38.dist-info → edsl-0.1.38.dev2.dist-info}/LICENSE +0 -0
  53. {edsl-0.1.38.dist-info → edsl-0.1.38.dev2.dist-info}/WHEEL +0 -0
edsl/results/Dataset.py CHANGED
@@ -5,23 +5,19 @@ import random
5
5
  import json
6
6
  from collections import UserList
7
7
  from typing import Any, Union, Optional
8
- import sys
8
+
9
9
  import numpy as np
10
10
 
11
11
  from edsl.results.ResultsExportMixin import ResultsExportMixin
12
12
  from edsl.results.DatasetTree import Tree
13
- from edsl.results.TableDisplay import TableDisplay
14
13
 
15
14
 
16
15
  class Dataset(UserList, ResultsExportMixin):
17
16
  """A class to represent a dataset of observations."""
18
17
 
19
- def __init__(
20
- self, data: list[dict[str, Any]] = None, print_parameters: Optional[dict] = None
21
- ):
18
+ def __init__(self, data: list[dict[str, Any]] = None):
22
19
  """Initialize the dataset with the given data."""
23
20
  super().__init__(data)
24
- self.print_parameters = print_parameters
25
21
 
26
22
  def __len__(self) -> int:
27
23
  """Return the number of observations in the dataset.
@@ -36,7 +32,7 @@ class Dataset(UserList, ResultsExportMixin):
36
32
  _, values = list(self.data[0].items())[0]
37
33
  return len(values)
38
34
 
39
- def keys(self) -> list[str]:
35
+ def keys(self):
40
36
  """Return the keys of the first observation in the dataset.
41
37
 
42
38
  >>> d = Dataset([{'a.b':[1,2,3,4]}])
@@ -45,45 +41,10 @@ class Dataset(UserList, ResultsExportMixin):
45
41
  """
46
42
  return [list(o.keys())[0] for o in self]
47
43
 
48
- def filter(self, expression):
49
- return self.to_scenario_list().filter(expression).to_dataset()
50
-
51
44
  def __repr__(self) -> str:
52
45
  """Return a string representation of the dataset."""
53
46
  return f"Dataset({self.data})"
54
47
 
55
- def write(self, filename: str, tablefmt: Optional[str] = None) -> None:
56
- return self.table(tablefmt=tablefmt).write(filename)
57
-
58
- def _repr_html_(self):
59
- # headers, data = self._tabular()
60
- return self.table(print_parameters=self.print_parameters)._repr_html_()
61
- # return TableDisplay(headers=headers, data=data, raw_data_set=self)
62
-
63
- def _tabular(self) -> tuple[list[str], list[list[Any]]]:
64
- # Extract headers
65
- headers = []
66
- for entry in self.data:
67
- headers.extend(entry.keys())
68
- headers = list(dict.fromkeys(headers)) # Ensure unique headers
69
-
70
- # Extract data
71
- max_len = max(len(values) for entry in self.data for values in entry.values())
72
- rows = []
73
- for i in range(max_len):
74
- row = []
75
- for header in headers:
76
- for entry in self.data:
77
- if header in entry:
78
- values = entry[header]
79
- row.append(values[i] if i < len(values) else None)
80
- break
81
- else:
82
- row.append(None) # Default to None if header is missing
83
- rows.append(row)
84
-
85
- return headers, rows
86
-
87
48
  def _key_to_value(self, key: str) -> Any:
88
49
  """Retrieve the value associated with the given key from the dataset.
89
50
 
@@ -128,25 +89,7 @@ class Dataset(UserList, ResultsExportMixin):
128
89
 
129
90
  return get_values(self.data[0])[0]
130
91
 
131
- def print(self, pretty_labels=None, **kwargs):
132
- if "format" in kwargs:
133
- if kwargs["format"] not in ["html", "markdown", "rich", "latex"]:
134
- raise ValueError(f"Format '{kwargs['format']}' not supported.")
135
- if pretty_labels is None:
136
- pretty_labels = {}
137
- else:
138
- return self.rename(pretty_labels).print(**kwargs)
139
- return self.table()
140
-
141
- def rename(self, rename_dic) -> Dataset:
142
- new_data = []
143
- for observation in self.data:
144
- key, values = list(observation.items())[0]
145
- new_key = rename_dic.get(key, key)
146
- new_data.append({new_key: values})
147
- return Dataset(new_data)
148
-
149
- def select(self, *keys) -> Dataset:
92
+ def select(self, *keys):
150
93
  """Return a new dataset with only the selected keys.
151
94
 
152
95
  :param keys: The keys to select.
@@ -179,6 +122,12 @@ class Dataset(UserList, ResultsExportMixin):
179
122
  json.dumps(self.data)
180
123
  ) # janky but I want to make sure it's serializable & deserializable
181
124
 
125
+ def _repr_html_(self) -> str:
126
+ """Return an HTML representation of the dataset."""
127
+ from edsl.utilities.utilities import data_to_html
128
+
129
+ return data_to_html(self.data)
130
+
182
131
  def shuffle(self, seed=None) -> Dataset:
183
132
  """Return a new dataset with the observations shuffled.
184
133
 
@@ -200,9 +149,6 @@ class Dataset(UserList, ResultsExportMixin):
200
149
 
201
150
  return self
202
151
 
203
- def expand(self, field):
204
- return self.to_scenario_list().expand(field).to_dataset()
205
-
206
152
  def sample(
207
153
  self,
208
154
  n: int = None,
@@ -321,92 +267,15 @@ class Dataset(UserList, ResultsExportMixin):
321
267
 
322
268
  return Dataset(new_data)
323
269
 
324
- def tree(self, node_order: Optional[list[str]] = None) -> Tree:
270
+ @property
271
+ def tree(self):
325
272
  """Return a tree representation of the dataset.
326
273
 
327
274
  >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[4,3,2,1]}])
328
- >>> d.tree()
329
- Tree(Dataset({'a': [1, 2, 3, 4], 'b': [4, 3, 2, 1]}))
275
+ >>> d.tree.print_tree()
276
+ Tree has not been constructed yet.
330
277
  """
331
- return Tree(self, node_order=node_order)
332
-
333
- def table(
334
- self,
335
- *fields,
336
- tablefmt: Optional[str] = None,
337
- max_rows: Optional[int] = None,
338
- pretty_labels=None,
339
- print_parameters: Optional[dict] = None,
340
- ):
341
- if pretty_labels is not None:
342
- new_fields = []
343
- for field in fields:
344
- new_fields.append(pretty_labels.get(field, field))
345
- return self.rename(pretty_labels).table(
346
- *new_fields, tablefmt=tablefmt, max_rows=max_rows
347
- )
348
-
349
- self.print_parameters = print_parameters
350
-
351
- headers, data = self._tabular()
352
-
353
- if tablefmt is not None:
354
- from tabulate import tabulate_formats
355
-
356
- if tablefmt not in tabulate_formats:
357
- print(
358
- f"Error: The following table format is not supported: {tablefmt}",
359
- file=sys.stderr,
360
- )
361
- print(f"\nAvailable formats are: {tabulate_formats}", file=sys.stderr)
362
- return None
363
-
364
- if max_rows:
365
- if len(data) < max_rows:
366
- max_rows = None
367
-
368
- if fields:
369
- full_data = data
370
- data = []
371
- indices = []
372
- for field in fields:
373
- if field not in headers:
374
- print(
375
- f"Error: The following field was not found: {field}",
376
- file=sys.stderr,
377
- )
378
- print(f"\nAvailable fields are: {headers}", file=sys.stderr)
379
-
380
- # Optional: Suggest similar fields using difflib
381
- import difflib
382
-
383
- matches = difflib.get_close_matches(field, headers)
384
- if matches:
385
- print(f"\nDid you mean: {matches[0]} ?", file=sys.stderr)
386
- return None
387
- indices.append(headers.index(field))
388
- headers = fields
389
- for row in full_data:
390
- data.append([row[i] for i in indices])
391
-
392
- if max_rows is not None:
393
- if max_rows > len(data):
394
- raise ValueError(
395
- "max_rows cannot be greater than the number of rows in the dataset."
396
- )
397
- last_line = data[-1]
398
- spaces = len(data[max_rows])
399
- filler_line = ["." for i in range(spaces)]
400
- data = data[:max_rows]
401
- data.append(filler_line)
402
- data.append(last_line)
403
-
404
- return TableDisplay(
405
- data=data, headers=headers, tablefmt=tablefmt, raw_data_set=self
406
- )
407
-
408
- def summary(self):
409
- return Dataset([{"num_observations": [len(self)], "keys": [self.keys()]}])
278
+ return Tree(self)
410
279
 
411
280
  @classmethod
412
281
  def example(self):