edsl 0.1.28__py3-none-any.whl → 0.1.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. edsl/Base.py +18 -18
  2. edsl/__init__.py +24 -24
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +77 -41
  5. edsl/agents/AgentList.py +35 -6
  6. edsl/agents/Invigilator.py +19 -1
  7. edsl/agents/InvigilatorBase.py +15 -10
  8. edsl/agents/PromptConstructionMixin.py +342 -100
  9. edsl/agents/descriptors.py +2 -1
  10. edsl/base/Base.py +289 -0
  11. edsl/config.py +2 -1
  12. edsl/conjure/InputData.py +39 -8
  13. edsl/coop/coop.py +188 -151
  14. edsl/coop/utils.py +43 -75
  15. edsl/data/Cache.py +19 -5
  16. edsl/data/SQLiteDict.py +11 -3
  17. edsl/jobs/Answers.py +15 -1
  18. edsl/jobs/Jobs.py +92 -47
  19. edsl/jobs/buckets/ModelBuckets.py +4 -2
  20. edsl/jobs/buckets/TokenBucket.py +1 -2
  21. edsl/jobs/interviews/Interview.py +3 -9
  22. edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
  23. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +15 -10
  24. edsl/jobs/runners/JobsRunnerAsyncio.py +21 -25
  25. edsl/jobs/tasks/TaskHistory.py +4 -3
  26. edsl/language_models/LanguageModel.py +5 -11
  27. edsl/language_models/ModelList.py +3 -3
  28. edsl/language_models/repair.py +8 -7
  29. edsl/notebooks/Notebook.py +40 -3
  30. edsl/prompts/Prompt.py +31 -19
  31. edsl/questions/QuestionBase.py +38 -13
  32. edsl/questions/QuestionBudget.py +5 -6
  33. edsl/questions/QuestionCheckBox.py +7 -3
  34. edsl/questions/QuestionExtract.py +5 -3
  35. edsl/questions/QuestionFreeText.py +3 -3
  36. edsl/questions/QuestionFunctional.py +0 -3
  37. edsl/questions/QuestionList.py +3 -4
  38. edsl/questions/QuestionMultipleChoice.py +16 -8
  39. edsl/questions/QuestionNumerical.py +4 -3
  40. edsl/questions/QuestionRank.py +5 -3
  41. edsl/questions/__init__.py +4 -3
  42. edsl/questions/descriptors.py +4 -2
  43. edsl/questions/question_registry.py +20 -31
  44. edsl/questions/settings.py +1 -1
  45. edsl/results/Dataset.py +31 -0
  46. edsl/results/DatasetExportMixin.py +493 -0
  47. edsl/results/Result.py +22 -74
  48. edsl/results/Results.py +105 -67
  49. edsl/results/ResultsDBMixin.py +7 -3
  50. edsl/results/ResultsExportMixin.py +22 -537
  51. edsl/results/ResultsGGMixin.py +3 -3
  52. edsl/results/ResultsToolsMixin.py +5 -5
  53. edsl/scenarios/FileStore.py +140 -0
  54. edsl/scenarios/Scenario.py +5 -6
  55. edsl/scenarios/ScenarioList.py +44 -15
  56. edsl/scenarios/ScenarioListExportMixin.py +32 -0
  57. edsl/scenarios/ScenarioListPdfMixin.py +2 -1
  58. edsl/scenarios/__init__.py +1 -0
  59. edsl/study/ObjectEntry.py +89 -13
  60. edsl/study/ProofOfWork.py +5 -2
  61. edsl/study/SnapShot.py +4 -8
  62. edsl/study/Study.py +21 -14
  63. edsl/study/__init__.py +2 -0
  64. edsl/surveys/MemoryPlan.py +11 -4
  65. edsl/surveys/Survey.py +46 -7
  66. edsl/surveys/SurveyExportMixin.py +4 -2
  67. edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
  68. edsl/tools/plotting.py +4 -2
  69. edsl/utilities/__init__.py +21 -21
  70. edsl/utilities/interface.py +66 -45
  71. edsl/utilities/utilities.py +11 -13
  72. {edsl-0.1.28.dist-info → edsl-0.1.29.dist-info}/METADATA +11 -10
  73. {edsl-0.1.28.dist-info → edsl-0.1.29.dist-info}/RECORD +75 -72
  74. edsl-0.1.28.dist-info/entry_points.txt +0 -3
  75. {edsl-0.1.28.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
  76. {edsl-0.1.28.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0
edsl/results/Results.py CHANGED
@@ -5,16 +5,10 @@ It is not typically instantiated directly, but is returned by the run method of
5
5
 
6
6
  from __future__ import annotations
7
7
  import json
8
- import hashlib
9
8
  import random
10
9
  from collections import UserList, defaultdict
11
10
  from typing import Optional, Callable, Any, Type, Union, List
12
11
 
13
- from pygments import highlight
14
- from pygments.lexers import JsonLexer
15
- from pygments.formatters import HtmlFormatter
16
- from IPython.display import HTML
17
-
18
12
  from simpleeval import EvalWithCompoundTypes
19
13
 
20
14
  from edsl.exceptions.results import (
@@ -24,29 +18,17 @@ from edsl.exceptions.results import (
24
18
  ResultsMutateError,
25
19
  ResultsFilterError,
26
20
  )
27
- from edsl.agents import Agent, AgentList
28
- from edsl.language_models.LanguageModel import LanguageModel
29
- from edsl.results.Dataset import Dataset
30
- from edsl.results.Result import Result
21
+
31
22
  from edsl.results.ResultsExportMixin import ResultsExportMixin
32
- from edsl.scenarios import Scenario
33
-
34
- # from edsl.scenarios.ScenarioList import ScenarioList
35
- from edsl.surveys import Survey
36
- from edsl.data.Cache import Cache
37
- from edsl.utilities import (
38
- is_valid_variable_name,
39
- shorten_string,
40
- )
41
- from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
42
- from edsl.utilities.utilities import dict_hash
43
23
  from edsl.results.ResultsToolsMixin import ResultsToolsMixin
44
-
45
24
  from edsl.results.ResultsDBMixin import ResultsDBMixin
46
25
  from edsl.results.ResultsGGMixin import ResultsGGMixin
26
+ from edsl.results.ResultsFetchMixin import ResultsFetchMixin
27
+
28
+ from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
29
+ from edsl.utilities.utilities import dict_hash
47
30
 
48
31
  from edsl.Base import Base
49
- from edsl.results.ResultsFetchMixin import ResultsFetchMixin
50
32
 
51
33
 
52
34
  class Mixins(
@@ -56,7 +38,22 @@ class Mixins(
56
38
  ResultsGGMixin,
57
39
  ResultsToolsMixin,
58
40
  ):
59
- pass
41
+ def print_long(self, max_rows=None) -> None:
42
+ """Print the results in long format.
43
+
44
+ >>> from edsl.results import Results
45
+ >>> r = Results.example()
46
+ >>> r.select('how_feeling').print_long(max_rows = 2)
47
+ ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┓
48
+ ┃ Result index ┃ Key ┃ Value ┃
49
+ ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━┩
50
+ │ 0 │ how_feeling │ OK │
51
+ │ 1 │ how_feeling │ Great │
52
+ └──────────────┴─────────────┴───────┘
53
+ """
54
+ from edsl.utilities.interface import print_results_long
55
+
56
+ print_results_long(self, max_rows=max_rows)
60
57
 
61
58
 
62
59
  class Results(UserList, Mixins, Base):
@@ -84,10 +81,10 @@ class Results(UserList, Mixins, Base):
84
81
 
85
82
  def __init__(
86
83
  self,
87
- survey: Optional[Survey] = None,
88
- data: Optional[list[Result]] = None,
84
+ survey: Optional["Survey"] = None,
85
+ data: Optional[list["Result"]] = None,
89
86
  created_columns: Optional[list[str]] = None,
90
- cache: Optional[Cache] = None,
87
+ cache: Optional["Cache"] = None,
91
88
  job_uuid: Optional[str] = None,
92
89
  total_results: Optional[int] = None,
93
90
  ):
@@ -100,6 +97,8 @@ class Results(UserList, Mixins, Base):
100
97
  :param total_results: An integer representing the total number of results.
101
98
  """
102
99
  super().__init__(data)
100
+ from edsl.data.Cache import Cache
101
+
103
102
  self.survey = survey
104
103
  self.created_columns = created_columns or []
105
104
  self._job_uuid = job_uuid
@@ -125,6 +124,10 @@ class Results(UserList, Mixins, Base):
125
124
  raise TypeError("Invalid argument type")
126
125
 
127
126
  def _update_results(self) -> None:
127
+ from edsl import Agent, Scenario
128
+ from edsl.language_models import LanguageModel
129
+ from edsl.results import Result
130
+
128
131
  if self._job_uuid and len(self.data) < self._total_results:
129
132
  results = [
130
133
  Result(
@@ -165,16 +168,16 @@ class Results(UserList, Mixins, Base):
165
168
  )
166
169
 
167
170
  def __repr__(self) -> str:
168
- # return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
169
- return f"""Results object
170
- Size: {len(self.data)}.
171
- Survey questions: {[q.question_name for q in self.survey.questions]}.
172
- Created columns: {self.created_columns}
173
- Hash: {hash(self)}
174
- """
171
+ return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
175
172
 
176
173
  def _repr_html_(self) -> str:
174
+ from IPython.display import HTML
175
+
177
176
  json_str = json.dumps(self.to_dict()["data"], indent=4)
177
+ from pygments import highlight
178
+ from pygments.lexers import JsonLexer
179
+ from pygments.formatters import HtmlFormatter
180
+
178
181
  formatted_json = highlight(
179
182
  json_str,
180
183
  JsonLexer(),
@@ -183,6 +186,8 @@ class Results(UserList, Mixins, Base):
183
186
  return HTML(formatted_json).data
184
187
 
185
188
  def _to_dict(self, sort=False):
189
+ from edsl.data.Cache import Cache
190
+
186
191
  if sort:
187
192
  data = sorted([result for result in self.data], key=lambda x: hash(x))
188
193
  else:
@@ -232,6 +237,31 @@ class Results(UserList, Mixins, Base):
232
237
  def hashes(self) -> set:
233
238
  return set(hash(result) for result in self.data)
234
239
 
240
+ def sample(self, n: int) -> "Results":
241
+ """Return a random sample of the results.
242
+
243
+ :param n: The number of samples to return.
244
+
245
+ >>> from edsl.results import Results
246
+ >>> r = Results.example()
247
+ >>> len(r.sample(2))
248
+ 2
249
+ """
250
+ indices = None
251
+
252
+ for entry in self:
253
+ key, values = list(entry.items())[0]
254
+ if indices is None: # gets the indices for the first time
255
+ indices = list(range(len(values)))
256
+ sampled_indices = random.sample(indices, n)
257
+ if n > len(indices):
258
+ raise ValueError(
259
+ f"Cannot sample {n} items from a list of length {len(indices)}."
260
+ )
261
+ entry[key] = [values[i] for i in sampled_indices]
262
+
263
+ return self
264
+
235
265
  @classmethod
236
266
  @remove_edsl_version
237
267
  def from_dict(cls, data: dict[str, Any]) -> Results:
@@ -247,12 +277,20 @@ class Results(UserList, Mixins, Base):
247
277
  >>> r == r2
248
278
  True
249
279
  """
250
- results = cls(
251
- survey=Survey.from_dict(data["survey"]),
252
- data=[Result.from_dict(r) for r in data["data"]],
253
- created_columns=data.get("created_columns", None),
254
- cache=Cache.from_dict(data.get("cache")) if "cache" in data else Cache(),
255
- )
280
+ from edsl import Survey, Cache
281
+ from edsl.results.Result import Result
282
+
283
+ try:
284
+ results = cls(
285
+ survey=Survey.from_dict(data["survey"]),
286
+ data=[Result.from_dict(r) for r in data["data"]],
287
+ created_columns=data.get("created_columns", None),
288
+ cache=(
289
+ Cache.from_dict(data.get("cache")) if "cache" in data else Cache()
290
+ ),
291
+ )
292
+ except Exception as e:
293
+ breakpoint()
256
294
  return results
257
295
 
258
296
  ######################
@@ -319,6 +357,8 @@ class Results(UserList, Mixins, Base):
319
357
  >>> r.answer_keys
320
358
  {'how_feeling': 'How are you this {{ period }}?', 'how_feeling_yesterday': 'How were you feeling yesterday {{ period }}?'}
321
359
  """
360
+ from edsl.utilities.utilities import shorten_string
361
+
322
362
  if not self.survey:
323
363
  raise Exception("Survey is not defined so no answer keys are available.")
324
364
 
@@ -333,7 +373,7 @@ class Results(UserList, Mixins, Base):
333
373
  return sorted_dict
334
374
 
335
375
  @property
336
- def agents(self) -> AgentList:
376
+ def agents(self) -> "AgentList":
337
377
  """Return a list of all of the agents in the Results.
338
378
 
339
379
  Example:
@@ -342,10 +382,12 @@ class Results(UserList, Mixins, Base):
342
382
  >>> r.agents
343
383
  AgentList([Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Sad'}), Agent(traits = {'status': 'Sad'})])
344
384
  """
385
+ from edsl import AgentList
386
+
345
387
  return AgentList([r.agent for r in self.data])
346
388
 
347
389
  @property
348
- def models(self) -> list[Type[LanguageModel]]:
390
+ def models(self) -> list[Type["LanguageModel"]]:
349
391
  """Return a list of all of the models in the Results.
350
392
 
351
393
  Example:
@@ -467,7 +509,7 @@ class Results(UserList, Mixins, Base):
467
509
  )
468
510
  return data_type, key
469
511
 
470
- def first(self) -> Result:
512
+ def first(self) -> "Result":
471
513
  """Return the first observation in the results.
472
514
 
473
515
  Example:
@@ -585,6 +627,8 @@ class Results(UserList, Mixins, Base):
585
627
  )
586
628
  raw_var_name, expression = new_var_string.split("=", 1)
587
629
  var_name = raw_var_name.strip()
630
+ from edsl.utilities.utilities import is_valid_variable_name
631
+
588
632
  if not is_valid_variable_name(var_name):
589
633
  raise ResultsInvalidNameError(f"{var_name} is not a valid variable name.")
590
634
 
@@ -596,7 +640,7 @@ class Results(UserList, Mixins, Base):
596
640
  names=result.combined_dict, functions=functions_dict
597
641
  )
598
642
 
599
- def new_result(old_result: Result, var_name: str) -> Result:
643
+ def new_result(old_result: "Result", var_name: str) -> "Result":
600
644
  evaluator = create_evaluator(old_result)
601
645
  value = evaluator.eval(expression)
602
646
  new_result = old_result.copy()
@@ -686,7 +730,7 @@ class Results(UserList, Mixins, Base):
686
730
 
687
731
  return Results(survey=self.survey, data=new_data, created_columns=None)
688
732
 
689
- def select(self, *columns: Union[str, list[str]]) -> Dataset:
733
+ def select(self, *columns: Union[str, list[str]]) -> "Dataset":
690
734
  """
691
735
  Select data from the results and format it.
692
736
 
@@ -698,6 +742,7 @@ class Results(UserList, Mixins, Base):
698
742
  >>> results.select('how_feeling')
699
743
  Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
700
744
  """
745
+
701
746
  if len(self) == 0:
702
747
  raise Exception("No data to select from---the Results object is empty.")
703
748
 
@@ -754,13 +799,22 @@ class Results(UserList, Mixins, Base):
754
799
  return items_in_order.index(single_key)
755
800
 
756
801
  sorted(new_data, key=sort_by_key_order)
802
+ from edsl.results.Dataset import Dataset
757
803
 
758
804
  return Dataset(new_data)
759
805
 
760
- def sort_by(self, columns, reverse: bool = False) -> Results:
806
+ def sort_by(self, *columns: str, reverse: bool = False) -> Results:
807
+ import warnings
808
+
809
+ warnings.warn(
810
+ "sort_by is deprecated. Use order_by instead.", DeprecationWarning
811
+ )
812
+ return self.order_by(*columns, reverse=reverse)
813
+
814
+ def order_by(self, *columns: str, reverse: bool = False) -> Results:
761
815
  """Sort the results by one or more columns.
762
816
 
763
- :param columns: A string or a list of strings that are column names.
817
+ :param columns: One or more column names as strings.
764
818
  :param reverse: A boolean that determines whether to sort in reverse order.
765
819
 
766
820
  Each column name can be a single key, e.g. "how_feeling", or a dot-separated string, e.g. "answer.how_feeling".
@@ -768,7 +822,7 @@ class Results(UserList, Mixins, Base):
768
822
  Example:
769
823
 
770
824
  >>> r = Results.example()
771
- >>> r.sort_by(['how_feeling'], reverse=False).select('how_feeling').print()
825
+ >>> r.sort_by('how_feeling', reverse=False).select('how_feeling').print()
772
826
  ┏━━━━━━━━━━━━━━┓
773
827
  ┃ answer ┃
774
828
  ┃ .how_feeling ┃
@@ -781,7 +835,7 @@ class Results(UserList, Mixins, Base):
781
835
  ├──────────────┤
782
836
  │ Terrible │
783
837
  └──────────────┘
784
- >>> r.sort_by(['how_feeling'], reverse=True).select('how_feeling').print()
838
+ >>> r.sort_by('how_feeling', reverse=True).select('how_feeling').print()
785
839
  ┏━━━━━━━━━━━━━━┓
786
840
  ┃ answer ┃
787
841
  ┃ .how_feeling ┃
@@ -795,8 +849,6 @@ class Results(UserList, Mixins, Base):
795
849
  │ Great │
796
850
  └──────────────┘
797
851
  """
798
- if isinstance(columns, str):
799
- columns = [columns]
800
852
 
801
853
  def to_numeric_if_possible(v):
802
854
  try:
@@ -805,28 +857,14 @@ class Results(UserList, Mixins, Base):
805
857
  return v
806
858
 
807
859
  def sort_key(item):
808
- # Create an empty list to store the key components for sorting
809
860
  key_components = []
810
-
811
- # Loop through each column specified in the sort
812
861
  for col in columns:
813
- # Parse the column into its data type and key
814
862
  data_type, key = self._parse_column(col)
815
-
816
- # Retrieve the value from the item based on the parsed data type and key
817
863
  value = item.get_value(data_type, key)
818
-
819
- # Convert the value to numeric if possible, and append it to the key components
820
864
  key_components.append(to_numeric_if_possible(value))
821
-
822
- # Convert the list of key components into a tuple to serve as the sorting key
823
865
  return tuple(key_components)
824
866
 
825
- new_data = sorted(
826
- self.data,
827
- key=sort_key,
828
- reverse=reverse,
829
- )
867
+ new_data = sorted(self.data, key=sort_key, reverse=reverse)
830
868
  return Results(survey=self.survey, data=new_data, created_columns=None)
831
869
 
832
870
  def filter(self, expression: str) -> Results:
@@ -920,7 +958,7 @@ class Results(UserList, Mixins, Base):
920
958
 
921
959
  :param debug: if False, uses actual API calls
922
960
  """
923
- from edsl.jobs import Jobs
961
+ from edsl.jobs.Jobs import Jobs
924
962
  from edsl.data.Cache import Cache
925
963
 
926
964
  c = Cache()
@@ -1,8 +1,6 @@
1
1
  """Mixin for working with SQLite respresentation of a 'Results' object."""
2
2
 
3
- import pandas as pd
4
3
  import sqlite3
5
- from sqlalchemy import create_engine
6
4
  from enum import Enum
7
5
  from typing import Literal, Union, Optional
8
6
 
@@ -92,6 +90,8 @@ class ResultsDBMixin:
92
90
  conn.commit()
93
91
  return conn
94
92
  elif shape == SQLDataShape.WIDE:
93
+ from sqlalchemy import create_engine
94
+
95
95
  engine = create_engine("sqlite:///:memory:")
96
96
  df = self.to_pandas(remove_prefix=remove_prefix)
97
97
  df.to_sql("self", engine, index=False, if_exists="replace")
@@ -121,7 +121,7 @@ class ResultsDBMixin:
121
121
  to_list=False,
122
122
  to_latex=False,
123
123
  filename: Optional[str] = None,
124
- ) -> Union[pd.DataFrame, str]:
124
+ ) -> Union["pd.DataFrame", str]:
125
125
  """Execute a SQL query and return the results as a DataFrame.
126
126
 
127
127
  :param query: The SQL query to execute
@@ -151,6 +151,8 @@ class ResultsDBMixin:
151
151
  2 Terrible
152
152
  3 OK
153
153
  """
154
+ import pandas as pd
155
+
154
156
  shape_enum = self._get_shape_enum(shape)
155
157
 
156
158
  conn = self._db(shape=shape_enum, remove_prefix=remove_prefix)
@@ -205,6 +207,8 @@ class ResultsDBMixin:
205
207
  ...
206
208
  <BLANKLINE>
207
209
  """
210
+ import pandas as pd
211
+
208
212
  shape_enum = self._get_shape_enum(shape)
209
213
  conn = self._db(shape=shape_enum, remove_prefix=remove_prefix)
210
214