edsl 0.1.29.dev6__py3-none-any.whl → 0.1.30.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. edsl/Base.py +6 -3
  2. edsl/__init__.py +23 -23
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +35 -34
  5. edsl/agents/AgentList.py +16 -5
  6. edsl/agents/Invigilator.py +19 -1
  7. edsl/agents/descriptors.py +2 -1
  8. edsl/base/Base.py +289 -0
  9. edsl/config.py +2 -1
  10. edsl/coop/utils.py +28 -1
  11. edsl/data/Cache.py +19 -5
  12. edsl/data/SQLiteDict.py +11 -3
  13. edsl/jobs/Answers.py +15 -1
  14. edsl/jobs/Jobs.py +69 -31
  15. edsl/jobs/buckets/ModelBuckets.py +4 -2
  16. edsl/jobs/buckets/TokenBucket.py +1 -2
  17. edsl/jobs/interviews/Interview.py +0 -6
  18. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +9 -5
  19. edsl/jobs/runners/JobsRunnerAsyncio.py +12 -16
  20. edsl/jobs/tasks/TaskHistory.py +4 -3
  21. edsl/language_models/LanguageModel.py +5 -11
  22. edsl/language_models/ModelList.py +1 -1
  23. edsl/language_models/repair.py +8 -7
  24. edsl/notebooks/Notebook.py +9 -3
  25. edsl/questions/QuestionBase.py +6 -2
  26. edsl/questions/QuestionBudget.py +5 -6
  27. edsl/questions/QuestionCheckBox.py +7 -3
  28. edsl/questions/QuestionExtract.py +5 -3
  29. edsl/questions/QuestionFreeText.py +3 -3
  30. edsl/questions/QuestionFunctional.py +0 -3
  31. edsl/questions/QuestionList.py +3 -4
  32. edsl/questions/QuestionMultipleChoice.py +12 -5
  33. edsl/questions/QuestionNumerical.py +4 -3
  34. edsl/questions/QuestionRank.py +5 -3
  35. edsl/questions/__init__.py +4 -3
  36. edsl/questions/descriptors.py +4 -2
  37. edsl/results/DatasetExportMixin.py +491 -0
  38. edsl/results/Result.py +13 -65
  39. edsl/results/Results.py +91 -39
  40. edsl/results/ResultsDBMixin.py +7 -3
  41. edsl/results/ResultsExportMixin.py +22 -537
  42. edsl/results/ResultsGGMixin.py +3 -3
  43. edsl/results/ResultsToolsMixin.py +1 -4
  44. edsl/scenarios/FileStore.py +140 -0
  45. edsl/scenarios/Scenario.py +5 -6
  46. edsl/scenarios/ScenarioList.py +17 -8
  47. edsl/scenarios/ScenarioListExportMixin.py +32 -0
  48. edsl/scenarios/ScenarioListPdfMixin.py +2 -1
  49. edsl/scenarios/__init__.py +1 -0
  50. edsl/surveys/MemoryPlan.py +11 -4
  51. edsl/surveys/Survey.py +9 -4
  52. edsl/surveys/SurveyExportMixin.py +4 -2
  53. edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
  54. edsl/utilities/__init__.py +21 -21
  55. edsl/utilities/interface.py +66 -45
  56. edsl/utilities/utilities.py +11 -13
  57. {edsl-0.1.29.dev6.dist-info → edsl-0.1.30.dev1.dist-info}/METADATA +1 -1
  58. {edsl-0.1.29.dev6.dist-info → edsl-0.1.30.dev1.dist-info}/RECORD +60 -56
  59. {edsl-0.1.29.dev6.dist-info → edsl-0.1.30.dev1.dist-info}/LICENSE +0 -0
  60. {edsl-0.1.29.dev6.dist-info → edsl-0.1.30.dev1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,140 @@
1
+ from edsl import Scenario
2
+ import base64
3
+ import io
4
+ import tempfile
5
+ from typing import Optional
6
+
7
+
8
+ class FileStore(Scenario):
9
+ def __init__(
10
+ self,
11
+ filename: str,
12
+ binary: Optional[bool] = None,
13
+ suffix: Optional[str] = None,
14
+ base64_string: Optional[str] = None,
15
+ ):
16
+ self.filename = filename
17
+ self.suffix = suffix or "." + filename.split(".")[-1]
18
+ self.binary = binary or False
19
+ self.base64_string = base64_string or self.encode_file_to_base64_string(
20
+ filename
21
+ )
22
+ super().__init__(
23
+ {
24
+ "filename": self.filename,
25
+ "base64_string": self.base64_string,
26
+ "binary": self.binary,
27
+ "suffix": self.suffix,
28
+ }
29
+ )
30
+
31
+ @classmethod
32
+ def from_dict(cls, d):
33
+ return cls(d["filename"], d["binary"], d["suffix"], d["base64_string"])
34
+
35
+ def encode_file_to_base64_string(self, file_path):
36
+ try:
37
+ # Attempt to open the file in text mode
38
+ with open(file_path, "r") as text_file:
39
+ # Read the text data
40
+ text_data = text_file.read()
41
+ # Encode the text data to a base64 string
42
+ base64_encoded_data = base64.b64encode(text_data.encode("utf-8"))
43
+ except UnicodeDecodeError:
44
+ # If reading as text fails, open the file in binary mode
45
+ with open(file_path, "rb") as binary_file:
46
+ # Read the binary data
47
+ binary_data = binary_file.read()
48
+ # Encode the binary data to a base64 string
49
+ base64_encoded_data = base64.b64encode(binary_data)
50
+ self.binary = True
51
+ # Convert the base64 bytes to a string
52
+ base64_string = base64_encoded_data.decode("utf-8")
53
+
54
+ return base64_string
55
+
56
+ def open(self):
57
+ if self.binary:
58
+ return self.base64_to_file(self["base64_string"], is_binary=True)
59
+ else:
60
+ return self.base64_to_text_file(self["base64_string"])
61
+
62
+ @staticmethod
63
+ def base64_to_text_file(base64_string):
64
+ # Decode the base64 string to bytes
65
+ text_data_bytes = base64.b64decode(base64_string)
66
+
67
+ # Convert bytes to string
68
+ text_data = text_data_bytes.decode("utf-8")
69
+
70
+ # Create a StringIO object from the text data
71
+ text_file = io.StringIO(text_data)
72
+
73
+ return text_file
74
+
75
+ @staticmethod
76
+ def base64_to_file(base64_string, is_binary=True):
77
+ # Decode the base64 string to bytes
78
+ file_data = base64.b64decode(base64_string)
79
+
80
+ if is_binary:
81
+ # Create a BytesIO object for binary data
82
+ return io.BytesIO(file_data)
83
+ else:
84
+ # Convert bytes to string for text data
85
+ text_data = file_data.decode("utf-8")
86
+ # Create a StringIO object for text data
87
+ return io.StringIO(text_data)
88
+
89
+ def to_tempfile(self, suffix=None):
90
+ if suffix is None:
91
+ suffix = self.suffix
92
+ if self.binary:
93
+ file_like_object = self.base64_to_file(
94
+ self["base64_string"], is_binary=True
95
+ )
96
+ else:
97
+ file_like_object = self.base64_to_text_file(self["base64_string"])
98
+
99
+ # Create a named temporary file
100
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
101
+ temp_file.write(file_like_object.read())
102
+ temp_file.close()
103
+
104
+ return temp_file.name
105
+
106
+ def push(self, description=None):
107
+ scenario_version = Scenario.from_dict(self.to_dict())
108
+ if description is None:
109
+ description = "File: " + self["filename"]
110
+ info = scenario_version.push(description=description)
111
+ return info
112
+
113
+ @classmethod
114
+ def pull(cls, uuid):
115
+ scenario_version = Scenario.pull(uuid)
116
+ return cls.from_dict(scenario_version.to_dict())
117
+
118
+
119
+ class CSVFileStore(FileStore):
120
+ def __init__(self, filename):
121
+ super().__init__(filename, suffix=".csv")
122
+
123
+
124
+ class PDFFileStore(FileStore):
125
+ def __init__(self, filename):
126
+ super().__init__(filename, suffix=".pdf")
127
+
128
+
129
+ if __name__ == "__main__":
130
+ # file_path = "../conjure/examples/Ex11-2.sav"
131
+ # fs = FileStore(file_path)
132
+ # info = fs.push()
133
+ # print(info)
134
+
135
+ # from edsl import Conjure
136
+
137
+ # c = Conjure(datafile_name=fs.to_tempfile())
138
+ f = PDFFileStore("paper.pdf")
139
+ # print(f.to_tempfile())
140
+ f.push()
@@ -1,17 +1,12 @@
1
1
  """A Scenario is a dictionary with a key/value to parameterize a question."""
2
2
 
3
+ import time
3
4
  import copy
4
5
  from collections import UserDict
5
6
  from typing import Union, List, Optional, Generator
6
7
  import base64
7
8
  import hashlib
8
- import json
9
-
10
- import fitz # PyMuPDF
11
9
  import os
12
- import subprocess
13
-
14
- from rich.table import Table
15
10
 
16
11
  from edsl.Base import Base
17
12
  from edsl.scenarios.ScenarioImageMixin import ScenarioImageMixin
@@ -217,6 +212,8 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
217
212
 
218
213
  @classmethod
219
214
  def from_pdf(cls, pdf_path):
215
+ import fitz # PyMuPDF
216
+
220
217
  # Ensure the file exists
221
218
  if not os.path.exists(pdf_path):
222
219
  raise FileNotFoundError(f"The file {pdf_path} does not exist.")
@@ -404,6 +401,8 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
404
401
 
405
402
  def rich_print(self) -> "Table":
406
403
  """Display an object as a rich table."""
404
+ from rich.table import Table
405
+
407
406
  table_data, column_names = self._table()
408
407
  table = Table(title=f"{self.__class__.__name__} Attributes")
409
408
  for column in column_names:
@@ -5,25 +5,22 @@ import csv
5
5
  import random
6
6
  from collections import UserList, Counter
7
7
  from collections.abc import Iterable
8
-
9
8
  from typing import Any, Optional, Union, List
10
9
 
11
- from rich.table import Table
12
10
  from simpleeval import EvalWithCompoundTypes
13
11
 
14
- from edsl.scenarios.Scenario import Scenario
15
12
  from edsl.Base import Base
16
13
  from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
14
+ from edsl.scenarios.Scenario import Scenario
17
15
  from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
16
+ from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
18
17
 
19
- from edsl.utilities.interface import print_scenario_list
20
18
 
21
- from edsl.utilities import is_valid_variable_name
19
+ class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
20
+ pass
22
21
 
23
- from edsl.results.ResultsExportMixin import ResultsExportMixin
24
22
 
25
-
26
- class ScenarioList(Base, UserList, ScenarioListPdfMixin, ResultsExportMixin):
23
+ class ScenarioList(Base, UserList, ScenarioListMixin):
27
24
  """Class for creating a list of scenarios to be used in a survey."""
28
25
 
29
26
  def __init__(self, data: Optional[list] = None):
@@ -157,6 +154,8 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin, ResultsExportMixin):
157
154
  )
158
155
  raw_var_name, expression = new_var_string.split("=", 1)
159
156
  var_name = raw_var_name.strip()
157
+ from edsl.utilities.utilities import is_valid_variable_name
158
+
160
159
  if not is_valid_variable_name(var_name):
161
160
  raise Exception(f"{var_name} is not a valid variable name.")
162
161
 
@@ -376,6 +375,8 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin, ResultsExportMixin):
376
375
  >>> scenario_list[1]['age']
377
376
  '25'
378
377
  """
378
+ from edsl.scenarios.Scenario import Scenario
379
+
379
380
  observations = []
380
381
  with open(filename, "r") as f:
381
382
  reader = csv.reader(f)
@@ -413,12 +414,16 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin, ResultsExportMixin):
413
414
  ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
414
415
 
415
416
  """
417
+ from edsl.scenarios.Scenario import Scenario
418
+
416
419
  return cls([Scenario(s) for s in scenario_dicts_list])
417
420
 
418
421
  @classmethod
419
422
  @remove_edsl_version
420
423
  def from_dict(cls, data) -> ScenarioList:
421
424
  """Create a `ScenarioList` from a dictionary."""
425
+ from edsl.scenarios.Scenario import Scenario
426
+
422
427
  return cls([Scenario.from_dict(s) for s in data["scenarios"]])
423
428
 
424
429
  def code(self) -> str:
@@ -443,6 +448,8 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin, ResultsExportMixin):
443
448
 
444
449
  def rich_print(self) -> None:
445
450
  """Display an object as a table."""
451
+ from rich.table import Table
452
+
446
453
  table = Table(title="ScenarioList")
447
454
  table.add_column("Index", style="bold")
448
455
  table.add_column("Scenario")
@@ -457,6 +464,8 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin, ResultsExportMixin):
457
464
  pretty_labels: Optional[dict] = None,
458
465
  filename: str = None,
459
466
  ):
467
+ from edsl.utilities.interface import print_scenario_list
468
+
460
469
  print_scenario_list(self[:max_rows])
461
470
 
462
471
  def __getitem__(self, key: Union[int, slice]) -> Any:
@@ -0,0 +1,32 @@
1
+ """Mixin class for exporting results."""
2
+
3
+ from functools import wraps
4
+ from edsl.results.DatasetExportMixin import DatasetExportMixin
5
+
6
+
7
+ def to_dataset(func):
8
+ """Convert the Results object to a Dataset object before calling the function."""
9
+
10
+ @wraps(func)
11
+ def wrapper(self, *args, **kwargs):
12
+ """Return the function with the Results object converted to a Dataset object."""
13
+ if self.__class__.__name__ == "ScenarioList":
14
+ return func(self.to_dataset(), *args, **kwargs)
15
+ else:
16
+ raise Exception(
17
+ f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
18
+ )
19
+
20
+ return wrapper
21
+
22
+
23
+ def decorate_all_methods(cls):
24
+ for attr_name, attr_value in cls.__dict__.items():
25
+ if callable(attr_value):
26
+ setattr(cls, attr_name, to_dataset(attr_value))
27
+ return cls
28
+
29
+
30
+ @decorate_all_methods
31
+ class ScenarioListExportMixin(DatasetExportMixin):
32
+ """Mixin class for exporting Results objects."""
@@ -2,7 +2,7 @@ import fitz # PyMuPDF
2
2
  import os
3
3
  import subprocess
4
4
 
5
- from edsl import Scenario
5
+ # from edsl import Scenario
6
6
 
7
7
 
8
8
  class ScenarioListPdfMixin:
@@ -22,6 +22,7 @@ class ScenarioListPdfMixin:
22
22
  """
23
23
  import tempfile
24
24
  from pdf2image import convert_from_path
25
+ from edsl.scenarios import Scenario
25
26
 
26
27
  with tempfile.TemporaryDirectory() as output_folder:
27
28
  # Convert PDF to images
@@ -1 +1,2 @@
1
1
  from edsl.scenarios.Scenario import Scenario
2
+ from edsl.scenarios.ScenarioList import ScenarioList
@@ -3,9 +3,9 @@
3
3
  from collections import UserDict, defaultdict
4
4
  from typing import Optional
5
5
 
6
- from edsl.surveys.Memory import Memory
7
- from edsl.prompts.Prompt import Prompt
8
- from edsl.surveys.DAG import DAG
6
+ # from edsl.surveys.Memory import Memory
7
+ # from edsl.prompts.Prompt import Prompt
8
+ # from edsl.surveys.DAG import DAG
9
9
 
10
10
 
11
11
  class MemoryPlan(UserDict):
@@ -61,6 +61,8 @@ class MemoryPlan(UserDict):
61
61
  :param answers: A dictionary of question names to answers.
62
62
 
63
63
  """
64
+ from edsl.prompts.Prompt import Prompt
65
+
64
66
  self._check_valid_question_name(focal_question)
65
67
 
66
68
  if focal_question not in self:
@@ -121,6 +123,7 @@ class MemoryPlan(UserDict):
121
123
  self._check_valid_question_name(focal_question)
122
124
  self._check_valid_question_name(prior_question)
123
125
  self._check_order(focal_question, prior_question)
126
+ from edsl.surveys.Memory import Memory
124
127
 
125
128
  if focal_question not in self:
126
129
  memory = Memory()
@@ -160,6 +163,8 @@ class MemoryPlan(UserDict):
160
163
  @classmethod
161
164
  def from_dict(cls, data) -> "MemoryPlan":
162
165
  """Deserialize a memory plan from a dictionary."""
166
+ from edsl.surveys.Memory import Memory
167
+
163
168
  newdata = {}
164
169
  for question_name, memory in data["data"].items():
165
170
  newdata[question_name] = Memory.from_dict(memory)
@@ -182,13 +187,15 @@ class MemoryPlan(UserDict):
182
187
  return new_d
183
188
 
184
189
  @property
185
- def dag(self) -> DAG:
190
+ def dag(self) -> "DAG":
186
191
  """Return a directed acyclic graph of the memory plan.
187
192
 
188
193
  >>> mp = MemoryPlan.example()
189
194
  >>> mp.dag
190
195
  {1: {0}}
191
196
  """
197
+ from edsl.surveys.DAG import DAG
198
+
192
199
  d = defaultdict(set)
193
200
  for focal_question, memory in self.items():
194
201
  for prior_question in memory:
edsl/surveys/Survey.py CHANGED
@@ -5,9 +5,6 @@ import re
5
5
 
6
6
  from typing import Any, Generator, Optional, Union, List, Literal, Callable
7
7
 
8
- from rich import print
9
- from rich.table import Table
10
-
11
8
  from edsl.exceptions import SurveyCreationError, SurveyHasNoRulesError
12
9
  from edsl.questions.QuestionBase import QuestionBase
13
10
  from edsl.surveys.base import RulePriority, EndOfSurvey
@@ -18,8 +15,8 @@ from edsl.Base import Base
18
15
  from edsl.surveys.SurveyExportMixin import SurveyExportMixin
19
16
  from edsl.surveys.descriptors import QuestionsDescriptor
20
17
  from edsl.surveys.MemoryPlan import MemoryPlan
18
+
21
19
  from edsl.surveys.DAG import DAG
22
- from edsl.utilities import is_notebook
23
20
  from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
24
21
  from edsl.surveys.SurveyFlowVisualizationMixin import SurveyFlowVisualizationMixin
25
22
 
@@ -563,6 +560,12 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
563
560
  job = Jobs(survey=self)
564
561
  return job.by(*args)
565
562
 
563
+ def to_jobs(self):
564
+ """Convert the survey to a Jobs object."""
565
+ from edsl.jobs.Jobs import Jobs
566
+
567
+ return Jobs(survey=self)
568
+
566
569
  def run(self, *args, **kwargs) -> "Results":
567
570
  """Turn the survey into a Job and runs it.
568
571
 
@@ -957,6 +960,8 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
957
960
  │ └───────────────┴─────────────────┴───────────────┴──────────────────────────────────────────────┘ │
958
961
  └────────────────────────────────────────────────────────────────────────────────────────────────────┘
959
962
  """
963
+ from rich.table import Table
964
+
960
965
  table = Table(show_header=True, header_style="bold magenta")
961
966
  table.add_column("Questions", style="dim")
962
967
 
@@ -1,8 +1,6 @@
1
1
  """A mixin class for exporting surveys to different formats."""
2
2
 
3
- from docx import Document
4
3
  from typing import Union, Optional
5
- import black
6
4
 
7
5
 
8
6
  class SurveyExportMixin:
@@ -29,6 +27,8 @@ class SurveyExportMixin:
29
27
 
30
28
  def docx(self, filename=None) -> Union["Document", None]:
31
29
  """Generate a docx document for the survey."""
30
+ from docx import Document
31
+
32
32
  doc = Document()
33
33
  doc.add_heading("EDSL Survey")
34
34
  doc.add_paragraph(f"\n")
@@ -83,6 +83,8 @@ class SurveyExportMixin:
83
83
  survey = Survey(questions=[q0, q1, q2])
84
84
  ...
85
85
  """
86
+ import black
87
+
86
88
  header_lines = ["from edsl.surveys.Survey import Survey"]
87
89
  header_lines.append("from edsl import Question")
88
90
  lines = ["\n".join(header_lines)]
@@ -1,10 +1,7 @@
1
1
  """A mixin for visualizing the flow of a survey."""
2
2
 
3
- import pydot
4
- import tempfile
5
- from IPython.display import Image
6
- from edsl.utilities import is_notebook
7
3
  from edsl.surveys.base import RulePriority, EndOfSurvey
4
+ import tempfile
8
5
 
9
6
 
10
7
  class SurveyFlowVisualizationMixin:
@@ -13,6 +10,8 @@ class SurveyFlowVisualizationMixin:
13
10
  def show_flow(self, filename: str = None):
14
11
  """Create an image showing the flow of users through the survey."""
15
12
  # Create a graph object
13
+ import pydot
14
+
16
15
  graph = pydot.Dot(graph_type="digraph")
17
16
 
18
17
  # Add nodes for each question
@@ -101,8 +100,11 @@ class SurveyFlowVisualizationMixin:
101
100
  on Ubuntu.
102
101
  """
103
102
  )
103
+ from edsl.utilities.utilities import is_notebook
104
104
 
105
105
  if is_notebook():
106
+ from IPython.display import Image
107
+
106
108
  display(Image(tmp_file.name))
107
109
  else:
108
110
  import os
@@ -1,22 +1,22 @@
1
- from edsl.utilities.interface import (
2
- print_dict_as_html_table,
3
- print_dict_with_rich,
4
- print_list_of_dicts_as_html_table,
5
- print_table_with_rich,
6
- print_public_methods_with_doc,
7
- print_list_of_dicts_as_markdown_table,
8
- )
1
+ # from edsl.utilities.interface import (
2
+ # print_dict_as_html_table,
3
+ # print_dict_with_rich,
4
+ # print_list_of_dicts_as_html_table,
5
+ # print_table_with_rich,
6
+ # print_public_methods_with_doc,
7
+ # print_list_of_dicts_as_markdown_table,
8
+ # )
9
9
 
10
- from edsl.utilities.utilities import (
11
- create_valid_var_name,
12
- dict_to_html,
13
- hash_value,
14
- HTMLSnippet,
15
- is_notebook,
16
- is_gzipped,
17
- is_valid_variable_name,
18
- random_string,
19
- repair_json,
20
- shorten_string,
21
- time_all_functions,
22
- )
10
+ # from edsl.utilities.utilities import (
11
+ # create_valid_var_name,
12
+ # dict_to_html,
13
+ # hash_value,
14
+ # HTMLSnippet,
15
+ # is_notebook,
16
+ # is_gzipped,
17
+ # is_valid_variable_name,
18
+ # random_string,
19
+ # repair_json,
20
+ # shorten_string,
21
+ # time_all_functions,
22
+ # )