edsl 0.1.38__py3-none-any.whl → 0.1.38.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. edsl/Base.py +34 -63
  2. edsl/BaseDiff.py +7 -7
  3. edsl/__init__.py +1 -2
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +11 -23
  6. edsl/agents/AgentList.py +23 -86
  7. edsl/agents/Invigilator.py +7 -18
  8. edsl/agents/InvigilatorBase.py +19 -0
  9. edsl/agents/PromptConstructor.py +4 -5
  10. edsl/auto/SurveyCreatorPipeline.py +1 -1
  11. edsl/auto/utilities.py +1 -1
  12. edsl/base/Base.py +13 -3
  13. edsl/config.py +0 -8
  14. edsl/conjure/AgentConstructionMixin.py +160 -0
  15. edsl/conjure/Conjure.py +62 -0
  16. edsl/conjure/InputData.py +659 -0
  17. edsl/conjure/InputDataCSV.py +48 -0
  18. edsl/conjure/InputDataMixinQuestionStats.py +182 -0
  19. edsl/conjure/InputDataPyRead.py +91 -0
  20. edsl/conjure/InputDataSPSS.py +8 -0
  21. edsl/conjure/InputDataStata.py +8 -0
  22. edsl/conjure/QuestionOptionMixin.py +76 -0
  23. edsl/conjure/QuestionTypeMixin.py +23 -0
  24. edsl/conjure/RawQuestion.py +65 -0
  25. edsl/conjure/SurveyResponses.py +7 -0
  26. edsl/conjure/__init__.py +9 -0
  27. edsl/conjure/examples/placeholder.txt +0 -0
  28. edsl/{utilities → conjure}/naming_utilities.py +1 -1
  29. edsl/conjure/utilities.py +201 -0
  30. edsl/coop/coop.py +7 -77
  31. edsl/data/Cache.py +17 -45
  32. edsl/data/CacheEntry.py +3 -8
  33. edsl/data/RemoteCacheSync.py +19 -0
  34. edsl/enums.py +0 -2
  35. edsl/exceptions/agents.py +0 -4
  36. edsl/inference_services/GoogleService.py +15 -7
  37. edsl/inference_services/registry.py +0 -2
  38. edsl/jobs/Jobs.py +559 -110
  39. edsl/jobs/buckets/TokenBucket.py +0 -3
  40. edsl/jobs/interviews/Interview.py +7 -7
  41. edsl/jobs/runners/JobsRunnerAsyncio.py +28 -156
  42. edsl/jobs/runners/JobsRunnerStatus.py +196 -194
  43. edsl/jobs/tasks/TaskHistory.py +19 -27
  44. edsl/language_models/LanguageModel.py +90 -52
  45. edsl/language_models/ModelList.py +14 -67
  46. edsl/language_models/registry.py +4 -57
  47. edsl/notebooks/Notebook.py +8 -7
  48. edsl/prompts/Prompt.py +3 -8
  49. edsl/questions/QuestionBase.py +30 -38
  50. edsl/questions/QuestionBaseGenMixin.py +1 -1
  51. edsl/questions/QuestionBasePromptsMixin.py +17 -0
  52. edsl/questions/QuestionExtract.py +4 -3
  53. edsl/questions/QuestionFunctional.py +3 -10
  54. edsl/questions/derived/QuestionTopK.py +0 -2
  55. edsl/questions/question_registry.py +6 -36
  56. edsl/results/Dataset.py +15 -146
  57. edsl/results/DatasetExportMixin.py +217 -231
  58. edsl/results/DatasetTree.py +4 -134
  59. edsl/results/Result.py +16 -31
  60. edsl/results/Results.py +65 -159
  61. edsl/scenarios/FileStore.py +13 -187
  62. edsl/scenarios/Scenario.py +18 -73
  63. edsl/scenarios/ScenarioList.py +76 -251
  64. edsl/surveys/MemoryPlan.py +1 -1
  65. edsl/surveys/Rule.py +5 -1
  66. edsl/surveys/RuleCollection.py +1 -1
  67. edsl/surveys/Survey.py +19 -25
  68. edsl/surveys/SurveyFlowVisualizationMixin.py +9 -67
  69. edsl/surveys/instructions/ChangeInstruction.py +7 -9
  70. edsl/surveys/instructions/Instruction.py +7 -21
  71. edsl/templates/error_reporting/interview_details.html +3 -3
  72. edsl/templates/error_reporting/interviews.html +9 -18
  73. edsl/utilities/utilities.py +0 -15
  74. {edsl-0.1.38.dist-info → edsl-0.1.38.dev1.dist-info}/METADATA +1 -2
  75. {edsl-0.1.38.dist-info → edsl-0.1.38.dev1.dist-info}/RECORD +77 -71
  76. edsl/exceptions/cache.py +0 -5
  77. edsl/inference_services/PerplexityService.py +0 -163
  78. edsl/jobs/JobsChecks.py +0 -147
  79. edsl/jobs/JobsPrompts.py +0 -268
  80. edsl/jobs/JobsRemoteInferenceHandler.py +0 -239
  81. edsl/results/CSSParameterizer.py +0 -108
  82. edsl/results/TableDisplay.py +0 -198
  83. edsl/results/table_display.css +0 -78
  84. edsl/scenarios/ScenarioJoin.py +0 -127
  85. {edsl-0.1.38.dist-info → edsl-0.1.38.dev1.dist-info}/LICENSE +0 -0
  86. {edsl-0.1.38.dist-info → edsl-0.1.38.dev1.dist-info}/WHEEL +0 -0
@@ -138,7 +138,7 @@ class QuestionBaseGenMixin:
138
138
  if exclude_components is None:
139
139
  exclude_components = ["question_name", "question_type"]
140
140
 
141
- d = copy.deepcopy(self.to_dict(add_edsl_version=False))
141
+ d = copy.deepcopy(self._to_dict())
142
142
  for key, value in d.items():
143
143
  if key in exclude_components:
144
144
  continue
@@ -126,6 +126,7 @@ class QuestionBasePromptsMixin:
126
126
 
127
127
  @classmethod
128
128
  def default_question_presentation(cls):
129
+ # template_text = cls._read_template("question_presentation.jinja")
129
130
  template_text = template_manager.get_template(
130
131
  cls.question_type, "question_presentation.jinja"
131
132
  )
@@ -141,6 +142,22 @@ class QuestionBasePromptsMixin:
141
142
  def answering_instructions(self, value) -> None:
142
143
  self._answering_instructions = value
143
144
 
145
+ # @classmethod
146
+ # def default_answering_instructions(cls) -> str:
147
+ # with resources.open_text(
148
+ # f"edsl.questions.templates.{cls.question_type}",
149
+ # "answering_instructions.jinja",
150
+ # ) as file:
151
+ # return Prompt(text=file.read())
152
+
153
+ # @classmethod
154
+ # def default_question_presentation(cls):
155
+ # with resources.open_text(
156
+ # f"edsl.questions.templates.{cls.question_type}",
157
+ # "question_presentation.jinja",
158
+ # ) as file:
159
+ # return Prompt(text=file.read())
160
+
144
161
  @property
145
162
  def question_presentation(self):
146
163
  if self._question_presentation is None:
@@ -1,7 +1,4 @@
1
1
  from __future__ import annotations
2
- import json
3
- import re
4
-
5
2
  from typing import Any, Optional, Dict
6
3
  from edsl.questions.QuestionBase import QuestionBase
7
4
  from edsl.questions.descriptors import AnswerTemplateDescriptor
@@ -14,6 +11,9 @@ from edsl.questions.decorators import inject_exception
14
11
  from typing import Dict, Any
15
12
  from pydantic import create_model, Field
16
13
 
14
+ import json
15
+ import re
16
+
17
17
 
18
18
  def extract_json(text, expected_keys, verbose=False):
19
19
  # Escape special regex characters in keys
@@ -112,6 +112,7 @@ class QuestionExtract(QuestionBase):
112
112
 
113
113
  :param question_name: The name of the question.
114
114
  :param question_text: The text of the question.
115
+ :param question_options: The options the respondent should select from.
115
116
  :param answer_template: The template for the answer.
116
117
  """
117
118
  self.question_name = question_name
@@ -108,22 +108,15 @@ class QuestionFunctional(QuestionBase):
108
108
  def question_html_content(self) -> str:
109
109
  return "NA for QuestionFunctional"
110
110
 
111
- # @add_edsl_version
112
- def to_dict(self, add_edsl_version=True):
113
- d = {
111
+ @add_edsl_version
112
+ def to_dict(self):
113
+ return {
114
114
  "question_name": self.question_name,
115
115
  "function_source_code": self.function_source_code,
116
116
  "question_type": "functional",
117
117
  "requires_loop": self.requires_loop,
118
118
  "function_name": self.function_name,
119
119
  }
120
- if add_edsl_version:
121
- from edsl import __version__
122
-
123
- d["edsl_version"] = __version__
124
- d["edsl_class_name"] = self.__class__.__name__
125
-
126
- return d
127
120
 
128
121
  @classmethod
129
122
  def example(cls):
@@ -21,7 +21,6 @@ class QuestionTopK(QuestionCheckBox):
21
21
  question_presentation: Optional[str] = None,
22
22
  answering_instructions: Optional[str] = None,
23
23
  include_comment: Optional[bool] = True,
24
- use_code: Optional[bool] = True,
25
24
  ):
26
25
  """Initialize the question.
27
26
 
@@ -40,7 +39,6 @@ class QuestionTopK(QuestionCheckBox):
40
39
  question_presentation=question_presentation,
41
40
  answering_instructions=answering_instructions,
42
41
  include_comment=include_comment,
43
- use_code=use_code,
44
42
  )
45
43
  if min_selections != max_selections:
46
44
  raise QuestionCreationValidationError(
@@ -90,22 +90,6 @@ class Question(metaclass=Meta):
90
90
  coop = Coop()
91
91
  return coop.patch(uuid, url, description, value, visibility)
92
92
 
93
- @classmethod
94
- def list_question_types(cls):
95
- """Return a list of available question types.
96
-
97
- >>> from edsl import Question
98
- >>> Question.list_question_types()
99
- ['checkbox', 'extract', 'free_text', 'functional', 'likert_five', 'linear_scale', 'list', 'multiple_choice', 'numerical', 'rank', 'top_k', 'yes_no']
100
- """
101
- return [
102
- q
103
- for q in sorted(
104
- list(RegisterQuestionsMeta.question_types_to_classes().keys())
105
- )
106
- if q not in ["budget"]
107
- ]
108
-
109
93
  @classmethod
110
94
  def available(cls, show_class_names: bool = False) -> Union[list, dict]:
111
95
  """Return a list of available question types.
@@ -114,32 +98,18 @@ class Question(metaclass=Meta):
114
98
 
115
99
  Example usage:
116
100
 
101
+ >>> from edsl import Question
102
+ >>> Question.available()
103
+ ['checkbox', 'extract', 'free_text', 'functional', 'likert_five', 'linear_scale', 'list', 'multiple_choice', 'numerical', 'rank', 'top_k', 'yes_no']
117
104
  """
118
- from edsl.results.Dataset import Dataset
119
-
120
105
  exclude = ["budget"]
121
106
  if show_class_names:
122
107
  return RegisterQuestionsMeta.question_types_to_classes()
123
108
  else:
124
- question_list = [
125
- q
126
- for q in sorted(
127
- set(RegisterQuestionsMeta.question_types_to_classes().keys())
128
- )
129
- if q not in exclude
130
- ]
131
- d = RegisterQuestionsMeta.question_types_to_classes()
132
- question_classes = [d[q] for q in question_list]
133
- example_questions = [repr(q.example()) for q in question_classes]
134
-
135
- return Dataset(
136
- [
137
- {"question_type": [q for q in question_list]},
138
- {"question_class": [q.__name__ for q in question_classes]},
139
- {"example_question": example_questions},
140
- ],
141
- print_parameters={"containerHeight": "auto"},
109
+ question_list = sorted(
110
+ set(RegisterQuestionsMeta.question_types_to_classes().keys())
142
111
  )
112
+ return [q for q in question_list if q not in exclude]
143
113
 
144
114
 
145
115
  def get_question_class(question_type):
edsl/results/Dataset.py CHANGED
@@ -5,23 +5,19 @@ import random
5
5
  import json
6
6
  from collections import UserList
7
7
  from typing import Any, Union, Optional
8
- import sys
8
+
9
9
  import numpy as np
10
10
 
11
11
  from edsl.results.ResultsExportMixin import ResultsExportMixin
12
12
  from edsl.results.DatasetTree import Tree
13
- from edsl.results.TableDisplay import TableDisplay
14
13
 
15
14
 
16
15
  class Dataset(UserList, ResultsExportMixin):
17
16
  """A class to represent a dataset of observations."""
18
17
 
19
- def __init__(
20
- self, data: list[dict[str, Any]] = None, print_parameters: Optional[dict] = None
21
- ):
18
+ def __init__(self, data: list[dict[str, Any]] = None):
22
19
  """Initialize the dataset with the given data."""
23
20
  super().__init__(data)
24
- self.print_parameters = print_parameters
25
21
 
26
22
  def __len__(self) -> int:
27
23
  """Return the number of observations in the dataset.
@@ -36,7 +32,7 @@ class Dataset(UserList, ResultsExportMixin):
36
32
  _, values = list(self.data[0].items())[0]
37
33
  return len(values)
38
34
 
39
- def keys(self) -> list[str]:
35
+ def keys(self):
40
36
  """Return the keys of the first observation in the dataset.
41
37
 
42
38
  >>> d = Dataset([{'a.b':[1,2,3,4]}])
@@ -45,45 +41,10 @@ class Dataset(UserList, ResultsExportMixin):
45
41
  """
46
42
  return [list(o.keys())[0] for o in self]
47
43
 
48
- def filter(self, expression):
49
- return self.to_scenario_list().filter(expression).to_dataset()
50
-
51
44
  def __repr__(self) -> str:
52
45
  """Return a string representation of the dataset."""
53
46
  return f"Dataset({self.data})"
54
47
 
55
- def write(self, filename: str, tablefmt: Optional[str] = None) -> None:
56
- return self.table(tablefmt=tablefmt).write(filename)
57
-
58
- def _repr_html_(self):
59
- # headers, data = self._tabular()
60
- return self.table(print_parameters=self.print_parameters)._repr_html_()
61
- # return TableDisplay(headers=headers, data=data, raw_data_set=self)
62
-
63
- def _tabular(self) -> tuple[list[str], list[list[Any]]]:
64
- # Extract headers
65
- headers = []
66
- for entry in self.data:
67
- headers.extend(entry.keys())
68
- headers = list(dict.fromkeys(headers)) # Ensure unique headers
69
-
70
- # Extract data
71
- max_len = max(len(values) for entry in self.data for values in entry.values())
72
- rows = []
73
- for i in range(max_len):
74
- row = []
75
- for header in headers:
76
- for entry in self.data:
77
- if header in entry:
78
- values = entry[header]
79
- row.append(values[i] if i < len(values) else None)
80
- break
81
- else:
82
- row.append(None) # Default to None if header is missing
83
- rows.append(row)
84
-
85
- return headers, rows
86
-
87
48
  def _key_to_value(self, key: str) -> Any:
88
49
  """Retrieve the value associated with the given key from the dataset.
89
50
 
@@ -128,25 +89,7 @@ class Dataset(UserList, ResultsExportMixin):
128
89
 
129
90
  return get_values(self.data[0])[0]
130
91
 
131
- def print(self, pretty_labels=None, **kwargs):
132
- if "format" in kwargs:
133
- if kwargs["format"] not in ["html", "markdown", "rich", "latex"]:
134
- raise ValueError(f"Format '{kwargs['format']}' not supported.")
135
- if pretty_labels is None:
136
- pretty_labels = {}
137
- else:
138
- return self.rename(pretty_labels).print(**kwargs)
139
- return self.table()
140
-
141
- def rename(self, rename_dic) -> Dataset:
142
- new_data = []
143
- for observation in self.data:
144
- key, values = list(observation.items())[0]
145
- new_key = rename_dic.get(key, key)
146
- new_data.append({new_key: values})
147
- return Dataset(new_data)
148
-
149
- def select(self, *keys) -> Dataset:
92
+ def select(self, *keys):
150
93
  """Return a new dataset with only the selected keys.
151
94
 
152
95
  :param keys: The keys to select.
@@ -179,6 +122,12 @@ class Dataset(UserList, ResultsExportMixin):
179
122
  json.dumps(self.data)
180
123
  ) # janky but I want to make sure it's serializable & deserializable
181
124
 
125
+ def _repr_html_(self) -> str:
126
+ """Return an HTML representation of the dataset."""
127
+ from edsl.utilities.utilities import data_to_html
128
+
129
+ return data_to_html(self.data)
130
+
182
131
  def shuffle(self, seed=None) -> Dataset:
183
132
  """Return a new dataset with the observations shuffled.
184
133
 
@@ -200,9 +149,6 @@ class Dataset(UserList, ResultsExportMixin):
200
149
 
201
150
  return self
202
151
 
203
- def expand(self, field):
204
- return self.to_scenario_list().expand(field).to_dataset()
205
-
206
152
  def sample(
207
153
  self,
208
154
  n: int = None,
@@ -321,92 +267,15 @@ class Dataset(UserList, ResultsExportMixin):
321
267
 
322
268
  return Dataset(new_data)
323
269
 
324
- def tree(self, node_order: Optional[list[str]] = None) -> Tree:
270
+ @property
271
+ def tree(self):
325
272
  """Return a tree representation of the dataset.
326
273
 
327
274
  >>> d = Dataset([{'a':[1,2,3,4]}, {'b':[4,3,2,1]}])
328
- >>> d.tree()
329
- Tree(Dataset({'a': [1, 2, 3, 4], 'b': [4, 3, 2, 1]}))
275
+ >>> d.tree.print_tree()
276
+ Tree has not been constructed yet.
330
277
  """
331
- return Tree(self, node_order=node_order)
332
-
333
- def table(
334
- self,
335
- *fields,
336
- tablefmt: Optional[str] = None,
337
- max_rows: Optional[int] = None,
338
- pretty_labels=None,
339
- print_parameters: Optional[dict] = None,
340
- ):
341
- if pretty_labels is not None:
342
- new_fields = []
343
- for field in fields:
344
- new_fields.append(pretty_labels.get(field, field))
345
- return self.rename(pretty_labels).table(
346
- *new_fields, tablefmt=tablefmt, max_rows=max_rows
347
- )
348
-
349
- self.print_parameters = print_parameters
350
-
351
- headers, data = self._tabular()
352
-
353
- if tablefmt is not None:
354
- from tabulate import tabulate_formats
355
-
356
- if tablefmt not in tabulate_formats:
357
- print(
358
- f"Error: The following table format is not supported: {tablefmt}",
359
- file=sys.stderr,
360
- )
361
- print(f"\nAvailable formats are: {tabulate_formats}", file=sys.stderr)
362
- return None
363
-
364
- if max_rows:
365
- if len(data) < max_rows:
366
- max_rows = None
367
-
368
- if fields:
369
- full_data = data
370
- data = []
371
- indices = []
372
- for field in fields:
373
- if field not in headers:
374
- print(
375
- f"Error: The following field was not found: {field}",
376
- file=sys.stderr,
377
- )
378
- print(f"\nAvailable fields are: {headers}", file=sys.stderr)
379
-
380
- # Optional: Suggest similar fields using difflib
381
- import difflib
382
-
383
- matches = difflib.get_close_matches(field, headers)
384
- if matches:
385
- print(f"\nDid you mean: {matches[0]} ?", file=sys.stderr)
386
- return None
387
- indices.append(headers.index(field))
388
- headers = fields
389
- for row in full_data:
390
- data.append([row[i] for i in indices])
391
-
392
- if max_rows is not None:
393
- if max_rows > len(data):
394
- raise ValueError(
395
- "max_rows cannot be greater than the number of rows in the dataset."
396
- )
397
- last_line = data[-1]
398
- spaces = len(data[max_rows])
399
- filler_line = ["." for i in range(spaces)]
400
- data = data[:max_rows]
401
- data.append(filler_line)
402
- data.append(last_line)
403
-
404
- return TableDisplay(
405
- data=data, headers=headers, tablefmt=tablefmt, raw_data_set=self
406
- )
407
-
408
- def summary(self):
409
- return Dataset([{"num_observations": [len(self)], "keys": [self.keys()]}])
278
+ return Tree(self)
410
279
 
411
280
  @classmethod
412
281
  def example(self):