edsl 0.1.45__py3-none-any.whl → 0.1.47__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. edsl/Base.py +87 -16
  2. edsl/__version__.py +1 -1
  3. edsl/agents/PromptConstructor.py +26 -79
  4. edsl/agents/QuestionInstructionPromptBuilder.py +70 -32
  5. edsl/agents/QuestionTemplateReplacementsBuilder.py +12 -2
  6. edsl/coop/coop.py +289 -147
  7. edsl/data/Cache.py +2 -0
  8. edsl/data/CacheEntry.py +10 -2
  9. edsl/data/RemoteCacheSync.py +10 -9
  10. edsl/inference_services/AvailableModelFetcher.py +1 -1
  11. edsl/inference_services/PerplexityService.py +9 -5
  12. edsl/jobs/AnswerQuestionFunctionConstructor.py +12 -1
  13. edsl/jobs/Jobs.py +35 -17
  14. edsl/jobs/JobsComponentConstructor.py +2 -1
  15. edsl/jobs/JobsPrompts.py +49 -26
  16. edsl/jobs/JobsRemoteInferenceHandler.py +4 -5
  17. edsl/jobs/data_structures.py +3 -0
  18. edsl/jobs/interviews/Interview.py +6 -3
  19. edsl/language_models/LanguageModel.py +7 -1
  20. edsl/questions/QuestionBase.py +5 -0
  21. edsl/questions/question_base_gen_mixin.py +2 -0
  22. edsl/questions/question_registry.py +6 -7
  23. edsl/results/DatasetExportMixin.py +124 -6
  24. edsl/results/Results.py +59 -0
  25. edsl/scenarios/FileStore.py +112 -7
  26. edsl/scenarios/ScenarioList.py +283 -21
  27. edsl/study/Study.py +2 -2
  28. edsl/surveys/Survey.py +15 -20
  29. {edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/METADATA +4 -3
  30. {edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/RECORD +32 -44
  31. edsl/auto/AutoStudy.py +0 -130
  32. edsl/auto/StageBase.py +0 -243
  33. edsl/auto/StageGenerateSurvey.py +0 -178
  34. edsl/auto/StageLabelQuestions.py +0 -125
  35. edsl/auto/StagePersona.py +0 -61
  36. edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
  37. edsl/auto/StagePersonaDimensionValues.py +0 -74
  38. edsl/auto/StagePersonaDimensions.py +0 -69
  39. edsl/auto/StageQuestions.py +0 -74
  40. edsl/auto/SurveyCreatorPipeline.py +0 -21
  41. edsl/auto/utilities.py +0 -218
  42. edsl/base/Base.py +0 -279
  43. {edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/LICENSE +0 -0
  44. {edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/WHEEL +0 -0
@@ -60,26 +60,25 @@ class Question(metaclass=Meta):
60
60
  return q.example()
61
61
 
62
62
  @classmethod
63
- def pull(cls, uuid: Optional[Union[str, UUID]] = None, url: Optional[str] = None):
63
+ def pull(cls, url_or_uuid: Union[str, UUID]):
64
64
  """Pull the object from coop."""
65
65
  from edsl.coop import Coop
66
66
 
67
67
  coop = Coop()
68
- return coop.get(uuid, url, "question")
68
+ return coop.get(url_or_uuid, "question")
69
69
 
70
70
  @classmethod
71
- def delete(cls, uuid: Optional[Union[str, UUID]] = None, url: Optional[str] = None):
71
+ def delete(cls, url_or_uuid: Union[str, UUID]):
72
72
  """Delete the object from coop."""
73
73
  from edsl.coop import Coop
74
74
 
75
75
  coop = Coop()
76
- return coop.delete(uuid, url)
76
+ return coop.delete(url_or_uuid)
77
77
 
78
78
  @classmethod
79
79
  def patch(
80
80
  cls,
81
- uuid: Optional[Union[str, UUID]] = None,
82
- url: Optional[str] = None,
81
+ url_or_uuid: Union[str, UUID],
83
82
  description: Optional[str] = None,
84
83
  value: Optional[Any] = None,
85
84
  visibility: Optional[str] = None,
@@ -88,7 +87,7 @@ class Question(metaclass=Meta):
88
87
  from edsl.coop import Coop
89
88
 
90
89
  coop = Coop()
91
- return coop.patch(uuid, url, description, value, visibility)
90
+ return coop.patch(url_or_uuid, description, value, visibility)
92
91
 
93
92
  @classmethod
94
93
  def list_question_types(cls):
@@ -505,8 +505,9 @@ class DatasetExportMixin:
505
505
 
506
506
  from edsl.utilities.PrettyList import PrettyList
507
507
 
508
- return PrettyList(list_to_return)
509
-
508
+ #return PrettyList(list_to_return)
509
+ return list_to_return
510
+
510
511
  def html(
511
512
  self,
512
513
  filename: Optional[str] = None,
@@ -735,11 +736,14 @@ class DatasetExportMixin:
735
736
  """
736
737
  Flatten a field containing a list of dictionaries into separate fields.
737
738
 
738
- For example, if a dataset contains:
739
- [{'data': [{'a': 1}, {'b': 2}], 'other': ['x', 'y']}]
739
+ >>> from edsl.results.Dataset import Dataset
740
+ >>> Dataset([{'a': [{'a': 1, 'b': 2}]}, {'c': [5] }]).flatten('a')
741
+ Dataset([{'c': [5]}, {'a.a': [1]}, {'a.b': [2]}])
742
+
743
+
744
+ >>> Dataset([{'answer.example': [{'a': 1, 'b': 2}]}, {'c': [5] }]).flatten('answer.example')
745
+ Dataset([{'c': [5]}, {'answer.example.a': [1]}, {'answer.example.b': [2]}])
740
746
 
741
- After d.flatten('data'), it should become:
742
- [{'other': ['x', 'y'], 'data.a': [1, None], 'data.b': [None, 2]}]
743
747
 
744
748
  Args:
745
749
  field: The field to flatten
@@ -753,6 +757,24 @@ class DatasetExportMixin:
753
757
  # Ensure the dataset isn't empty
754
758
  if not self.data:
755
759
  return self.copy()
760
+
761
+ # Find all columns that contain the field
762
+ matching_entries = []
763
+ for entry in self.data:
764
+ col_name = next(iter(entry.keys()))
765
+ if field == col_name or (
766
+ '.' in col_name and
767
+ (col_name.endswith('.' + field) or col_name.startswith(field + '.'))
768
+ ):
769
+ matching_entries.append(entry)
770
+
771
+ # Check if the field is ambiguous
772
+ if len(matching_entries) > 1:
773
+ matching_cols = [next(iter(entry.keys())) for entry in matching_entries]
774
+ raise ValueError(
775
+ f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
776
+ f"Please specify the full column name to flatten."
777
+ )
756
778
 
757
779
  # Get the number of observations
758
780
  num_observations = self.num_observations()
@@ -882,6 +904,102 @@ class DatasetExportMixin:
882
904
  result.data.pop(field_index)
883
905
 
884
906
  return result
907
+
908
+ def drop(self, field_name):
909
+ """
910
+ Returns a new Dataset with the specified field removed.
911
+
912
+ Args:
913
+ field_name (str): The name of the field to remove.
914
+
915
+ Returns:
916
+ Dataset: A new Dataset instance without the specified field.
917
+
918
+ Raises:
919
+ KeyError: If the field_name doesn't exist in the dataset.
920
+
921
+ Examples:
922
+ >>> from edsl.results.Dataset import Dataset
923
+ >>> d = Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
924
+ >>> d.drop('a')
925
+ Dataset([{'b': [4, 5, 6]}])
926
+
927
+ >>> d.drop('c')
928
+ Traceback (most recent call last):
929
+ ...
930
+ KeyError: "Field 'c' not found in dataset"
931
+ """
932
+ from edsl.results.Dataset import Dataset
933
+
934
+ # Check if field exists in the dataset
935
+ if field_name not in self.relevant_columns():
936
+ raise KeyError(f"Field '{field_name}' not found in dataset")
937
+
938
+ # Create a new dataset without the specified field
939
+ new_data = [entry for entry in self.data if field_name not in entry]
940
+ return Dataset(new_data)
941
+
942
+ def remove_prefix(self):
943
+ """Returns a new Dataset with the prefix removed from all column names.
944
+
945
+ The prefix is defined as everything before the first dot (.) in the column name.
946
+ If removing prefixes would result in duplicate column names, an exception is raised.
947
+
948
+ Returns:
949
+ Dataset: A new Dataset with prefixes removed from column names
950
+
951
+ Raises:
952
+ ValueError: If removing prefixes would result in duplicate column names
953
+
954
+ Examples:
955
+ >>> from edsl.results import Results
956
+ >>> r = Results.example()
957
+ >>> r.select('how_feeling', 'how_feeling_yesterday').relevant_columns()
958
+ ['answer.how_feeling', 'answer.how_feeling_yesterday']
959
+ >>> r.select('how_feeling', 'how_feeling_yesterday').remove_prefix().relevant_columns()
960
+ ['how_feeling', 'how_feeling_yesterday']
961
+
962
+ >>> from edsl.results.Dataset import Dataset
963
+ >>> d = Dataset([{'a.x': [1, 2, 3]}, {'b.x': [4, 5, 6]}])
964
+ >>> d.remove_prefix()
965
+ Traceback (most recent call last):
966
+ ...
967
+ ValueError: Removing prefixes would result in duplicate column names: ['x']
968
+ """
969
+ from edsl.results.Dataset import Dataset
970
+
971
+ # Get all column names
972
+ columns = self.relevant_columns()
973
+
974
+ # Extract the unprefixed names
975
+ unprefixed = {}
976
+ duplicates = set()
977
+
978
+ for col in columns:
979
+ if '.' in col:
980
+ unprefixed_name = col.split('.', 1)[1]
981
+ if unprefixed_name in unprefixed:
982
+ duplicates.add(unprefixed_name)
983
+ unprefixed[unprefixed_name] = col
984
+ else:
985
+ # For columns without a prefix, keep them as is
986
+ unprefixed[col] = col
987
+
988
+ # Check for duplicates
989
+ if duplicates:
990
+ raise ValueError(f"Removing prefixes would result in duplicate column names: {sorted(list(duplicates))}")
991
+
992
+ # Create a new dataset with unprefixed column names
993
+ new_data = []
994
+ for entry in self.data:
995
+ key, values = list(entry.items())[0]
996
+ if '.' in key:
997
+ new_key = key.split('.', 1)[1]
998
+ else:
999
+ new_key = key
1000
+ new_data.append({new_key: values})
1001
+
1002
+ return Dataset(new_data)
885
1003
 
886
1004
 
887
1005
  if __name__ == "__main__":
edsl/results/Results.py CHANGED
@@ -1379,6 +1379,65 @@ class Results(UserList, Mixins, Base):
1379
1379
  raise ResultsError(f"Failed to fetch remote results: {str(e)}")
1380
1380
 
1381
1381
 
1382
+ def spot_issues(self, models: Optional[ModelList] = None) -> Results:
1383
+ """Run a survey to spot issues and suggest improvements for prompts that had no model response, returning a new Results object.
1384
+ Future version: Allow user to optionally pass a list of questions to review, regardless of whether they had a null model response.
1385
+ """
1386
+ from edsl.questions import QuestionFreeText, QuestionDict
1387
+ from edsl.surveys import Survey
1388
+ from edsl.scenarios import Scenario, ScenarioList
1389
+ from edsl.language_models import Model, ModelList
1390
+ import pandas as pd
1391
+
1392
+ df = self.select("agent.*", "scenario.*", "answer.*", "raw_model_response.*", "prompt.*").to_pandas()
1393
+ scenario_list = []
1394
+
1395
+ for _, row in df.iterrows():
1396
+ for col in df.columns:
1397
+ if col.endswith("_raw_model_response") and pd.isna(row[col]):
1398
+ q = col.split("_raw_model_response")[0].replace("raw_model_response.", "")
1399
+
1400
+ s = Scenario({
1401
+ "original_question": q,
1402
+ "original_agent_index": row["agent.agent_index"],
1403
+ "original_scenario_index": row["scenario.scenario_index"],
1404
+ "original_prompts": f"User prompt: {row[f'prompt.{q}_user_prompt']}\nSystem prompt: {row[f'prompt.{q}_system_prompt']}"
1405
+ })
1406
+
1407
+ scenario_list.append(s)
1408
+
1409
+ sl = ScenarioList(set(scenario_list))
1410
+
1411
+ q1 = QuestionFreeText(
1412
+ question_name = "issues",
1413
+ question_text = """
1414
+ The following prompts generated a bad or null response: '{{ original_prompts }}'
1415
+ What do you think was the likely issue(s)?
1416
+ """
1417
+ )
1418
+
1419
+ q2 = QuestionDict(
1420
+ question_name = "revised",
1421
+ question_text = """
1422
+ The following prompts generated a bad or null response: '{{ original_prompts }}'
1423
+ You identified the issue(s) as '{{ issues.answer }}'.
1424
+ Please revise the prompts to address the issue(s).
1425
+ """,
1426
+ answer_keys = ["revised_user_prompt", "revised_system_prompt"]
1427
+ )
1428
+
1429
+ survey = Survey(questions = [q1, q2])
1430
+
1431
+ if models is not None:
1432
+ if not isinstance(models, ModelList):
1433
+ raise ResultsError("models must be a ModelList")
1434
+ results = survey.by(sl).by(models).run()
1435
+ else:
1436
+ results = survey.by(sl).run() # use the default model
1437
+
1438
+ return results
1439
+
1440
+
1382
1441
  def main(): # pragma: no cover
1383
1442
  """Call the OpenAI API credits."""
1384
1443
  from edsl.results.Results import Results
@@ -11,6 +11,10 @@ from edsl.utilities.remove_edsl_version import remove_edsl_version
11
11
  from edsl.scenarios.file_methods import FileMethods
12
12
  from typing import Union
13
13
  from uuid import UUID
14
+ import time
15
+ from typing import Dict, Any, IO, Optional, List, Union, Literal
16
+
17
+
14
18
 
15
19
  class FileStore(Scenario):
16
20
  __documentation__ = "https://docs.expectedparrot.com/en/latest/filestore.html"
@@ -30,7 +34,7 @@ class FileStore(Scenario):
30
34
  path = kwargs["filename"]
31
35
 
32
36
  # Check if path is a URL and handle download
33
- if path and (path.startswith('http://') or path.startswith('https://')):
37
+ if path and (path.startswith("http://") or path.startswith("https://")):
34
38
  temp_filestore = self.from_url(path, mime_type=mime_type)
35
39
  path = temp_filestore._path
36
40
  mime_type = temp_filestore.mime_type
@@ -91,6 +95,102 @@ class FileStore(Scenario):
91
95
  else:
92
96
  print(f"Example for {example_type} is not supported.")
93
97
 
98
+ @classmethod
99
+ async def _async_screenshot(
100
+ cls,
101
+ url: str,
102
+ full_page: bool = True,
103
+ wait_until: Literal[
104
+ "load", "domcontentloaded", "networkidle", "commit"
105
+ ] = "networkidle",
106
+ download_path: Optional[str] = None,
107
+ ) -> "FileStore":
108
+ """Async version of screenshot functionality"""
109
+ try:
110
+ from playwright.async_api import async_playwright
111
+ except ImportError:
112
+ raise ImportError(
113
+ "Screenshot functionality requires additional dependencies.\n"
114
+ "Install them with: pip install 'edsl[screenshot]'"
115
+ )
116
+
117
+ if download_path is None:
118
+ download_path = os.path.join(
119
+ os.getcwd(), f"screenshot_{int(time.time())}.png"
120
+ )
121
+
122
+ async with async_playwright() as p:
123
+ browser = await p.chromium.launch()
124
+ page = await browser.new_page()
125
+ await page.goto(url, wait_until=wait_until)
126
+ await page.screenshot(path=download_path, full_page=full_page)
127
+ await browser.close()
128
+
129
+ return cls(download_path, mime_type="image/png")
130
+
131
+ @classmethod
132
+ def from_url_screenshot(cls, url: str, **kwargs) -> "FileStore":
133
+ """Synchronous wrapper for screenshot functionality"""
134
+ import asyncio
135
+
136
+ try:
137
+ # Try using get_event_loop first (works in regular Python)
138
+ loop = asyncio.get_event_loop()
139
+ except RuntimeError:
140
+ # If we're in IPython/Jupyter, create a new loop
141
+ loop = asyncio.new_event_loop()
142
+ asyncio.set_event_loop(loop)
143
+
144
+ try:
145
+ return loop.run_until_complete(cls._async_screenshot(url, **kwargs))
146
+ finally:
147
+ if not loop.is_running():
148
+ loop.close()
149
+
150
+ @classmethod
151
+ def batch_screenshots(cls, urls: List[str], **kwargs) -> "ScenarioList":
152
+ """
153
+ Take screenshots of multiple URLs concurrently.
154
+ Args:
155
+ urls: List of URLs to screenshot
156
+ **kwargs: Additional arguments passed to screenshot function (full_page, wait_until, etc.)
157
+ Returns:
158
+ ScenarioList containing FileStore objects with their corresponding URLs
159
+ """
160
+ from edsl import ScenarioList
161
+
162
+ try:
163
+ # Try using get_event_loop first (works in regular Python)
164
+ loop = asyncio.get_event_loop()
165
+ except RuntimeError:
166
+ # If we're in IPython/Jupyter, create a new loop
167
+ loop = asyncio.new_event_loop()
168
+ asyncio.set_event_loop(loop)
169
+
170
+ # Create tasks for all screenshots
171
+ tasks = [cls._async_screenshot(url, **kwargs) for url in urls]
172
+
173
+ try:
174
+ # Run all screenshots concurrently
175
+ results = loop.run_until_complete(
176
+ asyncio.gather(*tasks, return_exceptions=True)
177
+ )
178
+
179
+ # Filter out any errors and log them
180
+ successful_results = []
181
+ for url, result in zip(urls, results):
182
+ if isinstance(result, Exception):
183
+ print(f"Failed to screenshot {url}: {result}")
184
+ else:
185
+ successful_results.append(
186
+ Scenario({"url": url, "screenshot": result})
187
+ )
188
+
189
+ return ScenarioList(successful_results)
190
+ finally:
191
+ if not loop.is_running():
192
+ loop.close()
193
+
94
194
  @property
95
195
  def size(self) -> int:
96
196
  if self.base64_string != None:
@@ -273,12 +373,11 @@ class FileStore(Scenario):
273
373
  # raise TypeError("No text method found for this file type.")
274
374
 
275
375
  def push(
276
- self,
277
- description: Optional[str] = None,
376
+ self,
377
+ description: Optional[str] = None,
278
378
  alias: Optional[str] = None,
279
379
  visibility: Optional[str] = "unlisted",
280
380
  expected_parrot_url: Optional[str] = None,
281
-
282
381
  ) -> dict:
283
382
  """
284
383
  Push the object to Coop.
@@ -286,20 +385,26 @@ class FileStore(Scenario):
286
385
  :param visibility: The visibility of the object to push.
287
386
  """
288
387
  scenario_version = Scenario.from_dict(self.to_dict())
388
+
289
389
  if description is None:
290
390
  description = "File: " + self.path
291
- info = scenario_version.push(description=description, visibility=visibility, expected_parrot_url=expected_parrot_url, alias=alias)
391
+ info = scenario_version.push(
392
+ description=description,
393
+ visibility=visibility,
394
+ expected_parrot_url=expected_parrot_url,
395
+ alias=alias,
396
+ )
292
397
  return info
293
398
 
294
399
  @classmethod
295
400
  def pull(cls, url_or_uuid: Union[str, UUID]) -> "FileStore":
296
401
  """
297
402
  Pull a FileStore object from Coop.
298
-
403
+
299
404
  Args:
300
405
  url_or_uuid: Either a UUID string or a URL pointing to the object
301
406
  expected_parrot_url: Optional URL for the Parrot server
302
-
407
+
303
408
  Returns:
304
409
  FileStore: The pulled FileStore object
305
410
  """