edsl 0.1.30.dev5__py3-none-any.whl → 0.1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. edsl/__version__.py +1 -1
  2. edsl/agents/Invigilator.py +7 -2
  3. edsl/agents/PromptConstructionMixin.py +18 -1
  4. edsl/config.py +4 -0
  5. edsl/conjure/Conjure.py +6 -0
  6. edsl/coop/coop.py +4 -0
  7. edsl/coop/utils.py +9 -1
  8. edsl/data/CacheHandler.py +3 -4
  9. edsl/enums.py +2 -0
  10. edsl/inference_services/DeepInfraService.py +6 -91
  11. edsl/inference_services/GroqService.py +18 -0
  12. edsl/inference_services/InferenceServicesCollection.py +13 -5
  13. edsl/inference_services/OpenAIService.py +64 -21
  14. edsl/inference_services/registry.py +2 -1
  15. edsl/jobs/Jobs.py +80 -33
  16. edsl/jobs/buckets/TokenBucket.py +15 -7
  17. edsl/jobs/interviews/Interview.py +41 -19
  18. edsl/jobs/interviews/InterviewExceptionEntry.py +101 -0
  19. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +58 -40
  20. edsl/jobs/interviews/interview_exception_tracking.py +68 -10
  21. edsl/jobs/runners/JobsRunnerAsyncio.py +112 -81
  22. edsl/jobs/runners/JobsRunnerStatusData.py +0 -237
  23. edsl/jobs/runners/JobsRunnerStatusMixin.py +291 -35
  24. edsl/jobs/tasks/QuestionTaskCreator.py +2 -3
  25. edsl/jobs/tasks/TaskCreators.py +8 -2
  26. edsl/jobs/tasks/TaskHistory.py +145 -1
  27. edsl/language_models/LanguageModel.py +133 -75
  28. edsl/language_models/ModelList.py +8 -2
  29. edsl/language_models/registry.py +16 -0
  30. edsl/questions/QuestionFunctional.py +8 -7
  31. edsl/questions/QuestionMultipleChoice.py +15 -12
  32. edsl/questions/QuestionNumerical.py +0 -1
  33. edsl/questions/descriptors.py +6 -4
  34. edsl/results/DatasetExportMixin.py +185 -78
  35. edsl/results/Result.py +13 -11
  36. edsl/results/Results.py +19 -16
  37. edsl/results/ResultsToolsMixin.py +1 -1
  38. edsl/scenarios/Scenario.py +14 -0
  39. edsl/scenarios/ScenarioList.py +59 -21
  40. edsl/scenarios/ScenarioListExportMixin.py +16 -5
  41. edsl/scenarios/ScenarioListPdfMixin.py +3 -0
  42. edsl/surveys/Survey.py +11 -8
  43. {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/METADATA +4 -2
  44. {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/RECORD +46 -44
  45. {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/LICENSE +0 -0
  46. {edsl-0.1.30.dev5.dist-info → edsl-0.1.31.dist-info}/WHEEL +0 -0
edsl/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.30.dev5"
1
+ __version__ = "0.1.31"
@@ -18,7 +18,12 @@ class InvigilatorAI(PromptConstructorMixin, InvigilatorBase):
18
18
  """An invigilator that uses an AI model to answer questions."""
19
19
 
20
20
  async def async_answer_question(self) -> AgentResponseDict:
21
- """Answer a question using the AI model."""
21
+ """Answer a question using the AI model.
22
+
23
+ >>> i = InvigilatorAI.example()
24
+ >>> i.answer_question()
25
+ {'message': '{"answer": "SPAM!"}'}
26
+ """
22
27
  params = self.get_prompts() | {"iteration": self.iteration}
23
28
  raw_response = await self.async_get_response(**params)
24
29
  data = {
@@ -29,6 +34,7 @@ class InvigilatorAI(PromptConstructorMixin, InvigilatorBase):
29
34
  "raw_model_response": raw_response["raw_model_response"],
30
35
  }
31
36
  response = self._format_raw_response(**data)
37
+ # breakpoint()
32
38
  return AgentResponseDict(**response)
33
39
 
34
40
  async def async_get_response(
@@ -97,7 +103,6 @@ class InvigilatorAI(PromptConstructorMixin, InvigilatorBase):
97
103
  answer = question._translate_answer_code_to_answer(
98
104
  response["answer"], combined_dict
99
105
  )
100
- # breakpoint()
101
106
  data = {
102
107
  "answer": answer,
103
108
  "comment": response.get(
@@ -275,8 +275,25 @@ class PromptConstructorMixin:
275
275
  if (new_question := question.split("_comment")[0]) in d:
276
276
  d[new_question].comment = answer
277
277
 
278
+ question_data = self.question.data.copy()
279
+
280
+ # check to see if the questio_options is actuall a string
281
+ if "question_options" in question_data:
282
+ if isinstance(self.question.data["question_options"], str):
283
+ from jinja2 import Environment, meta
284
+
285
+ env = Environment()
286
+ parsed_content = env.parse(self.question.data["question_options"])
287
+ question_option_key = list(
288
+ meta.find_undeclared_variables(parsed_content)
289
+ )[0]
290
+ question_data["question_options"] = self.scenario.get(
291
+ question_option_key
292
+ )
293
+
294
+ # breakpoint()
278
295
  rendered_instructions = question_prompt.render(
279
- self.question.data | self.scenario | d | {"agent": self.agent}
296
+ question_data | self.scenario | d | {"agent": self.agent}
280
297
  )
281
298
 
282
299
  undefined_template_variables = (
edsl/config.py CHANGED
@@ -65,6 +65,10 @@ CONFIG_MAP = {
65
65
  # "default": None,
66
66
  # "info": "This env var holds your Anthropic API key (https://www.anthropic.com/).",
67
67
  # },
68
+ # "GROQ_API_KEY": {
69
+ # "default": None,
70
+ # "info": "This env var holds your GROQ API key (https://console.groq.com/login).",
71
+ # },
68
72
  }
69
73
 
70
74
 
edsl/conjure/Conjure.py CHANGED
@@ -35,6 +35,12 @@ class Conjure:
35
35
  # The __init__ method in Conjure won't be called because __new__ returns a different class instance.
36
36
  pass
37
37
 
38
+ @classmethod
39
+ def example(cls):
40
+ from edsl.conjure.InputData import InputDataABC
41
+
42
+ return InputDataABC.example()
43
+
38
44
 
39
45
  if __name__ == "__main__":
40
46
  pass
edsl/coop/coop.py CHANGED
@@ -465,6 +465,7 @@ class Coop:
465
465
  description: Optional[str] = None,
466
466
  status: RemoteJobStatus = "queued",
467
467
  visibility: Optional[VisibilityType] = "unlisted",
468
+ iterations: Optional[int] = 1,
468
469
  ) -> dict:
469
470
  """
470
471
  Send a remote inference job to the server.
@@ -473,6 +474,7 @@ class Coop:
473
474
  :param optional description: A description for this entry in the remote cache.
474
475
  :param status: The status of the job. Should be 'queued', unless you are debugging.
475
476
  :param visibility: The visibility of the cache entry.
477
+ :param iterations: The number of times to run each interview.
476
478
 
477
479
  >>> job = Jobs.example()
478
480
  >>> coop.remote_inference_create(job=job, description="My job")
@@ -488,6 +490,7 @@ class Coop:
488
490
  ),
489
491
  "description": description,
490
492
  "status": status,
493
+ "iterations": iterations,
491
494
  "visibility": visibility,
492
495
  "version": self._edsl_version,
493
496
  },
@@ -498,6 +501,7 @@ class Coop:
498
501
  "uuid": response_json.get("jobs_uuid"),
499
502
  "description": response_json.get("description"),
500
503
  "status": response_json.get("status"),
504
+ "iterations": response_json.get("iterations"),
501
505
  "visibility": response_json.get("visibility"),
502
506
  "version": self._edsl_version,
503
507
  }
edsl/coop/utils.py CHANGED
@@ -2,6 +2,7 @@ from edsl import (
2
2
  Agent,
3
3
  AgentList,
4
4
  Cache,
5
+ ModelList,
5
6
  Notebook,
6
7
  Results,
7
8
  Scenario,
@@ -9,6 +10,7 @@ from edsl import (
9
10
  Survey,
10
11
  Study,
11
12
  )
13
+ from edsl.language_models import LanguageModel
12
14
  from edsl.questions import QuestionBase
13
15
  from typing import Literal, Optional, Type, Union
14
16
 
@@ -16,6 +18,8 @@ EDSLObject = Union[
16
18
  Agent,
17
19
  AgentList,
18
20
  Cache,
21
+ LanguageModel,
22
+ ModelList,
19
23
  Notebook,
20
24
  Type[QuestionBase],
21
25
  Results,
@@ -29,6 +33,8 @@ ObjectType = Literal[
29
33
  "agent",
30
34
  "agent_list",
31
35
  "cache",
36
+ "model",
37
+ "model_list",
32
38
  "notebook",
33
39
  "question",
34
40
  "results",
@@ -62,8 +68,10 @@ class ObjectRegistry:
62
68
  {"object_type": "agent", "edsl_class": Agent},
63
69
  {"object_type": "agent_list", "edsl_class": AgentList},
64
70
  {"object_type": "cache", "edsl_class": Cache},
65
- {"object_type": "question", "edsl_class": QuestionBase},
71
+ {"object_type": "model", "edsl_class": LanguageModel},
72
+ {"object_type": "model_list", "edsl_class": ModelList},
66
73
  {"object_type": "notebook", "edsl_class": Notebook},
74
+ {"object_type": "question", "edsl_class": QuestionBase},
67
75
  {"object_type": "results", "edsl_class": Results},
68
76
  {"object_type": "scenario", "edsl_class": Scenario},
69
77
  {"object_type": "scenario_list", "edsl_class": ScenarioList},
edsl/data/CacheHandler.py CHANGED
@@ -41,7 +41,7 @@ class CacheHandler:
41
41
  old_data = self.from_old_sqlite_cache()
42
42
  self.cache.add_from_dict(old_data)
43
43
 
44
- def create_cache_directory(self) -> None:
44
+ def create_cache_directory(self, notify = False) -> None:
45
45
  """
46
46
  Create the cache directory if one is required and it does not exist.
47
47
  """
@@ -49,9 +49,8 @@ class CacheHandler:
49
49
  dir_path = os.path.dirname(path)
50
50
  if dir_path and not os.path.exists(dir_path):
51
51
  os.makedirs(dir_path)
52
- import warnings
53
-
54
- warnings.warn(f"Created cache directory: {dir_path}")
52
+ if notify:
53
+ print(f"Created cache directory: {dir_path}")
55
54
 
56
55
  def gen_cache(self) -> Cache:
57
56
  """
edsl/enums.py CHANGED
@@ -59,6 +59,7 @@ class InferenceServiceType(EnumWithChecks):
59
59
  GOOGLE = "google"
60
60
  TEST = "test"
61
61
  ANTHROPIC = "anthropic"
62
+ GROQ = "groq"
62
63
 
63
64
 
64
65
  service_to_api_keyname = {
@@ -69,6 +70,7 @@ service_to_api_keyname = {
69
70
  InferenceServiceType.GOOGLE.value: "GOOGLE_API_KEY",
70
71
  InferenceServiceType.TEST.value: "TBD",
71
72
  InferenceServiceType.ANTHROPIC.value: "ANTHROPIC_API_KEY",
73
+ InferenceServiceType.GROQ.value: "GROQ_API_KEY",
72
74
  }
73
75
 
74
76
 
@@ -2,102 +2,17 @@ import aiohttp
2
2
  import json
3
3
  import requests
4
4
  from typing import Any, List
5
- from edsl.inference_services.InferenceServiceABC import InferenceServiceABC
5
+
6
+ # from edsl.inference_services.InferenceServiceABC import InferenceServiceABC
6
7
  from edsl.language_models import LanguageModel
7
8
 
9
+ from edsl.inference_services.OpenAIService import OpenAIService
10
+
8
11
 
9
- class DeepInfraService(InferenceServiceABC):
12
+ class DeepInfraService(OpenAIService):
10
13
  """DeepInfra service class."""
11
14
 
12
15
  _inference_service_ = "deep_infra"
13
16
  _env_key_name_ = "DEEP_INFRA_API_KEY"
14
-
17
+ _base_url_ = "https://api.deepinfra.com/v1/openai"
15
18
  _models_list_cache: List[str] = []
16
-
17
- @classmethod
18
- def available(cls):
19
- text_models = cls.full_details_available()
20
- return [m["model_name"] for m in text_models]
21
-
22
- @classmethod
23
- def full_details_available(cls, verbose=False):
24
- if not cls._models_list_cache:
25
- url = "https://api.deepinfra.com/models/list"
26
- response = requests.get(url)
27
- if response.status_code == 200:
28
- text_generation_models = [
29
- r for r in response.json() if r["type"] == "text-generation"
30
- ]
31
- cls._models_list_cache = text_generation_models
32
-
33
- from rich import print_json
34
- import json
35
-
36
- if verbose:
37
- print_json(json.dumps(text_generation_models))
38
- return text_generation_models
39
- else:
40
- return f"Failed to fetch data: Status code {response.status_code}"
41
- else:
42
- return cls._models_list_cache
43
-
44
- @classmethod
45
- def create_model(cls, model_name: str, model_class_name=None) -> LanguageModel:
46
- base_url = "https://api.deepinfra.com/v1/inference/"
47
- if model_class_name is None:
48
- model_class_name = cls.to_class_name(model_name)
49
- url = f"{base_url}{model_name}"
50
-
51
- class LLM(LanguageModel):
52
- _inference_service_ = cls._inference_service_
53
- _model_ = model_name
54
- _parameters_ = {
55
- "temperature": 0.7,
56
- "top_p": 0.2,
57
- "top_k": 0.1,
58
- "max_new_tokens": 512,
59
- "stopSequences": [],
60
- }
61
-
62
- async def async_execute_model_call(
63
- self, user_prompt: str, system_prompt: str = ""
64
- ) -> dict[str, Any]:
65
- self.url = url
66
- headers = {
67
- "Content-Type": "application/json",
68
- "Authorization": f"bearer {self.api_token}",
69
- }
70
- # don't mess w/ the newlines
71
- data = {
72
- "input": f"""
73
- [INST]<<SYS>>
74
- {system_prompt}
75
- <<SYS>>{user_prompt}[/INST]
76
- """,
77
- "stream": False,
78
- "temperature": self.temperature,
79
- "top_p": self.top_p,
80
- "top_k": self.top_k,
81
- "max_new_tokens": self.max_new_tokens,
82
- }
83
- async with aiohttp.ClientSession() as session:
84
- async with session.post(
85
- self.url, headers=headers, data=json.dumps(data)
86
- ) as response:
87
- raw_response_text = await response.text()
88
- return json.loads(raw_response_text)
89
-
90
- def parse_response(self, raw_response: dict[str, Any]) -> str:
91
- if "results" not in raw_response:
92
- raise Exception(
93
- f"Deep Infra response does not contain 'results' key: {raw_response}"
94
- )
95
- if "generated_text" not in raw_response["results"][0]:
96
- raise Exception(
97
- f"Deep Infra response does not contain 'generate_text' key: {raw_response['results'][0]}"
98
- )
99
- return raw_response["results"][0]["generated_text"]
100
-
101
- LLM.__name__ = model_class_name
102
-
103
- return LLM
@@ -0,0 +1,18 @@
1
+ from typing import Any, List
2
+ from edsl.inference_services.OpenAIService import OpenAIService
3
+
4
+ import groq
5
+
6
+
7
+ class GroqService(OpenAIService):
8
+ """DeepInfra service class."""
9
+
10
+ _inference_service_ = "groq"
11
+ _env_key_name_ = "GROQ_API_KEY"
12
+
13
+ _sync_client_ = groq.Groq
14
+ _async_client_ = groq.AsyncGroq
15
+
16
+ # _base_url_ = "https://api.deepinfra.com/v1/openai"
17
+ _base_url_ = None
18
+ _models_list_cache: List[str] = []
@@ -15,15 +15,19 @@ class InferenceServicesCollection:
15
15
  cls.added_models[service_name].append(model_name)
16
16
 
17
17
  @staticmethod
18
- def _get_service_available(service) -> list[str]:
18
+ def _get_service_available(service, warn: bool = False) -> list[str]:
19
19
  from_api = True
20
20
  try:
21
21
  service_models = service.available()
22
22
  except Exception as e:
23
- warnings.warn(
24
- f"Error getting models for {service._inference_service_}. Relying on cache.",
25
- UserWarning,
26
- )
23
+ if warn:
24
+ warnings.warn(
25
+ f"""Error getting models for {service._inference_service_}.
26
+ Check that you have properly stored your Expected Parrot API key and activated remote inference, or stored your own API keys for the language models that you want to use.
27
+ See https://docs.expectedparrot.com/en/latest/api_keys.html for instructions on storing API keys.
28
+ Relying on cache.""",
29
+ UserWarning,
30
+ )
27
31
  from edsl.inference_services.models_available_cache import models_available
28
32
 
29
33
  service_models = models_available.get(service._inference_service_, [])
@@ -57,4 +61,8 @@ class InferenceServicesCollection:
57
61
  if service_name is None or service_name == service._inference_service_:
58
62
  return service.create_model(model_name)
59
63
 
64
+ # if model_name == "test":
65
+ # from edsl.language_models import LanguageModel
66
+ # return LanguageModel(test = True)
67
+
60
68
  raise Exception(f"Model {model_name} not found in any of the services")
@@ -1,6 +1,9 @@
1
1
  from typing import Any, List
2
2
  import re
3
- from openai import AsyncOpenAI
3
+ import os
4
+
5
+ # from openai import AsyncOpenAI
6
+ import openai
4
7
 
5
8
  from edsl.inference_services.InferenceServiceABC import InferenceServiceABC
6
9
  from edsl.language_models import LanguageModel
@@ -12,6 +15,22 @@ class OpenAIService(InferenceServiceABC):
12
15
 
13
16
  _inference_service_ = "openai"
14
17
  _env_key_name_ = "OPENAI_API_KEY"
18
+ _base_url_ = None
19
+
20
+ _sync_client_ = openai.OpenAI
21
+ _async_client_ = openai.AsyncOpenAI
22
+
23
+ @classmethod
24
+ def sync_client(cls):
25
+ return cls._sync_client_(
26
+ api_key=os.getenv(cls._env_key_name_), base_url=cls._base_url_
27
+ )
28
+
29
+ @classmethod
30
+ def async_client(cls):
31
+ return cls._async_client_(
32
+ api_key=os.getenv(cls._env_key_name_), base_url=cls._base_url_
33
+ )
15
34
 
16
35
  # TODO: Make this a coop call
17
36
  model_exclude_list = [
@@ -31,16 +50,24 @@ class OpenAIService(InferenceServiceABC):
31
50
  ]
32
51
  _models_list_cache: List[str] = []
33
52
 
53
+ @classmethod
54
+ def get_model_list(cls):
55
+ raw_list = cls.sync_client().models.list()
56
+ if hasattr(raw_list, "data"):
57
+ return raw_list.data
58
+ else:
59
+ return raw_list
60
+
34
61
  @classmethod
35
62
  def available(cls) -> List[str]:
36
- from openai import OpenAI
63
+ # from openai import OpenAI
37
64
 
38
65
  if not cls._models_list_cache:
39
66
  try:
40
- client = OpenAI()
67
+ # client = OpenAI(api_key = os.getenv(cls._env_key_name_), base_url = cls._base_url_)
41
68
  cls._models_list_cache = [
42
69
  m.id
43
- for m in client.models.list()
70
+ for m in cls.get_model_list()
44
71
  if m.id not in cls.model_exclude_list
45
72
  ]
46
73
  except Exception as e:
@@ -78,15 +105,24 @@ class OpenAIService(InferenceServiceABC):
78
105
  "top_logprobs": 3,
79
106
  }
80
107
 
108
+ def sync_client(self):
109
+ return cls.sync_client()
110
+
111
+ def async_client(self):
112
+ return cls.async_client()
113
+
81
114
  @classmethod
82
115
  def available(cls) -> list[str]:
83
- client = openai.OpenAI()
84
- return client.models.list()
116
+ # import openai
117
+ # client = openai.OpenAI(api_key = os.getenv(cls._env_key_name_), base_url = cls._base_url_)
118
+ # return client.models.list()
119
+ return cls.sync_client().models.list()
85
120
 
86
121
  def get_headers(self) -> dict[str, Any]:
87
- from openai import OpenAI
122
+ # from openai import OpenAI
88
123
 
89
- client = OpenAI()
124
+ # client = OpenAI(api_key = os.getenv(cls._env_key_name_), base_url = cls._base_url_)
125
+ client = self.sync_client()
90
126
  response = client.chat.completions.with_raw_response.create(
91
127
  messages=[
92
128
  {
@@ -124,8 +160,8 @@ class OpenAIService(InferenceServiceABC):
124
160
  encoded_image=None,
125
161
  ) -> dict[str, Any]:
126
162
  """Calls the OpenAI API and returns the API response."""
127
- content = [{"type": "text", "text": user_prompt}]
128
163
  if encoded_image:
164
+ content = [{"type": "text", "text": user_prompt}]
129
165
  content.append(
130
166
  {
131
167
  "type": "image_url",
@@ -134,21 +170,28 @@ class OpenAIService(InferenceServiceABC):
134
170
  },
135
171
  }
136
172
  )
137
- self.client = AsyncOpenAI()
138
- response = await self.client.chat.completions.create(
139
- model=self.model,
140
- messages=[
173
+ else:
174
+ content = user_prompt
175
+ # self.client = AsyncOpenAI(
176
+ # api_key = os.getenv(cls._env_key_name_),
177
+ # base_url = cls._base_url_
178
+ # )
179
+ client = self.async_client()
180
+ params = {
181
+ "model": self.model,
182
+ "messages": [
141
183
  {"role": "system", "content": system_prompt},
142
184
  {"role": "user", "content": content},
143
185
  ],
144
- temperature=self.temperature,
145
- max_tokens=self.max_tokens,
146
- top_p=self.top_p,
147
- frequency_penalty=self.frequency_penalty,
148
- presence_penalty=self.presence_penalty,
149
- logprobs=self.logprobs,
150
- top_logprobs=self.top_logprobs if self.logprobs else None,
151
- )
186
+ "temperature": self.temperature,
187
+ "max_tokens": self.max_tokens,
188
+ "top_p": self.top_p,
189
+ "frequency_penalty": self.frequency_penalty,
190
+ "presence_penalty": self.presence_penalty,
191
+ "logprobs": self.logprobs,
192
+ "top_logprobs": self.top_logprobs if self.logprobs else None,
193
+ }
194
+ response = await client.chat.completions.create(**params)
152
195
  return response.model_dump()
153
196
 
154
197
  @staticmethod
@@ -6,7 +6,8 @@ from edsl.inference_services.OpenAIService import OpenAIService
6
6
  from edsl.inference_services.AnthropicService import AnthropicService
7
7
  from edsl.inference_services.DeepInfraService import DeepInfraService
8
8
  from edsl.inference_services.GoogleService import GoogleService
9
+ from edsl.inference_services.GroqService import GroqService
9
10
 
10
11
  default = InferenceServicesCollection(
11
- [OpenAIService, AnthropicService, DeepInfraService, GoogleService]
12
+ [OpenAIService, AnthropicService, DeepInfraService, GoogleService, GroqService]
12
13
  )