edsl 0.1.39.dev1__py3-none-any.whl → 0.1.39.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. edsl/Base.py +169 -116
  2. edsl/__init__.py +14 -6
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +358 -146
  5. edsl/agents/AgentList.py +211 -73
  6. edsl/agents/Invigilator.py +88 -36
  7. edsl/agents/InvigilatorBase.py +59 -70
  8. edsl/agents/PromptConstructor.py +117 -219
  9. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  10. edsl/agents/QuestionOptionProcessor.py +172 -0
  11. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  12. edsl/agents/__init__.py +0 -1
  13. edsl/agents/prompt_helpers.py +3 -3
  14. edsl/config.py +22 -2
  15. edsl/conversation/car_buying.py +2 -1
  16. edsl/coop/CoopFunctionsMixin.py +15 -0
  17. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  18. edsl/coop/PriceFetcher.py +1 -1
  19. edsl/coop/coop.py +104 -42
  20. edsl/coop/utils.py +14 -14
  21. edsl/data/Cache.py +21 -14
  22. edsl/data/CacheEntry.py +12 -15
  23. edsl/data/CacheHandler.py +33 -12
  24. edsl/data/__init__.py +4 -3
  25. edsl/data_transfer_models.py +2 -1
  26. edsl/enums.py +20 -0
  27. edsl/exceptions/__init__.py +50 -50
  28. edsl/exceptions/agents.py +12 -0
  29. edsl/exceptions/inference_services.py +5 -0
  30. edsl/exceptions/questions.py +24 -6
  31. edsl/exceptions/scenarios.py +7 -0
  32. edsl/inference_services/AnthropicService.py +0 -3
  33. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  34. edsl/inference_services/AvailableModelFetcher.py +209 -0
  35. edsl/inference_services/AwsBedrock.py +0 -2
  36. edsl/inference_services/AzureAI.py +0 -2
  37. edsl/inference_services/GoogleService.py +2 -11
  38. edsl/inference_services/InferenceServiceABC.py +18 -85
  39. edsl/inference_services/InferenceServicesCollection.py +105 -80
  40. edsl/inference_services/MistralAIService.py +0 -3
  41. edsl/inference_services/OpenAIService.py +1 -4
  42. edsl/inference_services/PerplexityService.py +0 -3
  43. edsl/inference_services/ServiceAvailability.py +135 -0
  44. edsl/inference_services/TestService.py +11 -8
  45. edsl/inference_services/data_structures.py +62 -0
  46. edsl/jobs/AnswerQuestionFunctionConstructor.py +188 -0
  47. edsl/jobs/Answers.py +1 -14
  48. edsl/jobs/FetchInvigilator.py +40 -0
  49. edsl/jobs/InterviewTaskManager.py +98 -0
  50. edsl/jobs/InterviewsConstructor.py +48 -0
  51. edsl/jobs/Jobs.py +102 -243
  52. edsl/jobs/JobsChecks.py +35 -10
  53. edsl/jobs/JobsComponentConstructor.py +189 -0
  54. edsl/jobs/JobsPrompts.py +5 -3
  55. edsl/jobs/JobsRemoteInferenceHandler.py +128 -80
  56. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  57. edsl/jobs/RequestTokenEstimator.py +30 -0
  58. edsl/jobs/buckets/BucketCollection.py +44 -3
  59. edsl/jobs/buckets/TokenBucket.py +53 -21
  60. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  61. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  62. edsl/jobs/decorators.py +35 -0
  63. edsl/jobs/interviews/Interview.py +77 -380
  64. edsl/jobs/jobs_status_enums.py +9 -0
  65. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  66. edsl/jobs/runners/JobsRunnerAsyncio.py +4 -49
  67. edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
  68. edsl/jobs/tasks/TaskHistory.py +14 -15
  69. edsl/jobs/tasks/task_status_enum.py +0 -2
  70. edsl/language_models/ComputeCost.py +63 -0
  71. edsl/language_models/LanguageModel.py +137 -234
  72. edsl/language_models/ModelList.py +11 -13
  73. edsl/language_models/PriceManager.py +127 -0
  74. edsl/language_models/RawResponseHandler.py +106 -0
  75. edsl/language_models/ServiceDataSources.py +0 -0
  76. edsl/language_models/__init__.py +0 -1
  77. edsl/language_models/key_management/KeyLookup.py +63 -0
  78. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  79. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  80. edsl/language_models/key_management/__init__.py +0 -0
  81. edsl/language_models/key_management/models.py +131 -0
  82. edsl/language_models/registry.py +49 -59
  83. edsl/language_models/repair.py +2 -2
  84. edsl/language_models/utilities.py +5 -4
  85. edsl/notebooks/Notebook.py +19 -14
  86. edsl/notebooks/NotebookToLaTeX.py +142 -0
  87. edsl/prompts/Prompt.py +29 -39
  88. edsl/questions/AnswerValidatorMixin.py +47 -2
  89. edsl/questions/ExceptionExplainer.py +77 -0
  90. edsl/questions/HTMLQuestion.py +103 -0
  91. edsl/questions/LoopProcessor.py +149 -0
  92. edsl/questions/QuestionBase.py +37 -192
  93. edsl/questions/QuestionBaseGenMixin.py +52 -48
  94. edsl/questions/QuestionBasePromptsMixin.py +7 -3
  95. edsl/questions/QuestionCheckBox.py +1 -1
  96. edsl/questions/QuestionExtract.py +1 -1
  97. edsl/questions/QuestionFreeText.py +1 -2
  98. edsl/questions/QuestionList.py +3 -5
  99. edsl/questions/QuestionMatrix.py +265 -0
  100. edsl/questions/QuestionMultipleChoice.py +66 -22
  101. edsl/questions/QuestionNumerical.py +1 -3
  102. edsl/questions/QuestionRank.py +6 -16
  103. edsl/questions/ResponseValidatorABC.py +37 -11
  104. edsl/questions/ResponseValidatorFactory.py +28 -0
  105. edsl/questions/SimpleAskMixin.py +4 -3
  106. edsl/questions/__init__.py +1 -0
  107. edsl/questions/derived/QuestionLinearScale.py +6 -3
  108. edsl/questions/derived/QuestionTopK.py +1 -1
  109. edsl/questions/descriptors.py +17 -3
  110. edsl/questions/question_registry.py +1 -1
  111. edsl/questions/templates/matrix/__init__.py +1 -0
  112. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  113. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  114. edsl/results/CSSParameterizer.py +1 -1
  115. edsl/results/Dataset.py +170 -7
  116. edsl/results/DatasetExportMixin.py +224 -302
  117. edsl/results/DatasetTree.py +28 -8
  118. edsl/results/MarkdownToDocx.py +122 -0
  119. edsl/results/MarkdownToPDF.py +111 -0
  120. edsl/results/Result.py +192 -206
  121. edsl/results/Results.py +120 -113
  122. edsl/results/ResultsExportMixin.py +2 -0
  123. edsl/results/Selector.py +23 -13
  124. edsl/results/TableDisplay.py +98 -171
  125. edsl/results/TextEditor.py +50 -0
  126. edsl/results/__init__.py +1 -1
  127. edsl/results/smart_objects.py +96 -0
  128. edsl/results/table_data_class.py +12 -0
  129. edsl/results/table_renderers.py +118 -0
  130. edsl/scenarios/ConstructDownloadLink.py +109 -0
  131. edsl/scenarios/DirectoryScanner.py +96 -0
  132. edsl/scenarios/DocumentChunker.py +102 -0
  133. edsl/scenarios/DocxScenario.py +16 -0
  134. edsl/scenarios/FileStore.py +118 -239
  135. edsl/scenarios/PdfExtractor.py +40 -0
  136. edsl/scenarios/Scenario.py +90 -193
  137. edsl/scenarios/ScenarioHtmlMixin.py +4 -3
  138. edsl/scenarios/ScenarioJoin.py +10 -6
  139. edsl/scenarios/ScenarioList.py +383 -240
  140. edsl/scenarios/ScenarioListExportMixin.py +0 -7
  141. edsl/scenarios/ScenarioListPdfMixin.py +15 -37
  142. edsl/scenarios/ScenarioSelector.py +156 -0
  143. edsl/scenarios/__init__.py +1 -2
  144. edsl/scenarios/file_methods.py +85 -0
  145. edsl/scenarios/handlers/__init__.py +13 -0
  146. edsl/scenarios/handlers/csv.py +38 -0
  147. edsl/scenarios/handlers/docx.py +76 -0
  148. edsl/scenarios/handlers/html.py +37 -0
  149. edsl/scenarios/handlers/json.py +111 -0
  150. edsl/scenarios/handlers/latex.py +5 -0
  151. edsl/scenarios/handlers/md.py +51 -0
  152. edsl/scenarios/handlers/pdf.py +68 -0
  153. edsl/scenarios/handlers/png.py +39 -0
  154. edsl/scenarios/handlers/pptx.py +105 -0
  155. edsl/scenarios/handlers/py.py +294 -0
  156. edsl/scenarios/handlers/sql.py +313 -0
  157. edsl/scenarios/handlers/sqlite.py +149 -0
  158. edsl/scenarios/handlers/txt.py +33 -0
  159. edsl/study/ObjectEntry.py +1 -1
  160. edsl/study/SnapShot.py +1 -1
  161. edsl/study/Study.py +5 -12
  162. edsl/surveys/ConstructDAG.py +92 -0
  163. edsl/surveys/EditSurvey.py +221 -0
  164. edsl/surveys/InstructionHandler.py +100 -0
  165. edsl/surveys/MemoryManagement.py +72 -0
  166. edsl/surveys/Rule.py +5 -4
  167. edsl/surveys/RuleCollection.py +25 -27
  168. edsl/surveys/RuleManager.py +172 -0
  169. edsl/surveys/Simulator.py +75 -0
  170. edsl/surveys/Survey.py +199 -771
  171. edsl/surveys/SurveyCSS.py +20 -8
  172. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
  173. edsl/surveys/SurveyToApp.py +141 -0
  174. edsl/surveys/__init__.py +4 -2
  175. edsl/surveys/descriptors.py +6 -2
  176. edsl/surveys/instructions/ChangeInstruction.py +1 -2
  177. edsl/surveys/instructions/Instruction.py +4 -13
  178. edsl/surveys/instructions/InstructionCollection.py +11 -6
  179. edsl/templates/error_reporting/interview_details.html +1 -1
  180. edsl/templates/error_reporting/report.html +1 -1
  181. edsl/tools/plotting.py +1 -1
  182. edsl/utilities/PrettyList.py +56 -0
  183. edsl/utilities/is_notebook.py +18 -0
  184. edsl/utilities/is_valid_variable_name.py +11 -0
  185. edsl/utilities/remove_edsl_version.py +24 -0
  186. edsl/utilities/utilities.py +35 -23
  187. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/METADATA +12 -10
  188. edsl-0.1.39.dev2.dist-info/RECORD +352 -0
  189. edsl/language_models/KeyLookup.py +0 -30
  190. edsl/language_models/unused/ReplicateBase.py +0 -83
  191. edsl/results/ResultsDBMixin.py +0 -238
  192. edsl-0.1.39.dev1.dist-info/RECORD +0 -277
  193. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/LICENSE +0 -0
  194. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,127 @@
1
+ from typing import Dict, Tuple, Optional, Union
2
+
3
+
4
+ class PriceManager:
5
+ _instance = None
6
+ _price_lookup: Dict[Tuple[str, str], Dict] = {}
7
+ _is_initialized = False
8
+
9
+ def __new__(cls):
10
+ if cls._instance is None:
11
+ cls._instance = super(PriceManager, cls).__new__(cls)
12
+ return cls._instance
13
+
14
+ def __init__(self):
15
+ # Only initialize once, even if __init__ is called multiple times
16
+ if not self._is_initialized:
17
+ self._is_initialized = True
18
+ self.refresh_prices()
19
+
20
+ def refresh_prices(self) -> None:
21
+ """
22
+ Fetch fresh prices from the Coop service and update the internal price lookup.
23
+
24
+ """
25
+ from edsl.coop import Coop
26
+
27
+ c = Coop()
28
+ try:
29
+ self._price_lookup = c.fetch_prices()
30
+ except Exception as e:
31
+ print(f"Error fetching prices: {str(e)}")
32
+
33
+ def get_price(self, inference_service: str, model: str) -> Optional[Dict]:
34
+ """
35
+ Get the price information for a specific service and model combination.
36
+
37
+ Args:
38
+ inference_service (str): The name of the inference service
39
+ model (str): The model identifier
40
+
41
+ Returns:
42
+ Optional[Dict]: Price information if found, None otherwise
43
+ """
44
+ key = (inference_service, model)
45
+ return self._price_lookup.get(key)
46
+
47
+ def get_all_prices(self) -> Dict[Tuple[str, str], Dict]:
48
+ """
49
+ Get the complete price lookup dictionary.
50
+
51
+ Returns:
52
+ Dict[Tuple[str, str], Dict]: The complete price lookup dictionary
53
+ """
54
+ return self._price_lookup.copy()
55
+
56
+ def calculate_cost(
57
+ self,
58
+ inference_service: str,
59
+ model: str,
60
+ usage: Dict[str, Union[str, int]],
61
+ input_token_name: str,
62
+ output_token_name: str,
63
+ ) -> Union[float, str]:
64
+ """
65
+ Calculate the total cost for a model usage based on input and output tokens.
66
+
67
+ Args:
68
+ inference_service (str): The inference service identifier
69
+ model (str): The model identifier
70
+ usage (Dict[str, Union[str, int]]): Dictionary containing token usage information
71
+ input_token_name (str): Key name for input tokens in the usage dict
72
+ output_token_name (str): Key name for output tokens in the usage dict
73
+
74
+ Returns:
75
+ Union[float, str]: Total cost if calculation successful, error message string if not
76
+ """
77
+ relevant_prices = self.get_price(inference_service, model)
78
+ if relevant_prices is None:
79
+ return f"Could not find price for model {model} in the price lookup."
80
+
81
+ # Extract token counts
82
+ try:
83
+ input_tokens = int(usage[input_token_name])
84
+ output_tokens = int(usage[output_token_name])
85
+ except Exception as e:
86
+ return f"Could not fetch tokens from model response: {e}"
87
+
88
+ # Extract price information
89
+ try:
90
+ inverse_output_price = relevant_prices["output"]["one_usd_buys"]
91
+ inverse_input_price = relevant_prices["input"]["one_usd_buys"]
92
+ except Exception as e:
93
+ if "output" not in relevant_prices:
94
+ return f"Could not fetch prices from {relevant_prices} - {e}; Missing 'output' key."
95
+ if "input" not in relevant_prices:
96
+ return f"Could not fetch prices from {relevant_prices} - {e}; Missing 'input' key."
97
+ return f"Could not fetch prices from {relevant_prices} - {e}"
98
+
99
+ # Calculate input cost
100
+ if inverse_input_price == "infinity":
101
+ input_cost = 0
102
+ else:
103
+ try:
104
+ input_cost = input_tokens / float(inverse_input_price)
105
+ except Exception as e:
106
+ return f"Could not compute input price - {e}."
107
+
108
+ # Calculate output cost
109
+ if inverse_output_price == "infinity":
110
+ output_cost = 0
111
+ else:
112
+ try:
113
+ output_cost = output_tokens / float(inverse_output_price)
114
+ except Exception as e:
115
+ return f"Could not compute output price - {e}"
116
+
117
+ return input_cost + output_cost
118
+
119
+ @property
120
+ def is_initialized(self) -> bool:
121
+ """
122
+ Check if the PriceManager has been initialized.
123
+
124
+ Returns:
125
+ bool: True if initialized, False otherwise
126
+ """
127
+ return self._is_initialized
@@ -0,0 +1,106 @@
1
+ import json
2
+ from typing import Optional, Any, List
3
+ from edsl.exceptions.language_models import LanguageModelBadResponseError
4
+
5
+ from json_repair import repair_json
6
+
7
+
8
+ def _extract_item_from_raw_response(data, sequence):
9
+ if isinstance(data, str):
10
+ try:
11
+ data = json.loads(data)
12
+ except json.JSONDecodeError as e:
13
+ return data
14
+ current_data = data
15
+ for i, key in enumerate(sequence):
16
+ try:
17
+ if isinstance(current_data, (list, tuple)):
18
+ if not isinstance(key, int):
19
+ raise TypeError(
20
+ f"Expected integer index for sequence at position {i}, got {type(key).__name__}"
21
+ )
22
+ if key < 0 or key >= len(current_data):
23
+ raise IndexError(
24
+ f"Index {key} out of range for sequence of length {len(current_data)} at position {i}"
25
+ )
26
+ elif isinstance(current_data, dict):
27
+ if key not in current_data:
28
+ raise KeyError(
29
+ f"Key '{key}' not found in dictionary at position {i}"
30
+ )
31
+ else:
32
+ raise TypeError(
33
+ f"Cannot index into {type(current_data).__name__} at position {i}. Full response is: {data} of type {type(data)}. Key sequence is: {sequence}"
34
+ )
35
+
36
+ current_data = current_data[key]
37
+ except Exception as e:
38
+ path = " -> ".join(map(str, sequence[: i + 1]))
39
+ if "error" in data:
40
+ msg = data["error"]
41
+ else:
42
+ msg = f"Error accessing path: {path}. {str(e)}. Full response is: '{data}'"
43
+ raise LanguageModelBadResponseError(message=msg, response_json=data)
44
+ if isinstance(current_data, str):
45
+ return current_data.strip()
46
+ else:
47
+ return current_data
48
+
49
+
50
+ class RawResponseHandler:
51
+ """Class to handle raw responses from language models."""
52
+
53
+ def __init__(self, key_sequence: list, usage_sequence: Optional[list] = None):
54
+ self.key_sequence = key_sequence
55
+ self.usage_sequence = usage_sequence
56
+
57
+ def get_generated_token_string(self, raw_response):
58
+ return _extract_item_from_raw_response(raw_response, self.key_sequence)
59
+
60
+ def get_usage_dict(self, raw_response):
61
+ if self.usage_sequence is None:
62
+ return {}
63
+ return _extract_item_from_raw_response(raw_response, self.usage_sequence)
64
+
65
+ def parse_response(self, raw_response: dict[str, Any]) -> "EDSLOutput":
66
+ """Parses the API response and returns the response text."""
67
+
68
+ from edsl.data_transfer_models import EDSLOutput
69
+
70
+ generated_token_string = self.get_generated_token_string(raw_response)
71
+ last_newline = generated_token_string.rfind("\n")
72
+
73
+ if last_newline == -1:
74
+ # There is no comment
75
+ edsl_dict = {
76
+ "answer": self.convert_answer(generated_token_string),
77
+ "generated_tokens": generated_token_string,
78
+ "comment": None,
79
+ }
80
+ else:
81
+ edsl_dict = {
82
+ "answer": self.convert_answer(generated_token_string[:last_newline]),
83
+ "comment": generated_token_string[last_newline + 1 :].strip(),
84
+ "generated_tokens": generated_token_string,
85
+ }
86
+ return EDSLOutput(**edsl_dict)
87
+
88
+ @staticmethod
89
+ def convert_answer(response_part):
90
+ import json
91
+
92
+ response_part = response_part.strip()
93
+
94
+ if response_part == "None":
95
+ return None
96
+
97
+ repaired = repair_json(response_part)
98
+ if repaired == '""':
99
+ # it was a literal string
100
+ return response_part
101
+
102
+ try:
103
+ return json.loads(repaired)
104
+ except json.JSONDecodeError as j:
105
+ # last resort
106
+ return response_part
File without changes
@@ -1,3 +1,2 @@
1
1
  from edsl.language_models.LanguageModel import LanguageModel
2
2
  from edsl.language_models.registry import Model
3
- from edsl.language_models.KeyLookup import KeyLookup
@@ -0,0 +1,63 @@
1
+ from collections import UserDict
2
+ from dataclasses import asdict
3
+ from edsl.enums import service_to_api_keyname
4
+
5
+ from edsl.language_models.key_management.models import LanguageModelInput
6
+
7
+
8
+ class KeyLookup(UserDict):
9
+ """A class for looking up API keys and related configuration.
10
+
11
+ >>> from edsl.language_models.key_management.models import LanguageModelInput
12
+ >>> lookup = KeyLookup()
13
+ >>> lm_input = LanguageModelInput.example()
14
+ >>> lookup['test'] = lm_input
15
+ >>> lookup.to_dict()['test']['api_token']
16
+ 'sk-abcd123'
17
+ >>> restored = KeyLookup.from_dict(lookup.to_dict())
18
+ >>> restored['test'].api_token
19
+ 'sk-abcd123'
20
+ """
21
+
22
+ def to_dict(self):
23
+ """
24
+ >>> kl = KeyLookup.example()
25
+ >>> kl2 = KeyLookup.from_dict(kl.to_dict())
26
+ >>> kl2 == kl
27
+ True
28
+ >>> kl2 is kl
29
+ False
30
+ """
31
+ return {k: asdict(v) for k, v in self.data.items()}
32
+
33
+ @classmethod
34
+ def from_dict(cls, d):
35
+ return cls({k: LanguageModelInput(**v) for k, v in d.items()})
36
+
37
+ @classmethod
38
+ def example(cls):
39
+ return cls(
40
+ {
41
+ "test": LanguageModelInput.example(),
42
+ "openai": LanguageModelInput.example(),
43
+ }
44
+ )
45
+
46
+ def to_dot_env(self):
47
+ """Return a string representation of the key lookup collection for a .env file."""
48
+ lines = []
49
+ for service, lm_input in self.items():
50
+ if service != "test":
51
+ lines.append(f"EDSL_SERVICE_RPM_{service.upper()}={lm_input.rpm}")
52
+ lines.append(f"EDSL_SERVICE_TPM_{service.upper()}={lm_input.tpm}")
53
+ key_name = service_to_api_keyname.get(service, service)
54
+ lines.append(f"{key_name.upper()}={lm_input.api_token}")
55
+ if lm_input.api_id is not None:
56
+ lines.append(f"{service.upper()}_API_ID={lm_input.api_id}")
57
+ return "\n".join([f"{line}" for line in lines])
58
+
59
+
60
+ if __name__ == "__main__":
61
+ import doctest
62
+
63
+ doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -0,0 +1,273 @@
1
+ from typing import Optional, List
2
+ from collections import UserDict
3
+ import os
4
+ from functools import lru_cache
5
+ from dataclasses import dataclass, asdict
6
+
7
+ from edsl.enums import service_to_api_keyname
8
+ from edsl.exceptions.general import MissingAPIKeyError
9
+
10
+ from edsl.language_models.key_management.KeyLookup import KeyLookup
11
+
12
+ from edsl.language_models.key_management.models import (
13
+ APIKeyEntry,
14
+ LimitEntry,
15
+ APIIDEntry,
16
+ LanguageModelInput,
17
+ )
18
+
19
+ service_to_api_keyname["bedrock"] = "AWS_SECRET_ACCESS_KEY"
20
+ service_to_api_id = {"bedrock": "AWS_ACCESS_KEY_ID"}
21
+
22
+ api_keyname_to_service = {}
23
+
24
+ for service, key in service_to_api_keyname.items():
25
+ if isinstance(key, list):
26
+ for k in key:
27
+ api_keyname_to_service[k] = service
28
+ else:
29
+ api_keyname_to_service[key] = service
30
+
31
+ api_id_to_service = {"AWS_ACCESS_KEY_ID": "bedrock"}
32
+
33
+
34
+ class KeyLookupBuilder:
35
+ """Builds KeyLookup options.
36
+
37
+ >>> builder = KeyLookupBuilder(fetch_order=("config", "env"))
38
+ >>> builder.DEFAULT_RPM
39
+ 10
40
+ >>> builder.DEFAULT_TPM
41
+ 2000000
42
+ >>> builder.fetch_order
43
+ ('config', 'env')
44
+
45
+ Test invalid fetch_order:
46
+ >>> try:
47
+ ... KeyLookupBuilder(fetch_order=["config", "env"]) # Should be tuple
48
+ ... except ValueError as e:
49
+ ... str(e)
50
+ 'fetch_order must be a tuple'
51
+
52
+ Test service extraction:
53
+ >>> builder.extract_service("EDSL_SERVICE_RPM_OPENAI")
54
+ ('openai', 'rpm')
55
+ """
56
+
57
+ DEFAULT_RPM = 10
58
+ DEFAULT_TPM = 2000000
59
+
60
+ def __init__(self, fetch_order: Optional[tuple[str]] = None):
61
+ if fetch_order is None:
62
+ self.fetch_order = ("config", "env")
63
+ else:
64
+ self.fetch_order = fetch_order
65
+
66
+ if not isinstance(self.fetch_order, tuple):
67
+ raise ValueError("fetch_order must be a tuple")
68
+
69
+ self.limit_data = {}
70
+ self.key_data = {}
71
+ self.id_data = {}
72
+ self.process_key_value_pairs()
73
+
74
+ @property
75
+ def known_services(self):
76
+ """Get the set of known services.
77
+
78
+ >>> builder = KeyLookupBuilder()
79
+ >>> isinstance(builder.known_services, set)
80
+ True
81
+ """
82
+ return set(self.key_data.keys()) | set(self.limit_data.keys())
83
+
84
+ @lru_cache
85
+ def build(self) -> "KeyLookup":
86
+ """Build a KeyLookup instance.
87
+
88
+ >>> builder = KeyLookupBuilder()
89
+ >>> lookup = builder.build()
90
+ >>> isinstance(lookup, KeyLookup)
91
+ True
92
+ >>> lookup['test'].api_token # Test service should always exist
93
+ 'test'
94
+ """
95
+ d = {}
96
+ for service in self.known_services:
97
+ try:
98
+ d[service] = self.get_language_model_input(service)
99
+ except MissingAPIKeyError:
100
+ pass
101
+
102
+ d.update({"test": LanguageModelInput(api_token="test", rpm=10, tpm=2000000)})
103
+ return KeyLookup(d)
104
+
105
+ def get_language_model_input(self, service: str) -> LanguageModelInput:
106
+ """Get the language model input for a given service.
107
+
108
+ >>> builder = KeyLookupBuilder()
109
+ >>> try:
110
+ ... builder.get_language_model_input("nonexistent_service")
111
+ ... except MissingAPIKeyError as e:
112
+ ... str(e)
113
+ "No key found for service 'nonexistent_service'"
114
+ """
115
+ if (key_entries := self.key_data.get(service)) is None:
116
+ raise MissingAPIKeyError(f"No key found for service '{service}'")
117
+
118
+ if len(key_entries) == 1:
119
+ api_key_entry = key_entries[0]
120
+
121
+ id_entry = self.id_data.get(service)
122
+ id_source = id_entry.source if id_entry is not None else None
123
+ api_id = id_entry.value if id_entry is not None else None
124
+
125
+ if (limit_entry := self.limit_data.get(service)) is None:
126
+ limit_entry = LimitEntry(
127
+ service=service,
128
+ rpm=self.DEFAULT_RPM,
129
+ tpm=self.DEFAULT_TPM,
130
+ source="default",
131
+ )
132
+
133
+ if limit_entry.rpm is None:
134
+ limit_entry.rpm = self.DEFAULT_RPM
135
+ if limit_entry.tpm is None:
136
+ limit_entry.tpm = self.DEFAULT_TPM
137
+
138
+ return LanguageModelInput(
139
+ api_token=api_key_entry.value,
140
+ rpm=int(limit_entry.rpm),
141
+ tpm=int(limit_entry.tpm),
142
+ api_id=api_id,
143
+ token_source=api_key_entry.source,
144
+ limit_source=limit_entry.source,
145
+ id_source=id_source,
146
+ )
147
+
148
+ def __repr__(self):
149
+ return f"DataSource(key_data={self.key_data}, limit_data={self.limit_data}, id_data={self.id_data})"
150
+
151
+ def _os_env_key_value_pairs(self):
152
+ return dict(list(os.environ.items()))
153
+
154
+ def _coop_key_value_pairs(self):
155
+ from edsl.coop import Coop
156
+
157
+ c = Coop()
158
+ return dict(list(c.fetch_rate_limit_config_vars().items()))
159
+
160
+ def _config_key_value_pairs(self):
161
+ from edsl.config import CONFIG
162
+
163
+ return dict(list(CONFIG.items()))
164
+
165
+ @staticmethod
166
+ def extract_service(key: str) -> str:
167
+ """Extract the service and limit type from the key"""
168
+ limit_type, service_raw = key.replace("EDSL_SERVICE_", "").split("_")
169
+ return service_raw.lower(), limit_type.lower()
170
+
171
+ def get_key_value_pairs(self) -> dict:
172
+ """Get key-value pairs from configured sources."""
173
+ fetching_functions = {
174
+ "env": self._os_env_key_value_pairs,
175
+ "coop": self._coop_key_value_pairs,
176
+ "config": self._config_key_value_pairs,
177
+ }
178
+ d = {}
179
+ for source in self.fetch_order:
180
+ f = fetching_functions[source]
181
+ new_data = f()
182
+ for k, v in new_data.items():
183
+ d[k] = (v, source)
184
+ return d
185
+
186
+ def _entry_type(self, key, value) -> str:
187
+ """Determine the type of entry from a key.
188
+
189
+ >>> builder = KeyLookupBuilder()
190
+ >>> builder._entry_type("EDSL_SERVICE_RPM_OPENAI", "60")
191
+ 'limit'
192
+ >>> builder._entry_type("OPENAI_API_KEY", "sk-1234")
193
+ 'api_key'
194
+ >>> builder._entry_type("AWS_ACCESS_KEY_ID", "AKIA1234")
195
+ 'api_id'
196
+ >>> builder._entry_type("UNKNOWN_KEY", "value")
197
+ 'unknown'
198
+ """
199
+ if key.startswith("EDSL_SERVICE_"):
200
+ return "limit"
201
+ elif key in api_keyname_to_service:
202
+ return "api_key"
203
+ elif key in api_id_to_service:
204
+ return "api_id"
205
+ return "unknown"
206
+
207
+ def _add_id(self, key: str, value: str, source: str) -> None:
208
+ """Add an API ID to the id_data dictionary.
209
+
210
+ >>> builder = KeyLookupBuilder()
211
+ >>> builder._add_id("AWS_ACCESS_KEY_ID", "AKIA1234", "env")
212
+ >>> builder.id_data["bedrock"].value
213
+ 'AKIA1234'
214
+ >>> try:
215
+ ... builder._add_id("AWS_ACCESS_KEY_ID", "AKIA5678", "env")
216
+ ... except ValueError as e:
217
+ ... str(e)
218
+ 'Duplicate ID for service bedrock'
219
+ """
220
+ service = api_id_to_service[key]
221
+ if service not in self.id_data:
222
+ self.id_data[service] = APIIDEntry(
223
+ service=service, name=key, value=value, source=source
224
+ )
225
+ else:
226
+ raise ValueError(f"Duplicate ID for service {service}")
227
+
228
+ def _add_limit(self, key: str, value: str, source: str) -> None:
229
+ """Add a rate limit entry to the limit_data dictionary.
230
+
231
+ >>> builder = KeyLookupBuilder()
232
+ >>> builder._add_limit("EDSL_SERVICE_RPM_OPENAI", "60", "config")
233
+ >>> builder.limit_data["openai"].rpm
234
+ '60'
235
+ >>> builder._add_limit("EDSL_SERVICE_TPM_OPENAI", "100000", "config")
236
+ >>> builder.limit_data["openai"].tpm
237
+ '100000'
238
+ """
239
+ service, limit_type = self.extract_service(key)
240
+ if service in self.limit_data:
241
+ setattr(self.limit_data[service], limit_type.lower(), value)
242
+ else:
243
+ new_limit_entry = LimitEntry(
244
+ service=service, rpm=None, tpm=None, source=source
245
+ )
246
+ setattr(new_limit_entry, limit_type.lower(), value)
247
+ self.limit_data[service] = new_limit_entry
248
+
249
+ def _add_api_key(self, key: str, value: str, source: str) -> None:
250
+ """Add an API key entry to the key_data dictionary.
251
+
252
+ >>> builder = KeyLookupBuilder()
253
+ >>> builder._add_api_key("OPENAI_API_KEY", "sk-1234", "env")
254
+ >>> 'sk-1234' == builder.key_data["openai"][-1].value
255
+ True
256
+ """
257
+ service = api_keyname_to_service[key]
258
+ new_entry = APIKeyEntry(service=service, name=key, value=value, source=source)
259
+ if service not in self.key_data:
260
+ self.key_data[service] = [new_entry]
261
+ else:
262
+ self.key_data[service].append(new_entry)
263
+
264
+ def process_key_value_pairs(self) -> None:
265
+ """Process all key-value pairs from the configured sources."""
266
+ for key, value_pair in self.get_key_value_pairs().items():
267
+ value, source = value_pair
268
+ if (entry_type := self._entry_type(key, value)) == "limit":
269
+ self._add_limit(key, value, source)
270
+ elif entry_type == "api_key":
271
+ self._add_api_key(key, value, source)
272
+ elif entry_type == "api_id":
273
+ self._add_id(key, value, source)
@@ -0,0 +1,38 @@
1
+ from collections import UserDict
2
+
3
+ from edsl.language_models.key_management.KeyLookupBuilder import KeyLookupBuilder
4
+
5
+
6
+ class KeyLookupCollection(UserDict):
7
+ """A singleton class that stores key-lookup objects.
8
+
9
+ This is because once a KeyLook is created once, we do not
10
+ need to keep re-creating it.
11
+
12
+ >>> collection = KeyLookupCollection()
13
+ >>> collection2 = KeyLookupCollection()
14
+ >>> collection is collection2 # Test singleton pattern
15
+ True
16
+ >>> collection.add_key_lookup(("config", "env"))
17
+ >>> ("config", "env") in collection.data
18
+ True
19
+ """
20
+
21
+ _instance = None
22
+
23
+ def __new__(cls, *args, **kwargs):
24
+ if cls._instance is None:
25
+ cls._instance = super().__new__(cls)
26
+ return cls._instance
27
+
28
+ def __init__(self, *args, **kwargs):
29
+ if not hasattr(self, "_initialized"):
30
+ self.data = {}
31
+ self._initialized = True
32
+ super().__init__(*args, **kwargs)
33
+
34
+ def add_key_lookup(self, fetch_order=None):
35
+ if fetch_order is None:
36
+ fetch_order = ("config", "env")
37
+ if fetch_order not in self.data:
38
+ self.data[fetch_order] = KeyLookupBuilder(fetch_order=fetch_order).build()
File without changes