edsl 0.1.39__py3-none-any.whl → 0.1.39.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +116 -197
- edsl/__init__.py +7 -15
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +147 -351
- edsl/agents/AgentList.py +73 -211
- edsl/agents/Invigilator.py +50 -101
- edsl/agents/InvigilatorBase.py +70 -62
- edsl/agents/PromptConstructor.py +225 -143
- edsl/agents/__init__.py +1 -0
- edsl/agents/prompt_helpers.py +3 -3
- edsl/auto/AutoStudy.py +5 -18
- edsl/auto/StageBase.py +40 -53
- edsl/auto/StageQuestions.py +1 -2
- edsl/auto/utilities.py +6 -0
- edsl/config.py +2 -22
- edsl/conversation/car_buying.py +1 -2
- edsl/coop/PriceFetcher.py +1 -1
- edsl/coop/coop.py +47 -125
- edsl/coop/utils.py +14 -14
- edsl/data/Cache.py +27 -45
- edsl/data/CacheEntry.py +15 -12
- edsl/data/CacheHandler.py +12 -31
- edsl/data/RemoteCacheSync.py +46 -154
- edsl/data/__init__.py +3 -4
- edsl/data_transfer_models.py +1 -2
- edsl/enums.py +0 -27
- edsl/exceptions/__init__.py +50 -50
- edsl/exceptions/agents.py +0 -12
- edsl/exceptions/questions.py +6 -24
- edsl/exceptions/scenarios.py +0 -7
- edsl/inference_services/AnthropicService.py +19 -38
- edsl/inference_services/AwsBedrock.py +2 -0
- edsl/inference_services/AzureAI.py +2 -0
- edsl/inference_services/GoogleService.py +12 -7
- edsl/inference_services/InferenceServiceABC.py +85 -18
- edsl/inference_services/InferenceServicesCollection.py +79 -120
- edsl/inference_services/MistralAIService.py +3 -0
- edsl/inference_services/OpenAIService.py +35 -47
- edsl/inference_services/PerplexityService.py +3 -0
- edsl/inference_services/TestService.py +10 -11
- edsl/inference_services/TogetherAIService.py +3 -5
- edsl/jobs/Answers.py +14 -1
- edsl/jobs/Jobs.py +431 -356
- edsl/jobs/JobsChecks.py +10 -35
- edsl/jobs/JobsPrompts.py +4 -6
- edsl/jobs/JobsRemoteInferenceHandler.py +133 -205
- edsl/jobs/buckets/BucketCollection.py +3 -44
- edsl/jobs/buckets/TokenBucket.py +21 -53
- edsl/jobs/interviews/Interview.py +408 -143
- edsl/jobs/runners/JobsRunnerAsyncio.py +403 -88
- edsl/jobs/runners/JobsRunnerStatus.py +165 -133
- edsl/jobs/tasks/QuestionTaskCreator.py +19 -21
- edsl/jobs/tasks/TaskHistory.py +18 -38
- edsl/jobs/tasks/task_status_enum.py +2 -0
- edsl/language_models/KeyLookup.py +30 -0
- edsl/language_models/LanguageModel.py +236 -194
- edsl/language_models/ModelList.py +19 -28
- edsl/language_models/__init__.py +2 -1
- edsl/language_models/registry.py +190 -0
- edsl/language_models/repair.py +2 -2
- edsl/language_models/unused/ReplicateBase.py +83 -0
- edsl/language_models/utilities.py +4 -5
- edsl/notebooks/Notebook.py +14 -19
- edsl/prompts/Prompt.py +39 -29
- edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +2 -47
- edsl/questions/QuestionBase.py +214 -68
- edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +50 -57
- edsl/questions/QuestionBasePromptsMixin.py +3 -7
- edsl/questions/QuestionBudget.py +1 -1
- edsl/questions/QuestionCheckBox.py +3 -3
- edsl/questions/QuestionExtract.py +7 -5
- edsl/questions/QuestionFreeText.py +3 -2
- edsl/questions/QuestionList.py +18 -10
- edsl/questions/QuestionMultipleChoice.py +23 -67
- edsl/questions/QuestionNumerical.py +4 -2
- edsl/questions/QuestionRank.py +17 -7
- edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +26 -40
- edsl/questions/SimpleAskMixin.py +3 -4
- edsl/questions/__init__.py +1 -2
- edsl/questions/derived/QuestionLinearScale.py +3 -6
- edsl/questions/derived/QuestionTopK.py +1 -1
- edsl/questions/descriptors.py +3 -17
- edsl/questions/question_registry.py +1 -1
- edsl/results/CSSParameterizer.py +1 -1
- edsl/results/Dataset.py +7 -170
- edsl/results/DatasetExportMixin.py +305 -168
- edsl/results/DatasetTree.py +8 -28
- edsl/results/Result.py +206 -298
- edsl/results/Results.py +131 -149
- edsl/results/ResultsDBMixin.py +238 -0
- edsl/results/ResultsExportMixin.py +0 -2
- edsl/results/{results_selector.py → Selector.py} +13 -23
- edsl/results/TableDisplay.py +171 -98
- edsl/results/__init__.py +1 -1
- edsl/scenarios/FileStore.py +239 -150
- edsl/scenarios/Scenario.py +193 -90
- edsl/scenarios/ScenarioHtmlMixin.py +3 -4
- edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +6 -10
- edsl/scenarios/ScenarioList.py +244 -415
- edsl/scenarios/ScenarioListExportMixin.py +7 -0
- edsl/scenarios/ScenarioListPdfMixin.py +37 -15
- edsl/scenarios/__init__.py +2 -1
- edsl/study/ObjectEntry.py +1 -1
- edsl/study/SnapShot.py +1 -1
- edsl/study/Study.py +12 -5
- edsl/surveys/Rule.py +4 -5
- edsl/surveys/RuleCollection.py +27 -25
- edsl/surveys/Survey.py +791 -270
- edsl/surveys/SurveyCSS.py +8 -20
- edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +9 -11
- edsl/surveys/__init__.py +2 -4
- edsl/surveys/descriptors.py +2 -6
- edsl/surveys/instructions/ChangeInstruction.py +2 -1
- edsl/surveys/instructions/Instruction.py +13 -4
- edsl/surveys/instructions/InstructionCollection.py +6 -11
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/report.html +1 -1
- edsl/tools/plotting.py +1 -1
- edsl/utilities/utilities.py +23 -35
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/METADATA +10 -12
- edsl-0.1.39.dev1.dist-info/RECORD +277 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/WHEEL +1 -1
- edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
- edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
- edsl/agents/question_option_processor.py +0 -172
- edsl/coop/CoopFunctionsMixin.py +0 -15
- edsl/coop/ExpectedParrotKeyHandler.py +0 -125
- edsl/exceptions/inference_services.py +0 -5
- edsl/inference_services/AvailableModelCacheHandler.py +0 -184
- edsl/inference_services/AvailableModelFetcher.py +0 -215
- edsl/inference_services/ServiceAvailability.py +0 -135
- edsl/inference_services/data_structures.py +0 -134
- edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -223
- edsl/jobs/FetchInvigilator.py +0 -47
- edsl/jobs/InterviewTaskManager.py +0 -98
- edsl/jobs/InterviewsConstructor.py +0 -50
- edsl/jobs/JobsComponentConstructor.py +0 -189
- edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
- edsl/jobs/RequestTokenEstimator.py +0 -30
- edsl/jobs/async_interview_runner.py +0 -138
- edsl/jobs/buckets/TokenBucketAPI.py +0 -211
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/check_survey_scenario_compatibility.py +0 -85
- edsl/jobs/data_structures.py +0 -120
- edsl/jobs/decorators.py +0 -35
- edsl/jobs/jobs_status_enums.py +0 -9
- edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
- edsl/jobs/results_exceptions_handler.py +0 -98
- edsl/language_models/ComputeCost.py +0 -63
- edsl/language_models/PriceManager.py +0 -127
- edsl/language_models/RawResponseHandler.py +0 -106
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +0 -131
- edsl/language_models/model.py +0 -256
- edsl/notebooks/NotebookToLaTeX.py +0 -142
- edsl/questions/ExceptionExplainer.py +0 -77
- edsl/questions/HTMLQuestion.py +0 -103
- edsl/questions/QuestionMatrix.py +0 -265
- edsl/questions/data_structures.py +0 -20
- edsl/questions/loop_processor.py +0 -149
- edsl/questions/response_validator_factory.py +0 -34
- edsl/questions/templates/matrix/__init__.py +0 -1
- edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
- edsl/questions/templates/matrix/question_presentation.jinja +0 -20
- edsl/results/MarkdownToDocx.py +0 -122
- edsl/results/MarkdownToPDF.py +0 -111
- edsl/results/TextEditor.py +0 -50
- edsl/results/file_exports.py +0 -252
- edsl/results/smart_objects.py +0 -96
- edsl/results/table_data_class.py +0 -12
- edsl/results/table_renderers.py +0 -118
- edsl/scenarios/ConstructDownloadLink.py +0 -109
- edsl/scenarios/DocumentChunker.py +0 -102
- edsl/scenarios/DocxScenario.py +0 -16
- edsl/scenarios/PdfExtractor.py +0 -40
- edsl/scenarios/directory_scanner.py +0 -96
- edsl/scenarios/file_methods.py +0 -85
- edsl/scenarios/handlers/__init__.py +0 -13
- edsl/scenarios/handlers/csv.py +0 -49
- edsl/scenarios/handlers/docx.py +0 -76
- edsl/scenarios/handlers/html.py +0 -37
- edsl/scenarios/handlers/json.py +0 -111
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/scenarios/handlers/md.py +0 -51
- edsl/scenarios/handlers/pdf.py +0 -68
- edsl/scenarios/handlers/png.py +0 -39
- edsl/scenarios/handlers/pptx.py +0 -105
- edsl/scenarios/handlers/py.py +0 -294
- edsl/scenarios/handlers/sql.py +0 -313
- edsl/scenarios/handlers/sqlite.py +0 -149
- edsl/scenarios/handlers/txt.py +0 -33
- edsl/scenarios/scenario_selector.py +0 -156
- edsl/surveys/ConstructDAG.py +0 -92
- edsl/surveys/EditSurvey.py +0 -221
- edsl/surveys/InstructionHandler.py +0 -100
- edsl/surveys/MemoryManagement.py +0 -72
- edsl/surveys/RuleManager.py +0 -172
- edsl/surveys/Simulator.py +0 -75
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/utilities/PrettyList.py +0 -56
- edsl/utilities/is_notebook.py +0 -18
- edsl/utilities/is_valid_variable_name.py +0 -11
- edsl/utilities/remove_edsl_version.py +0 -24
- edsl-0.1.39.dist-info/RECORD +0 -358
- /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
- /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
- /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/LICENSE +0 -0
@@ -1,127 +0,0 @@
|
|
1
|
-
from typing import Dict, Tuple, Optional, Union
|
2
|
-
|
3
|
-
|
4
|
-
class PriceManager:
|
5
|
-
_instance = None
|
6
|
-
_price_lookup: Dict[Tuple[str, str], Dict] = {}
|
7
|
-
_is_initialized = False
|
8
|
-
|
9
|
-
def __new__(cls):
|
10
|
-
if cls._instance is None:
|
11
|
-
cls._instance = super(PriceManager, cls).__new__(cls)
|
12
|
-
return cls._instance
|
13
|
-
|
14
|
-
def __init__(self):
|
15
|
-
# Only initialize once, even if __init__ is called multiple times
|
16
|
-
if not self._is_initialized:
|
17
|
-
self._is_initialized = True
|
18
|
-
self.refresh_prices()
|
19
|
-
|
20
|
-
def refresh_prices(self) -> None:
|
21
|
-
"""
|
22
|
-
Fetch fresh prices from the Coop service and update the internal price lookup.
|
23
|
-
|
24
|
-
"""
|
25
|
-
from edsl.coop import Coop
|
26
|
-
|
27
|
-
c = Coop()
|
28
|
-
try:
|
29
|
-
self._price_lookup = c.fetch_prices()
|
30
|
-
except Exception as e:
|
31
|
-
print(f"Error fetching prices: {str(e)}")
|
32
|
-
|
33
|
-
def get_price(self, inference_service: str, model: str) -> Optional[Dict]:
|
34
|
-
"""
|
35
|
-
Get the price information for a specific service and model combination.
|
36
|
-
|
37
|
-
Args:
|
38
|
-
inference_service (str): The name of the inference service
|
39
|
-
model (str): The model identifier
|
40
|
-
|
41
|
-
Returns:
|
42
|
-
Optional[Dict]: Price information if found, None otherwise
|
43
|
-
"""
|
44
|
-
key = (inference_service, model)
|
45
|
-
return self._price_lookup.get(key)
|
46
|
-
|
47
|
-
def get_all_prices(self) -> Dict[Tuple[str, str], Dict]:
|
48
|
-
"""
|
49
|
-
Get the complete price lookup dictionary.
|
50
|
-
|
51
|
-
Returns:
|
52
|
-
Dict[Tuple[str, str], Dict]: The complete price lookup dictionary
|
53
|
-
"""
|
54
|
-
return self._price_lookup.copy()
|
55
|
-
|
56
|
-
def calculate_cost(
|
57
|
-
self,
|
58
|
-
inference_service: str,
|
59
|
-
model: str,
|
60
|
-
usage: Dict[str, Union[str, int]],
|
61
|
-
input_token_name: str,
|
62
|
-
output_token_name: str,
|
63
|
-
) -> Union[float, str]:
|
64
|
-
"""
|
65
|
-
Calculate the total cost for a model usage based on input and output tokens.
|
66
|
-
|
67
|
-
Args:
|
68
|
-
inference_service (str): The inference service identifier
|
69
|
-
model (str): The model identifier
|
70
|
-
usage (Dict[str, Union[str, int]]): Dictionary containing token usage information
|
71
|
-
input_token_name (str): Key name for input tokens in the usage dict
|
72
|
-
output_token_name (str): Key name for output tokens in the usage dict
|
73
|
-
|
74
|
-
Returns:
|
75
|
-
Union[float, str]: Total cost if calculation successful, error message string if not
|
76
|
-
"""
|
77
|
-
relevant_prices = self.get_price(inference_service, model)
|
78
|
-
if relevant_prices is None:
|
79
|
-
return f"Could not find price for model {model} in the price lookup."
|
80
|
-
|
81
|
-
# Extract token counts
|
82
|
-
try:
|
83
|
-
input_tokens = int(usage[input_token_name])
|
84
|
-
output_tokens = int(usage[output_token_name])
|
85
|
-
except Exception as e:
|
86
|
-
return f"Could not fetch tokens from model response: {e}"
|
87
|
-
|
88
|
-
# Extract price information
|
89
|
-
try:
|
90
|
-
inverse_output_price = relevant_prices["output"]["one_usd_buys"]
|
91
|
-
inverse_input_price = relevant_prices["input"]["one_usd_buys"]
|
92
|
-
except Exception as e:
|
93
|
-
if "output" not in relevant_prices:
|
94
|
-
return f"Could not fetch prices from {relevant_prices} - {e}; Missing 'output' key."
|
95
|
-
if "input" not in relevant_prices:
|
96
|
-
return f"Could not fetch prices from {relevant_prices} - {e}; Missing 'input' key."
|
97
|
-
return f"Could not fetch prices from {relevant_prices} - {e}"
|
98
|
-
|
99
|
-
# Calculate input cost
|
100
|
-
if inverse_input_price == "infinity":
|
101
|
-
input_cost = 0
|
102
|
-
else:
|
103
|
-
try:
|
104
|
-
input_cost = input_tokens / float(inverse_input_price)
|
105
|
-
except Exception as e:
|
106
|
-
return f"Could not compute input price - {e}."
|
107
|
-
|
108
|
-
# Calculate output cost
|
109
|
-
if inverse_output_price == "infinity":
|
110
|
-
output_cost = 0
|
111
|
-
else:
|
112
|
-
try:
|
113
|
-
output_cost = output_tokens / float(inverse_output_price)
|
114
|
-
except Exception as e:
|
115
|
-
return f"Could not compute output price - {e}"
|
116
|
-
|
117
|
-
return input_cost + output_cost
|
118
|
-
|
119
|
-
@property
|
120
|
-
def is_initialized(self) -> bool:
|
121
|
-
"""
|
122
|
-
Check if the PriceManager has been initialized.
|
123
|
-
|
124
|
-
Returns:
|
125
|
-
bool: True if initialized, False otherwise
|
126
|
-
"""
|
127
|
-
return self._is_initialized
|
@@ -1,106 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
from typing import Optional, Any, List
|
3
|
-
from edsl.exceptions.language_models import LanguageModelBadResponseError
|
4
|
-
|
5
|
-
from json_repair import repair_json
|
6
|
-
|
7
|
-
|
8
|
-
def _extract_item_from_raw_response(data, sequence):
|
9
|
-
if isinstance(data, str):
|
10
|
-
try:
|
11
|
-
data = json.loads(data)
|
12
|
-
except json.JSONDecodeError as e:
|
13
|
-
return data
|
14
|
-
current_data = data
|
15
|
-
for i, key in enumerate(sequence):
|
16
|
-
try:
|
17
|
-
if isinstance(current_data, (list, tuple)):
|
18
|
-
if not isinstance(key, int):
|
19
|
-
raise TypeError(
|
20
|
-
f"Expected integer index for sequence at position {i}, got {type(key).__name__}"
|
21
|
-
)
|
22
|
-
if key < 0 or key >= len(current_data):
|
23
|
-
raise IndexError(
|
24
|
-
f"Index {key} out of range for sequence of length {len(current_data)} at position {i}"
|
25
|
-
)
|
26
|
-
elif isinstance(current_data, dict):
|
27
|
-
if key not in current_data:
|
28
|
-
raise KeyError(
|
29
|
-
f"Key '{key}' not found in dictionary at position {i}"
|
30
|
-
)
|
31
|
-
else:
|
32
|
-
raise TypeError(
|
33
|
-
f"Cannot index into {type(current_data).__name__} at position {i}. Full response is: {data} of type {type(data)}. Key sequence is: {sequence}"
|
34
|
-
)
|
35
|
-
|
36
|
-
current_data = current_data[key]
|
37
|
-
except Exception as e:
|
38
|
-
path = " -> ".join(map(str, sequence[: i + 1]))
|
39
|
-
if "error" in data:
|
40
|
-
msg = data["error"]
|
41
|
-
else:
|
42
|
-
msg = f"Error accessing path: {path}. {str(e)}. Full response is: '{data}'"
|
43
|
-
raise LanguageModelBadResponseError(message=msg, response_json=data)
|
44
|
-
if isinstance(current_data, str):
|
45
|
-
return current_data.strip()
|
46
|
-
else:
|
47
|
-
return current_data
|
48
|
-
|
49
|
-
|
50
|
-
class RawResponseHandler:
|
51
|
-
"""Class to handle raw responses from language models."""
|
52
|
-
|
53
|
-
def __init__(self, key_sequence: list, usage_sequence: Optional[list] = None):
|
54
|
-
self.key_sequence = key_sequence
|
55
|
-
self.usage_sequence = usage_sequence
|
56
|
-
|
57
|
-
def get_generated_token_string(self, raw_response):
|
58
|
-
return _extract_item_from_raw_response(raw_response, self.key_sequence)
|
59
|
-
|
60
|
-
def get_usage_dict(self, raw_response):
|
61
|
-
if self.usage_sequence is None:
|
62
|
-
return {}
|
63
|
-
return _extract_item_from_raw_response(raw_response, self.usage_sequence)
|
64
|
-
|
65
|
-
def parse_response(self, raw_response: dict[str, Any]) -> "EDSLOutput":
|
66
|
-
"""Parses the API response and returns the response text."""
|
67
|
-
|
68
|
-
from edsl.data_transfer_models import EDSLOutput
|
69
|
-
|
70
|
-
generated_token_string = self.get_generated_token_string(raw_response)
|
71
|
-
last_newline = generated_token_string.rfind("\n")
|
72
|
-
|
73
|
-
if last_newline == -1:
|
74
|
-
# There is no comment
|
75
|
-
edsl_dict = {
|
76
|
-
"answer": self.convert_answer(generated_token_string),
|
77
|
-
"generated_tokens": generated_token_string,
|
78
|
-
"comment": None,
|
79
|
-
}
|
80
|
-
else:
|
81
|
-
edsl_dict = {
|
82
|
-
"answer": self.convert_answer(generated_token_string[:last_newline]),
|
83
|
-
"comment": generated_token_string[last_newline + 1 :].strip(),
|
84
|
-
"generated_tokens": generated_token_string,
|
85
|
-
}
|
86
|
-
return EDSLOutput(**edsl_dict)
|
87
|
-
|
88
|
-
@staticmethod
|
89
|
-
def convert_answer(response_part):
|
90
|
-
import json
|
91
|
-
|
92
|
-
response_part = response_part.strip()
|
93
|
-
|
94
|
-
if response_part == "None":
|
95
|
-
return None
|
96
|
-
|
97
|
-
repaired = repair_json(response_part)
|
98
|
-
if repaired == '""':
|
99
|
-
# it was a literal string
|
100
|
-
return response_part
|
101
|
-
|
102
|
-
try:
|
103
|
-
return json.loads(repaired)
|
104
|
-
except json.JSONDecodeError as j:
|
105
|
-
# last resort
|
106
|
-
return response_part
|
File without changes
|
@@ -1,63 +0,0 @@
|
|
1
|
-
from collections import UserDict
|
2
|
-
from dataclasses import asdict
|
3
|
-
from edsl.enums import service_to_api_keyname
|
4
|
-
|
5
|
-
from edsl.language_models.key_management.models import LanguageModelInput
|
6
|
-
|
7
|
-
|
8
|
-
class KeyLookup(UserDict):
|
9
|
-
"""A class for looking up API keys and related configuration.
|
10
|
-
|
11
|
-
>>> from edsl.language_models.key_management.models import LanguageModelInput
|
12
|
-
>>> lookup = KeyLookup()
|
13
|
-
>>> lm_input = LanguageModelInput.example()
|
14
|
-
>>> lookup['test'] = lm_input
|
15
|
-
>>> lookup.to_dict()['test']['api_token']
|
16
|
-
'sk-abcd123'
|
17
|
-
>>> restored = KeyLookup.from_dict(lookup.to_dict())
|
18
|
-
>>> restored['test'].api_token
|
19
|
-
'sk-abcd123'
|
20
|
-
"""
|
21
|
-
|
22
|
-
def to_dict(self):
|
23
|
-
"""
|
24
|
-
>>> kl = KeyLookup.example()
|
25
|
-
>>> kl2 = KeyLookup.from_dict(kl.to_dict())
|
26
|
-
>>> kl2 == kl
|
27
|
-
True
|
28
|
-
>>> kl2 is kl
|
29
|
-
False
|
30
|
-
"""
|
31
|
-
return {k: asdict(v) for k, v in self.data.items()}
|
32
|
-
|
33
|
-
@classmethod
|
34
|
-
def from_dict(cls, d):
|
35
|
-
return cls({k: LanguageModelInput(**v) for k, v in d.items()})
|
36
|
-
|
37
|
-
@classmethod
|
38
|
-
def example(cls):
|
39
|
-
return cls(
|
40
|
-
{
|
41
|
-
"test": LanguageModelInput.example(),
|
42
|
-
"openai": LanguageModelInput.example(),
|
43
|
-
}
|
44
|
-
)
|
45
|
-
|
46
|
-
def to_dot_env(self):
|
47
|
-
"""Return a string representation of the key lookup collection for a .env file."""
|
48
|
-
lines = []
|
49
|
-
for service, lm_input in self.items():
|
50
|
-
if service != "test":
|
51
|
-
lines.append(f"EDSL_SERVICE_RPM_{service.upper()}={lm_input.rpm}")
|
52
|
-
lines.append(f"EDSL_SERVICE_TPM_{service.upper()}={lm_input.tpm}")
|
53
|
-
key_name = service_to_api_keyname.get(service, service)
|
54
|
-
lines.append(f"{key_name.upper()}={lm_input.api_token}")
|
55
|
-
if lm_input.api_id is not None:
|
56
|
-
lines.append(f"{service.upper()}_API_ID={lm_input.api_id}")
|
57
|
-
return "\n".join([f"{line}" for line in lines])
|
58
|
-
|
59
|
-
|
60
|
-
if __name__ == "__main__":
|
61
|
-
import doctest
|
62
|
-
|
63
|
-
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
@@ -1,273 +0,0 @@
|
|
1
|
-
from typing import Optional, List
|
2
|
-
from collections import UserDict
|
3
|
-
import os
|
4
|
-
from functools import lru_cache
|
5
|
-
from dataclasses import dataclass, asdict
|
6
|
-
|
7
|
-
from edsl.enums import service_to_api_keyname
|
8
|
-
from edsl.exceptions.general import MissingAPIKeyError
|
9
|
-
|
10
|
-
from edsl.language_models.key_management.KeyLookup import KeyLookup
|
11
|
-
|
12
|
-
from edsl.language_models.key_management.models import (
|
13
|
-
APIKeyEntry,
|
14
|
-
LimitEntry,
|
15
|
-
APIIDEntry,
|
16
|
-
LanguageModelInput,
|
17
|
-
)
|
18
|
-
|
19
|
-
service_to_api_keyname["bedrock"] = "AWS_SECRET_ACCESS_KEY"
|
20
|
-
service_to_api_id = {"bedrock": "AWS_ACCESS_KEY_ID"}
|
21
|
-
|
22
|
-
api_keyname_to_service = {}
|
23
|
-
|
24
|
-
for service, key in service_to_api_keyname.items():
|
25
|
-
if isinstance(key, list):
|
26
|
-
for k in key:
|
27
|
-
api_keyname_to_service[k] = service
|
28
|
-
else:
|
29
|
-
api_keyname_to_service[key] = service
|
30
|
-
|
31
|
-
api_id_to_service = {"AWS_ACCESS_KEY_ID": "bedrock"}
|
32
|
-
|
33
|
-
|
34
|
-
class KeyLookupBuilder:
|
35
|
-
"""Builds KeyLookup options.
|
36
|
-
|
37
|
-
>>> builder = KeyLookupBuilder(fetch_order=("config", "env"))
|
38
|
-
>>> builder.DEFAULT_RPM
|
39
|
-
10
|
40
|
-
>>> builder.DEFAULT_TPM
|
41
|
-
2000000
|
42
|
-
>>> builder.fetch_order
|
43
|
-
('config', 'env')
|
44
|
-
|
45
|
-
Test invalid fetch_order:
|
46
|
-
>>> try:
|
47
|
-
... KeyLookupBuilder(fetch_order=["config", "env"]) # Should be tuple
|
48
|
-
... except ValueError as e:
|
49
|
-
... str(e)
|
50
|
-
'fetch_order must be a tuple'
|
51
|
-
|
52
|
-
Test service extraction:
|
53
|
-
>>> builder.extract_service("EDSL_SERVICE_RPM_OPENAI")
|
54
|
-
('openai', 'rpm')
|
55
|
-
"""
|
56
|
-
|
57
|
-
DEFAULT_RPM = 10
|
58
|
-
DEFAULT_TPM = 2000000
|
59
|
-
|
60
|
-
def __init__(self, fetch_order: Optional[tuple[str]] = None):
|
61
|
-
if fetch_order is None:
|
62
|
-
self.fetch_order = ("config", "env")
|
63
|
-
else:
|
64
|
-
self.fetch_order = fetch_order
|
65
|
-
|
66
|
-
if not isinstance(self.fetch_order, tuple):
|
67
|
-
raise ValueError("fetch_order must be a tuple")
|
68
|
-
|
69
|
-
self.limit_data = {}
|
70
|
-
self.key_data = {}
|
71
|
-
self.id_data = {}
|
72
|
-
self.process_key_value_pairs()
|
73
|
-
|
74
|
-
@property
|
75
|
-
def known_services(self):
|
76
|
-
"""Get the set of known services.
|
77
|
-
|
78
|
-
>>> builder = KeyLookupBuilder()
|
79
|
-
>>> isinstance(builder.known_services, set)
|
80
|
-
True
|
81
|
-
"""
|
82
|
-
return set(self.key_data.keys()) | set(self.limit_data.keys())
|
83
|
-
|
84
|
-
@lru_cache
|
85
|
-
def build(self) -> "KeyLookup":
|
86
|
-
"""Build a KeyLookup instance.
|
87
|
-
|
88
|
-
>>> builder = KeyLookupBuilder()
|
89
|
-
>>> lookup = builder.build()
|
90
|
-
>>> isinstance(lookup, KeyLookup)
|
91
|
-
True
|
92
|
-
>>> lookup['test'].api_token # Test service should always exist
|
93
|
-
'test'
|
94
|
-
"""
|
95
|
-
d = {}
|
96
|
-
for service in self.known_services:
|
97
|
-
try:
|
98
|
-
d[service] = self.get_language_model_input(service)
|
99
|
-
except MissingAPIKeyError:
|
100
|
-
pass
|
101
|
-
|
102
|
-
d.update({"test": LanguageModelInput(api_token="test", rpm=10, tpm=2000000)})
|
103
|
-
return KeyLookup(d)
|
104
|
-
|
105
|
-
def get_language_model_input(self, service: str) -> LanguageModelInput:
|
106
|
-
"""Get the language model input for a given service.
|
107
|
-
|
108
|
-
>>> builder = KeyLookupBuilder()
|
109
|
-
>>> try:
|
110
|
-
... builder.get_language_model_input("nonexistent_service")
|
111
|
-
... except MissingAPIKeyError as e:
|
112
|
-
... str(e)
|
113
|
-
"No key found for service 'nonexistent_service'"
|
114
|
-
"""
|
115
|
-
if (key_entries := self.key_data.get(service)) is None:
|
116
|
-
raise MissingAPIKeyError(f"No key found for service '{service}'")
|
117
|
-
|
118
|
-
if len(key_entries) == 1:
|
119
|
-
api_key_entry = key_entries[0]
|
120
|
-
|
121
|
-
id_entry = self.id_data.get(service)
|
122
|
-
id_source = id_entry.source if id_entry is not None else None
|
123
|
-
api_id = id_entry.value if id_entry is not None else None
|
124
|
-
|
125
|
-
if (limit_entry := self.limit_data.get(service)) is None:
|
126
|
-
limit_entry = LimitEntry(
|
127
|
-
service=service,
|
128
|
-
rpm=self.DEFAULT_RPM,
|
129
|
-
tpm=self.DEFAULT_TPM,
|
130
|
-
source="default",
|
131
|
-
)
|
132
|
-
|
133
|
-
if limit_entry.rpm is None:
|
134
|
-
limit_entry.rpm = self.DEFAULT_RPM
|
135
|
-
if limit_entry.tpm is None:
|
136
|
-
limit_entry.tpm = self.DEFAULT_TPM
|
137
|
-
|
138
|
-
return LanguageModelInput(
|
139
|
-
api_token=api_key_entry.value,
|
140
|
-
rpm=int(limit_entry.rpm),
|
141
|
-
tpm=int(limit_entry.tpm),
|
142
|
-
api_id=api_id,
|
143
|
-
token_source=api_key_entry.source,
|
144
|
-
limit_source=limit_entry.source,
|
145
|
-
id_source=id_source,
|
146
|
-
)
|
147
|
-
|
148
|
-
def __repr__(self):
|
149
|
-
return f"DataSource(key_data={self.key_data}, limit_data={self.limit_data}, id_data={self.id_data})"
|
150
|
-
|
151
|
-
def _os_env_key_value_pairs(self):
|
152
|
-
return dict(list(os.environ.items()))
|
153
|
-
|
154
|
-
def _coop_key_value_pairs(self):
|
155
|
-
from edsl.coop import Coop
|
156
|
-
|
157
|
-
c = Coop()
|
158
|
-
return dict(list(c.fetch_rate_limit_config_vars().items()))
|
159
|
-
|
160
|
-
def _config_key_value_pairs(self):
|
161
|
-
from edsl.config import CONFIG
|
162
|
-
|
163
|
-
return dict(list(CONFIG.items()))
|
164
|
-
|
165
|
-
@staticmethod
|
166
|
-
def extract_service(key: str) -> str:
|
167
|
-
"""Extract the service and limit type from the key"""
|
168
|
-
limit_type, service_raw = key.replace("EDSL_SERVICE_", "").split("_")
|
169
|
-
return service_raw.lower(), limit_type.lower()
|
170
|
-
|
171
|
-
def get_key_value_pairs(self) -> dict:
|
172
|
-
"""Get key-value pairs from configured sources."""
|
173
|
-
fetching_functions = {
|
174
|
-
"env": self._os_env_key_value_pairs,
|
175
|
-
"coop": self._coop_key_value_pairs,
|
176
|
-
"config": self._config_key_value_pairs,
|
177
|
-
}
|
178
|
-
d = {}
|
179
|
-
for source in self.fetch_order:
|
180
|
-
f = fetching_functions[source]
|
181
|
-
new_data = f()
|
182
|
-
for k, v in new_data.items():
|
183
|
-
d[k] = (v, source)
|
184
|
-
return d
|
185
|
-
|
186
|
-
def _entry_type(self, key, value) -> str:
|
187
|
-
"""Determine the type of entry from a key.
|
188
|
-
|
189
|
-
>>> builder = KeyLookupBuilder()
|
190
|
-
>>> builder._entry_type("EDSL_SERVICE_RPM_OPENAI", "60")
|
191
|
-
'limit'
|
192
|
-
>>> builder._entry_type("OPENAI_API_KEY", "sk-1234")
|
193
|
-
'api_key'
|
194
|
-
>>> builder._entry_type("AWS_ACCESS_KEY_ID", "AKIA1234")
|
195
|
-
'api_id'
|
196
|
-
>>> builder._entry_type("UNKNOWN_KEY", "value")
|
197
|
-
'unknown'
|
198
|
-
"""
|
199
|
-
if key.startswith("EDSL_SERVICE_"):
|
200
|
-
return "limit"
|
201
|
-
elif key in api_keyname_to_service:
|
202
|
-
return "api_key"
|
203
|
-
elif key in api_id_to_service:
|
204
|
-
return "api_id"
|
205
|
-
return "unknown"
|
206
|
-
|
207
|
-
def _add_id(self, key: str, value: str, source: str) -> None:
|
208
|
-
"""Add an API ID to the id_data dictionary.
|
209
|
-
|
210
|
-
>>> builder = KeyLookupBuilder()
|
211
|
-
>>> builder._add_id("AWS_ACCESS_KEY_ID", "AKIA1234", "env")
|
212
|
-
>>> builder.id_data["bedrock"].value
|
213
|
-
'AKIA1234'
|
214
|
-
>>> try:
|
215
|
-
... builder._add_id("AWS_ACCESS_KEY_ID", "AKIA5678", "env")
|
216
|
-
... except ValueError as e:
|
217
|
-
... str(e)
|
218
|
-
'Duplicate ID for service bedrock'
|
219
|
-
"""
|
220
|
-
service = api_id_to_service[key]
|
221
|
-
if service not in self.id_data:
|
222
|
-
self.id_data[service] = APIIDEntry(
|
223
|
-
service=service, name=key, value=value, source=source
|
224
|
-
)
|
225
|
-
else:
|
226
|
-
raise ValueError(f"Duplicate ID for service {service}")
|
227
|
-
|
228
|
-
def _add_limit(self, key: str, value: str, source: str) -> None:
|
229
|
-
"""Add a rate limit entry to the limit_data dictionary.
|
230
|
-
|
231
|
-
>>> builder = KeyLookupBuilder()
|
232
|
-
>>> builder._add_limit("EDSL_SERVICE_RPM_OPENAI", "60", "config")
|
233
|
-
>>> builder.limit_data["openai"].rpm
|
234
|
-
'60'
|
235
|
-
>>> builder._add_limit("EDSL_SERVICE_TPM_OPENAI", "100000", "config")
|
236
|
-
>>> builder.limit_data["openai"].tpm
|
237
|
-
'100000'
|
238
|
-
"""
|
239
|
-
service, limit_type = self.extract_service(key)
|
240
|
-
if service in self.limit_data:
|
241
|
-
setattr(self.limit_data[service], limit_type.lower(), value)
|
242
|
-
else:
|
243
|
-
new_limit_entry = LimitEntry(
|
244
|
-
service=service, rpm=None, tpm=None, source=source
|
245
|
-
)
|
246
|
-
setattr(new_limit_entry, limit_type.lower(), value)
|
247
|
-
self.limit_data[service] = new_limit_entry
|
248
|
-
|
249
|
-
def _add_api_key(self, key: str, value: str, source: str) -> None:
|
250
|
-
"""Add an API key entry to the key_data dictionary.
|
251
|
-
|
252
|
-
>>> builder = KeyLookupBuilder()
|
253
|
-
>>> builder._add_api_key("OPENAI_API_KEY", "sk-1234", "env")
|
254
|
-
>>> 'sk-1234' == builder.key_data["openai"][-1].value
|
255
|
-
True
|
256
|
-
"""
|
257
|
-
service = api_keyname_to_service[key]
|
258
|
-
new_entry = APIKeyEntry(service=service, name=key, value=value, source=source)
|
259
|
-
if service not in self.key_data:
|
260
|
-
self.key_data[service] = [new_entry]
|
261
|
-
else:
|
262
|
-
self.key_data[service].append(new_entry)
|
263
|
-
|
264
|
-
def process_key_value_pairs(self) -> None:
|
265
|
-
"""Process all key-value pairs from the configured sources."""
|
266
|
-
for key, value_pair in self.get_key_value_pairs().items():
|
267
|
-
value, source = value_pair
|
268
|
-
if (entry_type := self._entry_type(key, value)) == "limit":
|
269
|
-
self._add_limit(key, value, source)
|
270
|
-
elif entry_type == "api_key":
|
271
|
-
self._add_api_key(key, value, source)
|
272
|
-
elif entry_type == "api_id":
|
273
|
-
self._add_id(key, value, source)
|
@@ -1,38 +0,0 @@
|
|
1
|
-
from collections import UserDict
|
2
|
-
|
3
|
-
from edsl.language_models.key_management.KeyLookupBuilder import KeyLookupBuilder
|
4
|
-
|
5
|
-
|
6
|
-
class KeyLookupCollection(UserDict):
|
7
|
-
"""A singleton class that stores key-lookup objects.
|
8
|
-
|
9
|
-
This is because once a KeyLook is created once, we do not
|
10
|
-
need to keep re-creating it.
|
11
|
-
|
12
|
-
>>> collection = KeyLookupCollection()
|
13
|
-
>>> collection2 = KeyLookupCollection()
|
14
|
-
>>> collection is collection2 # Test singleton pattern
|
15
|
-
True
|
16
|
-
>>> collection.add_key_lookup(("config", "env"))
|
17
|
-
>>> ("config", "env") in collection.data
|
18
|
-
True
|
19
|
-
"""
|
20
|
-
|
21
|
-
_instance = None
|
22
|
-
|
23
|
-
def __new__(cls, *args, **kwargs):
|
24
|
-
if cls._instance is None:
|
25
|
-
cls._instance = super().__new__(cls)
|
26
|
-
return cls._instance
|
27
|
-
|
28
|
-
def __init__(self, *args, **kwargs):
|
29
|
-
if not hasattr(self, "_initialized"):
|
30
|
-
self.data = {}
|
31
|
-
self._initialized = True
|
32
|
-
super().__init__(*args, **kwargs)
|
33
|
-
|
34
|
-
def add_key_lookup(self, fetch_order=None):
|
35
|
-
if fetch_order is None:
|
36
|
-
fetch_order = ("config", "env")
|
37
|
-
if fetch_order not in self.data:
|
38
|
-
self.data[fetch_order] = KeyLookupBuilder(fetch_order=fetch_order).build()
|
File without changes
|