edsl 0.1.40.dev2__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +1 -0
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +1 -1
- edsl/agents/Invigilator.py +3 -2
- edsl/coop/coop.py +5 -2
- edsl/data/Cache.py +7 -0
- edsl/data/RemoteCacheSync.py +16 -16
- edsl/jobs/AnswerQuestionFunctionConstructor.py +1 -1
- edsl/jobs/Jobs.py +24 -23
- edsl/jobs/interviews/Interview.py +1 -5
- edsl/jobs/interviews/InterviewExceptionEntry.py +14 -4
- edsl/jobs/runners/JobsRunnerAsyncio.py +18 -6
- edsl/jobs/runners/JobsRunnerStatus.py +2 -1
- edsl/language_models/key_management/KeyLookupBuilder.py +7 -3
- edsl/questions/QuestionBudget.py +2 -2
- edsl/questions/QuestionDict.py +343 -0
- edsl/questions/QuestionExtract.py +1 -1
- edsl/questions/__init__.py +1 -0
- edsl/questions/answer_validator_mixin.py +29 -0
- edsl/questions/derived/QuestionLinearScale.py +1 -1
- edsl/questions/descriptors.py +44 -0
- edsl/questions/question_registry.py +1 -1
- edsl/questions/templates/dict/__init__.py +0 -0
- edsl/questions/templates/dict/answering_instructions.jinja +21 -0
- edsl/questions/templates/dict/question_presentation.jinja +1 -0
- edsl/results/Result.py +16 -0
- edsl/results/Results.py +16 -4
- edsl/scenarios/FileStore.py +32 -0
- edsl/scenarios/handlers/csv.py +11 -0
- edsl/surveys/Survey.py +4 -0
- {edsl-0.1.40.dev2.dist-info → edsl-0.1.41.dist-info}/METADATA +2 -2
- {edsl-0.1.40.dev2.dist-info → edsl-0.1.41.dist-info}/RECORD +34 -30
- {edsl-0.1.40.dev2.dist-info → edsl-0.1.41.dist-info}/LICENSE +0 -0
- {edsl-0.1.40.dev2.dist-info → edsl-0.1.41.dist-info}/WHEEL +0 -0
edsl/__init__.py
CHANGED
@@ -21,6 +21,7 @@ from edsl.questions import QuestionFunctional
|
|
21
21
|
from edsl.questions import QuestionLikertFive
|
22
22
|
from edsl.questions import QuestionList
|
23
23
|
from edsl.questions import QuestionMatrix
|
24
|
+
from edsl.questions import QuestionDict
|
24
25
|
from edsl.questions import QuestionLinearScale
|
25
26
|
from edsl.questions import QuestionNumerical
|
26
27
|
from edsl.questions import QuestionYesNo
|
edsl/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.1.
|
1
|
+
__version__ = "0.1.41"
|
edsl/agents/Agent.py
CHANGED
@@ -906,7 +906,7 @@ class Agent(Base):
|
|
906
906
|
{'traits': {'age': 10, 'hair': 'brown', 'height': 5.5}, 'instruction': 'Have fun.', 'edsl_version': '...', 'edsl_class_name': 'Agent'}
|
907
907
|
"""
|
908
908
|
d = {}
|
909
|
-
d["traits"] = copy.deepcopy(self.
|
909
|
+
d["traits"] = copy.deepcopy(dict(self._traits))
|
910
910
|
if self.name:
|
911
911
|
d["name"] = self.name
|
912
912
|
if self.set_instructions:
|
edsl/agents/Invigilator.py
CHANGED
@@ -48,13 +48,14 @@ class InvigilatorAI(InvigilatorBase):
|
|
48
48
|
"""Store the response in the invigilator, in case it is needed later because of validation failure."""
|
49
49
|
self.raw_model_response = agent_response_dict.model_outputs.response
|
50
50
|
self.generated_tokens = agent_response_dict.edsl_dict.generated_tokens
|
51
|
+
self.cache_key = agent_response_dict.model_outputs.cache_key
|
51
52
|
|
52
|
-
async def async_answer_question(self) ->
|
53
|
+
async def async_answer_question(self) -> EDSLResultObjectInput:
|
53
54
|
"""Answer a question using the AI model.
|
54
55
|
|
55
56
|
>>> i = InvigilatorAI.example()
|
56
57
|
"""
|
57
|
-
agent_response_dict = await self.async_get_agent_response()
|
58
|
+
agent_response_dict: AgentResponseDict = await self.async_get_agent_response()
|
58
59
|
self.store_response(agent_response_dict)
|
59
60
|
return self._extract_edsl_result_entry_and_validate(agent_response_dict)
|
60
61
|
|
edsl/coop/coop.py
CHANGED
@@ -111,13 +111,13 @@ class Coop(CoopFunctionsMixin):
|
|
111
111
|
url = f"{self.api_url}/{uri}"
|
112
112
|
method = method.upper()
|
113
113
|
if payload is None:
|
114
|
-
timeout =
|
114
|
+
timeout = 40
|
115
115
|
elif (
|
116
116
|
method.upper() == "POST"
|
117
117
|
and "json_string" in payload
|
118
118
|
and payload.get("json_string") is not None
|
119
119
|
):
|
120
|
-
timeout = max(
|
120
|
+
timeout = max(40, (len(payload.get("json_string", "")) // (1024 * 1024)))
|
121
121
|
try:
|
122
122
|
if method in ["GET", "DELETE"]:
|
123
123
|
response = requests.request(
|
@@ -533,6 +533,7 @@ class Coop(CoopFunctionsMixin):
|
|
533
533
|
uri="api/v0/remote-cache/many",
|
534
534
|
method="POST",
|
535
535
|
payload=payload,
|
536
|
+
timeout=40,
|
536
537
|
)
|
537
538
|
self._resolve_server_response(response)
|
538
539
|
response_json = response.json()
|
@@ -563,6 +564,7 @@ class Coop(CoopFunctionsMixin):
|
|
563
564
|
uri="api/v0/remote-cache/get-many",
|
564
565
|
method="POST",
|
565
566
|
payload={"keys": exclude_keys},
|
567
|
+
timeout=40,
|
566
568
|
)
|
567
569
|
self._resolve_server_response(response)
|
568
570
|
return [
|
@@ -581,6 +583,7 @@ class Coop(CoopFunctionsMixin):
|
|
581
583
|
uri="api/v0/remote-cache/get-diff",
|
582
584
|
method="POST",
|
583
585
|
payload={"keys": client_cacheentry_keys},
|
586
|
+
timeout=40,
|
584
587
|
)
|
585
588
|
self._resolve_server_response(response)
|
586
589
|
response_json = response.json()
|
edsl/data/Cache.py
CHANGED
@@ -535,6 +535,13 @@ class Cache(Base):
|
|
535
535
|
"""
|
536
536
|
return html
|
537
537
|
|
538
|
+
def subset(self, keys: list[str]) -> Cache:
|
539
|
+
"""
|
540
|
+
Return a subset of the Cache with the specified keys.
|
541
|
+
"""
|
542
|
+
new_data = {k: v for k, v in self.data.items() if k in keys}
|
543
|
+
return Cache(data=new_data)
|
544
|
+
|
538
545
|
def view(self) -> None:
|
539
546
|
"""View the Cache in a new browser tab."""
|
540
547
|
import tempfile
|
edsl/data/RemoteCacheSync.py
CHANGED
@@ -112,18 +112,18 @@ class RemoteCacheSync(AbstractContextManager):
|
|
112
112
|
missing_count = len(diff.client_missing_entries)
|
113
113
|
|
114
114
|
if missing_count == 0:
|
115
|
-
|
115
|
+
# self._output("No new entries to add to local cache.")
|
116
116
|
return
|
117
117
|
|
118
|
-
self._output(
|
119
|
-
|
120
|
-
|
121
|
-
)
|
118
|
+
# self._output(
|
119
|
+
# f"Updating local cache with {missing_count:,} new "
|
120
|
+
# f"{'entry' if missing_count == 1 else 'entries'} from remote..."
|
121
|
+
# )
|
122
122
|
|
123
123
|
self.cache.add_from_dict(
|
124
124
|
{entry.key: entry for entry in diff.client_missing_entries}
|
125
125
|
)
|
126
|
-
self._output("Local cache updated!")
|
126
|
+
# self._output("Local cache updated!")
|
127
127
|
|
128
128
|
def _get_entries_to_upload(self, diff: CacheDifference) -> CacheEntriesList:
|
129
129
|
"""Determines which entries need to be uploaded to remote cache."""
|
@@ -154,23 +154,23 @@ class RemoteCacheSync(AbstractContextManager):
|
|
154
154
|
upload_count = len(entries_to_upload)
|
155
155
|
|
156
156
|
if upload_count > 0:
|
157
|
-
self._output(
|
158
|
-
|
159
|
-
|
160
|
-
)
|
157
|
+
# self._output(
|
158
|
+
# f"Updating remote cache with {upload_count:,} new "
|
159
|
+
# f"{'entry' if upload_count == 1 else 'entries'}..."
|
160
|
+
# )
|
161
161
|
|
162
162
|
self.coop.remote_cache_create_many(
|
163
163
|
entries_to_upload,
|
164
164
|
visibility="private",
|
165
165
|
description=self.remote_cache_description,
|
166
166
|
)
|
167
|
-
self._output("Remote cache updated!")
|
168
|
-
else:
|
169
|
-
self._output("No new entries to add to remote cache.")
|
167
|
+
# self._output("Remote cache updated!")
|
168
|
+
# else:
|
169
|
+
# self._output("No new entries to add to remote cache.")
|
170
170
|
|
171
|
-
self._output(
|
172
|
-
f"There are {len(self.cache.keys()):,} entries in the local cache."
|
173
|
-
)
|
171
|
+
# self._output(
|
172
|
+
# f"There are {len(self.cache.keys()):,} entries in the local cache."
|
173
|
+
# )
|
174
174
|
|
175
175
|
|
176
176
|
if __name__ == "__main__":
|
edsl/jobs/Jobs.py
CHANGED
@@ -499,7 +499,6 @@ class Jobs(Base):
|
|
499
499
|
jc.check_api_keys()
|
500
500
|
|
501
501
|
async def _execute_with_remote_cache(self, run_job_async: bool) -> Results:
|
502
|
-
|
503
502
|
use_remote_cache = self.use_remote_cache()
|
504
503
|
|
505
504
|
from edsl.coop.coop import Coop
|
@@ -508,22 +507,21 @@ class Jobs(Base):
|
|
508
507
|
|
509
508
|
assert isinstance(self.run_config.environment.cache, Cache)
|
510
509
|
|
511
|
-
with RemoteCacheSync(
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
):
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
510
|
+
# with RemoteCacheSync(
|
511
|
+
# coop=Coop(),
|
512
|
+
# cache=self.run_config.environment.cache,
|
513
|
+
# output_func=self._output,
|
514
|
+
# remote_cache=use_remote_cache,
|
515
|
+
# remote_cache_description=self.run_config.parameters.remote_cache_description,
|
516
|
+
# ):
|
517
|
+
runner = JobsRunnerAsyncio(self, environment=self.run_config.environment)
|
518
|
+
if run_job_async:
|
519
|
+
results = await runner.run_async(self.run_config.parameters)
|
520
|
+
else:
|
521
|
+
results = runner.run(self.run_config.parameters)
|
523
522
|
return results
|
524
523
|
|
525
524
|
def _setup_and_check(self) -> Tuple[RunConfig, Optional[Results]]:
|
526
|
-
|
527
525
|
self._prepare_to_run()
|
528
526
|
self._check_if_remote_keys_ok()
|
529
527
|
|
@@ -539,12 +537,16 @@ class Jobs(Base):
|
|
539
537
|
if self.run_config.parameters.n is None:
|
540
538
|
return len(self)
|
541
539
|
else:
|
542
|
-
len(self) * self.run_config.parameters.n
|
540
|
+
return len(self) * self.run_config.parameters.n
|
543
541
|
|
544
542
|
def _run(self, config: RunConfig):
|
545
543
|
"Shared code for run and run_async"
|
546
544
|
if config.environment.cache is not None:
|
547
545
|
self.run_config.environment.cache = config.environment.cache
|
546
|
+
if config.environment.jobs_runner_status is not None:
|
547
|
+
self.run_config.environment.jobs_runner_status = (
|
548
|
+
config.environment.jobs_runner_status
|
549
|
+
)
|
548
550
|
|
549
551
|
if config.environment.bucket_collection is not None:
|
550
552
|
self.run_config.environment.bucket_collection = (
|
@@ -646,20 +648,19 @@ class Jobs(Base):
|
|
646
648
|
}
|
647
649
|
|
648
650
|
def __len__(self) -> int:
|
649
|
-
"""Return the
|
650
|
-
|
651
|
+
"""Return the number of interviews that will be conducted for one iteration of this job.
|
652
|
+
An interview is the result of one survey, taken by one agent, with one model, with one scenario.
|
651
653
|
|
652
654
|
>>> from edsl.jobs import Jobs
|
653
655
|
>>> len(Jobs.example())
|
654
|
-
|
656
|
+
4
|
655
657
|
"""
|
656
|
-
|
658
|
+
number_of_interviews = (
|
657
659
|
len(self.agents or [1])
|
658
660
|
* len(self.scenarios or [1])
|
659
661
|
* len(self.models or [1])
|
660
|
-
* len(self.survey)
|
661
662
|
)
|
662
|
-
return
|
663
|
+
return number_of_interviews
|
663
664
|
|
664
665
|
def to_dict(self, add_edsl_version=True):
|
665
666
|
d = {
|
@@ -810,9 +811,9 @@ def main():
|
|
810
811
|
from edsl.data.Cache import Cache
|
811
812
|
|
812
813
|
job = Jobs.example()
|
813
|
-
len(job) ==
|
814
|
+
len(job) == 4
|
814
815
|
results = job.run(cache=Cache())
|
815
|
-
len(results) ==
|
816
|
+
len(results) == 4
|
816
817
|
results
|
817
818
|
|
818
819
|
|
@@ -213,10 +213,6 @@ class Interview:
|
|
213
213
|
async def async_conduct_interview(
|
214
214
|
self,
|
215
215
|
run_config: Optional["RunConfig"] = None,
|
216
|
-
# model_buckets: Optional[ModelBuckets] = None,
|
217
|
-
# stop_on_exception: bool = False,
|
218
|
-
# raise_validation_errors: bool = True,
|
219
|
-
# key_lookup: Optional[KeyLookup] = None,
|
220
216
|
) -> tuple["Answers", List[dict[str, Any]]]:
|
221
217
|
"""
|
222
218
|
Conduct an Interview asynchronously.
|
@@ -313,7 +309,7 @@ class Interview:
|
|
313
309
|
|
314
310
|
def handle_task(task, invigilator):
|
315
311
|
try:
|
316
|
-
result = task.result()
|
312
|
+
result: Answers = task.result()
|
317
313
|
except asyncio.CancelledError as e: # task was cancelled
|
318
314
|
result = invigilator.get_failed_task_result(
|
319
315
|
failure_reason="Task was cancelled."
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import traceback
|
2
2
|
import datetime
|
3
|
+
from edsl.agents.InvigilatorBase import InvigilatorBase
|
3
4
|
|
4
5
|
|
5
6
|
class InterviewExceptionEntry:
|
@@ -9,7 +10,7 @@ class InterviewExceptionEntry:
|
|
9
10
|
self,
|
10
11
|
*,
|
11
12
|
exception: Exception,
|
12
|
-
invigilator: "
|
13
|
+
invigilator: "InvigilatorBase",
|
13
14
|
traceback_format="text",
|
14
15
|
answers=None,
|
15
16
|
):
|
@@ -20,6 +21,8 @@ class InterviewExceptionEntry:
|
|
20
21
|
self.traceback_format = traceback_format
|
21
22
|
self.answers = answers
|
22
23
|
|
24
|
+
# breakpoint()
|
25
|
+
|
23
26
|
@property
|
24
27
|
def question_type(self):
|
25
28
|
# return self.failed_question.question.question_type
|
@@ -163,12 +166,16 @@ class InterviewExceptionEntry:
|
|
163
166
|
>>> entry = InterviewExceptionEntry.example()
|
164
167
|
>>> _ = entry.to_dict()
|
165
168
|
"""
|
166
|
-
|
169
|
+
invigilator = (
|
170
|
+
self.invigilator.to_dict() if self.invigilator is not None else None
|
171
|
+
)
|
172
|
+
d = {
|
167
173
|
"exception": self.serialize_exception(self.exception),
|
168
174
|
"time": self.time,
|
169
175
|
"traceback": self.traceback,
|
170
|
-
"invigilator":
|
176
|
+
"invigilator": invigilator,
|
171
177
|
}
|
178
|
+
return d
|
172
179
|
|
173
180
|
@classmethod
|
174
181
|
def from_dict(cls, data: dict) -> "InterviewExceptionEntry":
|
@@ -176,7 +183,10 @@ class InterviewExceptionEntry:
|
|
176
183
|
from edsl.agents.Invigilator import InvigilatorAI
|
177
184
|
|
178
185
|
exception = cls.deserialize_exception(data["exception"])
|
179
|
-
|
186
|
+
if data["invigilator"] is None:
|
187
|
+
invigilator = None
|
188
|
+
else:
|
189
|
+
invigilator = InvigilatorAI.from_dict(data["invigilator"])
|
180
190
|
return cls(exception=exception, invigilator=invigilator)
|
181
191
|
|
182
192
|
|
@@ -44,7 +44,16 @@ class JobsRunnerAsyncio:
|
|
44
44
|
data.append(result)
|
45
45
|
task_history.add_interview(interview)
|
46
46
|
|
47
|
-
|
47
|
+
results = Results(survey=self.jobs.survey, task_history=task_history, data=data)
|
48
|
+
|
49
|
+
relevant_cache = results.relevant_cache(self.environment.cache)
|
50
|
+
|
51
|
+
return Results(
|
52
|
+
survey=self.jobs.survey,
|
53
|
+
task_history=task_history,
|
54
|
+
data=data,
|
55
|
+
cache=relevant_cache,
|
56
|
+
)
|
48
57
|
|
49
58
|
def simple_run(self):
|
50
59
|
data = asyncio.run(self.run_async())
|
@@ -93,16 +102,16 @@ class JobsRunnerAsyncio:
|
|
93
102
|
|
94
103
|
self.completed = True
|
95
104
|
|
96
|
-
def run_progress_bar(stop_event) -> None:
|
105
|
+
def run_progress_bar(stop_event, jobs_runner_status) -> None:
|
97
106
|
"""Runs the progress bar in a separate thread."""
|
98
|
-
|
107
|
+
jobs_runner_status.update_progress(stop_event)
|
99
108
|
|
100
109
|
def set_up_progress_bar(progress_bar: bool, jobs_runner_status):
|
101
110
|
progress_thread = None
|
102
111
|
if progress_bar and jobs_runner_status.has_ep_api_key():
|
103
112
|
jobs_runner_status.setup()
|
104
113
|
progress_thread = threading.Thread(
|
105
|
-
target=run_progress_bar, args=(stop_event,)
|
114
|
+
target=run_progress_bar, args=(stop_event, jobs_runner_status)
|
106
115
|
)
|
107
116
|
progress_thread.start()
|
108
117
|
elif progress_bar:
|
@@ -115,8 +124,9 @@ class JobsRunnerAsyncio:
|
|
115
124
|
survey=self.jobs.survey,
|
116
125
|
data=[],
|
117
126
|
task_history=TaskHistory(),
|
118
|
-
cache=self.environment.cache.new_entries_cache(),
|
127
|
+
# cache=self.environment.cache.new_entries_cache(),
|
119
128
|
)
|
129
|
+
|
120
130
|
stop_event = threading.Event()
|
121
131
|
progress_thread = set_up_progress_bar(
|
122
132
|
parameters.progress_bar, run_config.environment.jobs_runner_status
|
@@ -140,7 +150,9 @@ class JobsRunnerAsyncio:
|
|
140
150
|
if exception_to_raise:
|
141
151
|
raise exception_to_raise
|
142
152
|
|
143
|
-
|
153
|
+
relevant_cache = results.relevant_cache(self.environment.cache)
|
154
|
+
results.cache = relevant_cache
|
155
|
+
# breakpoint()
|
144
156
|
results.bucket_collection = self.environment.bucket_collection
|
145
157
|
|
146
158
|
from edsl.jobs.results_exceptions_handler import ResultsExceptionsHandler
|
@@ -148,7 +148,8 @@ class JobsRunnerStatusBase(ABC):
|
|
148
148
|
}
|
149
149
|
|
150
150
|
model_queues = {}
|
151
|
-
for model, bucket in self.jobs_runner.bucket_collection.items():
|
151
|
+
# for model, bucket in self.jobs_runner.bucket_collection.items():
|
152
|
+
for model, bucket in self.jobs_runner.environment.bucket_collection.items():
|
152
153
|
model_name = model.model
|
153
154
|
model_queues[model_name] = {
|
154
155
|
"language_model_name": model_name,
|
@@ -36,7 +36,7 @@ class KeyLookupBuilder:
|
|
36
36
|
|
37
37
|
>>> builder = KeyLookupBuilder(fetch_order=("config", "env"))
|
38
38
|
>>> builder.DEFAULT_RPM
|
39
|
-
|
39
|
+
100
|
40
40
|
>>> builder.DEFAULT_TPM
|
41
41
|
2000000
|
42
42
|
>>> builder.fetch_order
|
@@ -54,8 +54,12 @@ class KeyLookupBuilder:
|
|
54
54
|
('openai', 'rpm')
|
55
55
|
"""
|
56
56
|
|
57
|
-
DEFAULT_RPM = 10
|
58
|
-
DEFAULT_TPM = 2000000
|
57
|
+
# DEFAULT_RPM = 10
|
58
|
+
# DEFAULT_TPM = 2000000
|
59
|
+
from edsl.config import CONFIG
|
60
|
+
|
61
|
+
DEFAULT_RPM = int(CONFIG.get("EDSL_SERVICE_RPM_BASELINE"))
|
62
|
+
DEFAULT_TPM = int(CONFIG.get("EDSL_SERVICE_TPM_BASELINE"))
|
59
63
|
|
60
64
|
def __init__(self, fetch_order: Optional[tuple[str]] = None):
|
61
65
|
if fetch_order is None:
|
edsl/questions/QuestionBudget.py
CHANGED
@@ -8,7 +8,7 @@ from edsl.questions.descriptors import IntegerDescriptor, QuestionOptionsDescrip
|
|
8
8
|
from edsl.questions.response_validator_abc import ResponseValidatorABC
|
9
9
|
|
10
10
|
|
11
|
-
class
|
11
|
+
class BudgetResponseValidator(ResponseValidatorABC):
|
12
12
|
valid_examples = []
|
13
13
|
|
14
14
|
invalid_examples = []
|
@@ -64,7 +64,7 @@ class QuestionBudget(QuestionBase):
|
|
64
64
|
budget_sum: int = IntegerDescriptor(none_allowed=False)
|
65
65
|
question_options: list[str] = QuestionOptionsDescriptor(q_budget=True)
|
66
66
|
_response_model = None
|
67
|
-
response_validator_class =
|
67
|
+
response_validator_class = BudgetResponseValidator
|
68
68
|
|
69
69
|
def __init__(
|
70
70
|
self,
|