edsl 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +8 -1
- edsl/__init__original.py +134 -0
- edsl/__version__.py +1 -1
- edsl/agents/agent.py +29 -0
- edsl/agents/agent_list.py +36 -1
- edsl/base/base_class.py +281 -151
- edsl/base/data_transfer_models.py +15 -4
- edsl/buckets/__init__.py +8 -3
- edsl/buckets/bucket_collection.py +9 -3
- edsl/buckets/model_buckets.py +4 -2
- edsl/buckets/token_bucket.py +2 -2
- edsl/buckets/token_bucket_client.py +5 -3
- edsl/caching/cache.py +131 -62
- edsl/caching/cache_entry.py +70 -58
- edsl/caching/sql_dict.py +17 -0
- edsl/cli.py +99 -0
- edsl/config/config_class.py +16 -0
- edsl/conversation/__init__.py +31 -0
- edsl/coop/coop.py +276 -242
- edsl/coop/coop_jobs_objects.py +59 -0
- edsl/coop/coop_objects.py +29 -0
- edsl/coop/coop_regular_objects.py +26 -0
- edsl/coop/utils.py +24 -19
- edsl/dataset/dataset.py +338 -101
- edsl/dataset/dataset_operations_mixin.py +216 -180
- edsl/db_list/sqlite_list.py +349 -0
- edsl/inference_services/__init__.py +40 -5
- edsl/inference_services/exceptions.py +11 -0
- edsl/inference_services/services/anthropic_service.py +5 -2
- edsl/inference_services/services/aws_bedrock.py +6 -2
- edsl/inference_services/services/azure_ai.py +6 -2
- edsl/inference_services/services/google_service.py +7 -3
- edsl/inference_services/services/mistral_ai_service.py +6 -2
- edsl/inference_services/services/open_ai_service.py +6 -2
- edsl/inference_services/services/perplexity_service.py +6 -2
- edsl/inference_services/services/test_service.py +94 -5
- edsl/interviews/answering_function.py +167 -59
- edsl/interviews/interview.py +124 -72
- edsl/interviews/interview_task_manager.py +10 -0
- edsl/interviews/request_token_estimator.py +8 -0
- edsl/invigilators/invigilators.py +35 -13
- edsl/jobs/async_interview_runner.py +146 -104
- edsl/jobs/data_structures.py +6 -4
- edsl/jobs/decorators.py +61 -0
- edsl/jobs/fetch_invigilator.py +61 -18
- edsl/jobs/html_table_job_logger.py +14 -2
- edsl/jobs/jobs.py +180 -104
- edsl/jobs/jobs_component_constructor.py +2 -2
- edsl/jobs/jobs_interview_constructor.py +2 -0
- edsl/jobs/jobs_pricing_estimation.py +154 -113
- edsl/jobs/jobs_remote_inference_logger.py +4 -0
- edsl/jobs/jobs_runner_status.py +30 -25
- edsl/jobs/progress_bar_manager.py +79 -0
- edsl/jobs/remote_inference.py +35 -1
- edsl/key_management/key_lookup_builder.py +6 -1
- edsl/language_models/language_model.py +110 -12
- edsl/language_models/model.py +10 -3
- edsl/language_models/price_manager.py +176 -71
- edsl/language_models/registry.py +5 -0
- edsl/notebooks/notebook.py +77 -10
- edsl/questions/VALIDATION_README.md +134 -0
- edsl/questions/__init__.py +24 -1
- edsl/questions/exceptions.py +21 -0
- edsl/questions/question_dict.py +201 -16
- edsl/questions/question_multiple_choice_with_other.py +624 -0
- edsl/questions/question_registry.py +2 -1
- edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
- edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
- edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
- edsl/questions/validation_analysis.py +185 -0
- edsl/questions/validation_cli.py +131 -0
- edsl/questions/validation_html_report.py +404 -0
- edsl/questions/validation_logger.py +136 -0
- edsl/results/result.py +115 -46
- edsl/results/results.py +702 -171
- edsl/scenarios/construct_download_link.py +16 -3
- edsl/scenarios/directory_scanner.py +226 -226
- edsl/scenarios/file_methods.py +5 -0
- edsl/scenarios/file_store.py +150 -9
- edsl/scenarios/handlers/__init__.py +5 -1
- edsl/scenarios/handlers/mp4_file_store.py +104 -0
- edsl/scenarios/handlers/webm_file_store.py +104 -0
- edsl/scenarios/scenario.py +120 -101
- edsl/scenarios/scenario_list.py +800 -727
- edsl/scenarios/scenario_list_gc_test.py +146 -0
- edsl/scenarios/scenario_list_memory_test.py +214 -0
- edsl/scenarios/scenario_list_source_refactor.md +35 -0
- edsl/scenarios/scenario_selector.py +5 -4
- edsl/scenarios/scenario_source.py +1990 -0
- edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
- edsl/surveys/survey.py +22 -0
- edsl/tasks/__init__.py +4 -2
- edsl/tasks/task_history.py +198 -36
- edsl/tests/scenarios/test_ScenarioSource.py +51 -0
- edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
- edsl/utilities/__init__.py +2 -1
- edsl/utilities/decorators.py +121 -0
- edsl/utilities/memory_debugger.py +1010 -0
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/METADATA +51 -76
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/RECORD +103 -79
- edsl/jobs/jobs_runner_asyncio.py +0 -281
- edsl/language_models/unused/fake_openai_service.py +0 -60
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/LICENSE +0 -0
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/WHEEL +0 -0
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/entry_points.txt +0 -0
edsl/jobs/jobs_runner_asyncio.py
DELETED
@@ -1,281 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Asynchronous execution engine for EDSL jobs.
|
3
|
-
|
4
|
-
This module provides the core functionality for running interviews asynchronously,
|
5
|
-
which is essential for efficient execution of large jobs. It handles the complex
|
6
|
-
process of coordinating multiple concurrent interviews, managing progress tracking,
|
7
|
-
and gracefully handling cancellations and errors.
|
8
|
-
|
9
|
-
Key components:
|
10
|
-
- JobsRunnerAsyncio: The main class that orchestrates async execution
|
11
|
-
- Progress bar integration with remote status tracking
|
12
|
-
- Error handling and graceful cancellation
|
13
|
-
- Result collection and organization
|
14
|
-
|
15
|
-
This module is primarily used internally by the Jobs class and is typically not
|
16
|
-
accessed directly by end users, though advanced users may need to understand its
|
17
|
-
behavior when customizing job execution.
|
18
|
-
"""
|
19
|
-
from __future__ import annotations
|
20
|
-
import time
|
21
|
-
import asyncio
|
22
|
-
import threading
|
23
|
-
import warnings
|
24
|
-
from typing import TYPE_CHECKING, Optional
|
25
|
-
|
26
|
-
if TYPE_CHECKING:
|
27
|
-
from ..results import Results
|
28
|
-
|
29
|
-
from ..results import Results
|
30
|
-
from ..tasks import TaskHistory
|
31
|
-
from ..utilities.decorators import jupyter_nb_handler
|
32
|
-
|
33
|
-
from .jobs_runner_status import JobsRunnerStatus
|
34
|
-
from .async_interview_runner import AsyncInterviewRunner
|
35
|
-
from .data_structures import RunEnvironment, RunParameters, RunConfig
|
36
|
-
|
37
|
-
if TYPE_CHECKING:
|
38
|
-
from ..jobs import Jobs
|
39
|
-
|
40
|
-
|
41
|
-
class JobsRunnerAsyncio:
|
42
|
-
"""
|
43
|
-
Executes a collection of interviews asynchronously with progress tracking.
|
44
|
-
|
45
|
-
This class is the main execution engine for EDSL jobs. It manages the asynchronous
|
46
|
-
running of interviews, handles progress tracking, and organizes results. It is
|
47
|
-
instantiated by a Jobs object and handles the complex execution logic that makes
|
48
|
-
parallel interview processing efficient.
|
49
|
-
|
50
|
-
Key responsibilities:
|
51
|
-
1. Coordinating asynchronous execution of interviews
|
52
|
-
2. Tracking and reporting progress
|
53
|
-
3. Handling errors and cancellations
|
54
|
-
4. Collecting and organizing results
|
55
|
-
|
56
|
-
This class supports two main execution modes:
|
57
|
-
- run(): For synchronous contexts (returns after completion)
|
58
|
-
- run_async(): For asynchronous contexts (can be awaited)
|
59
|
-
"""
|
60
|
-
|
61
|
-
def __init__(self, jobs: "Jobs", environment: RunEnvironment):
|
62
|
-
"""
|
63
|
-
Initialize a JobsRunnerAsyncio instance.
|
64
|
-
|
65
|
-
Parameters:
|
66
|
-
jobs (Jobs): The Jobs instance containing the interviews to run
|
67
|
-
environment (RunEnvironment): The environment configuration containing
|
68
|
-
resources like cache, key_lookup, and bucket_collection
|
69
|
-
|
70
|
-
Notes:
|
71
|
-
- The Jobs instance provides the interviews to be executed
|
72
|
-
- The environment contains resources like caches and API keys
|
73
|
-
- Additional runtime state like completion status is initialized when run() is called
|
74
|
-
"""
|
75
|
-
self.jobs = jobs
|
76
|
-
self.environment = environment
|
77
|
-
# These will be set when run() is called
|
78
|
-
self.start_time = None
|
79
|
-
self.completed = None
|
80
|
-
|
81
|
-
def __len__(self):
|
82
|
-
return len(self.jobs)
|
83
|
-
|
84
|
-
async def run_async(self, parameters: RunParameters) -> 'Results':
|
85
|
-
"""
|
86
|
-
Execute interviews asynchronously without progress tracking.
|
87
|
-
|
88
|
-
This method provides a simplified version of the run method, primarily used
|
89
|
-
by other modules that need direct access to asynchronous execution without
|
90
|
-
the full feature set of the main run() method. This is a lower-level interface
|
91
|
-
that doesn't include progress bars or advanced error handling.
|
92
|
-
|
93
|
-
Parameters:
|
94
|
-
parameters (RunParameters): Configuration parameters for the run
|
95
|
-
|
96
|
-
Returns:
|
97
|
-
Results: A Results object containing all responses and metadata
|
98
|
-
|
99
|
-
Notes:
|
100
|
-
- This method doesn't support progress bars or interactive cancellation
|
101
|
-
- It doesn't handle keyboard interrupts specially
|
102
|
-
- It's primarily meant for internal use by other EDSL components
|
103
|
-
- For most use cases, the main run() method is preferred
|
104
|
-
"""
|
105
|
-
# Initialize a simple status tracker (no progress bar)
|
106
|
-
self.environment.jobs_runner_status = JobsRunnerStatus(self, n=parameters.n)
|
107
|
-
data = []
|
108
|
-
task_history = TaskHistory(include_traceback=False)
|
109
|
-
|
110
|
-
run_config = RunConfig(parameters=parameters, environment=self.environment)
|
111
|
-
result_generator = AsyncInterviewRunner(self.jobs, run_config)
|
112
|
-
|
113
|
-
# Process results as they come in
|
114
|
-
async for result, interview in result_generator.run():
|
115
|
-
data.append(result)
|
116
|
-
task_history.add_interview(interview)
|
117
|
-
|
118
|
-
# Create the results object
|
119
|
-
results = Results(survey=self.jobs.survey, task_history=task_history, data=data)
|
120
|
-
|
121
|
-
# Extract only the relevant cache entries
|
122
|
-
relevant_cache = results.relevant_cache(self.environment.cache)
|
123
|
-
|
124
|
-
return Results(
|
125
|
-
survey=self.jobs.survey,
|
126
|
-
task_history=task_history,
|
127
|
-
data=data,
|
128
|
-
cache=relevant_cache,
|
129
|
-
)
|
130
|
-
|
131
|
-
def simple_run(self, parameters: Optional[RunParameters] = None) -> Results:
|
132
|
-
"""
|
133
|
-
Run interviews synchronously with minimal configuration.
|
134
|
-
|
135
|
-
This is a convenience method that provides a very simple synchronous interface
|
136
|
-
for running jobs. It's primarily used for quick tests or debugging, not for
|
137
|
-
production use.
|
138
|
-
|
139
|
-
Parameters:
|
140
|
-
parameters (RunParameters, optional): Configuration parameters for the run.
|
141
|
-
If not provided, default parameters will be used.
|
142
|
-
|
143
|
-
Returns:
|
144
|
-
Results: A Results object containing all responses and metadata
|
145
|
-
|
146
|
-
Notes:
|
147
|
-
- This method is synchronous (blocks until completion)
|
148
|
-
- It doesn't include progress tracking or advanced error handling
|
149
|
-
- For production use, use the main run() method instead
|
150
|
-
"""
|
151
|
-
if parameters is None:
|
152
|
-
parameters = RunParameters()
|
153
|
-
|
154
|
-
data = asyncio.run(self.run_async(parameters))
|
155
|
-
return Results(survey=self.jobs.survey, data=data)
|
156
|
-
|
157
|
-
@jupyter_nb_handler
|
158
|
-
async def run(self, parameters: RunParameters) -> Results:
|
159
|
-
"""
|
160
|
-
Execute interviews asynchronously with full feature support.
|
161
|
-
|
162
|
-
This is the main method for running jobs with full feature support, including
|
163
|
-
progress tracking, error handling, and graceful cancellation. It's decorated
|
164
|
-
with @jupyter_nb_handler to ensure proper handling in notebook environments.
|
165
|
-
|
166
|
-
Parameters:
|
167
|
-
parameters (RunParameters): Configuration parameters for the run
|
168
|
-
|
169
|
-
Returns:
|
170
|
-
Results: A Results object containing all responses and metadata
|
171
|
-
|
172
|
-
Raises:
|
173
|
-
Exception: Any unhandled exception from interviews if stop_on_exception=True
|
174
|
-
KeyboardInterrupt: If the user interrupts execution and it can't be handled gracefully
|
175
|
-
|
176
|
-
Notes:
|
177
|
-
- Supports progress bars with remote tracking via Coop
|
178
|
-
- Handles keyboard interrupts gracefully
|
179
|
-
- Manages concurrent execution of multiple interviews
|
180
|
-
- Collects and consolidates results from all completed interviews
|
181
|
-
- Can be used in both async and sync contexts due to the @jupyter_nb_handler decorator
|
182
|
-
"""
|
183
|
-
|
184
|
-
run_config = RunConfig(parameters=parameters, environment=self.environment)
|
185
|
-
|
186
|
-
self.start_time = time.monotonic()
|
187
|
-
self.completed = False
|
188
|
-
|
189
|
-
from ..coop import Coop
|
190
|
-
|
191
|
-
coop = Coop()
|
192
|
-
endpoint_url = coop.get_progress_bar_url()
|
193
|
-
|
194
|
-
def set_up_jobs_runner_status(jobs_runner_status):
|
195
|
-
if jobs_runner_status is not None:
|
196
|
-
return jobs_runner_status(
|
197
|
-
self,
|
198
|
-
n=parameters.n,
|
199
|
-
endpoint_url=endpoint_url,
|
200
|
-
job_uuid=parameters.job_uuid,
|
201
|
-
)
|
202
|
-
else:
|
203
|
-
return JobsRunnerStatus(
|
204
|
-
self,
|
205
|
-
n=parameters.n,
|
206
|
-
endpoint_url=endpoint_url,
|
207
|
-
job_uuid=parameters.job_uuid,
|
208
|
-
)
|
209
|
-
|
210
|
-
run_config.environment.jobs_runner_status = set_up_jobs_runner_status(
|
211
|
-
self.environment.jobs_runner_status
|
212
|
-
)
|
213
|
-
|
214
|
-
async def get_results(results) -> None:
|
215
|
-
"""Conducted the interviews and append to the results list."""
|
216
|
-
result_generator = AsyncInterviewRunner(self.jobs, run_config)
|
217
|
-
async for result, interview in result_generator.run():
|
218
|
-
results.append(result)
|
219
|
-
results.task_history.add_interview(interview)
|
220
|
-
|
221
|
-
self.completed = True
|
222
|
-
|
223
|
-
def run_progress_bar(stop_event, jobs_runner_status) -> None:
|
224
|
-
"""Runs the progress bar in a separate thread."""
|
225
|
-
jobs_runner_status.update_progress(stop_event)
|
226
|
-
|
227
|
-
def set_up_progress_bar(progress_bar: bool, jobs_runner_status):
|
228
|
-
progress_thread = None
|
229
|
-
if progress_bar and jobs_runner_status.has_ep_api_key():
|
230
|
-
jobs_runner_status.setup()
|
231
|
-
progress_thread = threading.Thread(
|
232
|
-
target=run_progress_bar, args=(stop_event, jobs_runner_status)
|
233
|
-
)
|
234
|
-
progress_thread.start()
|
235
|
-
elif progress_bar:
|
236
|
-
warnings.warn(
|
237
|
-
"You need an Expected Parrot API key to view job progress bars."
|
238
|
-
)
|
239
|
-
return progress_thread
|
240
|
-
|
241
|
-
results = Results(
|
242
|
-
survey=self.jobs.survey,
|
243
|
-
data=[],
|
244
|
-
task_history=TaskHistory(),
|
245
|
-
# cache=self.environment.cache.new_entries_cache(),
|
246
|
-
)
|
247
|
-
|
248
|
-
stop_event = threading.Event()
|
249
|
-
progress_thread = set_up_progress_bar(
|
250
|
-
parameters.progress_bar, run_config.environment.jobs_runner_status
|
251
|
-
)
|
252
|
-
|
253
|
-
exception_to_raise = None
|
254
|
-
try:
|
255
|
-
await get_results(results)
|
256
|
-
except KeyboardInterrupt:
|
257
|
-
print("Keyboard interrupt received. Stopping gracefully...")
|
258
|
-
stop_event.set()
|
259
|
-
except Exception as e:
|
260
|
-
if parameters.stop_on_exception:
|
261
|
-
exception_to_raise = e
|
262
|
-
stop_event.set()
|
263
|
-
finally:
|
264
|
-
stop_event.set()
|
265
|
-
if progress_thread is not None:
|
266
|
-
progress_thread.join()
|
267
|
-
|
268
|
-
if exception_to_raise:
|
269
|
-
raise exception_to_raise
|
270
|
-
|
271
|
-
relevant_cache = results.relevant_cache(self.environment.cache)
|
272
|
-
results.cache = relevant_cache
|
273
|
-
# breakpoint()
|
274
|
-
results.bucket_collection = self.environment.bucket_collection
|
275
|
-
|
276
|
-
from .results_exceptions_handler import ResultsExceptionsHandler
|
277
|
-
|
278
|
-
results_exceptions_handler = ResultsExceptionsHandler(results, parameters)
|
279
|
-
|
280
|
-
results_exceptions_handler.handle_exceptions()
|
281
|
-
return results
|
@@ -1,60 +0,0 @@
|
|
1
|
-
import threading
|
2
|
-
import asyncio
|
3
|
-
from fastapi import FastAPI, Request
|
4
|
-
from fastapi.responses import JSONResponse
|
5
|
-
import uvicorn
|
6
|
-
import json
|
7
|
-
|
8
|
-
app = FastAPI()
|
9
|
-
|
10
|
-
|
11
|
-
async def generate_response(question_number: int) -> dict:
|
12
|
-
# Simulate some asynchronous work
|
13
|
-
await asyncio.sleep(1)
|
14
|
-
return {
|
15
|
-
"id": "chatcmpl-123",
|
16
|
-
"object": "chat.completion",
|
17
|
-
"created": 1677652288,
|
18
|
-
"model": "gpt-3.5-turbo-0613",
|
19
|
-
"choices": [
|
20
|
-
{
|
21
|
-
"index": 0,
|
22
|
-
"message": {
|
23
|
-
"role": "assistant",
|
24
|
-
"content": json.dumps(
|
25
|
-
{"answer": f"SPAM for question {question_number}!"}
|
26
|
-
),
|
27
|
-
},
|
28
|
-
"finish_reason": "stop",
|
29
|
-
}
|
30
|
-
],
|
31
|
-
"usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21},
|
32
|
-
}
|
33
|
-
|
34
|
-
|
35
|
-
@app.post("/v1/chat/completions")
|
36
|
-
async def chat_completions(request: Request):
|
37
|
-
body = await request.json()
|
38
|
-
user_prompt = body["messages"][-1]["content"]
|
39
|
-
question_number = int(user_prompt.split("XX")[1])
|
40
|
-
|
41
|
-
response = await generate_response(question_number)
|
42
|
-
return JSONResponse(content=response)
|
43
|
-
|
44
|
-
|
45
|
-
def run_server():
|
46
|
-
uvicorn.run(app, host="127.0.0.1", port=8000)
|
47
|
-
|
48
|
-
|
49
|
-
if __name__ == "__main__":
|
50
|
-
# Start the server in a separate thread
|
51
|
-
server_thread = threading.Thread(target=run_server)
|
52
|
-
server_thread.start()
|
53
|
-
|
54
|
-
# Your main code here
|
55
|
-
# ...
|
56
|
-
|
57
|
-
# To use this with the OpenAI SDK:
|
58
|
-
# from openai import AsyncOpenAI
|
59
|
-
# client = AsyncOpenAI(base_url="http://127.0.0.1:8000/v1", api_key="fake_key")
|
60
|
-
# response = await client.chat.completions.create(model="gpt-3.5-turbo", messages=[...])
|
File without changes
|
File without changes
|
File without changes
|