edsl 0.1.54__py3-none-any.whl → 0.1.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. edsl/__init__.py +8 -1
  2. edsl/__init__original.py +134 -0
  3. edsl/__version__.py +1 -1
  4. edsl/agents/agent.py +29 -0
  5. edsl/agents/agent_list.py +36 -1
  6. edsl/base/base_class.py +281 -151
  7. edsl/buckets/__init__.py +8 -3
  8. edsl/buckets/bucket_collection.py +9 -3
  9. edsl/buckets/model_buckets.py +4 -2
  10. edsl/buckets/token_bucket.py +2 -2
  11. edsl/buckets/token_bucket_client.py +5 -3
  12. edsl/caching/cache.py +131 -62
  13. edsl/caching/cache_entry.py +70 -58
  14. edsl/caching/sql_dict.py +17 -0
  15. edsl/cli.py +99 -0
  16. edsl/config/config_class.py +16 -0
  17. edsl/conversation/__init__.py +31 -0
  18. edsl/coop/coop.py +276 -242
  19. edsl/coop/coop_jobs_objects.py +59 -0
  20. edsl/coop/coop_objects.py +29 -0
  21. edsl/coop/coop_regular_objects.py +26 -0
  22. edsl/coop/utils.py +24 -19
  23. edsl/dataset/dataset.py +338 -101
  24. edsl/db_list/sqlite_list.py +349 -0
  25. edsl/inference_services/__init__.py +40 -5
  26. edsl/inference_services/exceptions.py +11 -0
  27. edsl/inference_services/services/anthropic_service.py +5 -2
  28. edsl/inference_services/services/aws_bedrock.py +6 -2
  29. edsl/inference_services/services/azure_ai.py +6 -2
  30. edsl/inference_services/services/google_service.py +3 -2
  31. edsl/inference_services/services/mistral_ai_service.py +6 -2
  32. edsl/inference_services/services/open_ai_service.py +6 -2
  33. edsl/inference_services/services/perplexity_service.py +6 -2
  34. edsl/inference_services/services/test_service.py +94 -5
  35. edsl/interviews/answering_function.py +167 -59
  36. edsl/interviews/interview.py +124 -72
  37. edsl/interviews/interview_task_manager.py +10 -0
  38. edsl/invigilators/invigilators.py +9 -0
  39. edsl/jobs/async_interview_runner.py +146 -104
  40. edsl/jobs/data_structures.py +6 -4
  41. edsl/jobs/decorators.py +61 -0
  42. edsl/jobs/fetch_invigilator.py +61 -18
  43. edsl/jobs/html_table_job_logger.py +14 -2
  44. edsl/jobs/jobs.py +180 -104
  45. edsl/jobs/jobs_component_constructor.py +2 -2
  46. edsl/jobs/jobs_interview_constructor.py +2 -0
  47. edsl/jobs/jobs_remote_inference_logger.py +4 -0
  48. edsl/jobs/jobs_runner_status.py +30 -25
  49. edsl/jobs/progress_bar_manager.py +79 -0
  50. edsl/jobs/remote_inference.py +35 -1
  51. edsl/key_management/key_lookup_builder.py +6 -1
  52. edsl/language_models/language_model.py +86 -6
  53. edsl/language_models/model.py +10 -3
  54. edsl/language_models/price_manager.py +45 -75
  55. edsl/language_models/registry.py +5 -0
  56. edsl/notebooks/notebook.py +77 -10
  57. edsl/questions/VALIDATION_README.md +134 -0
  58. edsl/questions/__init__.py +24 -1
  59. edsl/questions/exceptions.py +21 -0
  60. edsl/questions/question_dict.py +201 -16
  61. edsl/questions/question_multiple_choice_with_other.py +624 -0
  62. edsl/questions/question_registry.py +2 -1
  63. edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
  64. edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
  65. edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
  66. edsl/questions/validation_analysis.py +185 -0
  67. edsl/questions/validation_cli.py +131 -0
  68. edsl/questions/validation_html_report.py +404 -0
  69. edsl/questions/validation_logger.py +136 -0
  70. edsl/results/result.py +63 -16
  71. edsl/results/results.py +702 -171
  72. edsl/scenarios/construct_download_link.py +16 -3
  73. edsl/scenarios/directory_scanner.py +226 -226
  74. edsl/scenarios/file_methods.py +5 -0
  75. edsl/scenarios/file_store.py +117 -6
  76. edsl/scenarios/handlers/__init__.py +5 -1
  77. edsl/scenarios/handlers/mp4_file_store.py +104 -0
  78. edsl/scenarios/handlers/webm_file_store.py +104 -0
  79. edsl/scenarios/scenario.py +120 -101
  80. edsl/scenarios/scenario_list.py +800 -727
  81. edsl/scenarios/scenario_list_gc_test.py +146 -0
  82. edsl/scenarios/scenario_list_memory_test.py +214 -0
  83. edsl/scenarios/scenario_list_source_refactor.md +35 -0
  84. edsl/scenarios/scenario_selector.py +5 -4
  85. edsl/scenarios/scenario_source.py +1990 -0
  86. edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
  87. edsl/surveys/survey.py +22 -0
  88. edsl/tasks/__init__.py +4 -2
  89. edsl/tasks/task_history.py +198 -36
  90. edsl/tests/scenarios/test_ScenarioSource.py +51 -0
  91. edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
  92. edsl/utilities/__init__.py +2 -1
  93. edsl/utilities/decorators.py +121 -0
  94. edsl/utilities/memory_debugger.py +1010 -0
  95. {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/METADATA +51 -76
  96. {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/RECORD +99 -75
  97. edsl/jobs/jobs_runner_asyncio.py +0 -281
  98. edsl/language_models/unused/fake_openai_service.py +0 -60
  99. {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/LICENSE +0 -0
  100. {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/WHEEL +0 -0
  101. {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/entry_points.txt +0 -0
@@ -1,281 +0,0 @@
1
- """
2
- Asynchronous execution engine for EDSL jobs.
3
-
4
- This module provides the core functionality for running interviews asynchronously,
5
- which is essential for efficient execution of large jobs. It handles the complex
6
- process of coordinating multiple concurrent interviews, managing progress tracking,
7
- and gracefully handling cancellations and errors.
8
-
9
- Key components:
10
- - JobsRunnerAsyncio: The main class that orchestrates async execution
11
- - Progress bar integration with remote status tracking
12
- - Error handling and graceful cancellation
13
- - Result collection and organization
14
-
15
- This module is primarily used internally by the Jobs class and is typically not
16
- accessed directly by end users, though advanced users may need to understand its
17
- behavior when customizing job execution.
18
- """
19
- from __future__ import annotations
20
- import time
21
- import asyncio
22
- import threading
23
- import warnings
24
- from typing import TYPE_CHECKING, Optional
25
-
26
- if TYPE_CHECKING:
27
- from ..results import Results
28
-
29
- from ..results import Results
30
- from ..tasks import TaskHistory
31
- from ..utilities.decorators import jupyter_nb_handler
32
-
33
- from .jobs_runner_status import JobsRunnerStatus
34
- from .async_interview_runner import AsyncInterviewRunner
35
- from .data_structures import RunEnvironment, RunParameters, RunConfig
36
-
37
- if TYPE_CHECKING:
38
- from ..jobs import Jobs
39
-
40
-
41
- class JobsRunnerAsyncio:
42
- """
43
- Executes a collection of interviews asynchronously with progress tracking.
44
-
45
- This class is the main execution engine for EDSL jobs. It manages the asynchronous
46
- running of interviews, handles progress tracking, and organizes results. It is
47
- instantiated by a Jobs object and handles the complex execution logic that makes
48
- parallel interview processing efficient.
49
-
50
- Key responsibilities:
51
- 1. Coordinating asynchronous execution of interviews
52
- 2. Tracking and reporting progress
53
- 3. Handling errors and cancellations
54
- 4. Collecting and organizing results
55
-
56
- This class supports two main execution modes:
57
- - run(): For synchronous contexts (returns after completion)
58
- - run_async(): For asynchronous contexts (can be awaited)
59
- """
60
-
61
- def __init__(self, jobs: "Jobs", environment: RunEnvironment):
62
- """
63
- Initialize a JobsRunnerAsyncio instance.
64
-
65
- Parameters:
66
- jobs (Jobs): The Jobs instance containing the interviews to run
67
- environment (RunEnvironment): The environment configuration containing
68
- resources like cache, key_lookup, and bucket_collection
69
-
70
- Notes:
71
- - The Jobs instance provides the interviews to be executed
72
- - The environment contains resources like caches and API keys
73
- - Additional runtime state like completion status is initialized when run() is called
74
- """
75
- self.jobs = jobs
76
- self.environment = environment
77
- # These will be set when run() is called
78
- self.start_time = None
79
- self.completed = None
80
-
81
- def __len__(self):
82
- return len(self.jobs)
83
-
84
- async def run_async(self, parameters: RunParameters) -> 'Results':
85
- """
86
- Execute interviews asynchronously without progress tracking.
87
-
88
- This method provides a simplified version of the run method, primarily used
89
- by other modules that need direct access to asynchronous execution without
90
- the full feature set of the main run() method. This is a lower-level interface
91
- that doesn't include progress bars or advanced error handling.
92
-
93
- Parameters:
94
- parameters (RunParameters): Configuration parameters for the run
95
-
96
- Returns:
97
- Results: A Results object containing all responses and metadata
98
-
99
- Notes:
100
- - This method doesn't support progress bars or interactive cancellation
101
- - It doesn't handle keyboard interrupts specially
102
- - It's primarily meant for internal use by other EDSL components
103
- - For most use cases, the main run() method is preferred
104
- """
105
- # Initialize a simple status tracker (no progress bar)
106
- self.environment.jobs_runner_status = JobsRunnerStatus(self, n=parameters.n)
107
- data = []
108
- task_history = TaskHistory(include_traceback=False)
109
-
110
- run_config = RunConfig(parameters=parameters, environment=self.environment)
111
- result_generator = AsyncInterviewRunner(self.jobs, run_config)
112
-
113
- # Process results as they come in
114
- async for result, interview in result_generator.run():
115
- data.append(result)
116
- task_history.add_interview(interview)
117
-
118
- # Create the results object
119
- results = Results(survey=self.jobs.survey, task_history=task_history, data=data)
120
-
121
- # Extract only the relevant cache entries
122
- relevant_cache = results.relevant_cache(self.environment.cache)
123
-
124
- return Results(
125
- survey=self.jobs.survey,
126
- task_history=task_history,
127
- data=data,
128
- cache=relevant_cache,
129
- )
130
-
131
- def simple_run(self, parameters: Optional[RunParameters] = None) -> Results:
132
- """
133
- Run interviews synchronously with minimal configuration.
134
-
135
- This is a convenience method that provides a very simple synchronous interface
136
- for running jobs. It's primarily used for quick tests or debugging, not for
137
- production use.
138
-
139
- Parameters:
140
- parameters (RunParameters, optional): Configuration parameters for the run.
141
- If not provided, default parameters will be used.
142
-
143
- Returns:
144
- Results: A Results object containing all responses and metadata
145
-
146
- Notes:
147
- - This method is synchronous (blocks until completion)
148
- - It doesn't include progress tracking or advanced error handling
149
- - For production use, use the main run() method instead
150
- """
151
- if parameters is None:
152
- parameters = RunParameters()
153
-
154
- data = asyncio.run(self.run_async(parameters))
155
- return Results(survey=self.jobs.survey, data=data)
156
-
157
- @jupyter_nb_handler
158
- async def run(self, parameters: RunParameters) -> Results:
159
- """
160
- Execute interviews asynchronously with full feature support.
161
-
162
- This is the main method for running jobs with full feature support, including
163
- progress tracking, error handling, and graceful cancellation. It's decorated
164
- with @jupyter_nb_handler to ensure proper handling in notebook environments.
165
-
166
- Parameters:
167
- parameters (RunParameters): Configuration parameters for the run
168
-
169
- Returns:
170
- Results: A Results object containing all responses and metadata
171
-
172
- Raises:
173
- Exception: Any unhandled exception from interviews if stop_on_exception=True
174
- KeyboardInterrupt: If the user interrupts execution and it can't be handled gracefully
175
-
176
- Notes:
177
- - Supports progress bars with remote tracking via Coop
178
- - Handles keyboard interrupts gracefully
179
- - Manages concurrent execution of multiple interviews
180
- - Collects and consolidates results from all completed interviews
181
- - Can be used in both async and sync contexts due to the @jupyter_nb_handler decorator
182
- """
183
-
184
- run_config = RunConfig(parameters=parameters, environment=self.environment)
185
-
186
- self.start_time = time.monotonic()
187
- self.completed = False
188
-
189
- from ..coop import Coop
190
-
191
- coop = Coop()
192
- endpoint_url = coop.get_progress_bar_url()
193
-
194
- def set_up_jobs_runner_status(jobs_runner_status):
195
- if jobs_runner_status is not None:
196
- return jobs_runner_status(
197
- self,
198
- n=parameters.n,
199
- endpoint_url=endpoint_url,
200
- job_uuid=parameters.job_uuid,
201
- )
202
- else:
203
- return JobsRunnerStatus(
204
- self,
205
- n=parameters.n,
206
- endpoint_url=endpoint_url,
207
- job_uuid=parameters.job_uuid,
208
- )
209
-
210
- run_config.environment.jobs_runner_status = set_up_jobs_runner_status(
211
- self.environment.jobs_runner_status
212
- )
213
-
214
- async def get_results(results) -> None:
215
- """Conducted the interviews and append to the results list."""
216
- result_generator = AsyncInterviewRunner(self.jobs, run_config)
217
- async for result, interview in result_generator.run():
218
- results.append(result)
219
- results.task_history.add_interview(interview)
220
-
221
- self.completed = True
222
-
223
- def run_progress_bar(stop_event, jobs_runner_status) -> None:
224
- """Runs the progress bar in a separate thread."""
225
- jobs_runner_status.update_progress(stop_event)
226
-
227
- def set_up_progress_bar(progress_bar: bool, jobs_runner_status):
228
- progress_thread = None
229
- if progress_bar and jobs_runner_status.has_ep_api_key():
230
- jobs_runner_status.setup()
231
- progress_thread = threading.Thread(
232
- target=run_progress_bar, args=(stop_event, jobs_runner_status)
233
- )
234
- progress_thread.start()
235
- elif progress_bar:
236
- warnings.warn(
237
- "You need an Expected Parrot API key to view job progress bars."
238
- )
239
- return progress_thread
240
-
241
- results = Results(
242
- survey=self.jobs.survey,
243
- data=[],
244
- task_history=TaskHistory(),
245
- # cache=self.environment.cache.new_entries_cache(),
246
- )
247
-
248
- stop_event = threading.Event()
249
- progress_thread = set_up_progress_bar(
250
- parameters.progress_bar, run_config.environment.jobs_runner_status
251
- )
252
-
253
- exception_to_raise = None
254
- try:
255
- await get_results(results)
256
- except KeyboardInterrupt:
257
- print("Keyboard interrupt received. Stopping gracefully...")
258
- stop_event.set()
259
- except Exception as e:
260
- if parameters.stop_on_exception:
261
- exception_to_raise = e
262
- stop_event.set()
263
- finally:
264
- stop_event.set()
265
- if progress_thread is not None:
266
- progress_thread.join()
267
-
268
- if exception_to_raise:
269
- raise exception_to_raise
270
-
271
- relevant_cache = results.relevant_cache(self.environment.cache)
272
- results.cache = relevant_cache
273
- # breakpoint()
274
- results.bucket_collection = self.environment.bucket_collection
275
-
276
- from .results_exceptions_handler import ResultsExceptionsHandler
277
-
278
- results_exceptions_handler = ResultsExceptionsHandler(results, parameters)
279
-
280
- results_exceptions_handler.handle_exceptions()
281
- return results
@@ -1,60 +0,0 @@
1
- import threading
2
- import asyncio
3
- from fastapi import FastAPI, Request
4
- from fastapi.responses import JSONResponse
5
- import uvicorn
6
- import json
7
-
8
- app = FastAPI()
9
-
10
-
11
- async def generate_response(question_number: int) -> dict:
12
- # Simulate some asynchronous work
13
- await asyncio.sleep(1)
14
- return {
15
- "id": "chatcmpl-123",
16
- "object": "chat.completion",
17
- "created": 1677652288,
18
- "model": "gpt-3.5-turbo-0613",
19
- "choices": [
20
- {
21
- "index": 0,
22
- "message": {
23
- "role": "assistant",
24
- "content": json.dumps(
25
- {"answer": f"SPAM for question {question_number}!"}
26
- ),
27
- },
28
- "finish_reason": "stop",
29
- }
30
- ],
31
- "usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21},
32
- }
33
-
34
-
35
- @app.post("/v1/chat/completions")
36
- async def chat_completions(request: Request):
37
- body = await request.json()
38
- user_prompt = body["messages"][-1]["content"]
39
- question_number = int(user_prompt.split("XX")[1])
40
-
41
- response = await generate_response(question_number)
42
- return JSONResponse(content=response)
43
-
44
-
45
- def run_server():
46
- uvicorn.run(app, host="127.0.0.1", port=8000)
47
-
48
-
49
- if __name__ == "__main__":
50
- # Start the server in a separate thread
51
- server_thread = threading.Thread(target=run_server)
52
- server_thread.start()
53
-
54
- # Your main code here
55
- # ...
56
-
57
- # To use this with the OpenAI SDK:
58
- # from openai import AsyncOpenAI
59
- # client = AsyncOpenAI(base_url="http://127.0.0.1:8000/v1", api_key="fake_key")
60
- # response = await client.chat.completions.create(model="gpt-3.5-turbo", messages=[...])
File without changes
File without changes