edsl 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +124 -53
- edsl/__version__.py +1 -1
- edsl/agents/agent.py +21 -21
- edsl/agents/agent_list.py +2 -5
- edsl/agents/exceptions.py +119 -5
- edsl/base/__init__.py +10 -35
- edsl/base/base_class.py +71 -36
- edsl/base/base_exception.py +204 -0
- edsl/base/data_transfer_models.py +1 -1
- edsl/base/exceptions.py +94 -0
- edsl/buckets/__init__.py +15 -1
- edsl/buckets/bucket_collection.py +3 -4
- edsl/buckets/exceptions.py +107 -0
- edsl/buckets/model_buckets.py +1 -2
- edsl/buckets/token_bucket.py +11 -6
- edsl/buckets/token_bucket_api.py +27 -12
- edsl/buckets/token_bucket_client.py +9 -7
- edsl/caching/cache.py +12 -4
- edsl/caching/cache_entry.py +10 -9
- edsl/caching/exceptions.py +113 -7
- edsl/caching/remote_cache_sync.py +6 -7
- edsl/caching/sql_dict.py +20 -14
- edsl/cli.py +43 -0
- edsl/config/__init__.py +1 -1
- edsl/config/config_class.py +32 -6
- edsl/conversation/Conversation.py +8 -4
- edsl/conversation/car_buying.py +1 -3
- edsl/conversation/exceptions.py +58 -0
- edsl/conversation/mug_negotiation.py +2 -8
- edsl/coop/__init__.py +28 -6
- edsl/coop/coop.py +120 -29
- edsl/coop/coop_functions.py +1 -1
- edsl/coop/ep_key_handling.py +1 -1
- edsl/coop/exceptions.py +188 -9
- edsl/coop/price_fetcher.py +5 -8
- edsl/coop/utils.py +4 -6
- edsl/dataset/__init__.py +5 -4
- edsl/dataset/dataset.py +177 -86
- edsl/dataset/dataset_operations_mixin.py +98 -76
- edsl/dataset/dataset_tree.py +11 -7
- edsl/dataset/display/table_display.py +0 -2
- edsl/dataset/display/table_renderers.py +6 -4
- edsl/dataset/exceptions.py +125 -0
- edsl/dataset/file_exports.py +18 -11
- edsl/dataset/r/ggplot.py +13 -6
- edsl/display/__init__.py +27 -0
- edsl/display/core.py +147 -0
- edsl/display/plugin.py +189 -0
- edsl/display/utils.py +52 -0
- edsl/inference_services/__init__.py +9 -1
- edsl/inference_services/available_model_cache_handler.py +1 -1
- edsl/inference_services/available_model_fetcher.py +5 -6
- edsl/inference_services/data_structures.py +10 -7
- edsl/inference_services/exceptions.py +132 -1
- edsl/inference_services/inference_service_abc.py +2 -2
- edsl/inference_services/inference_services_collection.py +2 -6
- edsl/inference_services/registry.py +4 -3
- edsl/inference_services/service_availability.py +4 -3
- edsl/inference_services/services/anthropic_service.py +4 -1
- edsl/inference_services/services/aws_bedrock.py +13 -12
- edsl/inference_services/services/azure_ai.py +12 -10
- edsl/inference_services/services/deep_infra_service.py +1 -4
- edsl/inference_services/services/deep_seek_service.py +1 -5
- edsl/inference_services/services/google_service.py +7 -3
- edsl/inference_services/services/groq_service.py +1 -1
- edsl/inference_services/services/mistral_ai_service.py +4 -2
- edsl/inference_services/services/ollama_service.py +1 -1
- edsl/inference_services/services/open_ai_service.py +7 -5
- edsl/inference_services/services/perplexity_service.py +6 -2
- edsl/inference_services/services/test_service.py +8 -7
- edsl/inference_services/services/together_ai_service.py +2 -3
- edsl/inference_services/services/xai_service.py +1 -1
- edsl/instructions/__init__.py +1 -1
- edsl/instructions/change_instruction.py +7 -5
- edsl/instructions/exceptions.py +61 -0
- edsl/instructions/instruction.py +6 -2
- edsl/instructions/instruction_collection.py +6 -4
- edsl/instructions/instruction_handler.py +12 -15
- edsl/interviews/ReportErrors.py +0 -3
- edsl/interviews/__init__.py +9 -2
- edsl/interviews/answering_function.py +11 -13
- edsl/interviews/exception_tracking.py +15 -8
- edsl/interviews/exceptions.py +79 -0
- edsl/interviews/interview.py +33 -30
- edsl/interviews/interview_status_dictionary.py +4 -2
- edsl/interviews/interview_status_log.py +2 -1
- edsl/interviews/interview_task_manager.py +5 -5
- edsl/interviews/request_token_estimator.py +5 -2
- edsl/interviews/statistics.py +3 -4
- edsl/invigilators/__init__.py +7 -1
- edsl/invigilators/exceptions.py +79 -0
- edsl/invigilators/invigilator_base.py +0 -1
- edsl/invigilators/invigilators.py +9 -13
- edsl/invigilators/prompt_constructor.py +1 -5
- edsl/invigilators/prompt_helpers.py +8 -4
- edsl/invigilators/question_instructions_prompt_builder.py +1 -1
- edsl/invigilators/question_option_processor.py +9 -5
- edsl/invigilators/question_template_replacements_builder.py +3 -2
- edsl/jobs/__init__.py +42 -5
- edsl/jobs/async_interview_runner.py +25 -23
- edsl/jobs/check_survey_scenario_compatibility.py +11 -10
- edsl/jobs/data_structures.py +8 -5
- edsl/jobs/exceptions.py +177 -8
- edsl/jobs/fetch_invigilator.py +1 -1
- edsl/jobs/jobs.py +74 -69
- edsl/jobs/jobs_checks.py +6 -7
- edsl/jobs/jobs_component_constructor.py +4 -4
- edsl/jobs/jobs_pricing_estimation.py +4 -3
- edsl/jobs/jobs_remote_inference_logger.py +5 -4
- edsl/jobs/jobs_runner_asyncio.py +3 -4
- edsl/jobs/jobs_runner_status.py +8 -9
- edsl/jobs/remote_inference.py +27 -24
- edsl/jobs/results_exceptions_handler.py +10 -7
- edsl/key_management/__init__.py +3 -1
- edsl/key_management/exceptions.py +62 -0
- edsl/key_management/key_lookup.py +1 -1
- edsl/key_management/key_lookup_builder.py +37 -14
- edsl/key_management/key_lookup_collection.py +2 -0
- edsl/language_models/__init__.py +1 -1
- edsl/language_models/exceptions.py +302 -14
- edsl/language_models/language_model.py +9 -8
- edsl/language_models/model.py +4 -4
- edsl/language_models/model_list.py +1 -1
- edsl/language_models/price_manager.py +1 -1
- edsl/language_models/raw_response_handler.py +14 -9
- edsl/language_models/registry.py +17 -21
- edsl/language_models/repair.py +0 -6
- edsl/language_models/unused/fake_openai_service.py +0 -1
- edsl/load_plugins.py +69 -0
- edsl/logger.py +146 -0
- edsl/notebooks/__init__.py +24 -1
- edsl/notebooks/exceptions.py +82 -0
- edsl/notebooks/notebook.py +7 -3
- edsl/notebooks/notebook_to_latex.py +1 -2
- edsl/plugins/__init__.py +63 -0
- edsl/plugins/built_in/export_example.py +50 -0
- edsl/plugins/built_in/pig_latin.py +67 -0
- edsl/plugins/cli.py +372 -0
- edsl/plugins/cli_typer.py +283 -0
- edsl/plugins/exceptions.py +31 -0
- edsl/plugins/hookspec.py +51 -0
- edsl/plugins/plugin_host.py +128 -0
- edsl/plugins/plugin_manager.py +633 -0
- edsl/plugins/plugins_registry.py +168 -0
- edsl/prompts/__init__.py +24 -1
- edsl/prompts/exceptions.py +107 -5
- edsl/prompts/prompt.py +15 -7
- edsl/questions/HTMLQuestion.py +5 -11
- edsl/questions/Quick.py +0 -1
- edsl/questions/__init__.py +6 -4
- edsl/questions/answer_validator_mixin.py +318 -323
- edsl/questions/compose_questions.py +3 -3
- edsl/questions/descriptors.py +11 -50
- edsl/questions/exceptions.py +278 -22
- edsl/questions/loop_processor.py +7 -5
- edsl/questions/prompt_templates/question_list.jinja +3 -0
- edsl/questions/question_base.py +46 -19
- edsl/questions/question_base_gen_mixin.py +2 -2
- edsl/questions/question_base_prompts_mixin.py +13 -7
- edsl/questions/question_budget.py +503 -98
- edsl/questions/question_check_box.py +660 -160
- edsl/questions/question_dict.py +345 -194
- edsl/questions/question_extract.py +401 -61
- edsl/questions/question_free_text.py +80 -14
- edsl/questions/question_functional.py +119 -9
- edsl/questions/{derived/question_likert_five.py → question_likert_five.py} +2 -2
- edsl/questions/{derived/question_linear_scale.py → question_linear_scale.py} +3 -4
- edsl/questions/question_list.py +275 -28
- edsl/questions/question_matrix.py +643 -96
- edsl/questions/question_multiple_choice.py +219 -51
- edsl/questions/question_numerical.py +361 -32
- edsl/questions/question_rank.py +401 -124
- edsl/questions/question_registry.py +7 -5
- edsl/questions/{derived/question_top_k.py → question_top_k.py} +3 -3
- edsl/questions/{derived/question_yes_no.py → question_yes_no.py} +3 -4
- edsl/questions/register_questions_meta.py +2 -2
- edsl/questions/response_validator_abc.py +13 -15
- edsl/questions/response_validator_factory.py +10 -12
- edsl/questions/templates/dict/answering_instructions.jinja +1 -0
- edsl/questions/templates/rank/question_presentation.jinja +1 -1
- edsl/results/__init__.py +1 -1
- edsl/results/exceptions.py +141 -7
- edsl/results/report.py +1 -2
- edsl/results/result.py +11 -9
- edsl/results/results.py +480 -321
- edsl/results/results_selector.py +8 -4
- edsl/scenarios/PdfExtractor.py +2 -2
- edsl/scenarios/construct_download_link.py +69 -35
- edsl/scenarios/directory_scanner.py +33 -14
- edsl/scenarios/document_chunker.py +1 -1
- edsl/scenarios/exceptions.py +238 -14
- edsl/scenarios/file_methods.py +1 -1
- edsl/scenarios/file_store.py +7 -3
- edsl/scenarios/handlers/__init__.py +17 -0
- edsl/scenarios/handlers/docx_file_store.py +0 -5
- edsl/scenarios/handlers/pdf_file_store.py +0 -1
- edsl/scenarios/handlers/pptx_file_store.py +0 -5
- edsl/scenarios/handlers/py_file_store.py +0 -1
- edsl/scenarios/handlers/sql_file_store.py +1 -4
- edsl/scenarios/handlers/sqlite_file_store.py +0 -1
- edsl/scenarios/handlers/txt_file_store.py +1 -1
- edsl/scenarios/scenario.py +1 -3
- edsl/scenarios/scenario_list.py +179 -27
- edsl/scenarios/scenario_list_pdf_tools.py +1 -0
- edsl/scenarios/scenario_selector.py +0 -1
- edsl/surveys/__init__.py +3 -4
- edsl/surveys/dag/__init__.py +4 -2
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/edit_survey.py +1 -0
- edsl/surveys/exceptions.py +165 -9
- edsl/surveys/memory/__init__.py +5 -3
- edsl/surveys/memory/memory_management.py +1 -0
- edsl/surveys/memory/memory_plan.py +6 -15
- edsl/surveys/rules/__init__.py +5 -3
- edsl/surveys/rules/rule.py +1 -2
- edsl/surveys/rules/rule_collection.py +1 -1
- edsl/surveys/survey.py +12 -24
- edsl/surveys/survey_css.py +3 -3
- edsl/surveys/survey_export.py +6 -3
- edsl/surveys/survey_flow_visualization.py +10 -1
- edsl/surveys/survey_simulator.py +2 -1
- edsl/tasks/__init__.py +23 -1
- edsl/tasks/exceptions.py +72 -0
- edsl/tasks/question_task_creator.py +3 -3
- edsl/tasks/task_creators.py +1 -3
- edsl/tasks/task_history.py +8 -10
- edsl/tasks/task_status_log.py +1 -2
- edsl/tokens/__init__.py +29 -1
- edsl/tokens/exceptions.py +37 -0
- edsl/tokens/interview_token_usage.py +3 -2
- edsl/tokens/token_usage.py +4 -3
- edsl/utilities/__init__.py +21 -1
- edsl/utilities/decorators.py +1 -2
- edsl/utilities/markdown_to_docx.py +2 -2
- edsl/utilities/markdown_to_pdf.py +1 -1
- edsl/utilities/repair_functions.py +0 -1
- edsl/utilities/restricted_python.py +0 -1
- edsl/utilities/template_loader.py +2 -3
- edsl/utilities/utilities.py +8 -29
- {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/METADATA +32 -2
- edsl-0.1.51.dist-info/RECORD +365 -0
- edsl-0.1.51.dist-info/entry_points.txt +3 -0
- edsl/dataset/smart_objects.py +0 -96
- edsl/exceptions/BaseException.py +0 -21
- edsl/exceptions/__init__.py +0 -54
- edsl/exceptions/configuration.py +0 -16
- edsl/exceptions/general.py +0 -34
- edsl/questions/derived/__init__.py +0 -0
- edsl/study/ObjectEntry.py +0 -173
- edsl/study/ProofOfWork.py +0 -113
- edsl/study/SnapShot.py +0 -80
- edsl/study/Study.py +0 -520
- edsl/study/__init__.py +0 -6
- edsl/utilities/interface.py +0 -135
- edsl-0.1.49.dist-info/RECORD +0 -347
- {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/LICENSE +0 -0
- {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/WHEEL +0 -0
edsl/results/results.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
"""
|
2
|
-
The Results module provides tools for working with collections of Result objects.
|
1
|
+
"""The Results module provides tools for working with collections of Result objects.
|
3
2
|
|
4
3
|
The Results class is the primary container for analyzing and manipulating data obtained
|
5
4
|
from running surveys with language models. It implements a powerful data analysis interface
|
@@ -41,16 +40,15 @@ import json
|
|
41
40
|
import random
|
42
41
|
import warnings
|
43
42
|
from collections import UserList, defaultdict
|
44
|
-
from typing import Optional, Callable, Any,
|
43
|
+
from typing import Optional, Callable, Any, Union, List, TYPE_CHECKING
|
45
44
|
from bisect import bisect_left
|
46
45
|
|
47
46
|
from ..base import Base
|
48
47
|
|
49
48
|
if TYPE_CHECKING:
|
50
49
|
from ..surveys import Survey
|
51
|
-
from ..
|
50
|
+
from ..caching import Cache
|
52
51
|
from ..agents import AgentList
|
53
|
-
from ..language_models import Model
|
54
52
|
from ..scenarios import ScenarioList
|
55
53
|
from ..results import Result
|
56
54
|
from ..tasks import TaskHistory
|
@@ -71,23 +69,43 @@ from .exceptions import (
|
|
71
69
|
ResultsDeserializationError,
|
72
70
|
)
|
73
71
|
|
72
|
+
|
74
73
|
def ensure_fetched(method):
|
75
|
-
"""A decorator that checks if remote data is loaded, and if not, attempts to fetch it.
|
74
|
+
"""A decorator that checks if remote data is loaded, and if not, attempts to fetch it.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
method: The method to decorate.
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
The wrapped method that will ensure data is fetched before execution.
|
81
|
+
"""
|
82
|
+
|
76
83
|
def wrapper(self, *args, **kwargs):
|
77
84
|
if not self._fetched:
|
78
85
|
# If not fetched, try fetching now.
|
79
86
|
# (If you know you have job info stored in self.job_info)
|
80
87
|
self.fetch_remote(self.job_info)
|
81
88
|
return method(self, *args, **kwargs)
|
89
|
+
|
82
90
|
return wrapper
|
83
91
|
|
92
|
+
|
84
93
|
def ensure_ready(method):
|
85
|
-
"""
|
86
|
-
|
87
|
-
|
94
|
+
"""Decorator for Results methods to handle not-ready state.
|
95
|
+
|
88
96
|
If the Results object is not ready, for most methods we return a NotReadyObject.
|
89
97
|
However, for __repr__ (and other methods that need to return a string), we return
|
90
98
|
the string representation of NotReadyObject.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
method: The method to decorate.
|
102
|
+
|
103
|
+
Returns:
|
104
|
+
The wrapped method that will handle not-ready Results objects appropriately.
|
105
|
+
|
106
|
+
Raises:
|
107
|
+
Exception: Any exception from fetch_remote will be caught and printed.
|
108
|
+
|
91
109
|
"""
|
92
110
|
from functools import wraps
|
93
111
|
|
@@ -102,7 +120,7 @@ def ensure_ready(method):
|
|
102
120
|
except Exception as e:
|
103
121
|
print(f"Error during fetch_remote in {method.__name__}: {e}")
|
104
122
|
if not self.completed:
|
105
|
-
not_ready = NotReadyObject(name
|
123
|
+
not_ready = NotReadyObject(name=method.__name__, job_info=self.job_info)
|
106
124
|
# For __repr__, ensure we return a string
|
107
125
|
if method.__name__ == "__repr__" or method.__name__ == "__str__":
|
108
126
|
return not_ready.__repr__()
|
@@ -111,59 +129,115 @@ def ensure_ready(method):
|
|
111
129
|
|
112
130
|
return wrapper
|
113
131
|
|
132
|
+
|
114
133
|
class NotReadyObject:
|
115
|
-
"""A placeholder object that
|
116
|
-
|
134
|
+
"""A placeholder object that indicates results are not ready yet.
|
135
|
+
|
136
|
+
This class returns itself for all attribute accesses and method calls,
|
137
|
+
displaying a message about the job's running status when represented as a string.
|
138
|
+
|
139
|
+
Attributes:
|
140
|
+
name: The name of the method that was originally called.
|
141
|
+
job_info: Information about the running job.
|
142
|
+
|
143
|
+
"""
|
144
|
+
|
145
|
+
def __init__(self, name: str, job_info: "Any"):
|
146
|
+
"""Initialize a NotReadyObject.
|
147
|
+
|
148
|
+
Args:
|
149
|
+
name: The name of the method that was attempted to be called.
|
150
|
+
job_info: Information about the running job.
|
151
|
+
"""
|
117
152
|
self.name = name
|
118
153
|
self.job_info = job_info
|
119
|
-
#print(f"Not ready to call {name}")
|
154
|
+
# print(f"Not ready to call {name}")
|
120
155
|
|
121
156
|
def __repr__(self):
|
122
|
-
|
157
|
+
"""Generate a string representation showing the job is still running.
|
158
|
+
|
159
|
+
Returns:
|
160
|
+
str: A message indicating the job is still running, along with job details.
|
161
|
+
"""
|
162
|
+
message = """Results not ready - job still running on server."""
|
123
163
|
for key, value in self.job_info.creation_data.items():
|
124
164
|
message += f"\n{key}: {value}"
|
125
165
|
return message
|
126
166
|
|
127
167
|
def __getattr__(self, _):
|
168
|
+
"""Return self for any attribute access.
|
169
|
+
|
170
|
+
Args:
|
171
|
+
_: The attribute name (ignored).
|
172
|
+
|
173
|
+
Returns:
|
174
|
+
NotReadyObject: Returns self for chaining.
|
175
|
+
"""
|
128
176
|
return self
|
129
|
-
|
177
|
+
|
130
178
|
def __call__(self, *args, **kwargs):
|
179
|
+
"""Return self when called as a function.
|
180
|
+
|
181
|
+
Args:
|
182
|
+
*args: Positional arguments (ignored).
|
183
|
+
**kwargs: Keyword arguments (ignored).
|
184
|
+
|
185
|
+
Returns:
|
186
|
+
NotReadyObject: Returns self for chaining.
|
187
|
+
"""
|
131
188
|
return self
|
132
189
|
|
133
190
|
|
134
191
|
class Results(UserList, ResultsOperationsMixin, Base):
|
135
|
-
"""
|
136
|
-
|
137
|
-
|
192
|
+
"""A collection of Result objects with powerful data analysis capabilities.
|
193
|
+
|
138
194
|
The Results class is the primary container for working with data from EDSL surveys.
|
139
195
|
It provides a rich set of methods for data analysis, transformation, and visualization
|
140
|
-
inspired by data manipulation libraries like dplyr and pandas. The Results class
|
141
|
-
implements a functional, fluent interface for data manipulation where each method
|
196
|
+
inspired by data manipulation libraries like dplyr and pandas. The Results class
|
197
|
+
implements a functional, fluent interface for data manipulation where each method
|
142
198
|
returns a new Results object, allowing method chaining.
|
143
|
-
|
199
|
+
|
200
|
+
Attributes:
|
201
|
+
survey: The Survey object containing the questions used to generate results.
|
202
|
+
data: A list of Result objects containing the responses.
|
203
|
+
created_columns: A list of column names created through transformations.
|
204
|
+
cache: A Cache object for storing model responses.
|
205
|
+
completed: Whether the Results object is ready for use.
|
206
|
+
task_history: A TaskHistory object containing information about the tasks.
|
207
|
+
known_data_types: List of valid data type strings for accessing data.
|
208
|
+
|
144
209
|
Key features:
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
210
|
+
- List-like interface for accessing individual Result objects
|
211
|
+
- Selection of specific data columns with `select()`
|
212
|
+
- Filtering results with boolean expressions using `filter()`
|
213
|
+
- Creating new derived columns with `mutate()`
|
214
|
+
- Recoding values with `recode()` and `answer_truncate()`
|
215
|
+
- Sorting results with `order_by()`
|
216
|
+
- Converting to other formats (dataset, table, pandas DataFrame)
|
217
|
+
- Serialization for storage and retrieval
|
218
|
+
- Support for remote execution and result retrieval
|
219
|
+
|
156
220
|
Results objects have a hierarchical structure with the following components:
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
221
|
+
1. Each Results object contains multiple Result objects
|
222
|
+
2. Each Result object contains data organized by type (agent, scenario, model, answer, etc.)
|
223
|
+
3. Each data type contains multiple attributes (e.g., "how_feeling" in the answer type)
|
224
|
+
|
162
225
|
You can access data in a Results object using dot notation (`answer.how_feeling`) or
|
163
226
|
using just the attribute name if it's not ambiguous (`how_feeling`).
|
164
|
-
|
227
|
+
|
165
228
|
The Results class also tracks "created columns" - new derived values that aren't
|
166
229
|
part of the original data but were created through transformations.
|
230
|
+
|
231
|
+
Examples:
|
232
|
+
>>> # Create a simple Results object from example data
|
233
|
+
>>> r = Results.example()
|
234
|
+
>>> len(r) > 0 # Contains Result objects
|
235
|
+
True
|
236
|
+
>>> # Filter and transform data
|
237
|
+
>>> filtered = r.filter("how_feeling == 'Great'")
|
238
|
+
>>> # Access hierarchical data
|
239
|
+
>>> 'agent' in r.known_data_types
|
240
|
+
True
|
167
241
|
"""
|
168
242
|
|
169
243
|
__documentation__ = "https://docs.expectedparrot.com/en/latest/results.html"
|
@@ -186,9 +260,28 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
186
260
|
]
|
187
261
|
|
188
262
|
@classmethod
|
189
|
-
def from_job_info(cls, job_info: dict) -> Results:
|
190
|
-
"""
|
191
|
-
|
263
|
+
def from_job_info(cls, job_info: dict) -> "Results":
|
264
|
+
"""Instantiate a Results object from a job info dictionary.
|
265
|
+
|
266
|
+
This method creates a Results object in a not-ready state that will
|
267
|
+
fetch its data from a remote source when methods are called on it.
|
268
|
+
|
269
|
+
Args:
|
270
|
+
job_info: Dictionary containing information about a remote job.
|
271
|
+
|
272
|
+
Returns:
|
273
|
+
Results: A new Results instance with completed=False that will
|
274
|
+
fetch remote data when needed.
|
275
|
+
|
276
|
+
Examples:
|
277
|
+
>>> # Create a job info dictionary
|
278
|
+
>>> job_info = {'job_uuid': '12345', 'creation_data': {'model': 'gpt-4'}}
|
279
|
+
>>> # Create a Results object from the job info
|
280
|
+
>>> results = Results.from_job_info(job_info)
|
281
|
+
>>> results.completed
|
282
|
+
False
|
283
|
+
>>> hasattr(results, 'job_info')
|
284
|
+
True
|
192
285
|
"""
|
193
286
|
results = cls()
|
194
287
|
results.completed = False
|
@@ -205,14 +298,37 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
205
298
|
total_results: Optional[int] = None,
|
206
299
|
task_history: Optional[TaskHistory] = None,
|
207
300
|
):
|
208
|
-
"""Instantiate a
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
:
|
215
|
-
|
301
|
+
"""Instantiate a Results object with a survey and a list of Result objects.
|
302
|
+
|
303
|
+
This initializes a completed Results object with the provided data.
|
304
|
+
For creating a not-ready Results object from a job info dictionary,
|
305
|
+
use the from_job_info class method instead.
|
306
|
+
|
307
|
+
Args:
|
308
|
+
survey: A Survey object containing the questions used to generate results.
|
309
|
+
data: A list of Result objects containing the responses.
|
310
|
+
created_columns: A list of column names created through transformations.
|
311
|
+
cache: A Cache object for storing model responses.
|
312
|
+
job_uuid: A string representing the job UUID.
|
313
|
+
total_results: An integer representing the total number of results.
|
314
|
+
task_history: A TaskHistory object containing information about the tasks.
|
315
|
+
|
316
|
+
Examples:
|
317
|
+
>>> from ..results import Result
|
318
|
+
>>> # Create an empty Results object
|
319
|
+
>>> r = Results()
|
320
|
+
>>> r.completed
|
321
|
+
True
|
322
|
+
>>> len(r.created_columns)
|
323
|
+
0
|
324
|
+
|
325
|
+
>>> # Create a Results object with data
|
326
|
+
>>> from unittest.mock import Mock
|
327
|
+
>>> mock_survey = Mock()
|
328
|
+
>>> mock_result = Mock(spec=Result)
|
329
|
+
>>> r = Results(survey=mock_survey, data=[mock_result])
|
330
|
+
>>> len(r)
|
331
|
+
1
|
216
332
|
"""
|
217
333
|
self.completed = True
|
218
334
|
self._fetching = False
|
@@ -231,39 +347,26 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
231
347
|
if hasattr(self, "_add_output_functions"):
|
232
348
|
self._add_output_functions()
|
233
349
|
|
234
|
-
def long(self):
|
235
|
-
return self.table().long()
|
236
|
-
|
237
|
-
def print_long(self, max_rows: int = None) -> None:
|
238
|
-
"""Print the results in long format.
|
239
|
-
|
240
|
-
>>> from edsl.results import Results
|
241
|
-
>>> r = Results.example()
|
242
|
-
>>> r.select('how_feeling').print_long(max_rows = 2)
|
243
|
-
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┓
|
244
|
-
┃ Result index ┃ Key ┃ Value ┃
|
245
|
-
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━┩
|
246
|
-
│ 0 │ how_feeling │ OK │
|
247
|
-
│ 1 │ how_feeling │ Great │
|
248
|
-
└──────────────┴─────────────┴───────┘
|
249
|
-
"""
|
250
|
-
from edsl.utilities.interface import print_results_long
|
251
|
-
|
252
|
-
print_results_long(self, max_rows=max_rows)
|
253
|
-
|
254
|
-
|
255
350
|
def _fetch_list(self, data_type: str, key: str) -> list:
|
256
|
-
"""
|
257
|
-
Return a list of values from the data for a given data type and key.
|
351
|
+
"""Return a list of values from the data for a given data type and key.
|
258
352
|
|
259
353
|
Uses the filtered data, not the original data.
|
260
354
|
|
261
|
-
|
355
|
+
Args:
|
356
|
+
data_type: The type of data to fetch (e.g., 'answer', 'agent', 'scenario').
|
357
|
+
key: The key to fetch from each data type dictionary.
|
262
358
|
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
359
|
+
Returns:
|
360
|
+
list: A list of values, one from each result in the data.
|
361
|
+
|
362
|
+
Examples:
|
363
|
+
>>> from edsl.results import Results
|
364
|
+
>>> r = Results.example()
|
365
|
+
>>> values = r._fetch_list('answer', 'how_feeling')
|
366
|
+
>>> len(values) == len(r)
|
367
|
+
True
|
368
|
+
>>> all(isinstance(v, (str, type(None))) for v in values)
|
369
|
+
True
|
267
370
|
"""
|
268
371
|
returned_list = []
|
269
372
|
for row in self.data:
|
@@ -271,6 +374,25 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
271
374
|
|
272
375
|
return returned_list
|
273
376
|
|
377
|
+
def get_answers(self, question_name: str) -> list:
|
378
|
+
"""Get the answers for a given question name.
|
379
|
+
|
380
|
+
Args:
|
381
|
+
question_name: The name of the question to fetch answers for.
|
382
|
+
|
383
|
+
Returns:
|
384
|
+
list: A list of answers, one from each result in the data.
|
385
|
+
|
386
|
+
Examples:
|
387
|
+
>>> from edsl.results import Results
|
388
|
+
>>> r = Results.example()
|
389
|
+
>>> answers = r.get_answers('how_feeling')
|
390
|
+
>>> isinstance(answers, list)
|
391
|
+
True
|
392
|
+
>>> len(answers) == len(r)
|
393
|
+
True
|
394
|
+
"""
|
395
|
+
return self._fetch_list("answer", question_name)
|
274
396
|
|
275
397
|
def _summary(self) -> dict:
|
276
398
|
import reprlib
|
@@ -322,8 +444,23 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
322
444
|
self.insert(item)
|
323
445
|
|
324
446
|
def compute_job_cost(self, include_cached_responses_in_cost: bool = False) -> float:
|
325
|
-
"""
|
326
|
-
|
447
|
+
"""Compute the cost of a completed job in USD.
|
448
|
+
|
449
|
+
This method calculates the total cost of all model responses in the results.
|
450
|
+
By default, it only counts the cost of responses that were not cached.
|
451
|
+
|
452
|
+
Args:
|
453
|
+
include_cached_responses_in_cost: Whether to include the cost of cached
|
454
|
+
responses in the total. Defaults to False.
|
455
|
+
|
456
|
+
Returns:
|
457
|
+
float: The total cost in USD.
|
458
|
+
|
459
|
+
Examples:
|
460
|
+
>>> from edsl.results import Results
|
461
|
+
>>> r = Results.example()
|
462
|
+
>>> r.compute_job_cost()
|
463
|
+
0
|
327
464
|
"""
|
328
465
|
total_cost = 0
|
329
466
|
for result in self:
|
@@ -342,88 +479,55 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
342
479
|
|
343
480
|
return total_cost
|
344
481
|
|
345
|
-
# def leaves(self):
|
346
|
-
# leaves = []
|
347
|
-
# for result in self:
|
348
|
-
# leaves.extend(result.leaves())
|
349
|
-
# return leaves
|
350
|
-
|
351
|
-
# def tree(self, node_list: Optional[List[str]] = None):
|
352
|
-
# return self.to_scenario_list().tree(node_list)
|
353
|
-
|
354
|
-
# def interactive_tree(
|
355
|
-
# self,
|
356
|
-
# fold_attributes: Optional[List[str]] = None,
|
357
|
-
# drop: Optional[List[str]] = None,
|
358
|
-
# open_file=True,
|
359
|
-
# ) -> dict:
|
360
|
-
# """Return the results as a tree."""
|
361
|
-
# from edsl.results.tree_explore import FoldableHTMLTableGenerator
|
362
|
-
|
363
|
-
# if drop is None:
|
364
|
-
# drop = []
|
365
|
-
|
366
|
-
# valid_attributes = [
|
367
|
-
# "model",
|
368
|
-
# "scenario",
|
369
|
-
# "agent",
|
370
|
-
# "answer",
|
371
|
-
# "question",
|
372
|
-
# "iteration",
|
373
|
-
# ]
|
374
|
-
# if fold_attributes is None:
|
375
|
-
# fold_attributes = []
|
376
|
-
|
377
|
-
# for attribute in fold_attributes:
|
378
|
-
# if attribute not in valid_attributes:
|
379
|
-
# raise ValueError(
|
380
|
-
# f"Invalid fold attribute: {attribute}; must be in {valid_attributes}"
|
381
|
-
# )
|
382
|
-
# data = self.leaves()
|
383
|
-
# generator = FoldableHTMLTableGenerator(data)
|
384
|
-
# tree = generator.tree(fold_attributes=fold_attributes, drop=drop)
|
385
|
-
# html_content = generator.generate_html(tree, fold_attributes)
|
386
|
-
# import tempfile
|
387
|
-
# from edsl.utilities.utilities import is_notebook
|
388
|
-
|
389
|
-
# from IPython.display import display, HTML
|
390
|
-
|
391
|
-
# if is_notebook():
|
392
|
-
# import html
|
393
|
-
# from IPython.display import display, HTML
|
394
|
-
|
395
|
-
# height = 1000
|
396
|
-
# width = 1000
|
397
|
-
# escaped_output = html.escape(html_content)
|
398
|
-
# # escaped_output = rendered_html
|
399
|
-
# iframe = f""""
|
400
|
-
# <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
|
401
|
-
# """
|
402
|
-
# display(HTML(iframe))
|
403
|
-
# return None
|
404
|
-
|
405
|
-
# with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
|
406
|
-
# f.write(html_content.encode())
|
407
|
-
# print(f"HTML file has been generated: {f.name}")
|
408
|
-
|
409
|
-
# if open_file:
|
410
|
-
# import webbrowser
|
411
|
-
# import time
|
412
|
-
|
413
|
-
# time.sleep(1) # Wait for 1 second
|
414
|
-
# # webbrowser.open(f.name)
|
415
|
-
# import os
|
416
|
-
|
417
|
-
# filename = f.name
|
418
|
-
# webbrowser.open(f"file://{os.path.abspath(filename)}")
|
419
|
-
|
420
|
-
# else:
|
421
|
-
# return html_content
|
422
|
-
|
423
482
|
def code(self):
|
424
|
-
|
483
|
+
"""Method for generating code representations.
|
484
|
+
|
485
|
+
Raises:
|
486
|
+
ResultsError: This method is not implemented for Results objects.
|
487
|
+
|
488
|
+
Examples:
|
489
|
+
>>> from edsl.results import Results
|
490
|
+
>>> r = Results.example()
|
491
|
+
>>> try:
|
492
|
+
... r.code()
|
493
|
+
... except ResultsError as e:
|
494
|
+
... str(e).startswith("The code() method is not implemented")
|
495
|
+
True
|
496
|
+
"""
|
497
|
+
raise ResultsError("The code() method is not implemented for Results objects")
|
425
498
|
|
426
499
|
def __getitem__(self, i):
|
500
|
+
"""Get an item from the Results object by index, slice, or key.
|
501
|
+
|
502
|
+
Args:
|
503
|
+
i: An integer index, a slice, or a string key.
|
504
|
+
|
505
|
+
Returns:
|
506
|
+
The requested item, slice of results, or dictionary value.
|
507
|
+
|
508
|
+
Raises:
|
509
|
+
ResultsError: If the argument type is invalid for indexing.
|
510
|
+
|
511
|
+
Examples:
|
512
|
+
>>> from edsl.results import Results
|
513
|
+
>>> r = Results.example()
|
514
|
+
>>> # Get by integer index
|
515
|
+
>>> result = r[0]
|
516
|
+
>>> # Get by slice
|
517
|
+
>>> subset = r[0:2]
|
518
|
+
>>> len(subset) == 2
|
519
|
+
True
|
520
|
+
>>> # Get by string key
|
521
|
+
>>> data = r["data"]
|
522
|
+
>>> isinstance(data, list)
|
523
|
+
True
|
524
|
+
>>> # Invalid index type
|
525
|
+
>>> try:
|
526
|
+
... r[1.5]
|
527
|
+
... except ResultsError:
|
528
|
+
... True
|
529
|
+
True
|
530
|
+
"""
|
427
531
|
if isinstance(i, int):
|
428
532
|
return self.data[i]
|
429
533
|
|
@@ -433,18 +537,40 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
433
537
|
if isinstance(i, str):
|
434
538
|
return self.to_dict()[i]
|
435
539
|
|
436
|
-
raise
|
540
|
+
raise ResultsError("Invalid argument type for indexing Results object")
|
437
541
|
|
438
542
|
def __add__(self, other: Results) -> Results:
|
439
543
|
"""Add two Results objects together.
|
440
|
-
They must have the same survey and created columns.
|
441
|
-
:param other: A Results object.
|
442
544
|
|
443
|
-
|
545
|
+
Combines two Results objects into a new one. Both objects must have the same
|
546
|
+
survey and created columns.
|
444
547
|
|
445
|
-
|
446
|
-
|
447
|
-
|
548
|
+
Args:
|
549
|
+
other: A Results object to add to this one.
|
550
|
+
|
551
|
+
Returns:
|
552
|
+
A new Results object containing data from both objects.
|
553
|
+
|
554
|
+
Raises:
|
555
|
+
ResultsError: If the surveys or created columns of the two objects don't match.
|
556
|
+
|
557
|
+
Examples:
|
558
|
+
>>> from edsl.results import Results
|
559
|
+
>>> r1 = Results.example()
|
560
|
+
>>> r2 = Results.example()
|
561
|
+
>>> # Combine two Results objects
|
562
|
+
>>> r3 = r1 + r2
|
563
|
+
>>> len(r3) == len(r1) + len(r2)
|
564
|
+
True
|
565
|
+
|
566
|
+
>>> # Attempting to add incompatible Results
|
567
|
+
>>> from unittest.mock import Mock
|
568
|
+
>>> r4 = Results(survey=Mock()) # Different survey
|
569
|
+
>>> try:
|
570
|
+
... r1 + r4
|
571
|
+
... except ResultsError:
|
572
|
+
... True
|
573
|
+
True
|
448
574
|
"""
|
449
575
|
if self.survey != other.survey:
|
450
576
|
raise ResultsError(
|
@@ -460,21 +586,17 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
460
586
|
data=self.data + other.data,
|
461
587
|
created_columns=self.created_columns,
|
462
588
|
)
|
463
|
-
|
589
|
+
|
464
590
|
def _repr_html_(self):
|
465
591
|
if not self.completed:
|
466
592
|
if hasattr(self, "job_info"):
|
467
593
|
self.fetch_remote(self.job_info)
|
468
|
-
|
594
|
+
|
469
595
|
if not self.completed:
|
470
|
-
return
|
471
|
-
|
596
|
+
return "Results not ready to call"
|
597
|
+
|
472
598
|
return super()._repr_html_()
|
473
599
|
|
474
|
-
# @ensure_ready
|
475
|
-
# def __str__(self):
|
476
|
-
# super().__str__()
|
477
|
-
|
478
600
|
@ensure_ready
|
479
601
|
def __repr__(self) -> str:
|
480
602
|
return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
|
@@ -482,7 +604,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
482
604
|
def table(
|
483
605
|
self,
|
484
606
|
*fields,
|
485
|
-
tablefmt: Optional[str] =
|
607
|
+
tablefmt: Optional[str] = "rich",
|
486
608
|
pretty_labels: Optional[dict] = None,
|
487
609
|
print_parameters: Optional[dict] = None,
|
488
610
|
):
|
@@ -516,8 +638,8 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
516
638
|
print_parameters=print_parameters,
|
517
639
|
)
|
518
640
|
)
|
519
|
-
|
520
|
-
def to_dataset(self) ->
|
641
|
+
|
642
|
+
def to_dataset(self) -> "Dataset":
|
521
643
|
return self.select()
|
522
644
|
|
523
645
|
def to_dict(
|
@@ -561,7 +683,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
561
683
|
d.update({"task_history": self.task_history.to_dict()})
|
562
684
|
|
563
685
|
if add_edsl_version:
|
564
|
-
from
|
686
|
+
from .. import __version__
|
565
687
|
|
566
688
|
d["edsl_version"] = __version__
|
567
689
|
d["edsl_class_name"] = "Results"
|
@@ -590,7 +712,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
590
712
|
return self.task_history.has_unfixed_exceptions
|
591
713
|
|
592
714
|
def __hash__(self) -> int:
|
593
|
-
|
715
|
+
|
594
716
|
return dict_hash(
|
595
717
|
self.to_dict(sort=True, add_edsl_version=False, include_cache_info=False)
|
596
718
|
)
|
@@ -599,7 +721,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
599
721
|
def hashes(self) -> set:
|
600
722
|
return set(hash(result) for result in self.data)
|
601
723
|
|
602
|
-
def
|
724
|
+
def _sample_legacy(self, n: int) -> Results:
|
603
725
|
"""Return a random sample of the results.
|
604
726
|
|
605
727
|
:param n: The number of samples to return.
|
@@ -643,7 +765,6 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
643
765
|
from ..caching import Cache
|
644
766
|
from ..results import Result
|
645
767
|
from ..tasks import TaskHistory
|
646
|
-
from ..agents import Agent
|
647
768
|
|
648
769
|
survey = Survey.from_dict(data["survey"])
|
649
770
|
results_data = [Result.from_dict(r) for r in data["data"]]
|
@@ -717,7 +838,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
717
838
|
['agent.agent_index', ...]
|
718
839
|
"""
|
719
840
|
column_names = [f"{v}.{k}" for k, v in self._key_to_data_type.items()]
|
720
|
-
from
|
841
|
+
from ..utilities.PrettyList import PrettyList
|
721
842
|
|
722
843
|
return PrettyList(sorted(column_names))
|
723
844
|
|
@@ -731,7 +852,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
731
852
|
>>> r.answer_keys
|
732
853
|
{'how_feeling': 'How are you this {{ period }}?', 'how_feeling_yesterday': 'How were you feeling yesterday {{ period }}?'}
|
733
854
|
"""
|
734
|
-
from
|
855
|
+
from ..utilities.utilities import shorten_string
|
735
856
|
|
736
857
|
if not self.survey:
|
737
858
|
raise ResultsError("Survey is not defined so no answer keys are available.")
|
@@ -756,7 +877,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
756
877
|
>>> r.agents
|
757
878
|
AgentList([Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Sad'}), Agent(traits = {'status': 'Sad'})])
|
758
879
|
"""
|
759
|
-
from
|
880
|
+
from ..agents import AgentList
|
760
881
|
|
761
882
|
return AgentList([r.agent for r in self.data])
|
762
883
|
|
@@ -867,7 +988,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
867
988
|
return self.data[0]
|
868
989
|
|
869
990
|
def answer_truncate(
|
870
|
-
self, column: str, top_n: int = 5, new_var_name: str = None
|
991
|
+
self, column: str, top_n: int = 5, new_var_name: Optional[str] = None
|
871
992
|
) -> Results:
|
872
993
|
"""Create a new variable that truncates the answers to the top_n.
|
873
994
|
|
@@ -998,24 +1119,23 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
998
1119
|
def mutate(
|
999
1120
|
self, new_var_string: str, functions_dict: Optional[dict] = None
|
1000
1121
|
) -> Results:
|
1001
|
-
"""
|
1002
|
-
|
1003
|
-
|
1122
|
+
"""Create a new column based on a computational expression.
|
1123
|
+
|
1004
1124
|
The mutate method allows you to create new derived variables based on existing data.
|
1005
1125
|
You provide an assignment expression where the left side is the new column name
|
1006
1126
|
and the right side is a Python expression that computes the value. The expression
|
1007
1127
|
can reference any existing columns in the Results object.
|
1008
|
-
|
1009
|
-
|
1010
|
-
new_var_string: A string containing an assignment expression in the form
|
1011
|
-
|
1012
|
-
|
1013
|
-
functions_dict: Optional dictionary of custom functions that can be used in
|
1014
|
-
|
1015
|
-
|
1128
|
+
|
1129
|
+
Args:
|
1130
|
+
new_var_string: A string containing an assignment expression in the form
|
1131
|
+
"new_column_name = expression". The expression can reference
|
1132
|
+
any existing column and use standard Python syntax.
|
1133
|
+
functions_dict: Optional dictionary of custom functions that can be used in
|
1134
|
+
the expression. Keys are function names, values are function objects.
|
1135
|
+
|
1016
1136
|
Returns:
|
1017
1137
|
A new Results object with the additional column.
|
1018
|
-
|
1138
|
+
|
1019
1139
|
Notes:
|
1020
1140
|
- The expression must contain an equals sign (=) separating the new column name
|
1021
1141
|
from the computation expression
|
@@ -1024,22 +1144,22 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1024
1144
|
- The expression can access any data in the Result object using the column names
|
1025
1145
|
- New columns are added to the "answer" data type
|
1026
1146
|
- Created columns are tracked in the `created_columns` property
|
1027
|
-
|
1147
|
+
|
1028
1148
|
Examples:
|
1029
1149
|
>>> r = Results.example()
|
1030
|
-
|
1031
|
-
# Create a simple derived column
|
1150
|
+
|
1151
|
+
>>> # Create a simple derived column
|
1032
1152
|
>>> r.mutate('how_feeling_x = how_feeling + "x"').select('how_feeling_x')
|
1033
1153
|
Dataset([{'answer.how_feeling_x': ['OKx', 'Greatx', 'Terriblex', 'OKx']}])
|
1034
|
-
|
1035
|
-
# Create a binary indicator column
|
1154
|
+
|
1155
|
+
>>> # Create a binary indicator column
|
1036
1156
|
>>> r.mutate('is_great = 1 if how_feeling == "Great" else 0').select('is_great')
|
1037
1157
|
Dataset([{'answer.is_great': [0, 1, 0, 0]}])
|
1038
|
-
|
1039
|
-
# Create a column with custom functions
|
1158
|
+
|
1159
|
+
>>> # Create a column with custom functions
|
1040
1160
|
>>> def sentiment(text):
|
1041
1161
|
... return len(text) > 5
|
1042
|
-
>>> r.mutate('is_long = sentiment(how_feeling)',
|
1162
|
+
>>> r.mutate('is_long = sentiment(how_feeling)',
|
1043
1163
|
... functions_dict={'sentiment': sentiment}).select('is_long')
|
1044
1164
|
Dataset([{'answer.is_long': [False, False, True, False]}])
|
1045
1165
|
"""
|
@@ -1050,7 +1170,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1050
1170
|
)
|
1051
1171
|
raw_var_name, expression = new_var_string.split("=", 1)
|
1052
1172
|
var_name = raw_var_name.strip()
|
1053
|
-
from
|
1173
|
+
from ..utilities.utilities import is_valid_variable_name
|
1054
1174
|
|
1055
1175
|
if not is_valid_variable_name(var_name):
|
1056
1176
|
raise ResultsInvalidNameError(f"{var_name} is not a valid variable name.")
|
@@ -1076,26 +1196,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1076
1196
|
created_columns=self.created_columns + [var_name],
|
1077
1197
|
)
|
1078
1198
|
|
1079
|
-
|
1080
|
-
def add_column(self, column_name: str, values: list) -> Results:
|
1081
|
-
"""Adds columns to Results
|
1082
|
-
|
1083
|
-
>>> r = Results.example()
|
1084
|
-
>>> r.add_column('a', [1,2,3, 4]).select('a')
|
1085
|
-
Dataset([{'answer.a': [1, 2, 3, 4]}])
|
1086
|
-
"""
|
1087
|
-
|
1088
|
-
assert len(values) == len(
|
1089
|
-
self.data
|
1090
|
-
), "The number of values must match the number of results."
|
1091
|
-
new_results = self.data.copy()
|
1092
|
-
for i, result in enumerate(new_results):
|
1093
|
-
result["answer"][column_name] = values[i]
|
1094
|
-
return Results(
|
1095
|
-
survey=self.survey,
|
1096
|
-
data=new_results,
|
1097
|
-
created_columns=self.created_columns + [column_name],
|
1098
|
-
)
|
1199
|
+
# Method removed due to duplication (F811)
|
1099
1200
|
|
1100
1201
|
@ensure_ready
|
1101
1202
|
def rename(self, old_name: str, new_name: str) -> Results:
|
@@ -1157,10 +1258,14 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1157
1258
|
random.seed(seed)
|
1158
1259
|
|
1159
1260
|
if n is None and frac is None:
|
1160
|
-
|
1261
|
+
from .exceptions import ResultsError
|
1262
|
+
|
1263
|
+
raise ResultsError("You must specify either n or frac.")
|
1161
1264
|
|
1162
1265
|
if n is not None and frac is not None:
|
1163
|
-
|
1266
|
+
from .exceptions import ResultsError
|
1267
|
+
|
1268
|
+
raise ResultsError("You cannot specify both n and frac.")
|
1164
1269
|
|
1165
1270
|
if frac is not None and n is None:
|
1166
1271
|
n = int(frac * len(self.data))
|
@@ -1173,61 +1278,62 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1173
1278
|
return Results(survey=self.survey, data=new_data, created_columns=None)
|
1174
1279
|
|
1175
1280
|
@ensure_ready
|
1176
|
-
def select(self, *columns: Union[str, list[str]]) ->
|
1177
|
-
"""
|
1178
|
-
|
1179
|
-
|
1281
|
+
def select(self, *columns: Union[str, list[str]]) -> "Dataset":
|
1282
|
+
"""Extract specific columns from the Results into a Dataset.
|
1283
|
+
|
1180
1284
|
This method allows you to select specific columns from the Results object
|
1181
1285
|
and transforms the data into a Dataset for further analysis and visualization.
|
1182
1286
|
A Dataset is a more general-purpose data structure optimized for analysis
|
1183
1287
|
operations rather than the hierarchical structure of Result objects.
|
1184
|
-
|
1185
|
-
|
1288
|
+
|
1289
|
+
Args:
|
1186
1290
|
*columns: Column names to select. Each column can be:
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1291
|
+
- A simple attribute name (e.g., "how_feeling")
|
1292
|
+
- A fully qualified name with type (e.g., "answer.how_feeling")
|
1293
|
+
- A wildcard pattern (e.g., "answer.*" to select all answer fields)
|
1294
|
+
If no columns are provided, selects all data.
|
1295
|
+
|
1192
1296
|
Returns:
|
1193
1297
|
A Dataset object containing the selected data.
|
1194
|
-
|
1298
|
+
|
1195
1299
|
Notes:
|
1196
1300
|
- Column names are automatically disambiguated if needed
|
1197
1301
|
- When column names are ambiguous, specify the full path with data type
|
1198
1302
|
- You can use wildcard patterns with "*" to select multiple related fields
|
1199
1303
|
- Selecting with no arguments returns all data
|
1200
1304
|
- Results are restructured in a columnar format in the Dataset
|
1201
|
-
|
1305
|
+
|
1202
1306
|
Examples:
|
1203
1307
|
>>> results = Results.example()
|
1204
|
-
|
1205
|
-
# Select a single column by name
|
1308
|
+
|
1309
|
+
>>> # Select a single column by name
|
1206
1310
|
>>> results.select('how_feeling')
|
1207
1311
|
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
|
1208
|
-
|
1209
|
-
# Select multiple columns
|
1312
|
+
|
1313
|
+
>>> # Select multiple columns
|
1210
1314
|
>>> ds = results.select('how_feeling', 'how_feeling_yesterday')
|
1211
1315
|
>>> sorted([list(d.keys())[0] for d in ds])
|
1212
1316
|
['answer.how_feeling', 'answer.how_feeling_yesterday']
|
1213
|
-
|
1214
|
-
# Using fully qualified names with data type
|
1317
|
+
|
1318
|
+
>>> # Using fully qualified names with data type
|
1215
1319
|
>>> results.select('answer.how_feeling')
|
1216
1320
|
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
|
1217
|
-
|
1218
|
-
# Using partial matching for column names
|
1321
|
+
|
1322
|
+
>>> # Using partial matching for column names
|
1219
1323
|
>>> results.select('answer.how_feeling_y')
|
1220
1324
|
Dataset([{'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
|
1221
|
-
|
1222
|
-
# Select all columns (same as calling select with no arguments)
|
1223
|
-
>>> results.select('*.*')
|
1325
|
+
|
1326
|
+
>>> # Select all columns (same as calling select with no arguments)
|
1327
|
+
>>> results.select('*.*')
|
1224
1328
|
Dataset([...])
|
1225
1329
|
"""
|
1226
1330
|
|
1227
|
-
from
|
1331
|
+
from .results_selector import Selector
|
1228
1332
|
|
1229
1333
|
if len(self) == 0:
|
1230
|
-
|
1334
|
+
from .exceptions import ResultsError
|
1335
|
+
|
1336
|
+
raise ResultsError("No data to select from---the Results object is empty.")
|
1231
1337
|
|
1232
1338
|
selector = Selector(
|
1233
1339
|
known_data_types=self.known_data_types,
|
@@ -1275,7 +1381,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1275
1381
|
def to_numeric_if_possible(v):
|
1276
1382
|
try:
|
1277
1383
|
return float(v)
|
1278
|
-
except:
|
1384
|
+
except (ValueError, TypeError):
|
1279
1385
|
return v
|
1280
1386
|
|
1281
1387
|
def sort_key(item):
|
@@ -1291,21 +1397,24 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1291
1397
|
|
1292
1398
|
@ensure_ready
|
1293
1399
|
def filter(self, expression: str) -> Results:
|
1294
|
-
"""
|
1295
|
-
|
1296
|
-
|
1400
|
+
"""Filter results based on a boolean expression.
|
1401
|
+
|
1297
1402
|
This method evaluates a boolean expression against each Result object in the
|
1298
1403
|
collection and returns a new Results object containing only those that match.
|
1299
1404
|
The expression can reference any column in the data and supports standard
|
1300
1405
|
Python operators and syntax.
|
1301
|
-
|
1302
|
-
|
1406
|
+
|
1407
|
+
Args:
|
1303
1408
|
expression: A string containing a Python expression that evaluates to a boolean.
|
1304
1409
|
The expression is applied to each Result object individually.
|
1305
|
-
|
1410
|
+
|
1306
1411
|
Returns:
|
1307
1412
|
A new Results object containing only the Result objects that satisfy the expression.
|
1308
|
-
|
1413
|
+
|
1414
|
+
Raises:
|
1415
|
+
ResultsFilterError: If the expression is invalid or uses improper syntax
|
1416
|
+
(like using '=' instead of '==').
|
1417
|
+
|
1309
1418
|
Notes:
|
1310
1419
|
- Column names can be specified with or without their data type prefix
|
1311
1420
|
(e.g., both "how_feeling" and "answer.how_feeling" work if unambiguous)
|
@@ -1314,23 +1423,23 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1314
1423
|
- You can use comparison operators like '==', '!=', '>', '<', '>=', '<='
|
1315
1424
|
- You can use membership tests with 'in'
|
1316
1425
|
- You can use string methods like '.startswith()', '.contains()', etc.
|
1317
|
-
|
1426
|
+
|
1318
1427
|
Examples:
|
1319
1428
|
>>> r = Results.example()
|
1320
|
-
|
1321
|
-
# Simple equality filter
|
1429
|
+
|
1430
|
+
>>> # Simple equality filter
|
1322
1431
|
>>> r.filter("how_feeling == 'Great'").select('how_feeling')
|
1323
1432
|
Dataset([{'answer.how_feeling': ['Great']}])
|
1324
|
-
|
1325
|
-
# Using OR condition
|
1433
|
+
|
1434
|
+
>>> # Using OR condition
|
1326
1435
|
>>> r.filter("how_feeling == 'Great' or how_feeling == 'Terrible'").select('how_feeling')
|
1327
1436
|
Dataset([{'answer.how_feeling': ['Great', 'Terrible']}])
|
1328
|
-
|
1329
|
-
# Filter on agent properties
|
1437
|
+
|
1438
|
+
>>> # Filter on agent properties
|
1330
1439
|
>>> r.filter("agent.status == 'Joyful'").select('agent.status')
|
1331
1440
|
Dataset([{'agent.status': ['Joyful', 'Joyful']}])
|
1332
|
-
|
1333
|
-
# Common error: using = instead of ==
|
1441
|
+
|
1442
|
+
>>> # Common error: using = instead of ==
|
1334
1443
|
>>> try:
|
1335
1444
|
... r.filter("how_feeling = 'Great'")
|
1336
1445
|
... except Exception as e:
|
@@ -1435,45 +1544,58 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1435
1544
|
[1, 1, 0, 0]
|
1436
1545
|
"""
|
1437
1546
|
return [r.score(f) for r in self.data]
|
1438
|
-
|
1547
|
+
|
1439
1548
|
def score_with_answer_key(self, answer_key: dict) -> list:
|
1440
1549
|
"""Score the results using an answer key.
|
1441
1550
|
|
1442
1551
|
:param answer_key: A dictionary that maps answer values to scores.
|
1443
1552
|
"""
|
1444
1553
|
return [r.score_with_answer_key(answer_key) for r in self.data]
|
1445
|
-
|
1446
1554
|
|
1447
|
-
def fetch_remote(self, job_info:
|
1448
|
-
"""
|
1449
|
-
|
1450
|
-
|
1451
|
-
This is useful when you have a Results object that was created locally but want to sync it with
|
1555
|
+
def fetch_remote(self, job_info: Any) -> None:
|
1556
|
+
"""Fetch remote Results object and update this instance with the data.
|
1557
|
+
|
1558
|
+
This is useful when you have a Results object that was created locally but want to sync it with
|
1452
1559
|
the latest data from the remote server.
|
1453
|
-
|
1560
|
+
|
1454
1561
|
Args:
|
1455
1562
|
job_info: RemoteJobInfo object containing the job_uuid and other remote job details
|
1456
|
-
|
1563
|
+
|
1564
|
+
Returns:
|
1565
|
+
bool: True if the fetch was successful, False if the job is not yet completed.
|
1566
|
+
|
1567
|
+
Raises:
|
1568
|
+
ResultsError: If there's an error during the fetch process.
|
1569
|
+
|
1570
|
+
Examples:
|
1571
|
+
>>> # This is a simplified example since we can't actually test this without a remote server
|
1572
|
+
>>> from unittest.mock import Mock, patch
|
1573
|
+
>>> # Create a mock job_info and Results
|
1574
|
+
>>> job_info = Mock()
|
1575
|
+
>>> job_info.job_uuid = "test_uuid"
|
1576
|
+
>>> results = Results()
|
1577
|
+
>>> # In a real scenario:
|
1578
|
+
>>> # results.fetch_remote(job_info)
|
1579
|
+
>>> # results.completed # Would be True if successful
|
1457
1580
|
"""
|
1458
|
-
#print("Calling fetch_remote")
|
1459
1581
|
try:
|
1460
1582
|
from ..coop import Coop
|
1461
1583
|
from ..jobs import JobsRemoteInferenceHandler
|
1462
|
-
|
1584
|
+
|
1463
1585
|
# Get the remote job data
|
1464
1586
|
remote_job_data = JobsRemoteInferenceHandler.check_status(job_info.job_uuid)
|
1465
|
-
|
1587
|
+
|
1466
1588
|
if remote_job_data.get("status") not in ["completed", "failed"]:
|
1467
1589
|
return False
|
1468
|
-
#
|
1590
|
+
#
|
1469
1591
|
results_uuid = remote_job_data.get("results_uuid")
|
1470
1592
|
if not results_uuid:
|
1471
1593
|
raise ResultsError("No results_uuid found in remote job data")
|
1472
|
-
|
1594
|
+
|
1473
1595
|
# Fetch the remote Results object
|
1474
1596
|
coop = Coop()
|
1475
1597
|
remote_results = coop.get(results_uuid, expected_object_type="results")
|
1476
|
-
|
1598
|
+
|
1477
1599
|
# Update this instance with remote data
|
1478
1600
|
self.data = remote_results.data
|
1479
1601
|
self.survey = remote_results.survey
|
@@ -1481,10 +1603,10 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1481
1603
|
self.cache = remote_results.cache
|
1482
1604
|
self.task_history = remote_results.task_history
|
1483
1605
|
self.completed = True
|
1484
|
-
|
1606
|
+
|
1485
1607
|
# Set job_uuid and results_uuid from remote data
|
1486
1608
|
self.job_uuid = job_info.job_uuid
|
1487
|
-
if hasattr(remote_results,
|
1609
|
+
if hasattr(remote_results, "results_uuid"):
|
1488
1610
|
self.results_uuid = remote_results.results_uuid
|
1489
1611
|
|
1490
1612
|
return True
|
@@ -1492,39 +1614,60 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1492
1614
|
except Exception as e:
|
1493
1615
|
raise ResultsError(f"Failed to fetch remote results: {str(e)}")
|
1494
1616
|
|
1495
|
-
def fetch(self, polling_interval: [float, int] = 1.0) -> Results:
|
1496
|
-
"""
|
1497
|
-
|
1498
|
-
|
1617
|
+
def fetch(self, polling_interval: Union[float, int] = 1.0) -> Results:
|
1618
|
+
"""Poll the server for job completion and update this Results instance.
|
1619
|
+
|
1620
|
+
This method continuously polls the remote server until the job is completed or
|
1621
|
+
fails, then updates this Results object with the final data.
|
1622
|
+
|
1499
1623
|
Args:
|
1500
1624
|
polling_interval: Number of seconds to wait between polling attempts (default: 1.0)
|
1501
|
-
|
1625
|
+
|
1502
1626
|
Returns:
|
1503
1627
|
self: The updated Results instance
|
1628
|
+
|
1629
|
+
Raises:
|
1630
|
+
ResultsError: If no job info is available or if there's an error during fetch.
|
1631
|
+
|
1632
|
+
Examples:
|
1633
|
+
>>> # This is a simplified example since we can't actually test polling
|
1634
|
+
>>> from unittest.mock import Mock, patch
|
1635
|
+
>>> # Create a mock results object
|
1636
|
+
>>> results = Results()
|
1637
|
+
>>> # In a real scenario with a running job:
|
1638
|
+
>>> # results.job_info = remote_job_info
|
1639
|
+
>>> # results.fetch() # Would poll until complete
|
1640
|
+
>>> # results.completed # Would be True if successful
|
1504
1641
|
"""
|
1505
1642
|
if not hasattr(self, "job_info"):
|
1506
|
-
raise ResultsError(
|
1507
|
-
|
1643
|
+
raise ResultsError(
|
1644
|
+
"No job info available - this Results object wasn't created from a remote job"
|
1645
|
+
)
|
1646
|
+
|
1508
1647
|
from ..jobs import JobsRemoteInferenceHandler
|
1509
|
-
|
1648
|
+
|
1510
1649
|
try:
|
1511
1650
|
# Get the remote job data
|
1512
|
-
remote_job_data = JobsRemoteInferenceHandler.check_status(
|
1513
|
-
|
1651
|
+
remote_job_data = JobsRemoteInferenceHandler.check_status(
|
1652
|
+
self.job_info.job_uuid
|
1653
|
+
)
|
1654
|
+
|
1514
1655
|
while remote_job_data.get("status") not in ["completed", "failed"]:
|
1515
1656
|
print("Waiting for remote job to complete...")
|
1516
1657
|
import time
|
1658
|
+
|
1517
1659
|
time.sleep(polling_interval)
|
1518
|
-
remote_job_data = JobsRemoteInferenceHandler.check_status(
|
1519
|
-
|
1660
|
+
remote_job_data = JobsRemoteInferenceHandler.check_status(
|
1661
|
+
self.job_info.job_uuid
|
1662
|
+
)
|
1663
|
+
|
1520
1664
|
# Once complete, fetch the full results
|
1521
1665
|
self.fetch_remote(self.job_info)
|
1522
1666
|
return self
|
1523
|
-
|
1667
|
+
|
1524
1668
|
except Exception as e:
|
1525
1669
|
raise ResultsError(f"Failed to fetch remote results: {str(e)}")
|
1526
1670
|
|
1527
|
-
|
1528
1671
|
def spot_issues(self, models: Optional[ModelList] = None) -> Results:
|
1529
1672
|
"""Run a survey to spot issues and suggest improvements for prompts that had no model response, returning a new Results object.
|
1530
1673
|
Future version: Allow user to optionally pass a list of questions to review, regardless of whether they had a null model response.
|
@@ -1532,60 +1675,75 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1532
1675
|
from ..questions import QuestionFreeText, QuestionDict
|
1533
1676
|
from ..surveys import Survey
|
1534
1677
|
from ..scenarios import Scenario, ScenarioList
|
1535
|
-
from ..language_models import
|
1678
|
+
from ..language_models import ModelList
|
1536
1679
|
import pandas as pd
|
1537
1680
|
|
1538
|
-
df = self.select(
|
1681
|
+
df = self.select(
|
1682
|
+
"agent.*", "scenario.*", "answer.*", "raw_model_response.*", "prompt.*"
|
1683
|
+
).to_pandas()
|
1539
1684
|
scenario_list = []
|
1540
1685
|
|
1541
1686
|
for _, row in df.iterrows():
|
1542
1687
|
for col in df.columns:
|
1543
1688
|
if col.endswith("_raw_model_response") and pd.isna(row[col]):
|
1544
|
-
q = col.split("_raw_model_response")[0].replace(
|
1545
|
-
|
1546
|
-
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1689
|
+
q = col.split("_raw_model_response")[0].replace(
|
1690
|
+
"raw_model_response.", ""
|
1691
|
+
)
|
1692
|
+
|
1693
|
+
s = Scenario(
|
1694
|
+
{
|
1695
|
+
"original_question": q,
|
1696
|
+
"original_agent_index": row["agent.agent_index"],
|
1697
|
+
"original_scenario_index": row["scenario.scenario_index"],
|
1698
|
+
"original_prompts": f"User prompt: {row[f'prompt.{q}_user_prompt']}\nSystem prompt: {row[f'prompt.{q}_system_prompt']}",
|
1699
|
+
}
|
1700
|
+
)
|
1701
|
+
|
1553
1702
|
scenario_list.append(s)
|
1554
1703
|
|
1555
1704
|
sl = ScenarioList(set(scenario_list))
|
1556
1705
|
|
1557
1706
|
q1 = QuestionFreeText(
|
1558
|
-
question_name
|
1559
|
-
question_text
|
1707
|
+
question_name="issues",
|
1708
|
+
question_text="""
|
1560
1709
|
The following prompts generated a bad or null response: '{{ original_prompts }}'
|
1561
1710
|
What do you think was the likely issue(s)?
|
1562
|
-
"""
|
1711
|
+
""",
|
1563
1712
|
)
|
1564
1713
|
|
1565
1714
|
q2 = QuestionDict(
|
1566
|
-
question_name
|
1567
|
-
question_text
|
1715
|
+
question_name="revised",
|
1716
|
+
question_text="""
|
1568
1717
|
The following prompts generated a bad or null response: '{{ original_prompts }}'
|
1569
1718
|
You identified the issue(s) as '{{ issues.answer }}'.
|
1570
1719
|
Please revise the prompts to address the issue(s).
|
1571
1720
|
""",
|
1572
|
-
answer_keys
|
1721
|
+
answer_keys=["revised_user_prompt", "revised_system_prompt"],
|
1573
1722
|
)
|
1574
1723
|
|
1575
|
-
survey = Survey(questions
|
1724
|
+
survey = Survey(questions=[q1, q2])
|
1576
1725
|
|
1577
1726
|
if models is not None:
|
1578
1727
|
if not isinstance(models, ModelList):
|
1579
1728
|
raise ResultsError("models must be a ModelList")
|
1580
1729
|
results = survey.by(sl).by(models).run()
|
1581
1730
|
else:
|
1582
|
-
results = survey.by(sl).run()
|
1731
|
+
results = survey.by(sl).run() # use the default model
|
1583
1732
|
|
1584
1733
|
return results
|
1585
1734
|
|
1586
1735
|
|
1587
1736
|
def main(): # pragma: no cover
|
1588
|
-
"""
|
1737
|
+
"""Run example operations on a Results object.
|
1738
|
+
|
1739
|
+
This function demonstrates basic filtering and mutation operations on
|
1740
|
+
a Results object, printing the output.
|
1741
|
+
|
1742
|
+
Examples:
|
1743
|
+
>>> # This can be run directly as a script
|
1744
|
+
>>> # python -m edsl.results.results
|
1745
|
+
>>> # It will create example results and show filtering and mutation
|
1746
|
+
"""
|
1589
1747
|
from ..results import Results
|
1590
1748
|
|
1591
1749
|
results = Results.example(debug=True)
|
@@ -1595,4 +1753,5 @@ def main(): # pragma: no cover
|
|
1595
1753
|
|
1596
1754
|
if __name__ == "__main__":
|
1597
1755
|
import doctest
|
1756
|
+
|
1598
1757
|
doctest.testmod(optionflags=doctest.ELLIPSIS)
|