edsl 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +8 -1
- edsl/__init__original.py +134 -0
- edsl/__version__.py +1 -1
- edsl/agents/agent.py +29 -0
- edsl/agents/agent_list.py +36 -1
- edsl/base/base_class.py +281 -151
- edsl/base/data_transfer_models.py +15 -4
- edsl/buckets/__init__.py +8 -3
- edsl/buckets/bucket_collection.py +9 -3
- edsl/buckets/model_buckets.py +4 -2
- edsl/buckets/token_bucket.py +2 -2
- edsl/buckets/token_bucket_client.py +5 -3
- edsl/caching/cache.py +131 -62
- edsl/caching/cache_entry.py +70 -58
- edsl/caching/sql_dict.py +17 -0
- edsl/cli.py +99 -0
- edsl/config/config_class.py +16 -0
- edsl/conversation/__init__.py +31 -0
- edsl/coop/coop.py +276 -242
- edsl/coop/coop_jobs_objects.py +59 -0
- edsl/coop/coop_objects.py +29 -0
- edsl/coop/coop_regular_objects.py +26 -0
- edsl/coop/utils.py +24 -19
- edsl/dataset/dataset.py +338 -101
- edsl/dataset/dataset_operations_mixin.py +216 -180
- edsl/db_list/sqlite_list.py +349 -0
- edsl/inference_services/__init__.py +40 -5
- edsl/inference_services/exceptions.py +11 -0
- edsl/inference_services/services/anthropic_service.py +5 -2
- edsl/inference_services/services/aws_bedrock.py +6 -2
- edsl/inference_services/services/azure_ai.py +6 -2
- edsl/inference_services/services/google_service.py +7 -3
- edsl/inference_services/services/mistral_ai_service.py +6 -2
- edsl/inference_services/services/open_ai_service.py +6 -2
- edsl/inference_services/services/perplexity_service.py +6 -2
- edsl/inference_services/services/test_service.py +94 -5
- edsl/interviews/answering_function.py +167 -59
- edsl/interviews/interview.py +124 -72
- edsl/interviews/interview_task_manager.py +10 -0
- edsl/interviews/request_token_estimator.py +8 -0
- edsl/invigilators/invigilators.py +35 -13
- edsl/jobs/async_interview_runner.py +146 -104
- edsl/jobs/data_structures.py +6 -4
- edsl/jobs/decorators.py +61 -0
- edsl/jobs/fetch_invigilator.py +61 -18
- edsl/jobs/html_table_job_logger.py +14 -2
- edsl/jobs/jobs.py +180 -104
- edsl/jobs/jobs_component_constructor.py +2 -2
- edsl/jobs/jobs_interview_constructor.py +2 -0
- edsl/jobs/jobs_pricing_estimation.py +154 -113
- edsl/jobs/jobs_remote_inference_logger.py +4 -0
- edsl/jobs/jobs_runner_status.py +30 -25
- edsl/jobs/progress_bar_manager.py +79 -0
- edsl/jobs/remote_inference.py +35 -1
- edsl/key_management/key_lookup_builder.py +6 -1
- edsl/language_models/language_model.py +110 -12
- edsl/language_models/model.py +10 -3
- edsl/language_models/price_manager.py +176 -71
- edsl/language_models/registry.py +5 -0
- edsl/notebooks/notebook.py +77 -10
- edsl/questions/VALIDATION_README.md +134 -0
- edsl/questions/__init__.py +24 -1
- edsl/questions/exceptions.py +21 -0
- edsl/questions/question_dict.py +201 -16
- edsl/questions/question_multiple_choice_with_other.py +624 -0
- edsl/questions/question_registry.py +2 -1
- edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
- edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
- edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
- edsl/questions/validation_analysis.py +185 -0
- edsl/questions/validation_cli.py +131 -0
- edsl/questions/validation_html_report.py +404 -0
- edsl/questions/validation_logger.py +136 -0
- edsl/results/result.py +115 -46
- edsl/results/results.py +702 -171
- edsl/scenarios/construct_download_link.py +16 -3
- edsl/scenarios/directory_scanner.py +226 -226
- edsl/scenarios/file_methods.py +5 -0
- edsl/scenarios/file_store.py +150 -9
- edsl/scenarios/handlers/__init__.py +5 -1
- edsl/scenarios/handlers/mp4_file_store.py +104 -0
- edsl/scenarios/handlers/webm_file_store.py +104 -0
- edsl/scenarios/scenario.py +120 -101
- edsl/scenarios/scenario_list.py +800 -727
- edsl/scenarios/scenario_list_gc_test.py +146 -0
- edsl/scenarios/scenario_list_memory_test.py +214 -0
- edsl/scenarios/scenario_list_source_refactor.md +35 -0
- edsl/scenarios/scenario_selector.py +5 -4
- edsl/scenarios/scenario_source.py +1990 -0
- edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
- edsl/surveys/survey.py +22 -0
- edsl/tasks/__init__.py +4 -2
- edsl/tasks/task_history.py +198 -36
- edsl/tests/scenarios/test_ScenarioSource.py +51 -0
- edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
- edsl/utilities/__init__.py +2 -1
- edsl/utilities/decorators.py +121 -0
- edsl/utilities/memory_debugger.py +1010 -0
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/METADATA +51 -76
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/RECORD +103 -79
- edsl/jobs/jobs_runner_asyncio.py +0 -281
- edsl/language_models/unused/fake_openai_service.py +0 -60
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/LICENSE +0 -0
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/WHEEL +0 -0
- {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/entry_points.txt +0 -0
edsl/results/results.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""The Results module provides tools for working with collections of Result objects.
|
2
2
|
|
3
|
-
The Results class is the primary container for analyzing and manipulating data obtained
|
4
|
-
from running surveys with language models. It implements a powerful data analysis interface
|
3
|
+
The Results class is the primary container for analyzing and manipulating data obtained
|
4
|
+
from running surveys with language models. It implements a powerful data analysis interface
|
5
5
|
with methods for filtering, selecting, mutating, and visualizing your results, similar to
|
6
6
|
data manipulation libraries like dplyr or pandas.
|
7
7
|
|
@@ -11,7 +11,7 @@ Key components:
|
|
11
11
|
2. Report - A flexible reporting system for generating formatted output from Results
|
12
12
|
3. Selectors - Tools for efficiently extracting specific data from Results
|
13
13
|
|
14
|
-
The Results class is not typically instantiated directly; instead, it's returned by the
|
14
|
+
The Results class is not typically instantiated directly; instead, it's returned by the
|
15
15
|
run() method of a Job object. Once you have a Results object, you can use its methods
|
16
16
|
to analyze and extract insights from your survey data.
|
17
17
|
|
@@ -39,9 +39,10 @@ from __future__ import annotations
|
|
39
39
|
import json
|
40
40
|
import random
|
41
41
|
import warnings
|
42
|
-
from collections import
|
42
|
+
from collections import defaultdict
|
43
43
|
from typing import Optional, Callable, Any, Union, List, TYPE_CHECKING
|
44
44
|
from bisect import bisect_left
|
45
|
+
from collections.abc import MutableSequence
|
45
46
|
|
46
47
|
from ..base import Base
|
47
48
|
from ..caching import Cache, CacheEntry
|
@@ -59,6 +60,9 @@ if TYPE_CHECKING:
|
|
59
60
|
from ..utilities import remove_edsl_version, dict_hash
|
60
61
|
from ..dataset import ResultsOperationsMixin
|
61
62
|
|
63
|
+
from .result import Result
|
64
|
+
from ..db_list.sqlite_list import SQLiteList
|
65
|
+
|
62
66
|
from .exceptions import (
|
63
67
|
ResultsError,
|
64
68
|
ResultsBadMutationstringError,
|
@@ -70,6 +74,18 @@ from .exceptions import (
|
|
70
74
|
)
|
71
75
|
|
72
76
|
|
77
|
+
class ResultsSQLList(SQLiteList):
|
78
|
+
def serialize(self, obj):
|
79
|
+
return json.dumps(obj.to_dict()) if hasattr(obj, "to_dict") else json.dumps(obj)
|
80
|
+
|
81
|
+
def deserialize(self, data):
|
82
|
+
return (
|
83
|
+
Result.from_dict(json.loads(data))
|
84
|
+
if hasattr(Result, "from_dict")
|
85
|
+
else json.loads(data)
|
86
|
+
)
|
87
|
+
|
88
|
+
|
73
89
|
def ensure_fetched(method):
|
74
90
|
"""A decorator that checks if remote data is loaded, and if not, attempts to fetch it.
|
75
91
|
|
@@ -188,7 +204,7 @@ class NotReadyObject:
|
|
188
204
|
return self
|
189
205
|
|
190
206
|
|
191
|
-
class Results(
|
207
|
+
class Results(MutableSequence, ResultsOperationsMixin, Base):
|
192
208
|
"""A collection of Result objects with powerful data analysis capabilities.
|
193
209
|
|
194
210
|
The Results class is the primary container for working with data from EDSL surveys.
|
@@ -297,13 +313,11 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
297
313
|
job_uuid: Optional[str] = None,
|
298
314
|
total_results: Optional[int] = None,
|
299
315
|
task_history: Optional[TaskHistory] = None,
|
316
|
+
sort_by_iteration: bool = False,
|
317
|
+
data_class: Optional[type] = list, # ResultsSQLList,
|
300
318
|
):
|
301
319
|
"""Instantiate a Results object with a survey and a list of Result objects.
|
302
320
|
|
303
|
-
This initializes a completed Results object with the provided data.
|
304
|
-
For creating a not-ready Results object from a job info dictionary,
|
305
|
-
use the from_job_info class method instead.
|
306
|
-
|
307
321
|
Args:
|
308
322
|
survey: A Survey object containing the questions used to generate results.
|
309
323
|
data: A list of Result objects containing the responses.
|
@@ -312,29 +326,49 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
312
326
|
job_uuid: A string representing the job UUID.
|
313
327
|
total_results: An integer representing the total number of results.
|
314
328
|
task_history: A TaskHistory object containing information about the tasks.
|
315
|
-
|
316
|
-
|
317
|
-
>>> from ..results import Result
|
318
|
-
>>> # Create an empty Results object
|
319
|
-
>>> r = Results()
|
320
|
-
>>> r.completed
|
321
|
-
True
|
322
|
-
>>> len(r.created_columns)
|
323
|
-
0
|
324
|
-
|
325
|
-
>>> # Create a Results object with data
|
326
|
-
>>> from unittest.mock import Mock
|
327
|
-
>>> mock_survey = Mock()
|
328
|
-
>>> mock_result = Mock(spec=Result)
|
329
|
-
>>> r = Results(survey=mock_survey, data=[mock_result])
|
330
|
-
>>> len(r)
|
331
|
-
1
|
329
|
+
sort_by_iteration: Whether to sort data by iteration before initializing.
|
330
|
+
data_class: The class to use for the data container (default: list).
|
332
331
|
"""
|
333
332
|
self.completed = True
|
334
333
|
self._fetching = False
|
335
|
-
|
334
|
+
|
335
|
+
# Determine the data class to use
|
336
|
+
if data is not None:
|
337
|
+
# Use the class of the provided data if it's not a basic list
|
338
|
+
self._data_class = (
|
339
|
+
data.__class__ if not isinstance(data, list) else data_class
|
340
|
+
)
|
341
|
+
else:
|
342
|
+
self._data_class = data_class
|
343
|
+
|
344
|
+
# Sort data appropriately before initialization if needed
|
345
|
+
if data and sort_by_iteration:
|
346
|
+
# First try to sort by order attribute if present on any result
|
347
|
+
has_order = any(hasattr(item, "order") for item in data)
|
348
|
+
if has_order:
|
349
|
+
|
350
|
+
def get_order(item):
|
351
|
+
if hasattr(item, "order"):
|
352
|
+
return item.order
|
353
|
+
return item.data.get("iteration", 0) * 1000
|
354
|
+
|
355
|
+
data = sorted(data, key=get_order)
|
356
|
+
else:
|
357
|
+
data = sorted(data, key=lambda x: x.data.get("iteration", 0))
|
358
|
+
|
359
|
+
# Initialize data with the appropriate class
|
360
|
+
self.data = self._data_class(data or [])
|
361
|
+
|
336
362
|
from ..caching import Cache
|
337
363
|
from ..tasks import TaskHistory
|
364
|
+
import tempfile
|
365
|
+
import os
|
366
|
+
|
367
|
+
# Create a unique shelve path in the system temp directory
|
368
|
+
self._shelve_path = os.path.join(
|
369
|
+
tempfile.gettempdir(), f"edsl_results_{os.getpid()}"
|
370
|
+
)
|
371
|
+
self._shelf_keys = set() # Track shelved result keys
|
338
372
|
|
339
373
|
self.survey = survey
|
340
374
|
self.created_columns = created_columns or []
|
@@ -347,6 +381,9 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
347
381
|
if hasattr(self, "_add_output_functions"):
|
348
382
|
self._add_output_functions()
|
349
383
|
|
384
|
+
def add_task_history_entry(self, interview: "Interview") -> None:
|
385
|
+
self.task_history.add_interview(interview)
|
386
|
+
|
350
387
|
def _fetch_list(self, data_type: str, key: str) -> list:
|
351
388
|
"""Return a list of values from the data for a given data type and key.
|
352
389
|
|
@@ -395,6 +432,32 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
395
432
|
return self._fetch_list("answer", question_name)
|
396
433
|
|
397
434
|
def _summary(self) -> dict:
|
435
|
+
"""Return a dictionary containing summary statistics about the Results object.
|
436
|
+
|
437
|
+
The summary includes:
|
438
|
+
- Number of observations (results)
|
439
|
+
- Number of unique agents
|
440
|
+
- Number of unique models
|
441
|
+
- Number of unique scenarios
|
442
|
+
- Number of questions in the survey
|
443
|
+
- Survey question names (truncated for readability)
|
444
|
+
|
445
|
+
Returns:
|
446
|
+
dict: A dictionary containing the summary statistics
|
447
|
+
|
448
|
+
Examples:
|
449
|
+
>>> from edsl.results import Results
|
450
|
+
>>> r = Results.example()
|
451
|
+
>>> summary = r._summary()
|
452
|
+
>>> isinstance(summary, dict)
|
453
|
+
True
|
454
|
+
>>> all(key in summary for key in ['observations', 'agents', 'models', 'scenarios', 'questions', 'Survey question names'])
|
455
|
+
True
|
456
|
+
>>> summary['observations'] > 0
|
457
|
+
True
|
458
|
+
>>> summary['questions'] > 0
|
459
|
+
True
|
460
|
+
"""
|
398
461
|
import reprlib
|
399
462
|
|
400
463
|
d = {
|
@@ -407,7 +470,22 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
407
470
|
}
|
408
471
|
return d
|
409
472
|
|
410
|
-
def _cache_keys(self):
|
473
|
+
def _cache_keys(self) -> List[str]: # -> list:
|
474
|
+
"""Return a list of all cache keys from the results.
|
475
|
+
|
476
|
+
This method collects all cache keys by iterating through each result in the data
|
477
|
+
and extracting the values from the 'cache_keys' dictionary. These keys can be used
|
478
|
+
to identify cached responses and manage the cache effectively.
|
479
|
+
|
480
|
+
Returns:
|
481
|
+
List[str]: A list of cache keys from all results.
|
482
|
+
|
483
|
+
Examples:
|
484
|
+
>>> from edsl.results import Results
|
485
|
+
>>> r = Results.example()
|
486
|
+
>>> all([type(s) == str for s in r._cache_keys()])
|
487
|
+
True
|
488
|
+
"""
|
411
489
|
cache_keys = []
|
412
490
|
for result in self:
|
413
491
|
cache_keys.extend(list(result["cache_keys"].values()))
|
@@ -417,31 +495,57 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
417
495
|
cache_keys = self._cache_keys()
|
418
496
|
return cache.subset(cache_keys)
|
419
497
|
|
420
|
-
def insert(self, item):
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
498
|
+
# def insert(self, item):
|
499
|
+
# """Insert a Result object into the Results list in the correct order.
|
500
|
+
|
501
|
+
# If the Result has an 'order' attribute, it uses that for ordering.
|
502
|
+
# Otherwise, it falls back to ordering by the 'iteration' attribute.
|
503
|
+
|
504
|
+
# >>> from edsl.results import Result
|
505
|
+
# >>> rnew = Result.example()
|
506
|
+
# >>> results = Results.example()
|
507
|
+
# >>> results.insert(rnew)
|
508
|
+
# >>> results[0] == rnew
|
509
|
+
# True
|
510
|
+
# >>> results = Results.example()
|
511
|
+
# >>> rnew.order = 100
|
512
|
+
# >>> results.insert(rnew)
|
513
|
+
# >>> results[-1] == rnew # The new result is at the end
|
514
|
+
# True
|
515
|
+
# """
|
516
|
+
|
517
|
+
# def get_sort_key(result):
|
518
|
+
# if hasattr(result, "order"):
|
519
|
+
# return result.order
|
520
|
+
# return result.data["iteration"]
|
521
|
+
|
522
|
+
# # Find insertion point using bisect with custom key function
|
523
|
+
# index = bisect_left([get_sort_key(x) for x in self.data], get_sort_key(item))
|
524
|
+
|
525
|
+
# # Call the parent class's insert directly
|
526
|
+
# MutableSequence.insert(self, index, item)
|
527
|
+
|
528
|
+
def extend_sorted(self, other):
|
529
|
+
"""Extend the Results list with items from another iterable.
|
530
|
+
|
531
|
+
This method preserves ordering based on 'order' attribute if present,
|
532
|
+
otherwise falls back to 'iteration' attribute.
|
533
|
+
"""
|
534
|
+
# Collect all items (existing and new)
|
535
|
+
all_items = list(self.data)
|
536
|
+
all_items.extend(other)
|
438
537
|
|
439
|
-
|
440
|
-
|
538
|
+
# Sort combined list by order attribute if available, otherwise by iteration
|
539
|
+
def get_sort_key(item):
|
540
|
+
if hasattr(item, "order"):
|
541
|
+
return (0, item.order) # Order attribute takes precedence
|
542
|
+
return (1, item.data["iteration"]) # Iteration is secondary
|
441
543
|
|
442
|
-
|
443
|
-
|
444
|
-
|
544
|
+
all_items.sort(key=get_sort_key)
|
545
|
+
|
546
|
+
# Clear and refill with sorted items
|
547
|
+
self.data.clear()
|
548
|
+
self.data.extend(all_items)
|
445
549
|
|
446
550
|
def compute_job_cost(self, include_cached_responses_in_cost: bool = False) -> float:
|
447
551
|
"""Compute the cost of a completed job in USD.
|
@@ -468,8 +572,16 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
468
572
|
if key.endswith("_cost"):
|
469
573
|
result_cost = result["raw_model_response"][key]
|
470
574
|
|
575
|
+
# Extract the question name from the key
|
471
576
|
question_name = key.removesuffix("_cost")
|
472
|
-
|
577
|
+
|
578
|
+
# Get cache status safely - default to False if not found
|
579
|
+
cache_used = False
|
580
|
+
if (
|
581
|
+
"cache_used_dict" in result
|
582
|
+
and question_name in result["cache_used_dict"]
|
583
|
+
):
|
584
|
+
cache_used = result["cache_used_dict"][question_name]
|
473
585
|
|
474
586
|
if isinstance(result_cost, (int, float)):
|
475
587
|
if include_cached_responses_in_cost:
|
@@ -496,48 +608,59 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
496
608
|
"""
|
497
609
|
raise ResultsError("The code() method is not implemented for Results objects")
|
498
610
|
|
611
|
+
@ensure_ready
|
499
612
|
def __getitem__(self, i):
|
500
|
-
|
613
|
+
if isinstance(i, int):
|
614
|
+
return self.data[i]
|
615
|
+
if isinstance(i, slice):
|
616
|
+
return self.__class__(survey=self.survey, data=self.data[i])
|
617
|
+
if isinstance(i, str):
|
618
|
+
return self.to_dict()[i]
|
619
|
+
raise ResultsError("Invalid argument type for indexing Results object")
|
501
620
|
|
502
|
-
|
503
|
-
|
621
|
+
@ensure_ready
|
622
|
+
def __setitem__(self, i, item):
|
623
|
+
self.data[i] = item
|
504
624
|
|
505
|
-
|
506
|
-
|
625
|
+
@ensure_ready
|
626
|
+
def __delitem__(self, i):
|
627
|
+
del self.data[i]
|
507
628
|
|
508
|
-
|
509
|
-
|
629
|
+
@ensure_ready
|
630
|
+
def __len__(self):
|
631
|
+
return len(self.data)
|
510
632
|
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
... r[1.5]
|
527
|
-
... except ResultsError:
|
528
|
-
... True
|
529
|
-
True
|
633
|
+
@ensure_ready
|
634
|
+
def insert(self, index, item):
|
635
|
+
self.data.insert(index, item)
|
636
|
+
|
637
|
+
@ensure_ready
|
638
|
+
def extend(self, other):
|
639
|
+
"""Extend the Results list with items from another iterable."""
|
640
|
+
self.data.extend(other)
|
641
|
+
|
642
|
+
@ensure_ready
|
643
|
+
def extend_sorted(self, other):
|
644
|
+
"""Extend the Results list with items from another iterable, maintaining sort order.
|
645
|
+
|
646
|
+
This method preserves ordering based on 'order' attribute if present,
|
647
|
+
otherwise falls back to 'iteration' attribute.
|
530
648
|
"""
|
531
|
-
|
532
|
-
|
649
|
+
# Collect all items (existing and new)
|
650
|
+
all_items = list(self.data)
|
651
|
+
all_items.extend(other)
|
533
652
|
|
534
|
-
if
|
535
|
-
|
653
|
+
# Sort combined list by order attribute if available, otherwise by iteration
|
654
|
+
def get_sort_key(item):
|
655
|
+
if hasattr(item, "order"):
|
656
|
+
return (0, item.order) # Order attribute takes precedence
|
657
|
+
return (1, item.data["iteration"]) # Iteration is secondary
|
536
658
|
|
537
|
-
|
538
|
-
return self.to_dict()[i]
|
659
|
+
all_items.sort(key=get_sort_key)
|
539
660
|
|
540
|
-
|
661
|
+
# Clear and refill with sorted items
|
662
|
+
self.data.clear()
|
663
|
+
self.data.extend(all_items)
|
541
664
|
|
542
665
|
def __add__(self, other: Results) -> Results:
|
543
666
|
"""Add two Results objects together.
|
@@ -581,9 +704,15 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
581
704
|
"The created columns are not the same so they cannot be added together."
|
582
705
|
)
|
583
706
|
|
707
|
+
# Create a new ResultsSQLList with the combined data
|
708
|
+
# combined_data = ResultsSQLList()
|
709
|
+
combined_data = self._data_class()
|
710
|
+
combined_data.extend(self.data)
|
711
|
+
combined_data.extend(other.data)
|
712
|
+
|
584
713
|
return Results(
|
585
714
|
survey=self.survey,
|
586
|
-
data=
|
715
|
+
data=combined_data,
|
587
716
|
created_columns=self.created_columns,
|
588
717
|
)
|
589
718
|
|
@@ -743,7 +872,12 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
743
872
|
|
744
873
|
def __hash__(self) -> int:
|
745
874
|
return dict_hash(
|
746
|
-
self.to_dict(
|
875
|
+
self.to_dict(
|
876
|
+
sort=True,
|
877
|
+
add_edsl_version=False,
|
878
|
+
include_cache=False,
|
879
|
+
include_cache_info=False,
|
880
|
+
)
|
747
881
|
)
|
748
882
|
|
749
883
|
@property
|
@@ -792,10 +926,11 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
792
926
|
"""
|
793
927
|
from ..surveys import Survey
|
794
928
|
from ..caching import Cache
|
795
|
-
from
|
929
|
+
from .result import Result
|
796
930
|
from ..tasks import TaskHistory
|
797
931
|
|
798
932
|
survey = Survey.from_dict(data["survey"])
|
933
|
+
# Convert dictionaries to Result objects
|
799
934
|
results_data = [Result.from_dict(r) for r in data["data"]]
|
800
935
|
created_columns = data.get("created_columns", None)
|
801
936
|
cache = Cache.from_dict(data.get("cache")) if "cache" in data else Cache()
|
@@ -804,9 +939,12 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
804
939
|
if "task_history" in data
|
805
940
|
else TaskHistory(interviews=[])
|
806
941
|
)
|
942
|
+
|
943
|
+
# Create a Results object with original order preserved
|
944
|
+
# using the empty data list initially
|
807
945
|
params = {
|
808
946
|
"survey": survey,
|
809
|
-
"data":
|
947
|
+
"data": [], # Start with empty data
|
810
948
|
"created_columns": created_columns,
|
811
949
|
"cache": cache,
|
812
950
|
"task_history": task_history,
|
@@ -814,6 +952,9 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
814
952
|
|
815
953
|
try:
|
816
954
|
results = cls(**params)
|
955
|
+
# Add each result individually to respect order attributes
|
956
|
+
for result in results_data:
|
957
|
+
results.append(result)
|
817
958
|
except Exception as e:
|
818
959
|
raise ResultsDeserializationError(f"Error in Results.from_dict: {e}")
|
819
960
|
return results
|
@@ -1081,19 +1222,26 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1081
1222
|
>>> r.add_column('a', [1,2,3, 4]).select('a')
|
1082
1223
|
Dataset([{'answer.a': [1, 2, 3, 4]}])
|
1083
1224
|
"""
|
1084
|
-
|
1085
1225
|
assert len(values) == len(
|
1086
1226
|
self.data
|
1087
1227
|
), "The number of values must match the number of results."
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
return Results(
|
1228
|
+
|
1229
|
+
# Create new Results object with same properties but empty data
|
1230
|
+
new_results = Results(
|
1092
1231
|
survey=self.survey,
|
1093
|
-
data=
|
1232
|
+
data=[],
|
1094
1233
|
created_columns=self.created_columns + [column_name],
|
1234
|
+
data_class=self._data_class,
|
1095
1235
|
)
|
1096
1236
|
|
1237
|
+
# Process one result at a time
|
1238
|
+
for i, result in enumerate(self.data):
|
1239
|
+
new_result = result.copy()
|
1240
|
+
new_result["answer"][column_name] = values[i]
|
1241
|
+
new_results.append(new_result)
|
1242
|
+
|
1243
|
+
return new_results
|
1244
|
+
|
1097
1245
|
@ensure_ready
|
1098
1246
|
def add_columns_from_dict(self, columns: List[dict]) -> Results:
|
1099
1247
|
"""Adds columns to Results from a list of dictionaries.
|
@@ -1234,33 +1382,63 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1234
1382
|
>>> s = Results.example()
|
1235
1383
|
>>> s.rename('how_feeling', 'how_feeling_new').select('how_feeling_new')
|
1236
1384
|
Dataset([{'answer.how_feeling_new': ['OK', 'Great', 'Terrible', 'OK']}])
|
1237
|
-
|
1238
|
-
# TODO: Should we allow renaming of scenario fields as well? Probably.
|
1239
|
-
|
1240
1385
|
"""
|
1386
|
+
# Create new Results object with same properties but empty data
|
1387
|
+
new_results = Results(
|
1388
|
+
survey=self.survey,
|
1389
|
+
data=[],
|
1390
|
+
created_columns=self.created_columns,
|
1391
|
+
data_class=self._data_class,
|
1392
|
+
)
|
1393
|
+
|
1394
|
+
# Update created_columns if old_name was in there
|
1395
|
+
if old_name in new_results.created_columns:
|
1396
|
+
new_results.created_columns.remove(old_name)
|
1397
|
+
new_results.created_columns.append(new_name)
|
1241
1398
|
|
1399
|
+
# Process one result at a time
|
1242
1400
|
for obs in self.data:
|
1243
|
-
|
1244
|
-
|
1401
|
+
new_result = obs.copy()
|
1402
|
+
new_result["answer"][new_name] = new_result["answer"][old_name]
|
1403
|
+
del new_result["answer"][old_name]
|
1404
|
+
new_results.append(new_result)
|
1245
1405
|
|
1246
|
-
return
|
1406
|
+
return new_results
|
1247
1407
|
|
1248
1408
|
@ensure_ready
|
1249
1409
|
def shuffle(self, seed: Optional[str] = "edsl") -> Results:
|
1250
|
-
"""
|
1410
|
+
"""Return a shuffled copy of the results using Fisher-Yates algorithm.
|
1251
1411
|
|
1252
|
-
|
1412
|
+
Args:
|
1413
|
+
seed: Random seed for reproducibility.
|
1253
1414
|
|
1254
|
-
|
1255
|
-
|
1256
|
-
Result(...)
|
1415
|
+
Returns:
|
1416
|
+
Results: A new Results object with shuffled data.
|
1257
1417
|
"""
|
1258
1418
|
if seed != "edsl":
|
1259
|
-
|
1419
|
+
random.seed(seed)
|
1260
1420
|
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1421
|
+
# Create new Results object with same properties but empty data
|
1422
|
+
shuffled_results = Results(
|
1423
|
+
survey=self.survey,
|
1424
|
+
data=[],
|
1425
|
+
created_columns=self.created_columns,
|
1426
|
+
data_class=self._data_class,
|
1427
|
+
)
|
1428
|
+
|
1429
|
+
# First pass: copy data while tracking indices
|
1430
|
+
indices = list(range(len(self.data)))
|
1431
|
+
|
1432
|
+
# Second pass: Fisher-Yates shuffle on indices
|
1433
|
+
for i in range(len(indices) - 1, 0, -1):
|
1434
|
+
j = random.randrange(i + 1)
|
1435
|
+
indices[i], indices[j] = indices[j], indices[i]
|
1436
|
+
|
1437
|
+
# Final pass: append items in shuffled order
|
1438
|
+
for idx in indices:
|
1439
|
+
shuffled_results.append(self.data[idx])
|
1440
|
+
|
1441
|
+
return shuffled_results
|
1264
1442
|
|
1265
1443
|
@ensure_ready
|
1266
1444
|
def sample(
|
@@ -1270,41 +1448,61 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1270
1448
|
with_replacement: bool = True,
|
1271
1449
|
seed: Optional[str] = None,
|
1272
1450
|
) -> Results:
|
1273
|
-
"""
|
1274
|
-
|
1275
|
-
:param n: An integer representing the number of samples to take.
|
1276
|
-
:param frac: A float representing the fraction of samples to take.
|
1277
|
-
:param with_replacement: A boolean representing whether to sample with replacement.
|
1278
|
-
:param seed: An integer representing the seed for the random number generator.
|
1451
|
+
"""Return a random sample of the results.
|
1279
1452
|
|
1280
|
-
|
1453
|
+
Args:
|
1454
|
+
n: The number of samples to take.
|
1455
|
+
frac: The fraction of samples to take (alternative to n).
|
1456
|
+
with_replacement: Whether to sample with replacement.
|
1457
|
+
seed: Random seed for reproducibility.
|
1281
1458
|
|
1282
|
-
|
1283
|
-
|
1284
|
-
2
|
1459
|
+
Returns:
|
1460
|
+
Results: A new Results object containing the sampled data.
|
1285
1461
|
"""
|
1286
1462
|
if seed:
|
1287
1463
|
random.seed(seed)
|
1288
1464
|
|
1289
1465
|
if n is None and frac is None:
|
1290
|
-
from .exceptions import ResultsError
|
1291
|
-
|
1292
1466
|
raise ResultsError("You must specify either n or frac.")
|
1293
1467
|
|
1294
1468
|
if n is not None and frac is not None:
|
1295
|
-
from .exceptions import ResultsError
|
1296
|
-
|
1297
1469
|
raise ResultsError("You cannot specify both n and frac.")
|
1298
1470
|
|
1299
|
-
if frac is not None
|
1471
|
+
if frac is not None:
|
1300
1472
|
n = int(frac * len(self.data))
|
1301
1473
|
|
1474
|
+
# Create new Results object with same properties but empty data
|
1475
|
+
sampled_results = Results(
|
1476
|
+
survey=self.survey,
|
1477
|
+
data=[],
|
1478
|
+
created_columns=self.created_columns,
|
1479
|
+
data_class=self._data_class,
|
1480
|
+
)
|
1481
|
+
|
1302
1482
|
if with_replacement:
|
1303
|
-
|
1483
|
+
# For sampling with replacement, we can generate indices and sample one at a time
|
1484
|
+
indices = (random.randrange(len(self.data)) for _ in range(n))
|
1485
|
+
for i in indices:
|
1486
|
+
sampled_results.append(self.data[i])
|
1304
1487
|
else:
|
1305
|
-
|
1488
|
+
# For sampling without replacement, use reservoir sampling
|
1489
|
+
if n > len(self.data):
|
1490
|
+
raise ResultsError(
|
1491
|
+
f"Cannot sample {n} items from a list of length {len(self.data)}."
|
1492
|
+
)
|
1493
|
+
|
1494
|
+
# Reservoir sampling algorithm
|
1495
|
+
for i, item in enumerate(self.data):
|
1496
|
+
if i < n:
|
1497
|
+
# Fill the reservoir initially
|
1498
|
+
sampled_results.append(item)
|
1499
|
+
else:
|
1500
|
+
# Randomly replace items with decreasing probability
|
1501
|
+
j = random.randrange(i + 1)
|
1502
|
+
if j < n:
|
1503
|
+
sampled_results.data[j] = item
|
1306
1504
|
|
1307
|
-
return
|
1505
|
+
return sampled_results
|
1308
1506
|
|
1309
1507
|
@ensure_ready
|
1310
1508
|
def select(self, *columns: Union[str, list[str]]) -> "Dataset":
|
@@ -1391,20 +1589,12 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1391
1589
|
def order_by(self, *columns: str, reverse: bool = False) -> Results:
|
1392
1590
|
"""Sort the results by one or more columns.
|
1393
1591
|
|
1394
|
-
:
|
1395
|
-
|
1396
|
-
|
1397
|
-
Each column name can be a single key, e.g. "how_feeling", or a dot-separated string, e.g. "answer.how_feeling".
|
1398
|
-
|
1399
|
-
Example:
|
1400
|
-
|
1401
|
-
>>> r = Results.example()
|
1402
|
-
>>> r.sort_by('how_feeling', reverse=False).select('how_feeling')
|
1403
|
-
Dataset([{'answer.how_feeling': ['Great', 'OK', 'OK', 'Terrible']}])
|
1404
|
-
|
1405
|
-
>>> r.sort_by('how_feeling', reverse=True).select('how_feeling')
|
1406
|
-
Dataset([{'answer.how_feeling': ['Terrible', 'OK', 'OK', 'Great']}])
|
1592
|
+
Args:
|
1593
|
+
columns: One or more column names as strings.
|
1594
|
+
reverse: A boolean that determines whether to sort in reverse order.
|
1407
1595
|
|
1596
|
+
Returns:
|
1597
|
+
Results: A new Results object with sorted data.
|
1408
1598
|
"""
|
1409
1599
|
|
1410
1600
|
def to_numeric_if_possible(v):
|
@@ -1418,11 +1608,52 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1418
1608
|
for col in columns:
|
1419
1609
|
data_type, key = self._parse_column(col)
|
1420
1610
|
value = item.get_value(data_type, key)
|
1421
|
-
|
1611
|
+
if isinstance(value, (str, bytes)):
|
1612
|
+
key_components.append(str(value))
|
1613
|
+
else:
|
1614
|
+
key_components.append(to_numeric_if_possible(value))
|
1422
1615
|
return tuple(key_components)
|
1423
1616
|
|
1424
|
-
|
1425
|
-
|
1617
|
+
# Create a new sorted view of the data without materializing it
|
1618
|
+
sorted_data = sorted(self.data, key=sort_key, reverse=reverse)
|
1619
|
+
|
1620
|
+
# Create new Results object that uses the sorted iterator
|
1621
|
+
return Results(
|
1622
|
+
survey=self.survey,
|
1623
|
+
data=sorted_data, # This will be an iterator, not a materialized list
|
1624
|
+
created_columns=self.created_columns,
|
1625
|
+
data_class=self._data_class,
|
1626
|
+
sort_by_iteration=False,
|
1627
|
+
)
|
1628
|
+
|
1629
|
+
@staticmethod
|
1630
|
+
def has_single_equals(expression: str) -> bool:
|
1631
|
+
"""Check if an expression contains a single equals sign not part of ==, >=, or <=.
|
1632
|
+
|
1633
|
+
Args:
|
1634
|
+
expression: String expression to check
|
1635
|
+
|
1636
|
+
Returns:
|
1637
|
+
bool: True if there is a standalone = sign
|
1638
|
+
|
1639
|
+
Examples:
|
1640
|
+
>>> Results.has_single_equals("x = 1")
|
1641
|
+
True
|
1642
|
+
>>> Results.has_single_equals("x == 1")
|
1643
|
+
False
|
1644
|
+
>>> Results.has_single_equals("x >= 1")
|
1645
|
+
False
|
1646
|
+
>>> Results.has_single_equals("x <= 1")
|
1647
|
+
False
|
1648
|
+
"""
|
1649
|
+
# First remove valid operators that contain =
|
1650
|
+
cleaned = (
|
1651
|
+
expression.replace("==", "")
|
1652
|
+
.replace(">=", "")
|
1653
|
+
.replace("<=", "")
|
1654
|
+
.replace("!=", "")
|
1655
|
+
)
|
1656
|
+
return "=" in cleaned
|
1426
1657
|
|
1427
1658
|
@ensure_ready
|
1428
1659
|
def filter(self, expression: str) -> Results:
|
@@ -1436,6 +1667,8 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1436
1667
|
Args:
|
1437
1668
|
expression: A string containing a Python expression that evaluates to a boolean.
|
1438
1669
|
The expression is applied to each Result object individually.
|
1670
|
+
Can be a multi-line string for better readability.
|
1671
|
+
Supports template-style syntax with {{ field }} notation.
|
1439
1672
|
|
1440
1673
|
Returns:
|
1441
1674
|
A new Results object containing only the Result objects that satisfy the expression.
|
@@ -1452,6 +1685,8 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1452
1685
|
- You can use comparison operators like '==', '!=', '>', '<', '>=', '<='
|
1453
1686
|
- You can use membership tests with 'in'
|
1454
1687
|
- You can use string methods like '.startswith()', '.contains()', etc.
|
1688
|
+
- The expression can be a multi-line string for improved readability
|
1689
|
+
- You can use template-style syntax with double curly braces: {{ field }}
|
1455
1690
|
|
1456
1691
|
Examples:
|
1457
1692
|
>>> r = Results.example()
|
@@ -1468,6 +1703,17 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1468
1703
|
>>> r.filter("agent.status == 'Joyful'").select('agent.status')
|
1469
1704
|
Dataset([{'agent.status': ['Joyful', 'Joyful']}])
|
1470
1705
|
|
1706
|
+
>>> # Using multi-line string for complex conditions
|
1707
|
+
>>> r.filter('''
|
1708
|
+
... how_feeling == 'Great'
|
1709
|
+
... or how_feeling == 'Terrible'
|
1710
|
+
... ''').select('how_feeling')
|
1711
|
+
Dataset([{'answer.how_feeling': ['Great', 'Terrible']}])
|
1712
|
+
|
1713
|
+
>>> # Using template-style syntax with {{}}
|
1714
|
+
>>> r.filter("{{ answer.how_feeling }} == 'Great'").select('how_feeling')
|
1715
|
+
Dataset([{'answer.how_feeling': ['Great']}])
|
1716
|
+
|
1471
1717
|
>>> # Common error: using = instead of ==
|
1472
1718
|
>>> try:
|
1473
1719
|
... r.filter("how_feeling = 'Great'")
|
@@ -1475,28 +1721,43 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1475
1721
|
... print("ResultsFilterError: You must use '==' instead of '=' in the filter expression.")
|
1476
1722
|
ResultsFilterError: You must use '==' instead of '=' in the filter expression.
|
1477
1723
|
"""
|
1724
|
+
# Normalize expression by removing extra whitespace and newlines
|
1725
|
+
normalized_expression = " ".join(expression.strip().split())
|
1478
1726
|
|
1479
|
-
|
1480
|
-
|
1481
|
-
|
1482
|
-
|
1483
|
-
"==" in string or "<=" in string or ">=" in string
|
1484
|
-
):
|
1485
|
-
return True
|
1727
|
+
# Remove template-style syntax (double curly braces)
|
1728
|
+
normalized_expression = normalized_expression.replace("{{", "").replace(
|
1729
|
+
"}}", ""
|
1730
|
+
)
|
1486
1731
|
|
1487
|
-
if has_single_equals(
|
1732
|
+
if self.has_single_equals(normalized_expression):
|
1488
1733
|
raise ResultsFilterError(
|
1489
1734
|
"You must use '==' instead of '=' in the filter expression."
|
1490
1735
|
)
|
1491
1736
|
|
1492
1737
|
try:
|
1493
|
-
#
|
1494
|
-
|
1738
|
+
# Create new Results object with same class as original but empty data
|
1739
|
+
filtered_results = Results(
|
1740
|
+
survey=self.survey,
|
1741
|
+
data=[], # Empty data list
|
1742
|
+
created_columns=self.created_columns,
|
1743
|
+
data_class=self._data_class, # Preserve the original data class
|
1744
|
+
)
|
1745
|
+
|
1746
|
+
# Process one result at a time
|
1495
1747
|
for result in self.data:
|
1496
1748
|
evaluator = self._create_evaluator(result)
|
1497
|
-
result.check_expression(
|
1498
|
-
if evaluator.eval(
|
1499
|
-
|
1749
|
+
result.check_expression(normalized_expression) # check expression
|
1750
|
+
if evaluator.eval(normalized_expression):
|
1751
|
+
filtered_results.append(
|
1752
|
+
result
|
1753
|
+
) # Use append method to add matching results
|
1754
|
+
|
1755
|
+
if len(filtered_results) == 0:
|
1756
|
+
import warnings
|
1757
|
+
|
1758
|
+
warnings.warn("No results remain after applying the filter.")
|
1759
|
+
|
1760
|
+
return filtered_results
|
1500
1761
|
|
1501
1762
|
except ValueError as e:
|
1502
1763
|
raise ResultsFilterError(
|
@@ -1506,21 +1767,14 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1506
1767
|
)
|
1507
1768
|
except Exception as e:
|
1508
1769
|
raise ResultsFilterError(
|
1509
|
-
f"
|
1510
|
-
f"
|
1511
|
-
"
|
1512
|
-
|
1513
|
-
"
|
1514
|
-
"
|
1770
|
+
f"Error in filter. Exception:{e}.",
|
1771
|
+
f"The expression you provided was: {expression}.",
|
1772
|
+
"Please make sure that the expression is a valid Python expression that evaluates to a boolean.",
|
1773
|
+
'For example, \'how_feeling == "Great"\' is a valid expression, as is \'how_feeling in ["Great", "Terrible"]\'.',
|
1774
|
+
"However, 'how_feeling = \"Great\"' is not a valid expression.",
|
1775
|
+
"See https://docs.expectedparrot.com/en/latest/results.html#filtering-results for more details.",
|
1515
1776
|
)
|
1516
1777
|
|
1517
|
-
if len(new_data) == 0:
|
1518
|
-
import warnings
|
1519
|
-
|
1520
|
-
warnings.warn("No results remain after applying the filter.")
|
1521
|
-
|
1522
|
-
return Results(survey=self.survey, data=new_data, created_columns=None)
|
1523
|
-
|
1524
1778
|
@classmethod
|
1525
1779
|
def example(cls, randomize: bool = False) -> Results:
|
1526
1780
|
"""Return an example `Results` object.
|
@@ -1529,7 +1783,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1529
1783
|
|
1530
1784
|
>>> r = Results.example()
|
1531
1785
|
|
1532
|
-
:param
|
1786
|
+
:param randomize: if True, randomizes agent and scenario combinations
|
1533
1787
|
"""
|
1534
1788
|
from ..jobs import Jobs
|
1535
1789
|
from ..caching import Cache
|
@@ -1544,6 +1798,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1544
1798
|
disable_remote_cache=True,
|
1545
1799
|
disable_remote_inference=True,
|
1546
1800
|
)
|
1801
|
+
|
1547
1802
|
return results
|
1548
1803
|
|
1549
1804
|
def rich_print(self):
|
@@ -1761,6 +2016,282 @@ class Results(UserList, ResultsOperationsMixin, Base):
|
|
1761
2016
|
|
1762
2017
|
return results
|
1763
2018
|
|
2019
|
+
def shelve_result(self, result: "Result") -> str:
|
2020
|
+
"""Store a Result object in persistent storage using its hash as the key.
|
2021
|
+
|
2022
|
+
Args:
|
2023
|
+
result: A Result object to store
|
2024
|
+
|
2025
|
+
Returns:
|
2026
|
+
str: The hash key for retrieving the result later
|
2027
|
+
|
2028
|
+
Raises:
|
2029
|
+
ResultsError: If there's an error storing the Result
|
2030
|
+
"""
|
2031
|
+
import shelve
|
2032
|
+
|
2033
|
+
key = str(hash(result))
|
2034
|
+
try:
|
2035
|
+
with shelve.open(self._shelve_path) as shelf:
|
2036
|
+
shelf[key] = result.to_dict()
|
2037
|
+
self._shelf_keys.add(key)
|
2038
|
+
return key
|
2039
|
+
except Exception as e:
|
2040
|
+
raise ResultsError(f"Error storing Result in shelve database: {str(e)}")
|
2041
|
+
|
2042
|
+
def get_shelved_result(self, key: str) -> "Result":
|
2043
|
+
"""Retrieve a Result object from persistent storage.
|
2044
|
+
|
2045
|
+
Args:
|
2046
|
+
key: The hash key of the Result to retrieve
|
2047
|
+
|
2048
|
+
Returns:
|
2049
|
+
Result: The stored Result object
|
2050
|
+
|
2051
|
+
Raises:
|
2052
|
+
ResultsError: If the key doesn't exist or if there's an error retrieving the Result
|
2053
|
+
"""
|
2054
|
+
import shelve
|
2055
|
+
from .result import Result
|
2056
|
+
|
2057
|
+
if key not in self._shelf_keys:
|
2058
|
+
raise ResultsError(f"No result found with key: {key}")
|
2059
|
+
|
2060
|
+
try:
|
2061
|
+
with shelve.open(self._shelve_path) as shelf:
|
2062
|
+
return Result.from_dict(shelf[key])
|
2063
|
+
except Exception as e:
|
2064
|
+
raise ResultsError(
|
2065
|
+
f"Error retrieving Result from shelve database: {str(e)}"
|
2066
|
+
)
|
2067
|
+
|
2068
|
+
@property
|
2069
|
+
def shelf_keys(self) -> set:
|
2070
|
+
"""Return a copy of the set of shelved result keys."""
|
2071
|
+
return self._shelf_keys.copy()
|
2072
|
+
|
2073
|
+
@ensure_ready
|
2074
|
+
def insert_sorted(self, item: "Result") -> None:
|
2075
|
+
"""Insert a Result object into the Results list while maintaining sort order.
|
2076
|
+
|
2077
|
+
Uses the 'order' attribute if present, otherwise falls back to 'iteration' attribute.
|
2078
|
+
Utilizes bisect for efficient insertion point finding.
|
2079
|
+
|
2080
|
+
Args:
|
2081
|
+
item: A Result object to insert
|
2082
|
+
|
2083
|
+
Examples:
|
2084
|
+
>>> r = Results.example()
|
2085
|
+
>>> new_result = r[0].copy()
|
2086
|
+
>>> new_result.order = 1.5 # Insert between items
|
2087
|
+
>>> r.insert_sorted(new_result)
|
2088
|
+
"""
|
2089
|
+
from bisect import bisect_left
|
2090
|
+
|
2091
|
+
def get_sort_key(result):
|
2092
|
+
if hasattr(result, "order"):
|
2093
|
+
return (0, result.order) # Order attribute takes precedence
|
2094
|
+
return (1, result.data["iteration"]) # Iteration is secondary
|
2095
|
+
|
2096
|
+
# Get the sort key for the new item
|
2097
|
+
item_key = get_sort_key(item)
|
2098
|
+
|
2099
|
+
# Get list of sort keys for existing items
|
2100
|
+
keys = [get_sort_key(x) for x in self.data]
|
2101
|
+
|
2102
|
+
# Find insertion point
|
2103
|
+
index = bisect_left(keys, item_key)
|
2104
|
+
|
2105
|
+
# Insert at the found position
|
2106
|
+
self.data.insert(index, item)
|
2107
|
+
|
2108
|
+
def insert_from_shelf(self) -> None:
|
2109
|
+
"""Move all shelved results into memory using insert_sorted method.
|
2110
|
+
Clears the shelf after successful insertion.
|
2111
|
+
|
2112
|
+
This method preserves the original order of results by using their 'order'
|
2113
|
+
attribute if available, which ensures consistent ordering even after
|
2114
|
+
serialization/deserialization.
|
2115
|
+
|
2116
|
+
Raises:
|
2117
|
+
ResultsError: If there's an error accessing or clearing the shelf
|
2118
|
+
"""
|
2119
|
+
import shelve
|
2120
|
+
from .result import Result
|
2121
|
+
|
2122
|
+
if not self._shelf_keys:
|
2123
|
+
return
|
2124
|
+
|
2125
|
+
try:
|
2126
|
+
# First collect all results from shelf
|
2127
|
+
with shelve.open(self._shelve_path) as shelf:
|
2128
|
+
# Get and insert all results first
|
2129
|
+
for key in self._shelf_keys:
|
2130
|
+
result_dict = shelf[key]
|
2131
|
+
result = Result.from_dict(result_dict)
|
2132
|
+
self.insert_sorted(result)
|
2133
|
+
|
2134
|
+
# Now clear the shelf
|
2135
|
+
for key in self._shelf_keys:
|
2136
|
+
del shelf[key]
|
2137
|
+
|
2138
|
+
# Clear the tracking set
|
2139
|
+
self._shelf_keys.clear()
|
2140
|
+
|
2141
|
+
except Exception as e:
|
2142
|
+
raise ResultsError(f"Error moving results from shelf to memory: {str(e)}")
|
2143
|
+
|
2144
|
+
def to_disk(self, filepath: str) -> None:
|
2145
|
+
"""Serialize the Results object to a zip file, preserving the SQLite database.
|
2146
|
+
|
2147
|
+
This method creates a zip file containing:
|
2148
|
+
1. The SQLite database file from the data container
|
2149
|
+
2. A metadata.json file with the survey, created_columns, and other non-data info
|
2150
|
+
3. The cache data if present
|
2151
|
+
|
2152
|
+
Args:
|
2153
|
+
filepath: Path where the zip file should be saved
|
2154
|
+
|
2155
|
+
Raises:
|
2156
|
+
ResultsError: If there's an error during serialization
|
2157
|
+
"""
|
2158
|
+
import zipfile
|
2159
|
+
import json
|
2160
|
+
import os
|
2161
|
+
import tempfile
|
2162
|
+
from pathlib import Path
|
2163
|
+
import sqlite3
|
2164
|
+
import shutil
|
2165
|
+
|
2166
|
+
data_class = ResultsSQLList
|
2167
|
+
|
2168
|
+
try:
|
2169
|
+
# Create a temporary directory to store files before zipping
|
2170
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
2171
|
+
temp_path = Path(temp_dir)
|
2172
|
+
|
2173
|
+
# 1. Handle the SQLite database
|
2174
|
+
db_path = temp_path / "results.db"
|
2175
|
+
|
2176
|
+
if isinstance(self.data, list):
|
2177
|
+
# If data is a list, create a new SQLiteList
|
2178
|
+
# from .sqlite_list import SQLiteList
|
2179
|
+
new_db = data_class()
|
2180
|
+
new_db.extend(self.data)
|
2181
|
+
shutil.copy2(new_db.db_path, db_path)
|
2182
|
+
elif hasattr(self.data, "db_path") and os.path.exists(
|
2183
|
+
self.data.db_path
|
2184
|
+
):
|
2185
|
+
# If data is already a SQLiteList, copy its database
|
2186
|
+
shutil.copy2(self.data.db_path, db_path)
|
2187
|
+
else:
|
2188
|
+
# If no database exists, create a new one
|
2189
|
+
# from .sqlite_list import SQLiteList
|
2190
|
+
# new_db = SQLiteList()
|
2191
|
+
new_db = data_class()
|
2192
|
+
new_db.extend(self.data)
|
2193
|
+
shutil.copy2(new_db.db_path, db_path)
|
2194
|
+
|
2195
|
+
# 2. Create metadata.json
|
2196
|
+
metadata = {
|
2197
|
+
"survey": self.survey.to_dict() if self.survey else None,
|
2198
|
+
"created_columns": self.created_columns,
|
2199
|
+
"cache": self.cache.to_dict() if hasattr(self, "cache") else None,
|
2200
|
+
"task_history": self.task_history.to_dict()
|
2201
|
+
if hasattr(self, "task_history")
|
2202
|
+
else None,
|
2203
|
+
"completed": self.completed,
|
2204
|
+
"job_uuid": self._job_uuid if hasattr(self, "_job_uuid") else None,
|
2205
|
+
"total_results": self._total_results
|
2206
|
+
if hasattr(self, "_total_results")
|
2207
|
+
else None,
|
2208
|
+
}
|
2209
|
+
|
2210
|
+
metadata_path = temp_path / "metadata.json"
|
2211
|
+
metadata_path.write_text(json.dumps(metadata, indent=4))
|
2212
|
+
|
2213
|
+
# 3. Create the zip file
|
2214
|
+
with zipfile.ZipFile(filepath, "w", zipfile.ZIP_DEFLATED) as zipf:
|
2215
|
+
# Add all files from temp directory to zip
|
2216
|
+
for file in temp_path.glob("*"):
|
2217
|
+
zipf.write(file, file.name)
|
2218
|
+
|
2219
|
+
except Exception as e:
|
2220
|
+
raise ResultsError(f"Error saving Results to disk: {str(e)}")
|
2221
|
+
|
2222
|
+
@classmethod
|
2223
|
+
def from_disk(cls, filepath: str) -> "Results":
|
2224
|
+
"""Load a Results object from a zip file.
|
2225
|
+
|
2226
|
+
This method:
|
2227
|
+
1. Extracts the SQLite database file
|
2228
|
+
2. Loads the metadata
|
2229
|
+
3. Creates a new Results instance with the restored data
|
2230
|
+
|
2231
|
+
Args:
|
2232
|
+
filepath: Path to the zip file containing the serialized Results
|
2233
|
+
|
2234
|
+
Returns:
|
2235
|
+
Results: A new Results instance with the restored data
|
2236
|
+
|
2237
|
+
Raises:
|
2238
|
+
ResultsError: If there's an error during deserialization
|
2239
|
+
"""
|
2240
|
+
import zipfile
|
2241
|
+
import json
|
2242
|
+
import tempfile
|
2243
|
+
from pathlib import Path
|
2244
|
+
from ..surveys import Survey
|
2245
|
+
from ..caching import Cache
|
2246
|
+
from ..tasks import TaskHistory
|
2247
|
+
|
2248
|
+
data_class = ResultsSQLList
|
2249
|
+
|
2250
|
+
try:
|
2251
|
+
# Create a temporary directory to extract files
|
2252
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
2253
|
+
temp_path = Path(temp_dir)
|
2254
|
+
|
2255
|
+
# Extract the zip file
|
2256
|
+
with zipfile.ZipFile(filepath, "r") as zipf:
|
2257
|
+
zipf.extractall(temp_path)
|
2258
|
+
|
2259
|
+
# 1. Load metadata
|
2260
|
+
metadata_path = temp_path / "metadata.json"
|
2261
|
+
metadata = json.loads(metadata_path.read_text())
|
2262
|
+
|
2263
|
+
# 2. Create a new Results instance
|
2264
|
+
results = cls(
|
2265
|
+
survey=Survey.from_dict(metadata["survey"])
|
2266
|
+
if metadata["survey"]
|
2267
|
+
else None,
|
2268
|
+
created_columns=metadata["created_columns"],
|
2269
|
+
cache=Cache.from_dict(metadata["cache"])
|
2270
|
+
if metadata["cache"]
|
2271
|
+
else None,
|
2272
|
+
task_history=TaskHistory.from_dict(metadata["task_history"])
|
2273
|
+
if metadata["task_history"]
|
2274
|
+
else None,
|
2275
|
+
job_uuid=metadata["job_uuid"],
|
2276
|
+
total_results=metadata["total_results"],
|
2277
|
+
)
|
2278
|
+
|
2279
|
+
# 3. Set the SQLite database path if it exists
|
2280
|
+
db_path = temp_path / "results.db"
|
2281
|
+
if db_path.exists():
|
2282
|
+
# Create a new ResultsSQLList instance
|
2283
|
+
new_db = data_class()
|
2284
|
+
# Copy data from the source database - convert Path to string
|
2285
|
+
new_db.copy_from(str(db_path))
|
2286
|
+
# Set the new database as the results data
|
2287
|
+
results.data = new_db
|
2288
|
+
|
2289
|
+
results.completed = metadata["completed"]
|
2290
|
+
return results
|
2291
|
+
|
2292
|
+
except Exception as e:
|
2293
|
+
raise ResultsError(f"Error loading Results from disk: {str(e)}")
|
2294
|
+
|
1764
2295
|
|
1765
2296
|
def main(): # pragma: no cover
|
1766
2297
|
"""Run example operations on a Results object.
|