edsl 0.1.53__py3-none-any.whl → 0.1.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. edsl/__init__.py +8 -1
  2. edsl/__init__original.py +134 -0
  3. edsl/__version__.py +1 -1
  4. edsl/agents/agent.py +29 -0
  5. edsl/agents/agent_list.py +36 -1
  6. edsl/base/base_class.py +281 -151
  7. edsl/buckets/__init__.py +8 -3
  8. edsl/buckets/bucket_collection.py +9 -3
  9. edsl/buckets/model_buckets.py +4 -2
  10. edsl/buckets/token_bucket.py +2 -2
  11. edsl/buckets/token_bucket_client.py +5 -3
  12. edsl/caching/cache.py +131 -62
  13. edsl/caching/cache_entry.py +70 -58
  14. edsl/caching/sql_dict.py +17 -0
  15. edsl/cli.py +99 -0
  16. edsl/config/config_class.py +16 -0
  17. edsl/conversation/__init__.py +31 -0
  18. edsl/coop/coop.py +276 -242
  19. edsl/coop/coop_jobs_objects.py +59 -0
  20. edsl/coop/coop_objects.py +29 -0
  21. edsl/coop/coop_regular_objects.py +26 -0
  22. edsl/coop/utils.py +24 -19
  23. edsl/dataset/dataset.py +338 -101
  24. edsl/db_list/sqlite_list.py +349 -0
  25. edsl/inference_services/__init__.py +40 -5
  26. edsl/inference_services/exceptions.py +11 -0
  27. edsl/inference_services/services/anthropic_service.py +5 -2
  28. edsl/inference_services/services/aws_bedrock.py +6 -2
  29. edsl/inference_services/services/azure_ai.py +6 -2
  30. edsl/inference_services/services/google_service.py +3 -2
  31. edsl/inference_services/services/mistral_ai_service.py +6 -2
  32. edsl/inference_services/services/open_ai_service.py +6 -2
  33. edsl/inference_services/services/perplexity_service.py +6 -2
  34. edsl/inference_services/services/test_service.py +105 -7
  35. edsl/interviews/answering_function.py +167 -59
  36. edsl/interviews/interview.py +124 -72
  37. edsl/interviews/interview_task_manager.py +10 -0
  38. edsl/invigilators/invigilators.py +10 -1
  39. edsl/jobs/async_interview_runner.py +146 -104
  40. edsl/jobs/data_structures.py +6 -4
  41. edsl/jobs/decorators.py +61 -0
  42. edsl/jobs/fetch_invigilator.py +61 -18
  43. edsl/jobs/html_table_job_logger.py +14 -2
  44. edsl/jobs/jobs.py +180 -104
  45. edsl/jobs/jobs_component_constructor.py +2 -2
  46. edsl/jobs/jobs_interview_constructor.py +2 -0
  47. edsl/jobs/jobs_pricing_estimation.py +127 -46
  48. edsl/jobs/jobs_remote_inference_logger.py +4 -0
  49. edsl/jobs/jobs_runner_status.py +30 -25
  50. edsl/jobs/progress_bar_manager.py +79 -0
  51. edsl/jobs/remote_inference.py +35 -1
  52. edsl/key_management/key_lookup_builder.py +6 -1
  53. edsl/language_models/language_model.py +102 -12
  54. edsl/language_models/model.py +10 -3
  55. edsl/language_models/price_manager.py +45 -75
  56. edsl/language_models/registry.py +5 -0
  57. edsl/language_models/utilities.py +2 -1
  58. edsl/notebooks/notebook.py +77 -10
  59. edsl/questions/VALIDATION_README.md +134 -0
  60. edsl/questions/__init__.py +24 -1
  61. edsl/questions/exceptions.py +21 -0
  62. edsl/questions/question_check_box.py +171 -149
  63. edsl/questions/question_dict.py +243 -51
  64. edsl/questions/question_multiple_choice_with_other.py +624 -0
  65. edsl/questions/question_registry.py +2 -1
  66. edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
  67. edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
  68. edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
  69. edsl/questions/validation_analysis.py +185 -0
  70. edsl/questions/validation_cli.py +131 -0
  71. edsl/questions/validation_html_report.py +404 -0
  72. edsl/questions/validation_logger.py +136 -0
  73. edsl/results/result.py +63 -16
  74. edsl/results/results.py +702 -171
  75. edsl/scenarios/construct_download_link.py +16 -3
  76. edsl/scenarios/directory_scanner.py +226 -226
  77. edsl/scenarios/file_methods.py +5 -0
  78. edsl/scenarios/file_store.py +117 -6
  79. edsl/scenarios/handlers/__init__.py +5 -1
  80. edsl/scenarios/handlers/mp4_file_store.py +104 -0
  81. edsl/scenarios/handlers/webm_file_store.py +104 -0
  82. edsl/scenarios/scenario.py +120 -101
  83. edsl/scenarios/scenario_list.py +800 -727
  84. edsl/scenarios/scenario_list_gc_test.py +146 -0
  85. edsl/scenarios/scenario_list_memory_test.py +214 -0
  86. edsl/scenarios/scenario_list_source_refactor.md +35 -0
  87. edsl/scenarios/scenario_selector.py +5 -4
  88. edsl/scenarios/scenario_source.py +1990 -0
  89. edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
  90. edsl/surveys/survey.py +22 -0
  91. edsl/tasks/__init__.py +4 -2
  92. edsl/tasks/task_history.py +198 -36
  93. edsl/tests/scenarios/test_ScenarioSource.py +51 -0
  94. edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
  95. edsl/utilities/__init__.py +2 -1
  96. edsl/utilities/decorators.py +121 -0
  97. edsl/utilities/memory_debugger.py +1010 -0
  98. {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/METADATA +52 -76
  99. {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/RECORD +102 -78
  100. edsl/jobs/jobs_runner_asyncio.py +0 -281
  101. edsl/language_models/unused/fake_openai_service.py +0 -60
  102. {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/LICENSE +0 -0
  103. {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/WHEEL +0 -0
  104. {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/entry_points.txt +0 -0
edsl/results/results.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """The Results module provides tools for working with collections of Result objects.
2
2
 
3
- The Results class is the primary container for analyzing and manipulating data obtained
4
- from running surveys with language models. It implements a powerful data analysis interface
3
+ The Results class is the primary container for analyzing and manipulating data obtained
4
+ from running surveys with language models. It implements a powerful data analysis interface
5
5
  with methods for filtering, selecting, mutating, and visualizing your results, similar to
6
6
  data manipulation libraries like dplyr or pandas.
7
7
 
@@ -11,7 +11,7 @@ Key components:
11
11
  2. Report - A flexible reporting system for generating formatted output from Results
12
12
  3. Selectors - Tools for efficiently extracting specific data from Results
13
13
 
14
- The Results class is not typically instantiated directly; instead, it's returned by the
14
+ The Results class is not typically instantiated directly; instead, it's returned by the
15
15
  run() method of a Job object. Once you have a Results object, you can use its methods
16
16
  to analyze and extract insights from your survey data.
17
17
 
@@ -39,9 +39,10 @@ from __future__ import annotations
39
39
  import json
40
40
  import random
41
41
  import warnings
42
- from collections import UserList, defaultdict
42
+ from collections import defaultdict
43
43
  from typing import Optional, Callable, Any, Union, List, TYPE_CHECKING
44
44
  from bisect import bisect_left
45
+ from collections.abc import MutableSequence
45
46
 
46
47
  from ..base import Base
47
48
  from ..caching import Cache, CacheEntry
@@ -59,6 +60,9 @@ if TYPE_CHECKING:
59
60
  from ..utilities import remove_edsl_version, dict_hash
60
61
  from ..dataset import ResultsOperationsMixin
61
62
 
63
+ from .result import Result
64
+ from ..db_list.sqlite_list import SQLiteList
65
+
62
66
  from .exceptions import (
63
67
  ResultsError,
64
68
  ResultsBadMutationstringError,
@@ -70,6 +74,18 @@ from .exceptions import (
70
74
  )
71
75
 
72
76
 
77
+ class ResultsSQLList(SQLiteList):
78
+ def serialize(self, obj):
79
+ return json.dumps(obj.to_dict()) if hasattr(obj, "to_dict") else json.dumps(obj)
80
+
81
+ def deserialize(self, data):
82
+ return (
83
+ Result.from_dict(json.loads(data))
84
+ if hasattr(Result, "from_dict")
85
+ else json.loads(data)
86
+ )
87
+
88
+
73
89
  def ensure_fetched(method):
74
90
  """A decorator that checks if remote data is loaded, and if not, attempts to fetch it.
75
91
 
@@ -188,7 +204,7 @@ class NotReadyObject:
188
204
  return self
189
205
 
190
206
 
191
- class Results(UserList, ResultsOperationsMixin, Base):
207
+ class Results(MutableSequence, ResultsOperationsMixin, Base):
192
208
  """A collection of Result objects with powerful data analysis capabilities.
193
209
 
194
210
  The Results class is the primary container for working with data from EDSL surveys.
@@ -297,13 +313,11 @@ class Results(UserList, ResultsOperationsMixin, Base):
297
313
  job_uuid: Optional[str] = None,
298
314
  total_results: Optional[int] = None,
299
315
  task_history: Optional[TaskHistory] = None,
316
+ sort_by_iteration: bool = False,
317
+ data_class: Optional[type] = list, # ResultsSQLList,
300
318
  ):
301
319
  """Instantiate a Results object with a survey and a list of Result objects.
302
320
 
303
- This initializes a completed Results object with the provided data.
304
- For creating a not-ready Results object from a job info dictionary,
305
- use the from_job_info class method instead.
306
-
307
321
  Args:
308
322
  survey: A Survey object containing the questions used to generate results.
309
323
  data: A list of Result objects containing the responses.
@@ -312,29 +326,49 @@ class Results(UserList, ResultsOperationsMixin, Base):
312
326
  job_uuid: A string representing the job UUID.
313
327
  total_results: An integer representing the total number of results.
314
328
  task_history: A TaskHistory object containing information about the tasks.
315
-
316
- Examples:
317
- >>> from ..results import Result
318
- >>> # Create an empty Results object
319
- >>> r = Results()
320
- >>> r.completed
321
- True
322
- >>> len(r.created_columns)
323
- 0
324
-
325
- >>> # Create a Results object with data
326
- >>> from unittest.mock import Mock
327
- >>> mock_survey = Mock()
328
- >>> mock_result = Mock(spec=Result)
329
- >>> r = Results(survey=mock_survey, data=[mock_result])
330
- >>> len(r)
331
- 1
329
+ sort_by_iteration: Whether to sort data by iteration before initializing.
330
+ data_class: The class to use for the data container (default: list).
332
331
  """
333
332
  self.completed = True
334
333
  self._fetching = False
335
- super().__init__(data)
334
+
335
+ # Determine the data class to use
336
+ if data is not None:
337
+ # Use the class of the provided data if it's not a basic list
338
+ self._data_class = (
339
+ data.__class__ if not isinstance(data, list) else data_class
340
+ )
341
+ else:
342
+ self._data_class = data_class
343
+
344
+ # Sort data appropriately before initialization if needed
345
+ if data and sort_by_iteration:
346
+ # First try to sort by order attribute if present on any result
347
+ has_order = any(hasattr(item, "order") for item in data)
348
+ if has_order:
349
+
350
+ def get_order(item):
351
+ if hasattr(item, "order"):
352
+ return item.order
353
+ return item.data.get("iteration", 0) * 1000
354
+
355
+ data = sorted(data, key=get_order)
356
+ else:
357
+ data = sorted(data, key=lambda x: x.data.get("iteration", 0))
358
+
359
+ # Initialize data with the appropriate class
360
+ self.data = self._data_class(data or [])
361
+
336
362
  from ..caching import Cache
337
363
  from ..tasks import TaskHistory
364
+ import tempfile
365
+ import os
366
+
367
+ # Create a unique shelve path in the system temp directory
368
+ self._shelve_path = os.path.join(
369
+ tempfile.gettempdir(), f"edsl_results_{os.getpid()}"
370
+ )
371
+ self._shelf_keys = set() # Track shelved result keys
338
372
 
339
373
  self.survey = survey
340
374
  self.created_columns = created_columns or []
@@ -347,6 +381,9 @@ class Results(UserList, ResultsOperationsMixin, Base):
347
381
  if hasattr(self, "_add_output_functions"):
348
382
  self._add_output_functions()
349
383
 
384
+ def add_task_history_entry(self, interview: "Interview") -> None:
385
+ self.task_history.add_interview(interview)
386
+
350
387
  def _fetch_list(self, data_type: str, key: str) -> list:
351
388
  """Return a list of values from the data for a given data type and key.
352
389
 
@@ -395,6 +432,32 @@ class Results(UserList, ResultsOperationsMixin, Base):
395
432
  return self._fetch_list("answer", question_name)
396
433
 
397
434
  def _summary(self) -> dict:
435
+ """Return a dictionary containing summary statistics about the Results object.
436
+
437
+ The summary includes:
438
+ - Number of observations (results)
439
+ - Number of unique agents
440
+ - Number of unique models
441
+ - Number of unique scenarios
442
+ - Number of questions in the survey
443
+ - Survey question names (truncated for readability)
444
+
445
+ Returns:
446
+ dict: A dictionary containing the summary statistics
447
+
448
+ Examples:
449
+ >>> from edsl.results import Results
450
+ >>> r = Results.example()
451
+ >>> summary = r._summary()
452
+ >>> isinstance(summary, dict)
453
+ True
454
+ >>> all(key in summary for key in ['observations', 'agents', 'models', 'scenarios', 'questions', 'Survey question names'])
455
+ True
456
+ >>> summary['observations'] > 0
457
+ True
458
+ >>> summary['questions'] > 0
459
+ True
460
+ """
398
461
  import reprlib
399
462
 
400
463
  d = {
@@ -407,7 +470,22 @@ class Results(UserList, ResultsOperationsMixin, Base):
407
470
  }
408
471
  return d
409
472
 
410
- def _cache_keys(self):
473
+ def _cache_keys(self) -> List[str]: # -> list:
474
+ """Return a list of all cache keys from the results.
475
+
476
+ This method collects all cache keys by iterating through each result in the data
477
+ and extracting the values from the 'cache_keys' dictionary. These keys can be used
478
+ to identify cached responses and manage the cache effectively.
479
+
480
+ Returns:
481
+ List[str]: A list of cache keys from all results.
482
+
483
+ Examples:
484
+ >>> from edsl.results import Results
485
+ >>> r = Results.example()
486
+ >>> all([type(s) == str for s in r._cache_keys()])
487
+ True
488
+ """
411
489
  cache_keys = []
412
490
  for result in self:
413
491
  cache_keys.extend(list(result["cache_keys"].values()))
@@ -417,31 +495,57 @@ class Results(UserList, ResultsOperationsMixin, Base):
417
495
  cache_keys = self._cache_keys()
418
496
  return cache.subset(cache_keys)
419
497
 
420
- def insert(self, item):
421
- item_order = getattr(item, "order", None)
422
- if item_order is not None:
423
- # Get list of orders, putting None at the end
424
- orders = [getattr(x, "order", None) for x in self]
425
- # Filter to just the non-None orders for bisect
426
- sorted_orders = [x for x in orders if x is not None]
427
- if sorted_orders:
428
- index = bisect_left(sorted_orders, item_order)
429
- # Account for any None values before this position
430
- index += orders[:index].count(None)
431
- else:
432
- # If no sorted items yet, insert before any unordered items
433
- index = 0
434
- self.data.insert(index, item)
435
- else:
436
- # No order - append to end
437
- self.data.append(item)
498
+ # def insert(self, item):
499
+ # """Insert a Result object into the Results list in the correct order.
500
+
501
+ # If the Result has an 'order' attribute, it uses that for ordering.
502
+ # Otherwise, it falls back to ordering by the 'iteration' attribute.
503
+
504
+ # >>> from edsl.results import Result
505
+ # >>> rnew = Result.example()
506
+ # >>> results = Results.example()
507
+ # >>> results.insert(rnew)
508
+ # >>> results[0] == rnew
509
+ # True
510
+ # >>> results = Results.example()
511
+ # >>> rnew.order = 100
512
+ # >>> results.insert(rnew)
513
+ # >>> results[-1] == rnew # The new result is at the end
514
+ # True
515
+ # """
516
+
517
+ # def get_sort_key(result):
518
+ # if hasattr(result, "order"):
519
+ # return result.order
520
+ # return result.data["iteration"]
521
+
522
+ # # Find insertion point using bisect with custom key function
523
+ # index = bisect_left([get_sort_key(x) for x in self.data], get_sort_key(item))
524
+
525
+ # # Call the parent class's insert directly
526
+ # MutableSequence.insert(self, index, item)
527
+
528
+ def extend_sorted(self, other):
529
+ """Extend the Results list with items from another iterable.
530
+
531
+ This method preserves ordering based on 'order' attribute if present,
532
+ otherwise falls back to 'iteration' attribute.
533
+ """
534
+ # Collect all items (existing and new)
535
+ all_items = list(self.data)
536
+ all_items.extend(other)
438
537
 
439
- def append(self, item):
440
- self.insert(item)
538
+ # Sort combined list by order attribute if available, otherwise by iteration
539
+ def get_sort_key(item):
540
+ if hasattr(item, "order"):
541
+ return (0, item.order) # Order attribute takes precedence
542
+ return (1, item.data["iteration"]) # Iteration is secondary
441
543
 
442
- def extend(self, other):
443
- for item in other:
444
- self.insert(item)
544
+ all_items.sort(key=get_sort_key)
545
+
546
+ # Clear and refill with sorted items
547
+ self.data.clear()
548
+ self.data.extend(all_items)
445
549
 
446
550
  def compute_job_cost(self, include_cached_responses_in_cost: bool = False) -> float:
447
551
  """Compute the cost of a completed job in USD.
@@ -468,8 +572,16 @@ class Results(UserList, ResultsOperationsMixin, Base):
468
572
  if key.endswith("_cost"):
469
573
  result_cost = result["raw_model_response"][key]
470
574
 
575
+ # Extract the question name from the key
471
576
  question_name = key.removesuffix("_cost")
472
- cache_used = result["cache_used_dict"][question_name]
577
+
578
+ # Get cache status safely - default to False if not found
579
+ cache_used = False
580
+ if (
581
+ "cache_used_dict" in result
582
+ and question_name in result["cache_used_dict"]
583
+ ):
584
+ cache_used = result["cache_used_dict"][question_name]
473
585
 
474
586
  if isinstance(result_cost, (int, float)):
475
587
  if include_cached_responses_in_cost:
@@ -496,48 +608,59 @@ class Results(UserList, ResultsOperationsMixin, Base):
496
608
  """
497
609
  raise ResultsError("The code() method is not implemented for Results objects")
498
610
 
611
+ @ensure_ready
499
612
  def __getitem__(self, i):
500
- """Get an item from the Results object by index, slice, or key.
613
+ if isinstance(i, int):
614
+ return self.data[i]
615
+ if isinstance(i, slice):
616
+ return self.__class__(survey=self.survey, data=self.data[i])
617
+ if isinstance(i, str):
618
+ return self.to_dict()[i]
619
+ raise ResultsError("Invalid argument type for indexing Results object")
501
620
 
502
- Args:
503
- i: An integer index, a slice, or a string key.
621
+ @ensure_ready
622
+ def __setitem__(self, i, item):
623
+ self.data[i] = item
504
624
 
505
- Returns:
506
- The requested item, slice of results, or dictionary value.
625
+ @ensure_ready
626
+ def __delitem__(self, i):
627
+ del self.data[i]
507
628
 
508
- Raises:
509
- ResultsError: If the argument type is invalid for indexing.
629
+ @ensure_ready
630
+ def __len__(self):
631
+ return len(self.data)
510
632
 
511
- Examples:
512
- >>> from edsl.results import Results
513
- >>> r = Results.example()
514
- >>> # Get by integer index
515
- >>> result = r[0]
516
- >>> # Get by slice
517
- >>> subset = r[0:2]
518
- >>> len(subset) == 2
519
- True
520
- >>> # Get by string key
521
- >>> data = r["data"]
522
- >>> isinstance(data, list)
523
- True
524
- >>> # Invalid index type
525
- >>> try:
526
- ... r[1.5]
527
- ... except ResultsError:
528
- ... True
529
- True
633
+ @ensure_ready
634
+ def insert(self, index, item):
635
+ self.data.insert(index, item)
636
+
637
+ @ensure_ready
638
+ def extend(self, other):
639
+ """Extend the Results list with items from another iterable."""
640
+ self.data.extend(other)
641
+
642
+ @ensure_ready
643
+ def extend_sorted(self, other):
644
+ """Extend the Results list with items from another iterable, maintaining sort order.
645
+
646
+ This method preserves ordering based on 'order' attribute if present,
647
+ otherwise falls back to 'iteration' attribute.
530
648
  """
531
- if isinstance(i, int):
532
- return self.data[i]
649
+ # Collect all items (existing and new)
650
+ all_items = list(self.data)
651
+ all_items.extend(other)
533
652
 
534
- if isinstance(i, slice):
535
- return self.__class__(survey=self.survey, data=self.data[i])
653
+ # Sort combined list by order attribute if available, otherwise by iteration
654
+ def get_sort_key(item):
655
+ if hasattr(item, "order"):
656
+ return (0, item.order) # Order attribute takes precedence
657
+ return (1, item.data["iteration"]) # Iteration is secondary
536
658
 
537
- if isinstance(i, str):
538
- return self.to_dict()[i]
659
+ all_items.sort(key=get_sort_key)
539
660
 
540
- raise ResultsError("Invalid argument type for indexing Results object")
661
+ # Clear and refill with sorted items
662
+ self.data.clear()
663
+ self.data.extend(all_items)
541
664
 
542
665
  def __add__(self, other: Results) -> Results:
543
666
  """Add two Results objects together.
@@ -581,9 +704,15 @@ class Results(UserList, ResultsOperationsMixin, Base):
581
704
  "The created columns are not the same so they cannot be added together."
582
705
  )
583
706
 
707
+ # Create a new ResultsSQLList with the combined data
708
+ # combined_data = ResultsSQLList()
709
+ combined_data = self._data_class()
710
+ combined_data.extend(self.data)
711
+ combined_data.extend(other.data)
712
+
584
713
  return Results(
585
714
  survey=self.survey,
586
- data=self.data + other.data,
715
+ data=combined_data,
587
716
  created_columns=self.created_columns,
588
717
  )
589
718
 
@@ -743,7 +872,12 @@ class Results(UserList, ResultsOperationsMixin, Base):
743
872
 
744
873
  def __hash__(self) -> int:
745
874
  return dict_hash(
746
- self.to_dict(sort=True, add_edsl_version=False, include_cache_info=False)
875
+ self.to_dict(
876
+ sort=True,
877
+ add_edsl_version=False,
878
+ include_cache=False,
879
+ include_cache_info=False,
880
+ )
747
881
  )
748
882
 
749
883
  @property
@@ -792,10 +926,11 @@ class Results(UserList, ResultsOperationsMixin, Base):
792
926
  """
793
927
  from ..surveys import Survey
794
928
  from ..caching import Cache
795
- from ..results import Result
929
+ from .result import Result
796
930
  from ..tasks import TaskHistory
797
931
 
798
932
  survey = Survey.from_dict(data["survey"])
933
+ # Convert dictionaries to Result objects
799
934
  results_data = [Result.from_dict(r) for r in data["data"]]
800
935
  created_columns = data.get("created_columns", None)
801
936
  cache = Cache.from_dict(data.get("cache")) if "cache" in data else Cache()
@@ -804,9 +939,12 @@ class Results(UserList, ResultsOperationsMixin, Base):
804
939
  if "task_history" in data
805
940
  else TaskHistory(interviews=[])
806
941
  )
942
+
943
+ # Create a Results object with original order preserved
944
+ # using the empty data list initially
807
945
  params = {
808
946
  "survey": survey,
809
- "data": results_data,
947
+ "data": [], # Start with empty data
810
948
  "created_columns": created_columns,
811
949
  "cache": cache,
812
950
  "task_history": task_history,
@@ -814,6 +952,9 @@ class Results(UserList, ResultsOperationsMixin, Base):
814
952
 
815
953
  try:
816
954
  results = cls(**params)
955
+ # Add each result individually to respect order attributes
956
+ for result in results_data:
957
+ results.append(result)
817
958
  except Exception as e:
818
959
  raise ResultsDeserializationError(f"Error in Results.from_dict: {e}")
819
960
  return results
@@ -1081,19 +1222,26 @@ class Results(UserList, ResultsOperationsMixin, Base):
1081
1222
  >>> r.add_column('a', [1,2,3, 4]).select('a')
1082
1223
  Dataset([{'answer.a': [1, 2, 3, 4]}])
1083
1224
  """
1084
-
1085
1225
  assert len(values) == len(
1086
1226
  self.data
1087
1227
  ), "The number of values must match the number of results."
1088
- new_results = self.data.copy()
1089
- for i, result in enumerate(new_results):
1090
- result["answer"][column_name] = values[i]
1091
- return Results(
1228
+
1229
+ # Create new Results object with same properties but empty data
1230
+ new_results = Results(
1092
1231
  survey=self.survey,
1093
- data=new_results,
1232
+ data=[],
1094
1233
  created_columns=self.created_columns + [column_name],
1234
+ data_class=self._data_class,
1095
1235
  )
1096
1236
 
1237
+ # Process one result at a time
1238
+ for i, result in enumerate(self.data):
1239
+ new_result = result.copy()
1240
+ new_result["answer"][column_name] = values[i]
1241
+ new_results.append(new_result)
1242
+
1243
+ return new_results
1244
+
1097
1245
  @ensure_ready
1098
1246
  def add_columns_from_dict(self, columns: List[dict]) -> Results:
1099
1247
  """Adds columns to Results from a list of dictionaries.
@@ -1234,33 +1382,63 @@ class Results(UserList, ResultsOperationsMixin, Base):
1234
1382
  >>> s = Results.example()
1235
1383
  >>> s.rename('how_feeling', 'how_feeling_new').select('how_feeling_new')
1236
1384
  Dataset([{'answer.how_feeling_new': ['OK', 'Great', 'Terrible', 'OK']}])
1237
-
1238
- # TODO: Should we allow renaming of scenario fields as well? Probably.
1239
-
1240
1385
  """
1386
+ # Create new Results object with same properties but empty data
1387
+ new_results = Results(
1388
+ survey=self.survey,
1389
+ data=[],
1390
+ created_columns=self.created_columns,
1391
+ data_class=self._data_class,
1392
+ )
1393
+
1394
+ # Update created_columns if old_name was in there
1395
+ if old_name in new_results.created_columns:
1396
+ new_results.created_columns.remove(old_name)
1397
+ new_results.created_columns.append(new_name)
1241
1398
 
1399
+ # Process one result at a time
1242
1400
  for obs in self.data:
1243
- obs["answer"][new_name] = obs["answer"][old_name]
1244
- del obs["answer"][old_name]
1401
+ new_result = obs.copy()
1402
+ new_result["answer"][new_name] = new_result["answer"][old_name]
1403
+ del new_result["answer"][old_name]
1404
+ new_results.append(new_result)
1245
1405
 
1246
- return self
1406
+ return new_results
1247
1407
 
1248
1408
  @ensure_ready
1249
1409
  def shuffle(self, seed: Optional[str] = "edsl") -> Results:
1250
- """Shuffle the results.
1410
+ """Return a shuffled copy of the results using Fisher-Yates algorithm.
1251
1411
 
1252
- Example:
1412
+ Args:
1413
+ seed: Random seed for reproducibility.
1253
1414
 
1254
- >>> r = Results.example()
1255
- >>> r.shuffle(seed = 1)[0]
1256
- Result(...)
1415
+ Returns:
1416
+ Results: A new Results object with shuffled data.
1257
1417
  """
1258
1418
  if seed != "edsl":
1259
- seed = random.seed(seed)
1419
+ random.seed(seed)
1260
1420
 
1261
- new_data = self.data.copy()
1262
- random.shuffle(new_data)
1263
- return Results(survey=self.survey, data=new_data, created_columns=None)
1421
+ # Create new Results object with same properties but empty data
1422
+ shuffled_results = Results(
1423
+ survey=self.survey,
1424
+ data=[],
1425
+ created_columns=self.created_columns,
1426
+ data_class=self._data_class,
1427
+ )
1428
+
1429
+ # First pass: copy data while tracking indices
1430
+ indices = list(range(len(self.data)))
1431
+
1432
+ # Second pass: Fisher-Yates shuffle on indices
1433
+ for i in range(len(indices) - 1, 0, -1):
1434
+ j = random.randrange(i + 1)
1435
+ indices[i], indices[j] = indices[j], indices[i]
1436
+
1437
+ # Final pass: append items in shuffled order
1438
+ for idx in indices:
1439
+ shuffled_results.append(self.data[idx])
1440
+
1441
+ return shuffled_results
1264
1442
 
1265
1443
  @ensure_ready
1266
1444
  def sample(
@@ -1270,41 +1448,61 @@ class Results(UserList, ResultsOperationsMixin, Base):
1270
1448
  with_replacement: bool = True,
1271
1449
  seed: Optional[str] = None,
1272
1450
  ) -> Results:
1273
- """Sample the results.
1274
-
1275
- :param n: An integer representing the number of samples to take.
1276
- :param frac: A float representing the fraction of samples to take.
1277
- :param with_replacement: A boolean representing whether to sample with replacement.
1278
- :param seed: An integer representing the seed for the random number generator.
1451
+ """Return a random sample of the results.
1279
1452
 
1280
- Example:
1453
+ Args:
1454
+ n: The number of samples to take.
1455
+ frac: The fraction of samples to take (alternative to n).
1456
+ with_replacement: Whether to sample with replacement.
1457
+ seed: Random seed for reproducibility.
1281
1458
 
1282
- >>> r = Results.example()
1283
- >>> len(r.sample(2))
1284
- 2
1459
+ Returns:
1460
+ Results: A new Results object containing the sampled data.
1285
1461
  """
1286
1462
  if seed:
1287
1463
  random.seed(seed)
1288
1464
 
1289
1465
  if n is None and frac is None:
1290
- from .exceptions import ResultsError
1291
-
1292
1466
  raise ResultsError("You must specify either n or frac.")
1293
1467
 
1294
1468
  if n is not None and frac is not None:
1295
- from .exceptions import ResultsError
1296
-
1297
1469
  raise ResultsError("You cannot specify both n and frac.")
1298
1470
 
1299
- if frac is not None and n is None:
1471
+ if frac is not None:
1300
1472
  n = int(frac * len(self.data))
1301
1473
 
1474
+ # Create new Results object with same properties but empty data
1475
+ sampled_results = Results(
1476
+ survey=self.survey,
1477
+ data=[],
1478
+ created_columns=self.created_columns,
1479
+ data_class=self._data_class,
1480
+ )
1481
+
1302
1482
  if with_replacement:
1303
- new_data = random.choices(self.data, k=n)
1483
+ # For sampling with replacement, we can generate indices and sample one at a time
1484
+ indices = (random.randrange(len(self.data)) for _ in range(n))
1485
+ for i in indices:
1486
+ sampled_results.append(self.data[i])
1304
1487
  else:
1305
- new_data = random.sample(self.data, n)
1488
+ # For sampling without replacement, use reservoir sampling
1489
+ if n > len(self.data):
1490
+ raise ResultsError(
1491
+ f"Cannot sample {n} items from a list of length {len(self.data)}."
1492
+ )
1493
+
1494
+ # Reservoir sampling algorithm
1495
+ for i, item in enumerate(self.data):
1496
+ if i < n:
1497
+ # Fill the reservoir initially
1498
+ sampled_results.append(item)
1499
+ else:
1500
+ # Randomly replace items with decreasing probability
1501
+ j = random.randrange(i + 1)
1502
+ if j < n:
1503
+ sampled_results.data[j] = item
1306
1504
 
1307
- return Results(survey=self.survey, data=new_data, created_columns=None)
1505
+ return sampled_results
1308
1506
 
1309
1507
  @ensure_ready
1310
1508
  def select(self, *columns: Union[str, list[str]]) -> "Dataset":
@@ -1391,20 +1589,12 @@ class Results(UserList, ResultsOperationsMixin, Base):
1391
1589
  def order_by(self, *columns: str, reverse: bool = False) -> Results:
1392
1590
  """Sort the results by one or more columns.
1393
1591
 
1394
- :param columns: One or more column names as strings.
1395
- :param reverse: A boolean that determines whether to sort in reverse order.
1396
-
1397
- Each column name can be a single key, e.g. "how_feeling", or a dot-separated string, e.g. "answer.how_feeling".
1398
-
1399
- Example:
1400
-
1401
- >>> r = Results.example()
1402
- >>> r.sort_by('how_feeling', reverse=False).select('how_feeling')
1403
- Dataset([{'answer.how_feeling': ['Great', 'OK', 'OK', 'Terrible']}])
1404
-
1405
- >>> r.sort_by('how_feeling', reverse=True).select('how_feeling')
1406
- Dataset([{'answer.how_feeling': ['Terrible', 'OK', 'OK', 'Great']}])
1592
+ Args:
1593
+ columns: One or more column names as strings.
1594
+ reverse: A boolean that determines whether to sort in reverse order.
1407
1595
 
1596
+ Returns:
1597
+ Results: A new Results object with sorted data.
1408
1598
  """
1409
1599
 
1410
1600
  def to_numeric_if_possible(v):
@@ -1418,11 +1608,52 @@ class Results(UserList, ResultsOperationsMixin, Base):
1418
1608
  for col in columns:
1419
1609
  data_type, key = self._parse_column(col)
1420
1610
  value = item.get_value(data_type, key)
1421
- key_components.append(to_numeric_if_possible(value))
1611
+ if isinstance(value, (str, bytes)):
1612
+ key_components.append(str(value))
1613
+ else:
1614
+ key_components.append(to_numeric_if_possible(value))
1422
1615
  return tuple(key_components)
1423
1616
 
1424
- new_data = sorted(self.data, key=sort_key, reverse=reverse)
1425
- return Results(survey=self.survey, data=new_data, created_columns=None)
1617
+ # Create a new sorted view of the data without materializing it
1618
+ sorted_data = sorted(self.data, key=sort_key, reverse=reverse)
1619
+
1620
+ # Create new Results object that uses the sorted iterator
1621
+ return Results(
1622
+ survey=self.survey,
1623
+ data=sorted_data, # This will be an iterator, not a materialized list
1624
+ created_columns=self.created_columns,
1625
+ data_class=self._data_class,
1626
+ sort_by_iteration=False,
1627
+ )
1628
+
1629
+ @staticmethod
1630
+ def has_single_equals(expression: str) -> bool:
1631
+ """Check if an expression contains a single equals sign not part of ==, >=, or <=.
1632
+
1633
+ Args:
1634
+ expression: String expression to check
1635
+
1636
+ Returns:
1637
+ bool: True if there is a standalone = sign
1638
+
1639
+ Examples:
1640
+ >>> Results.has_single_equals("x = 1")
1641
+ True
1642
+ >>> Results.has_single_equals("x == 1")
1643
+ False
1644
+ >>> Results.has_single_equals("x >= 1")
1645
+ False
1646
+ >>> Results.has_single_equals("x <= 1")
1647
+ False
1648
+ """
1649
+ # First remove valid operators that contain =
1650
+ cleaned = (
1651
+ expression.replace("==", "")
1652
+ .replace(">=", "")
1653
+ .replace("<=", "")
1654
+ .replace("!=", "")
1655
+ )
1656
+ return "=" in cleaned
1426
1657
 
1427
1658
  @ensure_ready
1428
1659
  def filter(self, expression: str) -> Results:
@@ -1436,6 +1667,8 @@ class Results(UserList, ResultsOperationsMixin, Base):
1436
1667
  Args:
1437
1668
  expression: A string containing a Python expression that evaluates to a boolean.
1438
1669
  The expression is applied to each Result object individually.
1670
+ Can be a multi-line string for better readability.
1671
+ Supports template-style syntax with {{ field }} notation.
1439
1672
 
1440
1673
  Returns:
1441
1674
  A new Results object containing only the Result objects that satisfy the expression.
@@ -1452,6 +1685,8 @@ class Results(UserList, ResultsOperationsMixin, Base):
1452
1685
  - You can use comparison operators like '==', '!=', '>', '<', '>=', '<='
1453
1686
  - You can use membership tests with 'in'
1454
1687
  - You can use string methods like '.startswith()', '.contains()', etc.
1688
+ - The expression can be a multi-line string for improved readability
1689
+ - You can use template-style syntax with double curly braces: {{ field }}
1455
1690
 
1456
1691
  Examples:
1457
1692
  >>> r = Results.example()
@@ -1468,6 +1703,17 @@ class Results(UserList, ResultsOperationsMixin, Base):
1468
1703
  >>> r.filter("agent.status == 'Joyful'").select('agent.status')
1469
1704
  Dataset([{'agent.status': ['Joyful', 'Joyful']}])
1470
1705
 
1706
+ >>> # Using multi-line string for complex conditions
1707
+ >>> r.filter('''
1708
+ ... how_feeling == 'Great'
1709
+ ... or how_feeling == 'Terrible'
1710
+ ... ''').select('how_feeling')
1711
+ Dataset([{'answer.how_feeling': ['Great', 'Terrible']}])
1712
+
1713
+ >>> # Using template-style syntax with {{}}
1714
+ >>> r.filter("{{ answer.how_feeling }} == 'Great'").select('how_feeling')
1715
+ Dataset([{'answer.how_feeling': ['Great']}])
1716
+
1471
1717
  >>> # Common error: using = instead of ==
1472
1718
  >>> try:
1473
1719
  ... r.filter("how_feeling = 'Great'")
@@ -1475,28 +1721,43 @@ class Results(UserList, ResultsOperationsMixin, Base):
1475
1721
  ... print("ResultsFilterError: You must use '==' instead of '=' in the filter expression.")
1476
1722
  ResultsFilterError: You must use '==' instead of '=' in the filter expression.
1477
1723
  """
1724
+ # Normalize expression by removing extra whitespace and newlines
1725
+ normalized_expression = " ".join(expression.strip().split())
1478
1726
 
1479
- def has_single_equals(string):
1480
- if "!=" in string:
1481
- return False
1482
- if "=" in string and not (
1483
- "==" in string or "<=" in string or ">=" in string
1484
- ):
1485
- return True
1727
+ # Remove template-style syntax (double curly braces)
1728
+ normalized_expression = normalized_expression.replace("{{", "").replace(
1729
+ "}}", ""
1730
+ )
1486
1731
 
1487
- if has_single_equals(expression):
1732
+ if self.has_single_equals(normalized_expression):
1488
1733
  raise ResultsFilterError(
1489
1734
  "You must use '==' instead of '=' in the filter expression."
1490
1735
  )
1491
1736
 
1492
1737
  try:
1493
- # iterates through all the results and evaluates the expression
1494
- new_data = []
1738
+ # Create new Results object with same class as original but empty data
1739
+ filtered_results = Results(
1740
+ survey=self.survey,
1741
+ data=[], # Empty data list
1742
+ created_columns=self.created_columns,
1743
+ data_class=self._data_class, # Preserve the original data class
1744
+ )
1745
+
1746
+ # Process one result at a time
1495
1747
  for result in self.data:
1496
1748
  evaluator = self._create_evaluator(result)
1497
- result.check_expression(expression) # check expression
1498
- if evaluator.eval(expression):
1499
- new_data.append(result)
1749
+ result.check_expression(normalized_expression) # check expression
1750
+ if evaluator.eval(normalized_expression):
1751
+ filtered_results.append(
1752
+ result
1753
+ ) # Use append method to add matching results
1754
+
1755
+ if len(filtered_results) == 0:
1756
+ import warnings
1757
+
1758
+ warnings.warn("No results remain after applying the filter.")
1759
+
1760
+ return filtered_results
1500
1761
 
1501
1762
  except ValueError as e:
1502
1763
  raise ResultsFilterError(
@@ -1506,21 +1767,14 @@ class Results(UserList, ResultsOperationsMixin, Base):
1506
1767
  )
1507
1768
  except Exception as e:
1508
1769
  raise ResultsFilterError(
1509
- f"""Error in filter. Exception:{e}.""",
1510
- f"""The expression you provided was: {expression}.""",
1511
- """Please make sure that the expression is a valid Python expression that evaluates to a boolean.""",
1512
- """For example, 'how_feeling == "Great"' is a valid expression, as is 'how_feeling in ["Great", "Terrible"]'., """,
1513
- """However, 'how_feeling = "Great"' is not a valid expression.""",
1514
- """See https://docs.expectedparrot.com/en/latest/results.html#filtering-results for more details.""",
1770
+ f"Error in filter. Exception:{e}.",
1771
+ f"The expression you provided was: {expression}.",
1772
+ "Please make sure that the expression is a valid Python expression that evaluates to a boolean.",
1773
+ 'For example, \'how_feeling == "Great"\' is a valid expression, as is \'how_feeling in ["Great", "Terrible"]\'.',
1774
+ "However, 'how_feeling = \"Great\"' is not a valid expression.",
1775
+ "See https://docs.expectedparrot.com/en/latest/results.html#filtering-results for more details.",
1515
1776
  )
1516
1777
 
1517
- if len(new_data) == 0:
1518
- import warnings
1519
-
1520
- warnings.warn("No results remain after applying the filter.")
1521
-
1522
- return Results(survey=self.survey, data=new_data, created_columns=None)
1523
-
1524
1778
  @classmethod
1525
1779
  def example(cls, randomize: bool = False) -> Results:
1526
1780
  """Return an example `Results` object.
@@ -1529,7 +1783,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
1529
1783
 
1530
1784
  >>> r = Results.example()
1531
1785
 
1532
- :param debug: if False, uses actual API calls
1786
+ :param randomize: if True, randomizes agent and scenario combinations
1533
1787
  """
1534
1788
  from ..jobs import Jobs
1535
1789
  from ..caching import Cache
@@ -1544,6 +1798,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
1544
1798
  disable_remote_cache=True,
1545
1799
  disable_remote_inference=True,
1546
1800
  )
1801
+
1547
1802
  return results
1548
1803
 
1549
1804
  def rich_print(self):
@@ -1761,6 +2016,282 @@ class Results(UserList, ResultsOperationsMixin, Base):
1761
2016
 
1762
2017
  return results
1763
2018
 
2019
+ def shelve_result(self, result: "Result") -> str:
2020
+ """Store a Result object in persistent storage using its hash as the key.
2021
+
2022
+ Args:
2023
+ result: A Result object to store
2024
+
2025
+ Returns:
2026
+ str: The hash key for retrieving the result later
2027
+
2028
+ Raises:
2029
+ ResultsError: If there's an error storing the Result
2030
+ """
2031
+ import shelve
2032
+
2033
+ key = str(hash(result))
2034
+ try:
2035
+ with shelve.open(self._shelve_path) as shelf:
2036
+ shelf[key] = result.to_dict()
2037
+ self._shelf_keys.add(key)
2038
+ return key
2039
+ except Exception as e:
2040
+ raise ResultsError(f"Error storing Result in shelve database: {str(e)}")
2041
+
2042
+ def get_shelved_result(self, key: str) -> "Result":
2043
+ """Retrieve a Result object from persistent storage.
2044
+
2045
+ Args:
2046
+ key: The hash key of the Result to retrieve
2047
+
2048
+ Returns:
2049
+ Result: The stored Result object
2050
+
2051
+ Raises:
2052
+ ResultsError: If the key doesn't exist or if there's an error retrieving the Result
2053
+ """
2054
+ import shelve
2055
+ from .result import Result
2056
+
2057
+ if key not in self._shelf_keys:
2058
+ raise ResultsError(f"No result found with key: {key}")
2059
+
2060
+ try:
2061
+ with shelve.open(self._shelve_path) as shelf:
2062
+ return Result.from_dict(shelf[key])
2063
+ except Exception as e:
2064
+ raise ResultsError(
2065
+ f"Error retrieving Result from shelve database: {str(e)}"
2066
+ )
2067
+
2068
+ @property
2069
+ def shelf_keys(self) -> set:
2070
+ """Return a copy of the set of shelved result keys."""
2071
+ return self._shelf_keys.copy()
2072
+
2073
+ @ensure_ready
2074
+ def insert_sorted(self, item: "Result") -> None:
2075
+ """Insert a Result object into the Results list while maintaining sort order.
2076
+
2077
+ Uses the 'order' attribute if present, otherwise falls back to 'iteration' attribute.
2078
+ Utilizes bisect for efficient insertion point finding.
2079
+
2080
+ Args:
2081
+ item: A Result object to insert
2082
+
2083
+ Examples:
2084
+ >>> r = Results.example()
2085
+ >>> new_result = r[0].copy()
2086
+ >>> new_result.order = 1.5 # Insert between items
2087
+ >>> r.insert_sorted(new_result)
2088
+ """
2089
+ from bisect import bisect_left
2090
+
2091
+ def get_sort_key(result):
2092
+ if hasattr(result, "order"):
2093
+ return (0, result.order) # Order attribute takes precedence
2094
+ return (1, result.data["iteration"]) # Iteration is secondary
2095
+
2096
+ # Get the sort key for the new item
2097
+ item_key = get_sort_key(item)
2098
+
2099
+ # Get list of sort keys for existing items
2100
+ keys = [get_sort_key(x) for x in self.data]
2101
+
2102
+ # Find insertion point
2103
+ index = bisect_left(keys, item_key)
2104
+
2105
+ # Insert at the found position
2106
+ self.data.insert(index, item)
2107
+
2108
+ def insert_from_shelf(self) -> None:
2109
+ """Move all shelved results into memory using insert_sorted method.
2110
+ Clears the shelf after successful insertion.
2111
+
2112
+ This method preserves the original order of results by using their 'order'
2113
+ attribute if available, which ensures consistent ordering even after
2114
+ serialization/deserialization.
2115
+
2116
+ Raises:
2117
+ ResultsError: If there's an error accessing or clearing the shelf
2118
+ """
2119
+ import shelve
2120
+ from .result import Result
2121
+
2122
+ if not self._shelf_keys:
2123
+ return
2124
+
2125
+ try:
2126
+ # First collect all results from shelf
2127
+ with shelve.open(self._shelve_path) as shelf:
2128
+ # Get and insert all results first
2129
+ for key in self._shelf_keys:
2130
+ result_dict = shelf[key]
2131
+ result = Result.from_dict(result_dict)
2132
+ self.insert_sorted(result)
2133
+
2134
+ # Now clear the shelf
2135
+ for key in self._shelf_keys:
2136
+ del shelf[key]
2137
+
2138
+ # Clear the tracking set
2139
+ self._shelf_keys.clear()
2140
+
2141
+ except Exception as e:
2142
+ raise ResultsError(f"Error moving results from shelf to memory: {str(e)}")
2143
+
2144
+ def to_disk(self, filepath: str) -> None:
2145
+ """Serialize the Results object to a zip file, preserving the SQLite database.
2146
+
2147
+ This method creates a zip file containing:
2148
+ 1. The SQLite database file from the data container
2149
+ 2. A metadata.json file with the survey, created_columns, and other non-data info
2150
+ 3. The cache data if present
2151
+
2152
+ Args:
2153
+ filepath: Path where the zip file should be saved
2154
+
2155
+ Raises:
2156
+ ResultsError: If there's an error during serialization
2157
+ """
2158
+ import zipfile
2159
+ import json
2160
+ import os
2161
+ import tempfile
2162
+ from pathlib import Path
2163
+ import sqlite3
2164
+ import shutil
2165
+
2166
+ data_class = ResultsSQLList
2167
+
2168
+ try:
2169
+ # Create a temporary directory to store files before zipping
2170
+ with tempfile.TemporaryDirectory() as temp_dir:
2171
+ temp_path = Path(temp_dir)
2172
+
2173
+ # 1. Handle the SQLite database
2174
+ db_path = temp_path / "results.db"
2175
+
2176
+ if isinstance(self.data, list):
2177
+ # If data is a list, create a new SQLiteList
2178
+ # from .sqlite_list import SQLiteList
2179
+ new_db = data_class()
2180
+ new_db.extend(self.data)
2181
+ shutil.copy2(new_db.db_path, db_path)
2182
+ elif hasattr(self.data, "db_path") and os.path.exists(
2183
+ self.data.db_path
2184
+ ):
2185
+ # If data is already a SQLiteList, copy its database
2186
+ shutil.copy2(self.data.db_path, db_path)
2187
+ else:
2188
+ # If no database exists, create a new one
2189
+ # from .sqlite_list import SQLiteList
2190
+ # new_db = SQLiteList()
2191
+ new_db = data_class()
2192
+ new_db.extend(self.data)
2193
+ shutil.copy2(new_db.db_path, db_path)
2194
+
2195
+ # 2. Create metadata.json
2196
+ metadata = {
2197
+ "survey": self.survey.to_dict() if self.survey else None,
2198
+ "created_columns": self.created_columns,
2199
+ "cache": self.cache.to_dict() if hasattr(self, "cache") else None,
2200
+ "task_history": self.task_history.to_dict()
2201
+ if hasattr(self, "task_history")
2202
+ else None,
2203
+ "completed": self.completed,
2204
+ "job_uuid": self._job_uuid if hasattr(self, "_job_uuid") else None,
2205
+ "total_results": self._total_results
2206
+ if hasattr(self, "_total_results")
2207
+ else None,
2208
+ }
2209
+
2210
+ metadata_path = temp_path / "metadata.json"
2211
+ metadata_path.write_text(json.dumps(metadata, indent=4))
2212
+
2213
+ # 3. Create the zip file
2214
+ with zipfile.ZipFile(filepath, "w", zipfile.ZIP_DEFLATED) as zipf:
2215
+ # Add all files from temp directory to zip
2216
+ for file in temp_path.glob("*"):
2217
+ zipf.write(file, file.name)
2218
+
2219
+ except Exception as e:
2220
+ raise ResultsError(f"Error saving Results to disk: {str(e)}")
2221
+
2222
+ @classmethod
2223
+ def from_disk(cls, filepath: str) -> "Results":
2224
+ """Load a Results object from a zip file.
2225
+
2226
+ This method:
2227
+ 1. Extracts the SQLite database file
2228
+ 2. Loads the metadata
2229
+ 3. Creates a new Results instance with the restored data
2230
+
2231
+ Args:
2232
+ filepath: Path to the zip file containing the serialized Results
2233
+
2234
+ Returns:
2235
+ Results: A new Results instance with the restored data
2236
+
2237
+ Raises:
2238
+ ResultsError: If there's an error during deserialization
2239
+ """
2240
+ import zipfile
2241
+ import json
2242
+ import tempfile
2243
+ from pathlib import Path
2244
+ from ..surveys import Survey
2245
+ from ..caching import Cache
2246
+ from ..tasks import TaskHistory
2247
+
2248
+ data_class = ResultsSQLList
2249
+
2250
+ try:
2251
+ # Create a temporary directory to extract files
2252
+ with tempfile.TemporaryDirectory() as temp_dir:
2253
+ temp_path = Path(temp_dir)
2254
+
2255
+ # Extract the zip file
2256
+ with zipfile.ZipFile(filepath, "r") as zipf:
2257
+ zipf.extractall(temp_path)
2258
+
2259
+ # 1. Load metadata
2260
+ metadata_path = temp_path / "metadata.json"
2261
+ metadata = json.loads(metadata_path.read_text())
2262
+
2263
+ # 2. Create a new Results instance
2264
+ results = cls(
2265
+ survey=Survey.from_dict(metadata["survey"])
2266
+ if metadata["survey"]
2267
+ else None,
2268
+ created_columns=metadata["created_columns"],
2269
+ cache=Cache.from_dict(metadata["cache"])
2270
+ if metadata["cache"]
2271
+ else None,
2272
+ task_history=TaskHistory.from_dict(metadata["task_history"])
2273
+ if metadata["task_history"]
2274
+ else None,
2275
+ job_uuid=metadata["job_uuid"],
2276
+ total_results=metadata["total_results"],
2277
+ )
2278
+
2279
+ # 3. Set the SQLite database path if it exists
2280
+ db_path = temp_path / "results.db"
2281
+ if db_path.exists():
2282
+ # Create a new ResultsSQLList instance
2283
+ new_db = data_class()
2284
+ # Copy data from the source database - convert Path to string
2285
+ new_db.copy_from(str(db_path))
2286
+ # Set the new database as the results data
2287
+ results.data = new_db
2288
+
2289
+ results.completed = metadata["completed"]
2290
+ return results
2291
+
2292
+ except Exception as e:
2293
+ raise ResultsError(f"Error loading Results from disk: {str(e)}")
2294
+
1764
2295
 
1765
2296
  def main(): # pragma: no cover
1766
2297
  """Run example operations on a Results object.