edsl 0.1.53__py3-none-any.whl → 0.1.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. edsl/__init__.py +8 -1
  2. edsl/__init__original.py +134 -0
  3. edsl/__version__.py +1 -1
  4. edsl/agents/agent.py +29 -0
  5. edsl/agents/agent_list.py +36 -1
  6. edsl/base/base_class.py +281 -151
  7. edsl/buckets/__init__.py +8 -3
  8. edsl/buckets/bucket_collection.py +9 -3
  9. edsl/buckets/model_buckets.py +4 -2
  10. edsl/buckets/token_bucket.py +2 -2
  11. edsl/buckets/token_bucket_client.py +5 -3
  12. edsl/caching/cache.py +131 -62
  13. edsl/caching/cache_entry.py +70 -58
  14. edsl/caching/sql_dict.py +17 -0
  15. edsl/cli.py +99 -0
  16. edsl/config/config_class.py +16 -0
  17. edsl/conversation/__init__.py +31 -0
  18. edsl/coop/coop.py +276 -242
  19. edsl/coop/coop_jobs_objects.py +59 -0
  20. edsl/coop/coop_objects.py +29 -0
  21. edsl/coop/coop_regular_objects.py +26 -0
  22. edsl/coop/utils.py +24 -19
  23. edsl/dataset/dataset.py +338 -101
  24. edsl/db_list/sqlite_list.py +349 -0
  25. edsl/inference_services/__init__.py +40 -5
  26. edsl/inference_services/exceptions.py +11 -0
  27. edsl/inference_services/services/anthropic_service.py +5 -2
  28. edsl/inference_services/services/aws_bedrock.py +6 -2
  29. edsl/inference_services/services/azure_ai.py +6 -2
  30. edsl/inference_services/services/google_service.py +3 -2
  31. edsl/inference_services/services/mistral_ai_service.py +6 -2
  32. edsl/inference_services/services/open_ai_service.py +6 -2
  33. edsl/inference_services/services/perplexity_service.py +6 -2
  34. edsl/inference_services/services/test_service.py +105 -7
  35. edsl/interviews/answering_function.py +167 -59
  36. edsl/interviews/interview.py +124 -72
  37. edsl/interviews/interview_task_manager.py +10 -0
  38. edsl/invigilators/invigilators.py +10 -1
  39. edsl/jobs/async_interview_runner.py +146 -104
  40. edsl/jobs/data_structures.py +6 -4
  41. edsl/jobs/decorators.py +61 -0
  42. edsl/jobs/fetch_invigilator.py +61 -18
  43. edsl/jobs/html_table_job_logger.py +14 -2
  44. edsl/jobs/jobs.py +180 -104
  45. edsl/jobs/jobs_component_constructor.py +2 -2
  46. edsl/jobs/jobs_interview_constructor.py +2 -0
  47. edsl/jobs/jobs_pricing_estimation.py +127 -46
  48. edsl/jobs/jobs_remote_inference_logger.py +4 -0
  49. edsl/jobs/jobs_runner_status.py +30 -25
  50. edsl/jobs/progress_bar_manager.py +79 -0
  51. edsl/jobs/remote_inference.py +35 -1
  52. edsl/key_management/key_lookup_builder.py +6 -1
  53. edsl/language_models/language_model.py +102 -12
  54. edsl/language_models/model.py +10 -3
  55. edsl/language_models/price_manager.py +45 -75
  56. edsl/language_models/registry.py +5 -0
  57. edsl/language_models/utilities.py +2 -1
  58. edsl/notebooks/notebook.py +77 -10
  59. edsl/questions/VALIDATION_README.md +134 -0
  60. edsl/questions/__init__.py +24 -1
  61. edsl/questions/exceptions.py +21 -0
  62. edsl/questions/question_check_box.py +171 -149
  63. edsl/questions/question_dict.py +243 -51
  64. edsl/questions/question_multiple_choice_with_other.py +624 -0
  65. edsl/questions/question_registry.py +2 -1
  66. edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
  67. edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
  68. edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
  69. edsl/questions/validation_analysis.py +185 -0
  70. edsl/questions/validation_cli.py +131 -0
  71. edsl/questions/validation_html_report.py +404 -0
  72. edsl/questions/validation_logger.py +136 -0
  73. edsl/results/result.py +63 -16
  74. edsl/results/results.py +702 -171
  75. edsl/scenarios/construct_download_link.py +16 -3
  76. edsl/scenarios/directory_scanner.py +226 -226
  77. edsl/scenarios/file_methods.py +5 -0
  78. edsl/scenarios/file_store.py +117 -6
  79. edsl/scenarios/handlers/__init__.py +5 -1
  80. edsl/scenarios/handlers/mp4_file_store.py +104 -0
  81. edsl/scenarios/handlers/webm_file_store.py +104 -0
  82. edsl/scenarios/scenario.py +120 -101
  83. edsl/scenarios/scenario_list.py +800 -727
  84. edsl/scenarios/scenario_list_gc_test.py +146 -0
  85. edsl/scenarios/scenario_list_memory_test.py +214 -0
  86. edsl/scenarios/scenario_list_source_refactor.md +35 -0
  87. edsl/scenarios/scenario_selector.py +5 -4
  88. edsl/scenarios/scenario_source.py +1990 -0
  89. edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
  90. edsl/surveys/survey.py +22 -0
  91. edsl/tasks/__init__.py +4 -2
  92. edsl/tasks/task_history.py +198 -36
  93. edsl/tests/scenarios/test_ScenarioSource.py +51 -0
  94. edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
  95. edsl/utilities/__init__.py +2 -1
  96. edsl/utilities/decorators.py +121 -0
  97. edsl/utilities/memory_debugger.py +1010 -0
  98. {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/METADATA +52 -76
  99. {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/RECORD +102 -78
  100. edsl/jobs/jobs_runner_asyncio.py +0 -281
  101. edsl/language_models/unused/fake_openai_service.py +0 -60
  102. {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/LICENSE +0 -0
  103. {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/WHEEL +0 -0
  104. {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,134 @@
1
+ # EDSL Validation Logging System
2
+
3
+ This system logs validation failures that occur during question answering and provides tools to analyze these failures to improve the "fix" methods for various question types.
4
+
5
+ ## Background
6
+
7
+ When a language model's response to a question fails validation (e.g., the response doesn't match the expected format or constraints), EDSL throws a `QuestionAnswerValidationError`. To make these validations more robust, we've added a system to log these failures and analyze common patterns.
8
+
9
+ ## Features
10
+
11
+ - **Validation Logging**: Automatically logs validation failures to a local file
12
+ - **Log Analysis**: Tools to analyze validation failures by question type and error message
13
+ - **Fix Method Suggestions**: Generates suggestions for improving fix methods based on common failure patterns
14
+ - **CLI Interface**: Command-line tools for managing and analyzing validation logs
15
+
16
+ ## Usage
17
+
18
+ ### Command Line Interface
19
+
20
+ The validation logging system is integrated with the EDSL CLI:
21
+
22
+ ```bash
23
+ # Show recent validation failure logs
24
+ edsl validation logs
25
+
26
+ # Show recent logs filtered by question type
27
+ edsl validation logs --type QuestionMultipleChoice
28
+
29
+ # Save logs to a file
30
+ edsl validation logs --output validation_logs.json
31
+
32
+ # Clear all validation logs
33
+ edsl validation clear
34
+
35
+ # Show validation failure statistics
36
+ edsl validation stats
37
+
38
+ # Get suggestions for improving fix methods
39
+ edsl validation suggest
40
+
41
+ # Filter suggestions for a specific question type
42
+ edsl validation suggest --type QuestionMultipleChoice
43
+
44
+ # Generate a comprehensive JSON report
45
+ edsl validation report
46
+
47
+ # Generate an HTML report and open it in browser
48
+ edsl validation html-report
49
+
50
+ # Generate HTML report without opening browser
51
+ edsl validation html-report --no-open
52
+ ```
53
+
54
+ You can also use the `make` command to generate reports:
55
+
56
+ ```bash
57
+ # Generate and open HTML validation report
58
+ make validation-report
59
+
60
+ # Show validation statistics
61
+ make validation-stats
62
+ ```
63
+
64
+ ### Programmatic Usage
65
+
66
+ You can also use the validation logging system programmatically:
67
+
68
+ ```python
69
+ from edsl.questions import (
70
+ log_validation_failure,
71
+ get_validation_failure_logs,
72
+ clear_validation_logs,
73
+ get_validation_failure_stats,
74
+ suggest_fix_improvements,
75
+ export_improvements_report,
76
+ generate_html_report,
77
+ generate_and_open_report
78
+ )
79
+
80
+ # Get recent validation failure logs
81
+ logs = get_validation_failure_logs(n=10)
82
+
83
+ # Get validation failure statistics
84
+ stats = get_validation_failure_stats()
85
+
86
+ # Get suggestions for improving fix methods
87
+ suggestions = suggest_fix_improvements()
88
+
89
+ # Generate a JSON report
90
+ report_path = export_improvements_report()
91
+
92
+ # Generate an HTML report
93
+ html_report_path = generate_html_report()
94
+
95
+ # Generate and open HTML report in browser
96
+ generate_and_open_report()
97
+ ```
98
+
99
+ ## Implementation Details
100
+
101
+ The validation logging system consists of the following components:
102
+
103
+ 1. **Validation Logger**: Logs validation failures to a local file
104
+ 2. **Validation Analysis**: Analyzes logs to identify patterns and suggest improvements
105
+ 3. **HTML Report Generator**: Creates user-friendly HTML reports with visualizations
106
+ 4. **CLI Integration**: Provides command-line tools for working with validation logs
107
+
108
+ ### Log Format
109
+
110
+ Validation failure logs include the following information:
111
+
112
+ - Timestamp
113
+ - Question type and name
114
+ - Error message
115
+ - Invalid data that failed validation
116
+ - Model schema used for validation
117
+ - Question details (if available)
118
+ - Stack trace
119
+
120
+ ### Storage Location
121
+
122
+ Logs are stored in the default EDSL log directory:
123
+
124
+ - Linux/macOS: `~/.edsl/logs/validation_failures.log`
125
+ - Windows: `%USERPROFILE%\.edsl\logs\validation_failures.log`
126
+
127
+ ## Future Improvements
128
+
129
+ Potential future improvements to the validation logging system:
130
+
131
+ 1. Integration with coop for cloud storage and analysis of validation failures
132
+ 2. Machine learning to automatically suggest fix method improvements
133
+ 3. Automated tests using common validation failure patterns
134
+ 4. A web-based dashboard for visualizing validation failure statistics
@@ -34,6 +34,7 @@ Derived Question Types:
34
34
  - QuestionLinearScale: Linear scale with customizable range and labels
35
35
  - QuestionYesNo: Simple binary yes/no response
36
36
  - QuestionTopK: Selection of top K items from a list of options
37
+ - QuestionMultipleChoiceWithOther: Multiple choice with option to specify "Other" custom response
37
38
 
38
39
  Technical Architecture:
39
40
  ---------------------
@@ -124,9 +125,19 @@ from .question_likert_five import QuestionLikertFive
124
125
  from .question_linear_scale import QuestionLinearScale
125
126
  from .question_yes_no import QuestionYesNo
126
127
  from .question_top_k import QuestionTopK
128
+ from .question_multiple_choice_with_other import QuestionMultipleChoiceWithOther
127
129
 
128
130
  from .exceptions import QuestionScenarioRenderError
129
131
 
132
+ # Import validation modules
133
+ from .validation_logger import log_validation_failure, get_validation_failure_logs, clear_validation_logs
134
+ from .validation_analysis import (
135
+ get_validation_failure_stats,
136
+ suggest_fix_improvements,
137
+ export_improvements_report
138
+ )
139
+ from .validation_html_report import generate_html_report, generate_and_open_report
140
+
130
141
  __all__ = [
131
142
  # Exceptions
132
143
  "QuestionScenarioRenderError",
@@ -156,4 +167,16 @@ __all__ = [
156
167
  "QuestionTopK",
157
168
  "QuestionLikertFive",
158
169
  "QuestionYesNo",
159
- ]
170
+ "QuestionMultipleChoiceWithOther",
171
+ "QuestionMultipleChoiceWithOther",
172
+
173
+ # Validation utilities
174
+ "log_validation_failure",
175
+ "get_validation_failure_logs",
176
+ "clear_validation_logs",
177
+ "get_validation_failure_stats",
178
+ "suggest_fix_improvements",
179
+ "export_improvements_report",
180
+ "generate_html_report",
181
+ "generate_and_open_report",
182
+ ]
@@ -72,6 +72,27 @@ class QuestionAnswerValidationError(QuestionErrors):
72
72
  self.data = data
73
73
  self.model = model
74
74
  super().__init__(self.message)
75
+
76
+ # Log validation failure for analysis
77
+ try:
78
+ from .validation_logger import log_validation_failure
79
+
80
+ # Get question type and name if available
81
+ question_type = getattr(model, "question_type", "unknown")
82
+ question_name = getattr(model, "question_name", "unknown")
83
+
84
+ # Log the validation failure
85
+ log_validation_failure(
86
+ question_type=question_type,
87
+ question_name=question_name,
88
+ error_message=str(message),
89
+ invalid_data=data,
90
+ model_schema=model.model_json_schema(),
91
+ question_dict=getattr(model, "to_dict", lambda: None)(),
92
+ )
93
+ except Exception:
94
+ # Silently ignore logging errors to not disrupt normal operation
95
+ pass
75
96
 
76
97
  def __str__(self):
77
98
  if isinstance(self.message, ValidationError):