edsl 0.1.53__py3-none-any.whl → 0.1.55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +8 -1
- edsl/__init__original.py +134 -0
- edsl/__version__.py +1 -1
- edsl/agents/agent.py +29 -0
- edsl/agents/agent_list.py +36 -1
- edsl/base/base_class.py +281 -151
- edsl/buckets/__init__.py +8 -3
- edsl/buckets/bucket_collection.py +9 -3
- edsl/buckets/model_buckets.py +4 -2
- edsl/buckets/token_bucket.py +2 -2
- edsl/buckets/token_bucket_client.py +5 -3
- edsl/caching/cache.py +131 -62
- edsl/caching/cache_entry.py +70 -58
- edsl/caching/sql_dict.py +17 -0
- edsl/cli.py +99 -0
- edsl/config/config_class.py +16 -0
- edsl/conversation/__init__.py +31 -0
- edsl/coop/coop.py +276 -242
- edsl/coop/coop_jobs_objects.py +59 -0
- edsl/coop/coop_objects.py +29 -0
- edsl/coop/coop_regular_objects.py +26 -0
- edsl/coop/utils.py +24 -19
- edsl/dataset/dataset.py +338 -101
- edsl/db_list/sqlite_list.py +349 -0
- edsl/inference_services/__init__.py +40 -5
- edsl/inference_services/exceptions.py +11 -0
- edsl/inference_services/services/anthropic_service.py +5 -2
- edsl/inference_services/services/aws_bedrock.py +6 -2
- edsl/inference_services/services/azure_ai.py +6 -2
- edsl/inference_services/services/google_service.py +3 -2
- edsl/inference_services/services/mistral_ai_service.py +6 -2
- edsl/inference_services/services/open_ai_service.py +6 -2
- edsl/inference_services/services/perplexity_service.py +6 -2
- edsl/inference_services/services/test_service.py +105 -7
- edsl/interviews/answering_function.py +167 -59
- edsl/interviews/interview.py +124 -72
- edsl/interviews/interview_task_manager.py +10 -0
- edsl/invigilators/invigilators.py +10 -1
- edsl/jobs/async_interview_runner.py +146 -104
- edsl/jobs/data_structures.py +6 -4
- edsl/jobs/decorators.py +61 -0
- edsl/jobs/fetch_invigilator.py +61 -18
- edsl/jobs/html_table_job_logger.py +14 -2
- edsl/jobs/jobs.py +180 -104
- edsl/jobs/jobs_component_constructor.py +2 -2
- edsl/jobs/jobs_interview_constructor.py +2 -0
- edsl/jobs/jobs_pricing_estimation.py +127 -46
- edsl/jobs/jobs_remote_inference_logger.py +4 -0
- edsl/jobs/jobs_runner_status.py +30 -25
- edsl/jobs/progress_bar_manager.py +79 -0
- edsl/jobs/remote_inference.py +35 -1
- edsl/key_management/key_lookup_builder.py +6 -1
- edsl/language_models/language_model.py +102 -12
- edsl/language_models/model.py +10 -3
- edsl/language_models/price_manager.py +45 -75
- edsl/language_models/registry.py +5 -0
- edsl/language_models/utilities.py +2 -1
- edsl/notebooks/notebook.py +77 -10
- edsl/questions/VALIDATION_README.md +134 -0
- edsl/questions/__init__.py +24 -1
- edsl/questions/exceptions.py +21 -0
- edsl/questions/question_check_box.py +171 -149
- edsl/questions/question_dict.py +243 -51
- edsl/questions/question_multiple_choice_with_other.py +624 -0
- edsl/questions/question_registry.py +2 -1
- edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
- edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
- edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
- edsl/questions/validation_analysis.py +185 -0
- edsl/questions/validation_cli.py +131 -0
- edsl/questions/validation_html_report.py +404 -0
- edsl/questions/validation_logger.py +136 -0
- edsl/results/result.py +63 -16
- edsl/results/results.py +702 -171
- edsl/scenarios/construct_download_link.py +16 -3
- edsl/scenarios/directory_scanner.py +226 -226
- edsl/scenarios/file_methods.py +5 -0
- edsl/scenarios/file_store.py +117 -6
- edsl/scenarios/handlers/__init__.py +5 -1
- edsl/scenarios/handlers/mp4_file_store.py +104 -0
- edsl/scenarios/handlers/webm_file_store.py +104 -0
- edsl/scenarios/scenario.py +120 -101
- edsl/scenarios/scenario_list.py +800 -727
- edsl/scenarios/scenario_list_gc_test.py +146 -0
- edsl/scenarios/scenario_list_memory_test.py +214 -0
- edsl/scenarios/scenario_list_source_refactor.md +35 -0
- edsl/scenarios/scenario_selector.py +5 -4
- edsl/scenarios/scenario_source.py +1990 -0
- edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
- edsl/surveys/survey.py +22 -0
- edsl/tasks/__init__.py +4 -2
- edsl/tasks/task_history.py +198 -36
- edsl/tests/scenarios/test_ScenarioSource.py +51 -0
- edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
- edsl/utilities/__init__.py +2 -1
- edsl/utilities/decorators.py +121 -0
- edsl/utilities/memory_debugger.py +1010 -0
- {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/METADATA +52 -76
- {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/RECORD +102 -78
- edsl/jobs/jobs_runner_asyncio.py +0 -281
- edsl/language_models/unused/fake_openai_service.py +0 -60
- {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/LICENSE +0 -0
- {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/WHEEL +0 -0
- {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,134 @@
|
|
1
|
+
# EDSL Validation Logging System
|
2
|
+
|
3
|
+
This system logs validation failures that occur during question answering and provides tools to analyze these failures to improve the "fix" methods for various question types.
|
4
|
+
|
5
|
+
## Background
|
6
|
+
|
7
|
+
When a language model's response to a question fails validation (e.g., the response doesn't match the expected format or constraints), EDSL throws a `QuestionAnswerValidationError`. To make these validations more robust, we've added a system to log these failures and analyze common patterns.
|
8
|
+
|
9
|
+
## Features
|
10
|
+
|
11
|
+
- **Validation Logging**: Automatically logs validation failures to a local file
|
12
|
+
- **Log Analysis**: Tools to analyze validation failures by question type and error message
|
13
|
+
- **Fix Method Suggestions**: Generates suggestions for improving fix methods based on common failure patterns
|
14
|
+
- **CLI Interface**: Command-line tools for managing and analyzing validation logs
|
15
|
+
|
16
|
+
## Usage
|
17
|
+
|
18
|
+
### Command Line Interface
|
19
|
+
|
20
|
+
The validation logging system is integrated with the EDSL CLI:
|
21
|
+
|
22
|
+
```bash
|
23
|
+
# Show recent validation failure logs
|
24
|
+
edsl validation logs
|
25
|
+
|
26
|
+
# Show recent logs filtered by question type
|
27
|
+
edsl validation logs --type QuestionMultipleChoice
|
28
|
+
|
29
|
+
# Save logs to a file
|
30
|
+
edsl validation logs --output validation_logs.json
|
31
|
+
|
32
|
+
# Clear all validation logs
|
33
|
+
edsl validation clear
|
34
|
+
|
35
|
+
# Show validation failure statistics
|
36
|
+
edsl validation stats
|
37
|
+
|
38
|
+
# Get suggestions for improving fix methods
|
39
|
+
edsl validation suggest
|
40
|
+
|
41
|
+
# Filter suggestions for a specific question type
|
42
|
+
edsl validation suggest --type QuestionMultipleChoice
|
43
|
+
|
44
|
+
# Generate a comprehensive JSON report
|
45
|
+
edsl validation report
|
46
|
+
|
47
|
+
# Generate an HTML report and open it in browser
|
48
|
+
edsl validation html-report
|
49
|
+
|
50
|
+
# Generate HTML report without opening browser
|
51
|
+
edsl validation html-report --no-open
|
52
|
+
```
|
53
|
+
|
54
|
+
You can also use the `make` command to generate reports:
|
55
|
+
|
56
|
+
```bash
|
57
|
+
# Generate and open HTML validation report
|
58
|
+
make validation-report
|
59
|
+
|
60
|
+
# Show validation statistics
|
61
|
+
make validation-stats
|
62
|
+
```
|
63
|
+
|
64
|
+
### Programmatic Usage
|
65
|
+
|
66
|
+
You can also use the validation logging system programmatically:
|
67
|
+
|
68
|
+
```python
|
69
|
+
from edsl.questions import (
|
70
|
+
log_validation_failure,
|
71
|
+
get_validation_failure_logs,
|
72
|
+
clear_validation_logs,
|
73
|
+
get_validation_failure_stats,
|
74
|
+
suggest_fix_improvements,
|
75
|
+
export_improvements_report,
|
76
|
+
generate_html_report,
|
77
|
+
generate_and_open_report
|
78
|
+
)
|
79
|
+
|
80
|
+
# Get recent validation failure logs
|
81
|
+
logs = get_validation_failure_logs(n=10)
|
82
|
+
|
83
|
+
# Get validation failure statistics
|
84
|
+
stats = get_validation_failure_stats()
|
85
|
+
|
86
|
+
# Get suggestions for improving fix methods
|
87
|
+
suggestions = suggest_fix_improvements()
|
88
|
+
|
89
|
+
# Generate a JSON report
|
90
|
+
report_path = export_improvements_report()
|
91
|
+
|
92
|
+
# Generate an HTML report
|
93
|
+
html_report_path = generate_html_report()
|
94
|
+
|
95
|
+
# Generate and open HTML report in browser
|
96
|
+
generate_and_open_report()
|
97
|
+
```
|
98
|
+
|
99
|
+
## Implementation Details
|
100
|
+
|
101
|
+
The validation logging system consists of the following components:
|
102
|
+
|
103
|
+
1. **Validation Logger**: Logs validation failures to a local file
|
104
|
+
2. **Validation Analysis**: Analyzes logs to identify patterns and suggest improvements
|
105
|
+
3. **HTML Report Generator**: Creates user-friendly HTML reports with visualizations
|
106
|
+
4. **CLI Integration**: Provides command-line tools for working with validation logs
|
107
|
+
|
108
|
+
### Log Format
|
109
|
+
|
110
|
+
Validation failure logs include the following information:
|
111
|
+
|
112
|
+
- Timestamp
|
113
|
+
- Question type and name
|
114
|
+
- Error message
|
115
|
+
- Invalid data that failed validation
|
116
|
+
- Model schema used for validation
|
117
|
+
- Question details (if available)
|
118
|
+
- Stack trace
|
119
|
+
|
120
|
+
### Storage Location
|
121
|
+
|
122
|
+
Logs are stored in the default EDSL log directory:
|
123
|
+
|
124
|
+
- Linux/macOS: `~/.edsl/logs/validation_failures.log`
|
125
|
+
- Windows: `%USERPROFILE%\.edsl\logs\validation_failures.log`
|
126
|
+
|
127
|
+
## Future Improvements
|
128
|
+
|
129
|
+
Potential future improvements to the validation logging system:
|
130
|
+
|
131
|
+
1. Integration with coop for cloud storage and analysis of validation failures
|
132
|
+
2. Machine learning to automatically suggest fix method improvements
|
133
|
+
3. Automated tests using common validation failure patterns
|
134
|
+
4. A web-based dashboard for visualizing validation failure statistics
|
edsl/questions/__init__.py
CHANGED
@@ -34,6 +34,7 @@ Derived Question Types:
|
|
34
34
|
- QuestionLinearScale: Linear scale with customizable range and labels
|
35
35
|
- QuestionYesNo: Simple binary yes/no response
|
36
36
|
- QuestionTopK: Selection of top K items from a list of options
|
37
|
+
- QuestionMultipleChoiceWithOther: Multiple choice with option to specify "Other" custom response
|
37
38
|
|
38
39
|
Technical Architecture:
|
39
40
|
---------------------
|
@@ -124,9 +125,19 @@ from .question_likert_five import QuestionLikertFive
|
|
124
125
|
from .question_linear_scale import QuestionLinearScale
|
125
126
|
from .question_yes_no import QuestionYesNo
|
126
127
|
from .question_top_k import QuestionTopK
|
128
|
+
from .question_multiple_choice_with_other import QuestionMultipleChoiceWithOther
|
127
129
|
|
128
130
|
from .exceptions import QuestionScenarioRenderError
|
129
131
|
|
132
|
+
# Import validation modules
|
133
|
+
from .validation_logger import log_validation_failure, get_validation_failure_logs, clear_validation_logs
|
134
|
+
from .validation_analysis import (
|
135
|
+
get_validation_failure_stats,
|
136
|
+
suggest_fix_improvements,
|
137
|
+
export_improvements_report
|
138
|
+
)
|
139
|
+
from .validation_html_report import generate_html_report, generate_and_open_report
|
140
|
+
|
130
141
|
__all__ = [
|
131
142
|
# Exceptions
|
132
143
|
"QuestionScenarioRenderError",
|
@@ -156,4 +167,16 @@ __all__ = [
|
|
156
167
|
"QuestionTopK",
|
157
168
|
"QuestionLikertFive",
|
158
169
|
"QuestionYesNo",
|
159
|
-
|
170
|
+
"QuestionMultipleChoiceWithOther",
|
171
|
+
"QuestionMultipleChoiceWithOther",
|
172
|
+
|
173
|
+
# Validation utilities
|
174
|
+
"log_validation_failure",
|
175
|
+
"get_validation_failure_logs",
|
176
|
+
"clear_validation_logs",
|
177
|
+
"get_validation_failure_stats",
|
178
|
+
"suggest_fix_improvements",
|
179
|
+
"export_improvements_report",
|
180
|
+
"generate_html_report",
|
181
|
+
"generate_and_open_report",
|
182
|
+
]
|
edsl/questions/exceptions.py
CHANGED
@@ -72,6 +72,27 @@ class QuestionAnswerValidationError(QuestionErrors):
|
|
72
72
|
self.data = data
|
73
73
|
self.model = model
|
74
74
|
super().__init__(self.message)
|
75
|
+
|
76
|
+
# Log validation failure for analysis
|
77
|
+
try:
|
78
|
+
from .validation_logger import log_validation_failure
|
79
|
+
|
80
|
+
# Get question type and name if available
|
81
|
+
question_type = getattr(model, "question_type", "unknown")
|
82
|
+
question_name = getattr(model, "question_name", "unknown")
|
83
|
+
|
84
|
+
# Log the validation failure
|
85
|
+
log_validation_failure(
|
86
|
+
question_type=question_type,
|
87
|
+
question_name=question_name,
|
88
|
+
error_message=str(message),
|
89
|
+
invalid_data=data,
|
90
|
+
model_schema=model.model_json_schema(),
|
91
|
+
question_dict=getattr(model, "to_dict", lambda: None)(),
|
92
|
+
)
|
93
|
+
except Exception:
|
94
|
+
# Silently ignore logging errors to not disrupt normal operation
|
95
|
+
pass
|
75
96
|
|
76
97
|
def __str__(self):
|
77
98
|
if isinstance(self.message, ValidationError):
|