edsl 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. edsl/__init__.py +8 -1
  2. edsl/__init__original.py +134 -0
  3. edsl/__version__.py +1 -1
  4. edsl/agents/agent.py +29 -0
  5. edsl/agents/agent_list.py +36 -1
  6. edsl/base/base_class.py +281 -151
  7. edsl/base/data_transfer_models.py +15 -4
  8. edsl/buckets/__init__.py +8 -3
  9. edsl/buckets/bucket_collection.py +9 -3
  10. edsl/buckets/model_buckets.py +4 -2
  11. edsl/buckets/token_bucket.py +2 -2
  12. edsl/buckets/token_bucket_client.py +5 -3
  13. edsl/caching/cache.py +131 -62
  14. edsl/caching/cache_entry.py +70 -58
  15. edsl/caching/sql_dict.py +17 -0
  16. edsl/cli.py +99 -0
  17. edsl/config/config_class.py +16 -0
  18. edsl/conversation/__init__.py +31 -0
  19. edsl/coop/coop.py +276 -242
  20. edsl/coop/coop_jobs_objects.py +59 -0
  21. edsl/coop/coop_objects.py +29 -0
  22. edsl/coop/coop_regular_objects.py +26 -0
  23. edsl/coop/utils.py +24 -19
  24. edsl/dataset/dataset.py +338 -101
  25. edsl/dataset/dataset_operations_mixin.py +216 -180
  26. edsl/db_list/sqlite_list.py +349 -0
  27. edsl/inference_services/__init__.py +40 -5
  28. edsl/inference_services/exceptions.py +11 -0
  29. edsl/inference_services/services/anthropic_service.py +5 -2
  30. edsl/inference_services/services/aws_bedrock.py +6 -2
  31. edsl/inference_services/services/azure_ai.py +6 -2
  32. edsl/inference_services/services/google_service.py +7 -3
  33. edsl/inference_services/services/mistral_ai_service.py +6 -2
  34. edsl/inference_services/services/open_ai_service.py +6 -2
  35. edsl/inference_services/services/perplexity_service.py +6 -2
  36. edsl/inference_services/services/test_service.py +94 -5
  37. edsl/interviews/answering_function.py +167 -59
  38. edsl/interviews/interview.py +124 -72
  39. edsl/interviews/interview_task_manager.py +10 -0
  40. edsl/interviews/request_token_estimator.py +8 -0
  41. edsl/invigilators/invigilators.py +35 -13
  42. edsl/jobs/async_interview_runner.py +146 -104
  43. edsl/jobs/data_structures.py +6 -4
  44. edsl/jobs/decorators.py +61 -0
  45. edsl/jobs/fetch_invigilator.py +61 -18
  46. edsl/jobs/html_table_job_logger.py +14 -2
  47. edsl/jobs/jobs.py +180 -104
  48. edsl/jobs/jobs_component_constructor.py +2 -2
  49. edsl/jobs/jobs_interview_constructor.py +2 -0
  50. edsl/jobs/jobs_pricing_estimation.py +154 -113
  51. edsl/jobs/jobs_remote_inference_logger.py +4 -0
  52. edsl/jobs/jobs_runner_status.py +30 -25
  53. edsl/jobs/progress_bar_manager.py +79 -0
  54. edsl/jobs/remote_inference.py +35 -1
  55. edsl/key_management/key_lookup_builder.py +6 -1
  56. edsl/language_models/language_model.py +110 -12
  57. edsl/language_models/model.py +10 -3
  58. edsl/language_models/price_manager.py +176 -71
  59. edsl/language_models/registry.py +5 -0
  60. edsl/notebooks/notebook.py +77 -10
  61. edsl/questions/VALIDATION_README.md +134 -0
  62. edsl/questions/__init__.py +24 -1
  63. edsl/questions/exceptions.py +21 -0
  64. edsl/questions/question_dict.py +201 -16
  65. edsl/questions/question_multiple_choice_with_other.py +624 -0
  66. edsl/questions/question_registry.py +2 -1
  67. edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
  68. edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
  69. edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
  70. edsl/questions/validation_analysis.py +185 -0
  71. edsl/questions/validation_cli.py +131 -0
  72. edsl/questions/validation_html_report.py +404 -0
  73. edsl/questions/validation_logger.py +136 -0
  74. edsl/results/result.py +115 -46
  75. edsl/results/results.py +702 -171
  76. edsl/scenarios/construct_download_link.py +16 -3
  77. edsl/scenarios/directory_scanner.py +226 -226
  78. edsl/scenarios/file_methods.py +5 -0
  79. edsl/scenarios/file_store.py +150 -9
  80. edsl/scenarios/handlers/__init__.py +5 -1
  81. edsl/scenarios/handlers/mp4_file_store.py +104 -0
  82. edsl/scenarios/handlers/webm_file_store.py +104 -0
  83. edsl/scenarios/scenario.py +120 -101
  84. edsl/scenarios/scenario_list.py +800 -727
  85. edsl/scenarios/scenario_list_gc_test.py +146 -0
  86. edsl/scenarios/scenario_list_memory_test.py +214 -0
  87. edsl/scenarios/scenario_list_source_refactor.md +35 -0
  88. edsl/scenarios/scenario_selector.py +5 -4
  89. edsl/scenarios/scenario_source.py +1990 -0
  90. edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
  91. edsl/surveys/survey.py +22 -0
  92. edsl/tasks/__init__.py +4 -2
  93. edsl/tasks/task_history.py +198 -36
  94. edsl/tests/scenarios/test_ScenarioSource.py +51 -0
  95. edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
  96. edsl/utilities/__init__.py +2 -1
  97. edsl/utilities/decorators.py +121 -0
  98. edsl/utilities/memory_debugger.py +1010 -0
  99. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/METADATA +51 -76
  100. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/RECORD +103 -79
  101. edsl/jobs/jobs_runner_asyncio.py +0 -281
  102. edsl/language_models/unused/fake_openai_service.py +0 -60
  103. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/LICENSE +0 -0
  104. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/WHEEL +0 -0
  105. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,185 @@
1
+ """Analyze validation failures to improve fix methods.
2
+
3
+ This module provides tools to analyze validation failures that have been logged
4
+ and suggest improvements to the fix methods for various question types.
5
+ """
6
+
7
+ import collections
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Dict, List, Optional, Tuple
11
+
12
+ from ..config import CONFIG
13
+ from .validation_logger import get_validation_failure_logs
14
+
15
+
16
+ def get_validation_failure_stats() -> Dict[str, Dict[str, int]]:
17
+ """
18
+ Get statistics about validation failures by question type and error message.
19
+
20
+ Returns:
21
+ Dictionary with stats about validation failures by question type and error message
22
+ """
23
+ logs = get_validation_failure_logs(n=1000) # Get up to 1000 recent logs
24
+
25
+ # Count by question type
26
+ type_counts = collections.Counter()
27
+
28
+ # Count by question type and error message
29
+ error_counts = collections.defaultdict(collections.Counter)
30
+
31
+ for log in logs:
32
+ question_type = log.get("question_type", "unknown")
33
+ error_message = log.get("error_message", "unknown")
34
+
35
+ type_counts[question_type] += 1
36
+ error_counts[question_type][error_message] += 1
37
+
38
+ # Convert to dict for cleaner output
39
+ result = {
40
+ "by_question_type": dict(type_counts),
41
+ "by_error_message": {k: dict(v) for k, v in error_counts.items()}
42
+ }
43
+
44
+ return result
45
+
46
+
47
+ def suggest_fix_improvements(question_type: Optional[str] = None) -> Dict[str, List[Dict]]:
48
+ """
49
+ Analyze validation failures and suggest improvements for fix methods.
50
+
51
+ Args:
52
+ question_type: Optional filter for a specific question type
53
+
54
+ Returns:
55
+ Dictionary with improvement suggestions for fix methods
56
+ """
57
+ logs = get_validation_failure_logs(n=1000) # Get up to 1000 recent logs
58
+
59
+ # Filter by question_type if provided
60
+ if question_type:
61
+ logs = [log for log in logs if log.get("question_type") == question_type]
62
+
63
+ # Group by question type
64
+ grouped_logs = collections.defaultdict(list)
65
+ for log in logs:
66
+ grouped_logs[log.get("question_type", "unknown")].append(log)
67
+
68
+ suggestions = {}
69
+
70
+ # Analyze patterns for each question type
71
+ for qt, logs in grouped_logs.items():
72
+ qt_suggestions = []
73
+
74
+ # Get common error patterns
75
+ error_counter = collections.Counter([log.get("error_message", "") for log in logs])
76
+ common_errors = error_counter.most_common(5) # Top 5 errors
77
+
78
+ # Create suggestions based on error patterns
79
+ for error_msg, count in common_errors:
80
+ if not error_msg:
81
+ continue
82
+
83
+ # Get example data for this error
84
+ example_logs = [log for log in logs if log.get("error_message") == error_msg]
85
+ example = example_logs[0] if example_logs else None
86
+
87
+ suggestion = {
88
+ "error_message": error_msg,
89
+ "occurrence_count": count,
90
+ "suggestion": _generate_suggestion(qt, error_msg, example),
91
+ "example_data": example.get("invalid_data") if example else None
92
+ }
93
+
94
+ qt_suggestions.append(suggestion)
95
+
96
+ if qt_suggestions:
97
+ suggestions[qt] = qt_suggestions
98
+
99
+ return suggestions
100
+
101
+
102
+ def _generate_suggestion(question_type: str, error_msg: str, example: Optional[Dict]) -> str:
103
+ """
104
+ Generate a suggestion for improving fix methods based on the error pattern.
105
+
106
+ Args:
107
+ question_type: The question type
108
+ error_msg: The error message
109
+ example: An example log entry containing the error
110
+
111
+ Returns:
112
+ A suggestion string for improving the fix method
113
+ """
114
+ # Common validation error patterns and suggestions
115
+ if "missing" in error_msg.lower() and "key" in error_msg.lower():
116
+ return (
117
+ f"The fix method for {question_type} should check for missing keys "
118
+ f"in the answer and add them with default values."
119
+ )
120
+
121
+ if "not a valid" in error_msg.lower() and any(t in error_msg.lower() for t in ["integer", "number", "float"]):
122
+ return (
123
+ f"The fix method for {question_type} should convert string values to the expected "
124
+ f"numeric type (int/float) and handle non-numeric strings."
125
+ )
126
+
127
+ if "must be" in error_msg.lower() and "length" in error_msg.lower():
128
+ return (
129
+ f"The fix method for {question_type} should ensure the answer has the correct length "
130
+ f"requirements, potentially truncating or padding as needed."
131
+ )
132
+
133
+ if "not a valid" in error_msg.lower() and "list" in error_msg.lower():
134
+ return (
135
+ f"The fix method for {question_type} should ensure the answer is a valid list, "
136
+ f"potentially converting single items to lists when needed."
137
+ )
138
+
139
+ if "greater than" in error_msg.lower() or "less than" in error_msg.lower():
140
+ return (
141
+ f"The fix method for {question_type} should enforce value range constraints "
142
+ f"by clamping values to the allowed min/max range."
143
+ )
144
+
145
+ # Default suggestion
146
+ return (
147
+ f"Review the validation failures for {question_type} and update the fix method "
148
+ f"to handle common error patterns more effectively."
149
+ )
150
+
151
+
152
+ def export_improvements_report(output_path: Optional[Path] = None) -> Path:
153
+ """
154
+ Generate a report with improvement suggestions for fix methods.
155
+
156
+ Args:
157
+ output_path: Optional custom path for the report
158
+
159
+ Returns:
160
+ Path to the generated report
161
+ """
162
+ if output_path is None:
163
+ default_log_dir = Path.home() / ".edsl" / "logs"
164
+ try:
165
+ report_dir = Path(CONFIG.get("EDSL_LOG_DIR"))
166
+ except Exception:
167
+ # If EDSL_LOG_DIR is not defined, use default
168
+ report_dir = default_log_dir
169
+ output_path = report_dir / "fix_methods_improvements.json"
170
+
171
+ # Get stats and suggestions
172
+ stats = get_validation_failure_stats()
173
+ suggestions = suggest_fix_improvements()
174
+
175
+ # Create report
176
+ report = {
177
+ "validation_failure_stats": stats,
178
+ "fix_method_improvement_suggestions": suggestions
179
+ }
180
+
181
+ # Write report to file
182
+ with open(output_path, "w") as f:
183
+ json.dump(report, f, indent=2)
184
+
185
+ return output_path
@@ -0,0 +1,131 @@
1
+ """Command-line interface for validation logging and analysis.
2
+
3
+ This module provides a command-line interface for managing validation logs,
4
+ generating reports, and suggesting improvements to fix methods.
5
+ """
6
+
7
+ import argparse
8
+ import json
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ from .validation_analysis import (
13
+ export_improvements_report,
14
+ get_validation_failure_stats,
15
+ suggest_fix_improvements,
16
+ )
17
+ from .validation_logger import clear_validation_logs, get_validation_failure_logs
18
+
19
+
20
+ def main():
21
+ """Run the validation CLI."""
22
+ parser = argparse.ArgumentParser(
23
+ description="Manage and analyze question validation failures"
24
+ )
25
+
26
+ subparsers = parser.add_subparsers(dest="command", help="Command to run")
27
+
28
+ # List logs command
29
+ list_parser = subparsers.add_parser("list", help="List validation failure logs")
30
+ list_parser.add_argument(
31
+ "-n", "--count", type=int, default=10,
32
+ help="Number of logs to show (default: 10)"
33
+ )
34
+ list_parser.add_argument(
35
+ "-t", "--type", type=str,
36
+ help="Filter by question type"
37
+ )
38
+ list_parser.add_argument(
39
+ "-o", "--output", type=str,
40
+ help="Output file path (default: stdout)"
41
+ )
42
+
43
+ # Clear logs command
44
+ subparsers.add_parser("clear", help="Clear validation failure logs")
45
+
46
+ # Stats command
47
+ stats_parser = subparsers.add_parser("stats", help="Show validation failure statistics")
48
+ stats_parser.add_argument(
49
+ "-o", "--output", type=str,
50
+ help="Output file path (default: stdout)"
51
+ )
52
+
53
+ # Suggest improvements command
54
+ suggest_parser = subparsers.add_parser(
55
+ "suggest", help="Suggest improvements for fix methods"
56
+ )
57
+ suggest_parser.add_argument(
58
+ "-t", "--type", type=str,
59
+ help="Filter by question type"
60
+ )
61
+ suggest_parser.add_argument(
62
+ "-o", "--output", type=str,
63
+ help="Output file path (default: stdout)"
64
+ )
65
+
66
+ # Generate report command
67
+ report_parser = subparsers.add_parser(
68
+ "report", help="Generate a comprehensive report"
69
+ )
70
+ report_parser.add_argument(
71
+ "-o", "--output", type=str,
72
+ help="Output file path (default: ~/.edsl/logs/fix_methods_improvements.json)"
73
+ )
74
+
75
+ args = parser.parse_args()
76
+
77
+ if args.command == "list":
78
+ logs = get_validation_failure_logs(n=args.count)
79
+
80
+ # Filter by question type if provided
81
+ if args.type:
82
+ logs = [log for log in logs if log.get("question_type") == args.type]
83
+
84
+ output = json.dumps(logs, indent=2)
85
+ if args.output:
86
+ with open(args.output, "w") as f:
87
+ f.write(output)
88
+ else:
89
+ print(output)
90
+
91
+ elif args.command == "clear":
92
+ clear_validation_logs()
93
+ print("Validation logs cleared.")
94
+
95
+ elif args.command == "stats":
96
+ stats = get_validation_failure_stats()
97
+ output = json.dumps(stats, indent=2)
98
+
99
+ if args.output:
100
+ with open(args.output, "w") as f:
101
+ f.write(output)
102
+ else:
103
+ print(output)
104
+
105
+ elif args.command == "suggest":
106
+ suggestions = suggest_fix_improvements(question_type=args.type)
107
+ output = json.dumps(suggestions, indent=2)
108
+
109
+ if args.output:
110
+ with open(args.output, "w") as f:
111
+ f.write(output)
112
+ else:
113
+ print(output)
114
+
115
+ elif args.command == "report":
116
+ output_path = None
117
+ if args.output:
118
+ output_path = Path(args.output)
119
+
120
+ report_path = export_improvements_report(output_path=output_path)
121
+ print(f"Report generated at: {report_path}")
122
+
123
+ else:
124
+ parser.print_help()
125
+ return 1
126
+
127
+ return 0
128
+
129
+
130
+ if __name__ == "__main__":
131
+ sys.exit(main())