edsl 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. edsl/__init__.py +124 -53
  2. edsl/__version__.py +1 -1
  3. edsl/agents/agent.py +21 -21
  4. edsl/agents/agent_list.py +2 -5
  5. edsl/agents/exceptions.py +119 -5
  6. edsl/base/__init__.py +10 -35
  7. edsl/base/base_class.py +71 -36
  8. edsl/base/base_exception.py +204 -0
  9. edsl/base/data_transfer_models.py +1 -1
  10. edsl/base/exceptions.py +94 -0
  11. edsl/buckets/__init__.py +15 -1
  12. edsl/buckets/bucket_collection.py +3 -4
  13. edsl/buckets/exceptions.py +107 -0
  14. edsl/buckets/model_buckets.py +1 -2
  15. edsl/buckets/token_bucket.py +11 -6
  16. edsl/buckets/token_bucket_api.py +27 -12
  17. edsl/buckets/token_bucket_client.py +9 -7
  18. edsl/caching/cache.py +12 -4
  19. edsl/caching/cache_entry.py +10 -9
  20. edsl/caching/exceptions.py +113 -7
  21. edsl/caching/remote_cache_sync.py +6 -7
  22. edsl/caching/sql_dict.py +20 -14
  23. edsl/cli.py +43 -0
  24. edsl/config/__init__.py +1 -1
  25. edsl/config/config_class.py +32 -6
  26. edsl/conversation/Conversation.py +8 -4
  27. edsl/conversation/car_buying.py +1 -3
  28. edsl/conversation/exceptions.py +58 -0
  29. edsl/conversation/mug_negotiation.py +2 -8
  30. edsl/coop/__init__.py +28 -6
  31. edsl/coop/coop.py +120 -29
  32. edsl/coop/coop_functions.py +1 -1
  33. edsl/coop/ep_key_handling.py +1 -1
  34. edsl/coop/exceptions.py +188 -9
  35. edsl/coop/price_fetcher.py +5 -8
  36. edsl/coop/utils.py +4 -6
  37. edsl/dataset/__init__.py +5 -4
  38. edsl/dataset/dataset.py +177 -86
  39. edsl/dataset/dataset_operations_mixin.py +98 -76
  40. edsl/dataset/dataset_tree.py +11 -7
  41. edsl/dataset/display/table_display.py +0 -2
  42. edsl/dataset/display/table_renderers.py +6 -4
  43. edsl/dataset/exceptions.py +125 -0
  44. edsl/dataset/file_exports.py +18 -11
  45. edsl/dataset/r/ggplot.py +13 -6
  46. edsl/display/__init__.py +27 -0
  47. edsl/display/core.py +147 -0
  48. edsl/display/plugin.py +189 -0
  49. edsl/display/utils.py +52 -0
  50. edsl/inference_services/__init__.py +9 -1
  51. edsl/inference_services/available_model_cache_handler.py +1 -1
  52. edsl/inference_services/available_model_fetcher.py +5 -6
  53. edsl/inference_services/data_structures.py +10 -7
  54. edsl/inference_services/exceptions.py +132 -1
  55. edsl/inference_services/inference_service_abc.py +2 -2
  56. edsl/inference_services/inference_services_collection.py +2 -6
  57. edsl/inference_services/registry.py +4 -3
  58. edsl/inference_services/service_availability.py +4 -3
  59. edsl/inference_services/services/anthropic_service.py +4 -1
  60. edsl/inference_services/services/aws_bedrock.py +13 -12
  61. edsl/inference_services/services/azure_ai.py +12 -10
  62. edsl/inference_services/services/deep_infra_service.py +1 -4
  63. edsl/inference_services/services/deep_seek_service.py +1 -5
  64. edsl/inference_services/services/google_service.py +7 -3
  65. edsl/inference_services/services/groq_service.py +1 -1
  66. edsl/inference_services/services/mistral_ai_service.py +4 -2
  67. edsl/inference_services/services/ollama_service.py +1 -1
  68. edsl/inference_services/services/open_ai_service.py +7 -5
  69. edsl/inference_services/services/perplexity_service.py +6 -2
  70. edsl/inference_services/services/test_service.py +8 -7
  71. edsl/inference_services/services/together_ai_service.py +2 -3
  72. edsl/inference_services/services/xai_service.py +1 -1
  73. edsl/instructions/__init__.py +1 -1
  74. edsl/instructions/change_instruction.py +7 -5
  75. edsl/instructions/exceptions.py +61 -0
  76. edsl/instructions/instruction.py +6 -2
  77. edsl/instructions/instruction_collection.py +6 -4
  78. edsl/instructions/instruction_handler.py +12 -15
  79. edsl/interviews/ReportErrors.py +0 -3
  80. edsl/interviews/__init__.py +9 -2
  81. edsl/interviews/answering_function.py +11 -13
  82. edsl/interviews/exception_tracking.py +15 -8
  83. edsl/interviews/exceptions.py +79 -0
  84. edsl/interviews/interview.py +33 -30
  85. edsl/interviews/interview_status_dictionary.py +4 -2
  86. edsl/interviews/interview_status_log.py +2 -1
  87. edsl/interviews/interview_task_manager.py +5 -5
  88. edsl/interviews/request_token_estimator.py +5 -2
  89. edsl/interviews/statistics.py +3 -4
  90. edsl/invigilators/__init__.py +7 -1
  91. edsl/invigilators/exceptions.py +79 -0
  92. edsl/invigilators/invigilator_base.py +0 -1
  93. edsl/invigilators/invigilators.py +9 -13
  94. edsl/invigilators/prompt_constructor.py +1 -5
  95. edsl/invigilators/prompt_helpers.py +8 -4
  96. edsl/invigilators/question_instructions_prompt_builder.py +1 -1
  97. edsl/invigilators/question_option_processor.py +9 -5
  98. edsl/invigilators/question_template_replacements_builder.py +3 -2
  99. edsl/jobs/__init__.py +42 -5
  100. edsl/jobs/async_interview_runner.py +25 -23
  101. edsl/jobs/check_survey_scenario_compatibility.py +11 -10
  102. edsl/jobs/data_structures.py +8 -5
  103. edsl/jobs/exceptions.py +177 -8
  104. edsl/jobs/fetch_invigilator.py +1 -1
  105. edsl/jobs/jobs.py +74 -69
  106. edsl/jobs/jobs_checks.py +6 -7
  107. edsl/jobs/jobs_component_constructor.py +4 -4
  108. edsl/jobs/jobs_pricing_estimation.py +4 -3
  109. edsl/jobs/jobs_remote_inference_logger.py +5 -4
  110. edsl/jobs/jobs_runner_asyncio.py +3 -4
  111. edsl/jobs/jobs_runner_status.py +8 -9
  112. edsl/jobs/remote_inference.py +27 -24
  113. edsl/jobs/results_exceptions_handler.py +10 -7
  114. edsl/key_management/__init__.py +3 -1
  115. edsl/key_management/exceptions.py +62 -0
  116. edsl/key_management/key_lookup.py +1 -1
  117. edsl/key_management/key_lookup_builder.py +37 -14
  118. edsl/key_management/key_lookup_collection.py +2 -0
  119. edsl/language_models/__init__.py +1 -1
  120. edsl/language_models/exceptions.py +302 -14
  121. edsl/language_models/language_model.py +9 -8
  122. edsl/language_models/model.py +4 -4
  123. edsl/language_models/model_list.py +1 -1
  124. edsl/language_models/price_manager.py +1 -1
  125. edsl/language_models/raw_response_handler.py +14 -9
  126. edsl/language_models/registry.py +17 -21
  127. edsl/language_models/repair.py +0 -6
  128. edsl/language_models/unused/fake_openai_service.py +0 -1
  129. edsl/load_plugins.py +69 -0
  130. edsl/logger.py +146 -0
  131. edsl/notebooks/__init__.py +24 -1
  132. edsl/notebooks/exceptions.py +82 -0
  133. edsl/notebooks/notebook.py +7 -3
  134. edsl/notebooks/notebook_to_latex.py +1 -2
  135. edsl/plugins/__init__.py +63 -0
  136. edsl/plugins/built_in/export_example.py +50 -0
  137. edsl/plugins/built_in/pig_latin.py +67 -0
  138. edsl/plugins/cli.py +372 -0
  139. edsl/plugins/cli_typer.py +283 -0
  140. edsl/plugins/exceptions.py +31 -0
  141. edsl/plugins/hookspec.py +51 -0
  142. edsl/plugins/plugin_host.py +128 -0
  143. edsl/plugins/plugin_manager.py +633 -0
  144. edsl/plugins/plugins_registry.py +168 -0
  145. edsl/prompts/__init__.py +24 -1
  146. edsl/prompts/exceptions.py +107 -5
  147. edsl/prompts/prompt.py +15 -7
  148. edsl/questions/HTMLQuestion.py +5 -11
  149. edsl/questions/Quick.py +0 -1
  150. edsl/questions/__init__.py +6 -4
  151. edsl/questions/answer_validator_mixin.py +318 -323
  152. edsl/questions/compose_questions.py +3 -3
  153. edsl/questions/descriptors.py +11 -50
  154. edsl/questions/exceptions.py +278 -22
  155. edsl/questions/loop_processor.py +7 -5
  156. edsl/questions/prompt_templates/question_list.jinja +3 -0
  157. edsl/questions/question_base.py +46 -19
  158. edsl/questions/question_base_gen_mixin.py +2 -2
  159. edsl/questions/question_base_prompts_mixin.py +13 -7
  160. edsl/questions/question_budget.py +503 -98
  161. edsl/questions/question_check_box.py +660 -160
  162. edsl/questions/question_dict.py +345 -194
  163. edsl/questions/question_extract.py +401 -61
  164. edsl/questions/question_free_text.py +80 -14
  165. edsl/questions/question_functional.py +119 -9
  166. edsl/questions/{derived/question_likert_five.py → question_likert_five.py} +2 -2
  167. edsl/questions/{derived/question_linear_scale.py → question_linear_scale.py} +3 -4
  168. edsl/questions/question_list.py +275 -28
  169. edsl/questions/question_matrix.py +643 -96
  170. edsl/questions/question_multiple_choice.py +219 -51
  171. edsl/questions/question_numerical.py +361 -32
  172. edsl/questions/question_rank.py +401 -124
  173. edsl/questions/question_registry.py +7 -5
  174. edsl/questions/{derived/question_top_k.py → question_top_k.py} +3 -3
  175. edsl/questions/{derived/question_yes_no.py → question_yes_no.py} +3 -4
  176. edsl/questions/register_questions_meta.py +2 -2
  177. edsl/questions/response_validator_abc.py +13 -15
  178. edsl/questions/response_validator_factory.py +10 -12
  179. edsl/questions/templates/dict/answering_instructions.jinja +1 -0
  180. edsl/questions/templates/rank/question_presentation.jinja +1 -1
  181. edsl/results/__init__.py +1 -1
  182. edsl/results/exceptions.py +141 -7
  183. edsl/results/report.py +1 -2
  184. edsl/results/result.py +11 -9
  185. edsl/results/results.py +480 -321
  186. edsl/results/results_selector.py +8 -4
  187. edsl/scenarios/PdfExtractor.py +2 -2
  188. edsl/scenarios/construct_download_link.py +69 -35
  189. edsl/scenarios/directory_scanner.py +33 -14
  190. edsl/scenarios/document_chunker.py +1 -1
  191. edsl/scenarios/exceptions.py +238 -14
  192. edsl/scenarios/file_methods.py +1 -1
  193. edsl/scenarios/file_store.py +7 -3
  194. edsl/scenarios/handlers/__init__.py +17 -0
  195. edsl/scenarios/handlers/docx_file_store.py +0 -5
  196. edsl/scenarios/handlers/pdf_file_store.py +0 -1
  197. edsl/scenarios/handlers/pptx_file_store.py +0 -5
  198. edsl/scenarios/handlers/py_file_store.py +0 -1
  199. edsl/scenarios/handlers/sql_file_store.py +1 -4
  200. edsl/scenarios/handlers/sqlite_file_store.py +0 -1
  201. edsl/scenarios/handlers/txt_file_store.py +1 -1
  202. edsl/scenarios/scenario.py +1 -3
  203. edsl/scenarios/scenario_list.py +179 -27
  204. edsl/scenarios/scenario_list_pdf_tools.py +1 -0
  205. edsl/scenarios/scenario_selector.py +0 -1
  206. edsl/surveys/__init__.py +3 -4
  207. edsl/surveys/dag/__init__.py +4 -2
  208. edsl/surveys/descriptors.py +1 -1
  209. edsl/surveys/edit_survey.py +1 -0
  210. edsl/surveys/exceptions.py +165 -9
  211. edsl/surveys/memory/__init__.py +5 -3
  212. edsl/surveys/memory/memory_management.py +1 -0
  213. edsl/surveys/memory/memory_plan.py +6 -15
  214. edsl/surveys/rules/__init__.py +5 -3
  215. edsl/surveys/rules/rule.py +1 -2
  216. edsl/surveys/rules/rule_collection.py +1 -1
  217. edsl/surveys/survey.py +12 -24
  218. edsl/surveys/survey_css.py +3 -3
  219. edsl/surveys/survey_export.py +6 -3
  220. edsl/surveys/survey_flow_visualization.py +10 -1
  221. edsl/surveys/survey_simulator.py +2 -1
  222. edsl/tasks/__init__.py +23 -1
  223. edsl/tasks/exceptions.py +72 -0
  224. edsl/tasks/question_task_creator.py +3 -3
  225. edsl/tasks/task_creators.py +1 -3
  226. edsl/tasks/task_history.py +8 -10
  227. edsl/tasks/task_status_log.py +1 -2
  228. edsl/tokens/__init__.py +29 -1
  229. edsl/tokens/exceptions.py +37 -0
  230. edsl/tokens/interview_token_usage.py +3 -2
  231. edsl/tokens/token_usage.py +4 -3
  232. edsl/utilities/__init__.py +21 -1
  233. edsl/utilities/decorators.py +1 -2
  234. edsl/utilities/markdown_to_docx.py +2 -2
  235. edsl/utilities/markdown_to_pdf.py +1 -1
  236. edsl/utilities/repair_functions.py +0 -1
  237. edsl/utilities/restricted_python.py +0 -1
  238. edsl/utilities/template_loader.py +2 -3
  239. edsl/utilities/utilities.py +8 -29
  240. {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/METADATA +32 -2
  241. edsl-0.1.51.dist-info/RECORD +365 -0
  242. edsl-0.1.51.dist-info/entry_points.txt +3 -0
  243. edsl/dataset/smart_objects.py +0 -96
  244. edsl/exceptions/BaseException.py +0 -21
  245. edsl/exceptions/__init__.py +0 -54
  246. edsl/exceptions/configuration.py +0 -16
  247. edsl/exceptions/general.py +0 -34
  248. edsl/questions/derived/__init__.py +0 -0
  249. edsl/study/ObjectEntry.py +0 -173
  250. edsl/study/ProofOfWork.py +0 -113
  251. edsl/study/SnapShot.py +0 -80
  252. edsl/study/Study.py +0 -520
  253. edsl/study/__init__.py +0 -6
  254. edsl/utilities/interface.py +0 -135
  255. edsl-0.1.49.dist-info/RECORD +0 -347
  256. {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/LICENSE +0 -0
  257. {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/WHEEL +0 -0
@@ -1,9 +1,26 @@
1
+ """
2
+ question_matrix.py
3
+
4
+ Module implementing the matrix question type with Pydantic validation
5
+ """
6
+
1
7
  from __future__ import annotations
2
- from typing import Union, Optional, Dict, List, Any
8
+ from typing import (
9
+ Union,
10
+ Optional,
11
+ Dict,
12
+ List,
13
+ Any,
14
+ Type,
15
+ Literal
16
+ )
17
+ import random
18
+ import json
19
+ import re
3
20
 
4
- from pydantic import BaseModel, Field, field_validator
21
+ from pydantic import BaseModel, Field, create_model, ValidationError, model_validator
5
22
  from jinja2 import Template
6
- import random
23
+
7
24
  from .question_base import QuestionBase
8
25
  from .descriptors import (
9
26
  QuestionOptionsDescriptor,
@@ -14,56 +31,201 @@ from .response_validator_abc import ResponseValidatorABC
14
31
  from .decorators import inject_exception
15
32
 
16
33
  from .exceptions import (
17
- QuestionAnswerValidationError,
18
34
  QuestionCreationValidationError,
35
+ QuestionAnswerValidationError,
19
36
  )
20
37
 
21
38
 
39
+ class MatrixResponseBase(BaseModel):
40
+ """
41
+ Base model for matrix question responses.
42
+
43
+ Attributes:
44
+ answer: A dictionary mapping each item to a selected option
45
+ comment: Optional comment about the selections
46
+ generated_tokens: Optional token usage data
47
+
48
+ Examples:
49
+ >>> # Valid response with two items
50
+ >>> model = MatrixResponseBase(answer={"Item1": 1, "Item2": 2})
51
+ >>> model.answer
52
+ {'Item1': 1, 'Item2': 2}
53
+
54
+ >>> # Valid response with a comment
55
+ >>> model = MatrixResponseBase(
56
+ ... answer={"Item1": "Yes", "Item2": "No"},
57
+ ... comment="This is my reasoning"
58
+ ... )
59
+ >>> model.comment
60
+ 'This is my reasoning'
61
+ """
62
+ answer: Dict[str, Any]
63
+ comment: Optional[str] = None
64
+ generated_tokens: Optional[Any] = None
65
+
66
+
22
67
  def create_matrix_response(
23
68
  question_items: List[str],
24
69
  question_options: List[Union[int, str, float]],
25
70
  permissive: bool = False,
26
- ):
27
- """Create a response model for matrix questions.
28
-
29
- The response model validates that:
30
- 1. All question items are answered
31
- 2. Each answer is from the allowed options
71
+ ) -> Type[BaseModel]:
32
72
  """
33
-
34
- if permissive:
35
-
36
- class MatrixResponse(BaseModel):
37
- answer: Dict[str, Any]
38
- comment: Optional[str] = None
39
- generated_tokens: Optional[Any] = None
40
-
73
+ Create a dynamic Pydantic model for matrix questions with appropriate validation.
74
+
75
+ Args:
76
+ question_items: List of items that need responses
77
+ question_options: List of allowed options for each item
78
+ permissive: If True, allows any values and additional items
79
+
80
+ Returns:
81
+ A Pydantic model class for validating matrix responses
82
+
83
+ Examples:
84
+ >>> # Create a model for a 2x3 matrix
85
+ >>> Model = create_matrix_response(
86
+ ... ["Item1", "Item2"],
87
+ ... [1, 2, 3]
88
+ ... )
89
+ >>> # Valid response
90
+ >>> response = Model(answer={"Item1": 1, "Item2": 2})
91
+ >>> isinstance(response.answer, BaseModel)
92
+ True
93
+ >>> response.answer.Item1
94
+ 1
95
+ >>> response.answer.Item2
96
+ 2
97
+
98
+ >>> # Invalid: missing an item
99
+ >>> try:
100
+ ... Model(answer={"Item1": 1})
101
+ ... except Exception:
102
+ ... print("Validation error occurred")
103
+ Validation error occurred
104
+
105
+ >>> # Invalid: invalid option value
106
+ >>> try:
107
+ ... Model(answer={"Item1": 4, "Item2": 2})
108
+ ... except Exception:
109
+ ... print("Validation error occurred")
110
+ Validation error occurred
111
+ """
112
+ # Convert question_options to a tuple for Literal type
113
+ option_tuple = tuple(question_options)
114
+
115
+ # If non-permissive, build a Literal for each valid option
116
+ # e.g. Literal[1,2,3] or Literal["Yes","No"] or a mix
117
+ if not permissive:
118
+ # If question_options is empty (edge case), fall back to 'Any'
119
+ if question_options:
120
+ AllowedOptions = Literal[option_tuple] # type: ignore
121
+ else:
122
+ AllowedOptions = Any
41
123
  else:
42
-
43
- class MatrixResponse(BaseModel):
44
- answer: Dict[str, Union[int, str, float]] = Field(
45
- ..., description="Mapping of items to selected options"
46
- )
47
- comment: Optional[str] = None
48
- generated_tokens: Optional[Any] = None
49
-
50
- @field_validator("answer")
51
- def validate_answer(cls, v, values, **kwargs):
52
- # Check that all items have responses
53
- if not all(item in v for item in question_items):
54
- missing = set(question_items) - set(v.keys())
55
- raise ValueError(f"Missing responses for items: {missing}")
56
-
57
- # Check that all responses are valid options
58
- if not all(answer in question_options for answer in v.values()):
59
- invalid = [ans for ans in v.values() if ans not in question_options]
60
- raise ValueError(f"Invalid options selected: {invalid}")
61
- return v
124
+ # Permissive => let each item be anything
125
+ AllowedOptions = Any
126
+
127
+ # Build field definitions for the answer submodel
128
+ field_definitions = {}
129
+ for item in question_items:
130
+ field_definitions[item] = (AllowedOptions, Field(...)) # required field
131
+
132
+ # Dynamically create the submodel
133
+ MatrixAnswerSubModel = create_model(
134
+ "MatrixAnswerSubModel",
135
+ __base__=BaseModel,
136
+ **field_definitions
137
+ )
138
+
139
+ # Create the full response model with custom validation
140
+ class MatrixResponse(MatrixResponseBase):
141
+ """
142
+ Model for matrix question responses with validation for specific items and options.
143
+ """
144
+ answer: MatrixAnswerSubModel # Use the dynamically created submodel
145
+
146
+ @model_validator(mode='after')
147
+ def validate_matrix_constraints(self):
148
+ """
149
+ Validates that:
150
+ 1. All required items have responses
151
+ 2. All responses are valid options
152
+ 3. No unexpected items are included (unless permissive)
153
+ """
154
+ matrix_answer = self.answer.model_dump()
155
+
156
+ # Check that all required items have responses
157
+ missing_items = [item for item in question_items if item not in matrix_answer]
158
+ if missing_items and not permissive:
159
+ missing_str = ", ".join(missing_items)
160
+ validation_error = ValidationError.from_exception_data(
161
+ title='MatrixResponse',
162
+ line_errors=[{
163
+ 'type': 'value_error',
164
+ 'loc': ('answer',),
165
+ 'msg': f'Missing responses for items: {missing_str}',
166
+ 'input': matrix_answer,
167
+ 'ctx': {'missing_items': missing_items}
168
+ }]
169
+ )
170
+ raise QuestionAnswerValidationError(
171
+ message=f"Missing responses for items: {missing_str}",
172
+ data=self.model_dump(),
173
+ model=self.__class__,
174
+ pydantic_error=validation_error
175
+ )
176
+
177
+ # Check that all responses are valid options
178
+ if not permissive:
179
+ invalid_items = {}
180
+ for item, value in matrix_answer.items():
181
+ if value not in option_tuple:
182
+ invalid_items[item] = value
183
+
184
+ if invalid_items:
185
+ items_str = ", ".join(f"{k}: {v}" for k, v in invalid_items.items())
186
+ validation_error = ValidationError.from_exception_data(
187
+ title='MatrixResponse',
188
+ line_errors=[{
189
+ 'type': 'value_error',
190
+ 'loc': ('answer',),
191
+ 'msg': f'Invalid options selected: {items_str}',
192
+ 'input': matrix_answer,
193
+ 'ctx': {'invalid_items': invalid_items, 'allowed_options': option_tuple}
194
+ }]
195
+ )
196
+ raise QuestionAnswerValidationError(
197
+ message=f"Invalid options selected: {items_str}. Allowed options: {option_tuple}",
198
+ data=self.model_dump(),
199
+ model=self.__class__,
200
+ pydantic_error=validation_error
201
+ )
202
+
203
+ return self
204
+
205
+ class Config:
206
+ # If permissive=True, allow extra fields in the answer dict
207
+ extra = "allow" if permissive else "forbid"
208
+
209
+ @staticmethod
210
+ def json_schema_extra(schema: dict, model: BaseModel) -> None:
211
+ # Add the options to the schema for better documentation
212
+ if "properties" in schema and "answer" in schema["properties"]:
213
+ schema["properties"]["answer"]["description"] = "Matrix responses for each item"
214
+ if "properties" in schema["properties"]["answer"]:
215
+ for _, prop in schema["properties"]["answer"]["properties"].items():
216
+ prop["enum"] = list(question_options)
62
217
 
63
218
  return MatrixResponse
64
219
 
65
220
 
66
221
  class MatrixResponseValidator(ResponseValidatorABC):
222
+ """
223
+ Validator for matrix question responses that attempts to fix invalid responses.
224
+
225
+ This validator tries multiple approaches to recover valid matrix responses from
226
+ malformed inputs, including JSON parsing, remapping numeric keys, and extracting
227
+ structured data from text.
228
+ """
67
229
  required_params = ["question_items", "question_options", "permissive"]
68
230
 
69
231
  valid_examples = [
@@ -72,8 +234,17 @@ class MatrixResponseValidator(ResponseValidatorABC):
72
234
  {
73
235
  "question_items": ["Item1", "Item2"],
74
236
  "question_options": [1, 2, 3],
237
+ "permissive": False
75
238
  },
76
- )
239
+ ),
240
+ (
241
+ {"answer": {"Item1": "Yes", "Item2": "No"}},
242
+ {
243
+ "question_items": ["Item1", "Item2"],
244
+ "question_options": ["Yes", "No", "Maybe"],
245
+ "permissive": False
246
+ },
247
+ ),
77
248
  ]
78
249
 
79
250
  invalid_examples = [
@@ -82,57 +253,415 @@ class MatrixResponseValidator(ResponseValidatorABC):
82
253
  {
83
254
  "question_items": ["Item1", "Item2"],
84
255
  "question_options": [1, 2, 3],
256
+ "permissive": False
85
257
  },
86
- "Missing responses for some items",
258
+ "Missing responses for items",
87
259
  ),
88
260
  (
89
261
  {"answer": {"Item1": 4, "Item2": 5}},
90
262
  {
91
263
  "question_items": ["Item1", "Item2"],
92
264
  "question_options": [1, 2, 3],
265
+ "permissive": False
93
266
  },
94
267
  "Invalid options selected",
95
268
  ),
96
269
  ]
97
270
 
98
271
  def fix(self, response, verbose=False):
272
+ """
273
+ Attempts to fix an invalid matrix response by trying multiple parsing strategies.
274
+
275
+ Args:
276
+ response: The invalid response to fix
277
+ verbose: Whether to print verbose debugging information
278
+
279
+ Returns:
280
+ A fixed response dict if fixable, otherwise the original response
281
+ """
99
282
  if verbose:
100
283
  print(f"Fixing matrix response: {response}")
101
-
102
- # If we have generated tokens, try to parse them
103
- if "generated_tokens" in response:
284
+
285
+ # If response doesn't have an answer field, nothing to do
286
+ if "answer" not in response:
287
+ if verbose:
288
+ print("Response has no answer field, cannot fix")
289
+ return response
290
+
291
+ # Strategy 1: If we have generated_tokens, try to parse them as JSON
292
+ if "generated_tokens" in response and response["generated_tokens"]:
104
293
  try:
105
- import json
106
-
107
- fixed = json.loads(response["generated_tokens"])
108
- if isinstance(fixed, dict):
109
- # Map numeric keys to question items
110
- mapped_answer = {}
111
- for idx, item in enumerate(self.question_items):
112
- if str(idx) in fixed:
113
- mapped_answer[item] = fixed[str(idx)]
114
- if (
115
- mapped_answer
116
- ): # Only return if we successfully mapped some answers
117
- return {"answer": mapped_answer}
118
- except:
119
- pass
120
-
121
- # If answer uses numeric keys, map them to question items
122
- if "answer" in response and isinstance(response["answer"], dict):
123
- if all(str(key).isdigit() for key in response["answer"].keys()):
294
+ # Try to parse generated_tokens as JSON
295
+ tokens_text = str(response["generated_tokens"])
296
+ json_match = re.search(r'\{.*\}', tokens_text, re.DOTALL)
297
+
298
+ if json_match:
299
+ json_str = json_match.group(0)
300
+ fixed = json.loads(json_str)
301
+
302
+ if isinstance(fixed, dict):
303
+ # Map numeric keys to question items if needed
304
+ if all(str(k).isdigit() for k in fixed.keys()):
305
+ if verbose:
306
+ print(f"JSON extraction found numeric keys: {fixed}")
307
+ print(f"Question items: {self.question_items}")
308
+ print(f"Question options: {self.question_options}")
309
+
310
+ # Special handling for case when numeric keys directly represent option indices
311
+ # This is the case we're trying to fix: {"0": 1, "1": 3, "2": 0} maps to options at those indices
312
+ direct_mapped_answer = {}
313
+ if verbose:
314
+ print(f"Attempting to map numeric key/value format in JSON: {fixed}")
315
+
316
+ for idx, item in enumerate(self.question_items):
317
+ if str(idx) in fixed:
318
+ # Get the option index directly from the value
319
+ option_idx = fixed[str(idx)]
320
+
321
+ # Convert to int if needed
322
+ if isinstance(option_idx, str) and option_idx.isdigit():
323
+ option_idx = int(option_idx)
324
+
325
+ if verbose:
326
+ print(f"Item {item} at index {idx} maps to value {option_idx}")
327
+
328
+ if isinstance(option_idx, (int, float)) and 0 <= option_idx < len(self.question_options):
329
+ direct_mapped_answer[item] = self.question_options[option_idx]
330
+ if verbose:
331
+ print(f"Mapped option_idx {option_idx} to {self.question_options[option_idx]}")
332
+
333
+ if direct_mapped_answer and len(direct_mapped_answer) == len(self.question_items):
334
+ proposed_data = {
335
+ "answer": direct_mapped_answer,
336
+ "comment": response.get("comment"),
337
+ "generated_tokens": response.get("generated_tokens")
338
+ }
339
+ if verbose:
340
+ print(f"Created direct option mapping from JSON: {proposed_data}")
341
+ try:
342
+ self.response_model(**proposed_data)
343
+ if verbose:
344
+ print(f"Successfully fixed with direct option mapping from JSON: {proposed_data}")
345
+ return proposed_data
346
+ except Exception as e:
347
+ if verbose:
348
+ print(f"Direct option mapping from JSON failed validation: {e}")
349
+
350
+ # Try the standard approach as well
351
+ mapped_answer = {}
352
+ for idx, item in enumerate(self.question_items):
353
+ if str(idx) in fixed:
354
+ # Get the value (column index) from the response
355
+ value_idx = fixed[str(idx)]
356
+
357
+ # Convert to int if it's a digit string
358
+ if isinstance(value_idx, str) and value_idx.isdigit():
359
+ value_idx = int(value_idx)
360
+
361
+ # Convert column index to actual option value
362
+ if isinstance(value_idx, (int, float)) and 0 <= value_idx < len(self.question_options):
363
+ option_value = self.question_options[value_idx]
364
+ mapped_answer[item] = option_value
365
+ else:
366
+ # If the value is already a valid option, use it directly
367
+ if value_idx in self.question_options:
368
+ mapped_answer[item] = value_idx
369
+ else:
370
+ # Last resort - try to use it as a direct value even if not in options
371
+ mapped_answer[item] = value_idx
372
+
373
+ if mapped_answer and (len(mapped_answer) == len(self.question_items) or self.permissive):
374
+ proposed_data = {
375
+ "answer": mapped_answer,
376
+ "comment": response.get("comment"),
377
+ "generated_tokens": response.get("generated_tokens")
378
+ }
379
+ try:
380
+ # Validate the fixed response
381
+ self.response_model(**proposed_data)
382
+ if verbose:
383
+ print(f"Successfully fixed by parsing JSON: {proposed_data}")
384
+ return proposed_data
385
+ except Exception as e:
386
+ if verbose:
387
+ print(f"Fixed response failed validation: {e}")
388
+
389
+ # Try again with string values for all options
390
+ text_mapped_answer = {}
391
+ for item_name, option_value in mapped_answer.items():
392
+ text_mapped_answer[item_name] = str(option_value)
393
+
394
+ proposed_data = {
395
+ "answer": text_mapped_answer,
396
+ "comment": response.get("comment"),
397
+ "generated_tokens": response.get("generated_tokens")
398
+ }
399
+ try:
400
+ self.response_model(**proposed_data)
401
+ if verbose:
402
+ print(f"Successfully fixed with string conversion from JSON: {proposed_data}")
403
+ return proposed_data
404
+ except Exception as e:
405
+ if verbose:
406
+ print(f"String conversion from JSON failed validation: {e}")
407
+ else:
408
+ # The JSON already has string keys, use directly
409
+ proposed_data = {
410
+ "answer": fixed,
411
+ "comment": response.get("comment"),
412
+ "generated_tokens": response.get("generated_tokens")
413
+ }
414
+ try:
415
+ self.response_model(**proposed_data)
416
+ if verbose:
417
+ print(f"Successfully fixed by direct JSON: {proposed_data}")
418
+ return proposed_data
419
+ except Exception as e:
420
+ if verbose:
421
+ print(f"Fixed response failed validation: {e}")
422
+
423
+ # If validation failed, check if we need to map string keys to item names
424
+ # This handles cases where the model responded with something like {"Row 0": 1, "Row 1": 2}
425
+ # instead of using the exact item names
426
+ item_map = {}
427
+ for item in self.question_items:
428
+ # Create various forms of the item name that might appear in responses
429
+ item_variants = [
430
+ item.lower(),
431
+ item.upper(),
432
+ item.strip(),
433
+ f"Row {self.question_items.index(item)}",
434
+ f"Item {self.question_items.index(item)}",
435
+ f"{self.question_items.index(item)}"
436
+ ]
437
+ for key in fixed.keys():
438
+ if isinstance(key, str):
439
+ key_lower = key.lower().strip()
440
+ if key_lower in item_variants or item.lower() in key_lower:
441
+ item_map[key] = item
442
+
443
+ if item_map:
444
+ mapped_answer = {}
445
+ for key, value in fixed.items():
446
+ if key in item_map:
447
+ # Handle both numeric indices and direct values
448
+ if isinstance(value, (int, float)) and 0 <= value < len(self.question_options):
449
+ mapped_answer[item_map[key]] = self.question_options[value]
450
+ else:
451
+ mapped_answer[item_map[key]] = value
452
+
453
+ if mapped_answer:
454
+ proposed_data = {
455
+ "answer": mapped_answer,
456
+ "comment": response.get("comment"),
457
+ "generated_tokens": response.get("generated_tokens")
458
+ }
459
+ try:
460
+ self.response_model(**proposed_data)
461
+ if verbose:
462
+ print(f"Successfully fixed by mapping item names: {proposed_data}")
463
+ return proposed_data
464
+ except Exception as e:
465
+ if verbose:
466
+ print(f"Item-mapped response failed validation: {e}")
467
+ except (ValueError, KeyError, TypeError, json.JSONDecodeError) as e:
468
+ if verbose:
469
+ print(f"JSON parsing failed: {e}")
470
+ # Continue to other strategies
471
+
472
+ # Strategy 2: If answer uses numeric keys, map them to question items
473
+ if isinstance(response.get("answer"), dict):
474
+ answer_dict = response["answer"]
475
+
476
+ if all(str(k).isdigit() for k in answer_dict.keys()):
477
+ if verbose:
478
+ print(f"Processing answer with numeric keys: {answer_dict}")
479
+ print(f"Question items: {self.question_items}")
480
+ print(f"Question options: {self.question_options}")
481
+
124
482
  mapped_answer = {}
125
483
  for idx, item in enumerate(self.question_items):
126
- if str(idx) in response["answer"]:
127
- mapped_answer[item] = response["answer"][str(idx)]
128
- if mapped_answer: # Only update if we successfully mapped some answers
129
- response["answer"] = mapped_answer
130
-
484
+ if str(idx) in answer_dict:
485
+ # Get the value (column index) from the response
486
+ value_idx = answer_dict[str(idx)]
487
+
488
+ # Convert to int if it's a digit string
489
+ if isinstance(value_idx, str) and value_idx.isdigit():
490
+ value_idx = int(value_idx)
491
+
492
+ if verbose:
493
+ print(f"Processing item {item} at index {idx}, value_idx={value_idx}")
494
+
495
+ # Convert column index to actual option value
496
+ if isinstance(value_idx, (int, float)) and 0 <= value_idx < len(self.question_options):
497
+ option_value = self.question_options[value_idx]
498
+ mapped_answer[item] = option_value
499
+ if verbose:
500
+ print(f"Mapped column index {value_idx} to option '{option_value}'")
501
+ else:
502
+ # If the value is already a valid option, use it directly
503
+ if value_idx in self.question_options:
504
+ mapped_answer[item] = value_idx
505
+ if verbose:
506
+ print(f"Used direct option value '{value_idx}'")
507
+ else:
508
+ # Last resort - try to use it as a direct value even if not in options
509
+ # (this helps with permissive mode)
510
+ mapped_answer[item] = value_idx
511
+ if verbose:
512
+ print(f"Used non-option value '{value_idx}' as direct value")
513
+
514
+ if mapped_answer and len(mapped_answer) == len(self.question_items):
515
+ if verbose:
516
+ print(f"Created complete mapped answer: {mapped_answer}")
517
+
518
+ proposed_data = {
519
+ "answer": mapped_answer,
520
+ "comment": response.get("comment"),
521
+ "generated_tokens": response.get("generated_tokens")
522
+ }
523
+ try:
524
+ self.response_model(**proposed_data)
525
+ if verbose:
526
+ print(f"Successfully fixed by mapping numeric keys: {proposed_data}")
527
+ return proposed_data
528
+ except Exception as e:
529
+ if verbose:
530
+ print(f"Fixed response failed validation: {e}")
531
+
532
+ # Try again with string values for the options
533
+ text_mapped_answer = {}
534
+ for item_name, option_value in mapped_answer.items():
535
+ text_mapped_answer[item_name] = str(option_value)
536
+
537
+ proposed_data = {
538
+ "answer": text_mapped_answer,
539
+ "comment": response.get("comment"),
540
+ "generated_tokens": response.get("generated_tokens")
541
+ }
542
+ try:
543
+ self.response_model(**proposed_data)
544
+ if verbose:
545
+ print(f"Successfully fixed with string conversion: {proposed_data}")
546
+ return proposed_data
547
+ except Exception as e:
548
+ if verbose:
549
+ print(f"String conversion failed validation: {e}")
550
+
551
+ # Special handling for case when numeric keys directly represent option indices
552
+ # This is the case we're trying to fix: {"0": 1, "1": 3, "2": 0} maps to options at those indices
553
+ direct_mapped_answer = {}
554
+ if verbose:
555
+ print(f"Attempting to map numeric key/value format in answer: {answer_dict}")
556
+
557
+ for idx, item in enumerate(self.question_items):
558
+ if str(idx) in answer_dict:
559
+ # Get the option index directly from the value
560
+ option_idx = answer_dict[str(idx)]
561
+
562
+ # Convert to int if needed
563
+ if isinstance(option_idx, str) and option_idx.isdigit():
564
+ option_idx = int(option_idx)
565
+
566
+ if verbose:
567
+ print(f"Item {item} at index {idx} maps to value {option_idx}")
568
+
569
+ if isinstance(option_idx, (int, float)) and 0 <= option_idx < len(self.question_options):
570
+ direct_mapped_answer[item] = self.question_options[option_idx]
571
+ if verbose:
572
+ print(f"Mapped option_idx {option_idx} to {self.question_options[option_idx]}")
573
+
574
+ if direct_mapped_answer and len(direct_mapped_answer) == len(self.question_items):
575
+ proposed_data = {
576
+ "answer": direct_mapped_answer,
577
+ "comment": response.get("comment"),
578
+ "generated_tokens": response.get("generated_tokens")
579
+ }
580
+ if verbose:
581
+ print(f"Created direct option mapping: {proposed_data}")
582
+ try:
583
+ self.response_model(**proposed_data)
584
+ if verbose:
585
+ print(f"Successfully fixed with direct option mapping: {proposed_data}")
586
+ return proposed_data
587
+ except Exception as e:
588
+ if verbose:
589
+ print(f"Direct option mapping failed validation: {e}")
590
+
591
+ # Strategy 3: If answer is a string, try to extract a structured response
592
+ if isinstance(response.get("answer"), str):
593
+ answer_text = response["answer"]
594
+
595
+ # Try to extract item-option pairs using regex
596
+ pairs = re.findall(r'([^:,]+):\s*([^,]+)', answer_text)
597
+ if pairs:
598
+ extracted = {}
599
+ for item, option in pairs:
600
+ item = item.strip()
601
+ option = option.strip()
602
+
603
+ # Match the item name with the closest question item
604
+ best_match = None
605
+ for q_item in self.question_items:
606
+ if q_item.lower() in item.lower():
607
+ best_match = q_item
608
+ break
609
+
610
+ if best_match:
611
+ # Try to match the option with question options
612
+ matched_option = None
613
+ for q_option in self.question_options:
614
+ q_option_str = str(q_option)
615
+ if q_option_str == option or q_option_str in option:
616
+ matched_option = q_option
617
+ break
618
+
619
+ if matched_option is not None:
620
+ extracted[best_match] = matched_option
621
+
622
+ if extracted and (len(extracted) == len(self.question_items) or self.permissive):
623
+ proposed_data = {
624
+ "answer": extracted,
625
+ "comment": response.get("comment"),
626
+ "generated_tokens": response.get("generated_tokens")
627
+ }
628
+ try:
629
+ self.response_model(**proposed_data)
630
+ if verbose:
631
+ print(f"Successfully fixed by extracting pairs: {proposed_data}")
632
+ return proposed_data
633
+ except Exception as e:
634
+ if verbose:
635
+ print(f"Fixed response failed validation: {e}")
636
+
637
+ # If we got here, we couldn't fix the response
638
+ if verbose:
639
+ print("Could not fix matrix response, returning original")
131
640
  return response
132
641
 
133
642
 
134
643
  class QuestionMatrix(QuestionBase):
135
- """A question that presents a matrix/grid where multiple items are rated using the same scale."""
644
+ """
645
+ A question that presents a matrix/grid where multiple items are rated
646
+ or selected from the same set of options.
647
+
648
+ This question type allows respondents to provide an answer for each row
649
+ in a grid, selecting from the same set of options for each row. It's often
650
+ used for Likert scales, ratings grids, or any scenario where multiple items
651
+ need to be rated using the same scale.
652
+
653
+ Examples:
654
+ >>> # Create a happiness rating matrix
655
+ >>> question = QuestionMatrix(
656
+ ... question_name="happiness_matrix",
657
+ ... question_text="Rate your happiness with each aspect:",
658
+ ... question_items=["Work", "Family", "Social life"],
659
+ ... question_options=[1, 2, 3, 4, 5],
660
+ ... option_labels={1: "Very unhappy", 3: "Neutral", 5: "Very happy"}
661
+ ... )
662
+ >>> # The response is a dict matching each item to a rating
663
+ >>> response = {"answer": {"Work": 4, "Family": 5, "Social life": 3}}
664
+ """
136
665
 
137
666
  question_type = "matrix"
138
667
  question_text: str = QuestionTextDescriptor()
@@ -155,18 +684,19 @@ class QuestionMatrix(QuestionBase):
155
684
  question_presentation: Optional[str] = None,
156
685
  permissive: bool = False,
157
686
  ):
158
- """Initialize a matrix question.
687
+ """
688
+ Initialize a matrix question.
159
689
 
160
690
  Args:
161
691
  question_name: The name of the question
162
692
  question_text: The text of the question
163
- question_items: List of items to be rated
164
- question_options: List of rating options
165
- option_labels: Optional mapping of options to their labels
693
+ question_items: List of items to be rated or answered (rows)
694
+ question_options: Possible answer options for each item (columns)
695
+ option_labels: Optional mapping of options to labels (e.g. {1: "Sad", 5: "Happy"})
166
696
  include_comment: Whether to include a comment field
167
- answering_instructions: Optional custom instructions
168
- question_presentation: Optional custom presentation
169
- permissive: Whether to strictly validate responses
697
+ answering_instructions: Custom instructions template
698
+ question_presentation: Custom presentation template
699
+ permissive: Whether to allow any values & extra items instead of strictly checking
170
700
  """
171
701
  self.question_name = question_name
172
702
 
@@ -186,14 +716,42 @@ class QuestionMatrix(QuestionBase):
186
716
  self.question_presentation = question_presentation
187
717
  self.permissive = permissive
188
718
 
189
- def create_response_model(self):
719
+ def create_response_model(self) -> Type[BaseModel]:
720
+ """
721
+ Returns the pydantic model for validating responses to this question.
722
+
723
+ The model is dynamically created based on the question's configuration,
724
+ including allowed items, options, and permissiveness.
725
+ """
190
726
  return create_matrix_response(
191
- self.question_items, self.question_options, self.permissive
727
+ self.question_items,
728
+ self.question_options,
729
+ self.permissive
192
730
  )
193
731
 
732
+ def _simulate_answer(self) -> dict:
733
+ """
734
+ Simulate a random valid answer for testing purposes.
735
+
736
+ Returns:
737
+ A valid simulated response with random selections
738
+ """
739
+ return {
740
+ "answer": {
741
+ item: random.choice(self.question_options)
742
+ for item in self.question_items
743
+ },
744
+ "comment": "Sample matrix response"
745
+ }
746
+
194
747
  @property
195
748
  def question_html_content(self) -> str:
196
- """Generate HTML representation of the matrix question."""
749
+ """
750
+ Generate an HTML representation of the matrix question.
751
+
752
+ Returns:
753
+ HTML content string for rendering the question
754
+ """
197
755
  template = Template(
198
756
  """
199
757
  <table class="matrix-question">
@@ -225,7 +783,6 @@ class QuestionMatrix(QuestionBase):
225
783
  </table>
226
784
  """
227
785
  )
228
-
229
786
  return template.render(
230
787
  question_name=self.question_name,
231
788
  question_items=self.question_items,
@@ -236,7 +793,12 @@ class QuestionMatrix(QuestionBase):
236
793
  @classmethod
237
794
  @inject_exception
238
795
  def example(cls) -> QuestionMatrix:
239
- """Return an example matrix question."""
796
+ """
797
+ Return an example matrix question.
798
+
799
+ Returns:
800
+ An example QuestionMatrix instance for happiness ratings by family size
801
+ """
240
802
  return cls(
241
803
  question_name="child_happiness",
242
804
  question_text="How happy would you be with different numbers of children?",
@@ -248,19 +810,4 @@ class QuestionMatrix(QuestionBase):
248
810
  ],
249
811
  question_options=[1, 2, 3, 4, 5],
250
812
  option_labels={1: "Very sad", 3: "Neutral", 5: "Extremely happy"},
251
- )
252
-
253
- def _simulate_answer(self) -> dict:
254
- """Simulate a random valid answer."""
255
- return {
256
- "answer": {
257
- item: random.choice(self.question_options)
258
- for item in self.question_items
259
- }
260
- }
261
-
262
-
263
- if __name__ == "__main__":
264
- import doctest
265
-
266
- doctest.testmod(optionflags=doctest.ELLIPSIS)
813
+ )