edsl 0.1.38.dev4__py3-none-any.whl → 0.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. edsl/Base.py +197 -116
  2. edsl/__init__.py +15 -7
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +351 -147
  5. edsl/agents/AgentList.py +211 -73
  6. edsl/agents/Invigilator.py +101 -50
  7. edsl/agents/InvigilatorBase.py +62 -70
  8. edsl/agents/PromptConstructor.py +143 -225
  9. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  10. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  11. edsl/agents/__init__.py +0 -1
  12. edsl/agents/prompt_helpers.py +3 -3
  13. edsl/agents/question_option_processor.py +172 -0
  14. edsl/auto/AutoStudy.py +18 -5
  15. edsl/auto/StageBase.py +53 -40
  16. edsl/auto/StageQuestions.py +2 -1
  17. edsl/auto/utilities.py +0 -6
  18. edsl/config.py +22 -2
  19. edsl/conversation/car_buying.py +2 -1
  20. edsl/coop/CoopFunctionsMixin.py +15 -0
  21. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  22. edsl/coop/PriceFetcher.py +1 -1
  23. edsl/coop/coop.py +125 -47
  24. edsl/coop/utils.py +14 -14
  25. edsl/data/Cache.py +45 -27
  26. edsl/data/CacheEntry.py +12 -15
  27. edsl/data/CacheHandler.py +31 -12
  28. edsl/data/RemoteCacheSync.py +154 -46
  29. edsl/data/__init__.py +4 -3
  30. edsl/data_transfer_models.py +2 -1
  31. edsl/enums.py +27 -0
  32. edsl/exceptions/__init__.py +50 -50
  33. edsl/exceptions/agents.py +12 -0
  34. edsl/exceptions/inference_services.py +5 -0
  35. edsl/exceptions/questions.py +24 -6
  36. edsl/exceptions/scenarios.py +7 -0
  37. edsl/inference_services/AnthropicService.py +38 -19
  38. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  39. edsl/inference_services/AvailableModelFetcher.py +215 -0
  40. edsl/inference_services/AwsBedrock.py +0 -2
  41. edsl/inference_services/AzureAI.py +0 -2
  42. edsl/inference_services/GoogleService.py +7 -12
  43. edsl/inference_services/InferenceServiceABC.py +18 -85
  44. edsl/inference_services/InferenceServicesCollection.py +120 -79
  45. edsl/inference_services/MistralAIService.py +0 -3
  46. edsl/inference_services/OpenAIService.py +47 -35
  47. edsl/inference_services/PerplexityService.py +0 -3
  48. edsl/inference_services/ServiceAvailability.py +135 -0
  49. edsl/inference_services/TestService.py +11 -10
  50. edsl/inference_services/TogetherAIService.py +5 -3
  51. edsl/inference_services/data_structures.py +134 -0
  52. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  53. edsl/jobs/Answers.py +1 -14
  54. edsl/jobs/FetchInvigilator.py +47 -0
  55. edsl/jobs/InterviewTaskManager.py +98 -0
  56. edsl/jobs/InterviewsConstructor.py +50 -0
  57. edsl/jobs/Jobs.py +356 -431
  58. edsl/jobs/JobsChecks.py +35 -10
  59. edsl/jobs/JobsComponentConstructor.py +189 -0
  60. edsl/jobs/JobsPrompts.py +6 -4
  61. edsl/jobs/JobsRemoteInferenceHandler.py +205 -133
  62. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  63. edsl/jobs/RequestTokenEstimator.py +30 -0
  64. edsl/jobs/async_interview_runner.py +138 -0
  65. edsl/jobs/buckets/BucketCollection.py +44 -3
  66. edsl/jobs/buckets/TokenBucket.py +53 -21
  67. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  68. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  69. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  70. edsl/jobs/data_structures.py +120 -0
  71. edsl/jobs/decorators.py +35 -0
  72. edsl/jobs/interviews/Interview.py +143 -408
  73. edsl/jobs/jobs_status_enums.py +9 -0
  74. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  75. edsl/jobs/results_exceptions_handler.py +98 -0
  76. edsl/jobs/runners/JobsRunnerAsyncio.py +88 -403
  77. edsl/jobs/runners/JobsRunnerStatus.py +133 -165
  78. edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
  79. edsl/jobs/tasks/TaskHistory.py +38 -18
  80. edsl/jobs/tasks/task_status_enum.py +0 -2
  81. edsl/language_models/ComputeCost.py +63 -0
  82. edsl/language_models/LanguageModel.py +194 -236
  83. edsl/language_models/ModelList.py +28 -19
  84. edsl/language_models/PriceManager.py +127 -0
  85. edsl/language_models/RawResponseHandler.py +106 -0
  86. edsl/language_models/ServiceDataSources.py +0 -0
  87. edsl/language_models/__init__.py +1 -2
  88. edsl/language_models/key_management/KeyLookup.py +63 -0
  89. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  90. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  91. edsl/language_models/key_management/__init__.py +0 -0
  92. edsl/language_models/key_management/models.py +131 -0
  93. edsl/language_models/model.py +256 -0
  94. edsl/language_models/repair.py +2 -2
  95. edsl/language_models/utilities.py +5 -4
  96. edsl/notebooks/Notebook.py +19 -14
  97. edsl/notebooks/NotebookToLaTeX.py +142 -0
  98. edsl/prompts/Prompt.py +29 -39
  99. edsl/questions/ExceptionExplainer.py +77 -0
  100. edsl/questions/HTMLQuestion.py +103 -0
  101. edsl/questions/QuestionBase.py +68 -214
  102. edsl/questions/QuestionBasePromptsMixin.py +7 -3
  103. edsl/questions/QuestionBudget.py +1 -1
  104. edsl/questions/QuestionCheckBox.py +3 -3
  105. edsl/questions/QuestionExtract.py +5 -7
  106. edsl/questions/QuestionFreeText.py +2 -3
  107. edsl/questions/QuestionList.py +10 -18
  108. edsl/questions/QuestionMatrix.py +265 -0
  109. edsl/questions/QuestionMultipleChoice.py +67 -23
  110. edsl/questions/QuestionNumerical.py +2 -4
  111. edsl/questions/QuestionRank.py +7 -17
  112. edsl/questions/SimpleAskMixin.py +4 -3
  113. edsl/questions/__init__.py +2 -1
  114. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +47 -2
  115. edsl/questions/data_structures.py +20 -0
  116. edsl/questions/derived/QuestionLinearScale.py +6 -3
  117. edsl/questions/derived/QuestionTopK.py +1 -1
  118. edsl/questions/descriptors.py +17 -3
  119. edsl/questions/loop_processor.py +149 -0
  120. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +57 -50
  121. edsl/questions/question_registry.py +1 -1
  122. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +40 -26
  123. edsl/questions/response_validator_factory.py +34 -0
  124. edsl/questions/templates/matrix/__init__.py +1 -0
  125. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  126. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  127. edsl/results/CSSParameterizer.py +1 -1
  128. edsl/results/Dataset.py +170 -7
  129. edsl/results/DatasetExportMixin.py +168 -305
  130. edsl/results/DatasetTree.py +28 -8
  131. edsl/results/MarkdownToDocx.py +122 -0
  132. edsl/results/MarkdownToPDF.py +111 -0
  133. edsl/results/Result.py +298 -206
  134. edsl/results/Results.py +149 -131
  135. edsl/results/ResultsExportMixin.py +2 -0
  136. edsl/results/TableDisplay.py +98 -171
  137. edsl/results/TextEditor.py +50 -0
  138. edsl/results/__init__.py +1 -1
  139. edsl/results/file_exports.py +252 -0
  140. edsl/results/{Selector.py → results_selector.py} +23 -13
  141. edsl/results/smart_objects.py +96 -0
  142. edsl/results/table_data_class.py +12 -0
  143. edsl/results/table_renderers.py +118 -0
  144. edsl/scenarios/ConstructDownloadLink.py +109 -0
  145. edsl/scenarios/DocumentChunker.py +102 -0
  146. edsl/scenarios/DocxScenario.py +16 -0
  147. edsl/scenarios/FileStore.py +150 -239
  148. edsl/scenarios/PdfExtractor.py +40 -0
  149. edsl/scenarios/Scenario.py +90 -193
  150. edsl/scenarios/ScenarioHtmlMixin.py +4 -3
  151. edsl/scenarios/ScenarioList.py +415 -244
  152. edsl/scenarios/ScenarioListExportMixin.py +0 -7
  153. edsl/scenarios/ScenarioListPdfMixin.py +15 -37
  154. edsl/scenarios/__init__.py +1 -2
  155. edsl/scenarios/directory_scanner.py +96 -0
  156. edsl/scenarios/file_methods.py +85 -0
  157. edsl/scenarios/handlers/__init__.py +13 -0
  158. edsl/scenarios/handlers/csv.py +49 -0
  159. edsl/scenarios/handlers/docx.py +76 -0
  160. edsl/scenarios/handlers/html.py +37 -0
  161. edsl/scenarios/handlers/json.py +111 -0
  162. edsl/scenarios/handlers/latex.py +5 -0
  163. edsl/scenarios/handlers/md.py +51 -0
  164. edsl/scenarios/handlers/pdf.py +68 -0
  165. edsl/scenarios/handlers/png.py +39 -0
  166. edsl/scenarios/handlers/pptx.py +105 -0
  167. edsl/scenarios/handlers/py.py +294 -0
  168. edsl/scenarios/handlers/sql.py +313 -0
  169. edsl/scenarios/handlers/sqlite.py +149 -0
  170. edsl/scenarios/handlers/txt.py +33 -0
  171. edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +10 -6
  172. edsl/scenarios/scenario_selector.py +156 -0
  173. edsl/study/ObjectEntry.py +1 -1
  174. edsl/study/SnapShot.py +1 -1
  175. edsl/study/Study.py +5 -12
  176. edsl/surveys/ConstructDAG.py +92 -0
  177. edsl/surveys/EditSurvey.py +221 -0
  178. edsl/surveys/InstructionHandler.py +100 -0
  179. edsl/surveys/MemoryManagement.py +72 -0
  180. edsl/surveys/Rule.py +5 -4
  181. edsl/surveys/RuleCollection.py +25 -27
  182. edsl/surveys/RuleManager.py +172 -0
  183. edsl/surveys/Simulator.py +75 -0
  184. edsl/surveys/Survey.py +270 -791
  185. edsl/surveys/SurveyCSS.py +20 -8
  186. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
  187. edsl/surveys/SurveyToApp.py +141 -0
  188. edsl/surveys/__init__.py +4 -2
  189. edsl/surveys/descriptors.py +6 -2
  190. edsl/surveys/instructions/ChangeInstruction.py +1 -2
  191. edsl/surveys/instructions/Instruction.py +4 -13
  192. edsl/surveys/instructions/InstructionCollection.py +11 -6
  193. edsl/templates/error_reporting/interview_details.html +1 -1
  194. edsl/templates/error_reporting/report.html +1 -1
  195. edsl/tools/plotting.py +1 -1
  196. edsl/utilities/PrettyList.py +56 -0
  197. edsl/utilities/is_notebook.py +18 -0
  198. edsl/utilities/is_valid_variable_name.py +11 -0
  199. edsl/utilities/remove_edsl_version.py +24 -0
  200. edsl/utilities/utilities.py +35 -23
  201. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/METADATA +12 -10
  202. edsl-0.1.39.dist-info/RECORD +358 -0
  203. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/WHEEL +1 -1
  204. edsl/language_models/KeyLookup.py +0 -30
  205. edsl/language_models/registry.py +0 -190
  206. edsl/language_models/unused/ReplicateBase.py +0 -83
  207. edsl/results/ResultsDBMixin.py +0 -238
  208. edsl-0.1.38.dev4.dist-info/RECORD +0 -277
  209. /edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +0 -0
  210. /edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +0 -0
  211. /edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +0 -0
  212. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/LICENSE +0 -0
@@ -0,0 +1,96 @@
1
+ from typing import Optional
2
+
3
+
4
+ class SmartInt(int):
5
+ pass
6
+
7
+
8
+ class SmartFloat(float):
9
+ pass
10
+
11
+
12
+ class SmartStr(str):
13
+ def clipboard(self) -> None:
14
+ try:
15
+ import pyperclip
16
+ except ImportError:
17
+ print(
18
+ "pyperclip is not installed. Run `pip install pyperclip` to install it."
19
+ )
20
+ return None
21
+
22
+ pyperclip.copy(self)
23
+ print("Text copied to clipboard.")
24
+
25
+ def write(self, filename: str):
26
+ with open(filename, "w") as f:
27
+ f.write(str(self))
28
+ return None
29
+
30
+ def _repr_html_(self):
31
+ pass
32
+
33
+ def markdown(self):
34
+ return SmartMarkdown(self)
35
+
36
+ def pdf(self, filename: Optional[str] = None): # Markdown will have this as well
37
+ # renders the markdown as a pdf that can be downloaded
38
+ from edsl.results.MarkdownToPDF import MarkdownToPDF
39
+
40
+ return MarkdownToPDF(self, filename).preview()
41
+
42
+ def docx(self, filename: Optional[str] = None):
43
+ # renders the markdown as a docx that can be downloaded
44
+ from edsl.results.MarkdownToDocx import MarkdownToDocx
45
+
46
+ return MarkdownToDocx(self, filename).preview()
47
+
48
+ def edit(self):
49
+ from edsl.results.TextEditor import TextEditor
50
+
51
+ editor = TextEditor(self)
52
+ self = self.__class__(editor.edit_gui())
53
+ # print(f"Updated text: {self}")
54
+
55
+
56
+ class SmartMarkdown(SmartStr):
57
+ def _repr_markdown_(self):
58
+ return self
59
+
60
+ def _repr_html_(self):
61
+ from IPython.display import Markdown, display
62
+
63
+ display(Markdown(self))
64
+
65
+
66
+ class SmartLaTeX(SmartStr):
67
+ def _repr_html_(self):
68
+ print(self)
69
+
70
+ def pdf(self, filename: Optional[str] = None):
71
+ from edsl.results.LaTeXToPDF import LaTeXToPDF
72
+
73
+ return LaTeXToPDF(self, filename).preview()
74
+
75
+ def docx(self, filename: Optional[str] = None):
76
+ from edsl.results.LaTeXToDocx import LaTeXToDocx
77
+
78
+ return LaTeXToDocx(self, filename).preview()
79
+
80
+ def edit(self):
81
+ from edsl.results.TextEditor import TextEditor
82
+
83
+ editor = TextEditor(self)
84
+ self = self.__class__(editor.edit_gui())
85
+ # print(f"Updated LaTeX: {self}")
86
+
87
+
88
+ class FirstObject:
89
+ def __new__(self, value):
90
+ if isinstance(value, int):
91
+ return SmartInt(value)
92
+ if isinstance(value, float):
93
+ return SmartFloat(value)
94
+ if isinstance(value, str):
95
+ return SmartStr(value)
96
+ return value
@@ -0,0 +1,12 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, List
3
+
4
+
5
+ @dataclass
6
+ class TableData:
7
+ """Simple data class to hold table information"""
8
+
9
+ headers: List[str]
10
+ data: List[List[Any]]
11
+ parameters: dict = None
12
+ raw_data_set: Any = None
@@ -0,0 +1,118 @@
1
+ from abc import ABC, abstractmethod
2
+ from edsl.results.table_data_class import TableData
3
+
4
+
5
+ class DataTablesRendererABC(ABC):
6
+ def __init__(self, table_data: TableData):
7
+ self.table_data = table_data
8
+
9
+ @abstractmethod
10
+ def render_html(self) -> str:
11
+ pass
12
+
13
+
14
+ class DataTablesRenderer(DataTablesRendererABC):
15
+ """Interactive DataTables renderer implementation"""
16
+
17
+ def render_html(self) -> str:
18
+ html_template = """
19
+ <!DOCTYPE html>
20
+ <html>
21
+ <head>
22
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/5.3.0/css/bootstrap.min.css" rel="stylesheet">
23
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/datatables.net-bs5/1.13.6/dataTables.bootstrap5.min.css" rel="stylesheet">
24
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/datatables.net-buttons-bs5/2.4.1/buttons.bootstrap5.min.css" rel="stylesheet">
25
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/datatables.net-responsive-bs5/2.4.1/responsive.bootstrap5.min.css" rel="stylesheet">
26
+ <style>
27
+ {css}
28
+ </style>
29
+ </head>
30
+ <body>
31
+ <div class="container">
32
+ <table id="interactive-table" class="table table-striped" style="width:100%">
33
+ <thead>
34
+ <tr>{header_cells}</tr>
35
+ </thead>
36
+ <tbody>{body_rows}</tbody>
37
+ </table>
38
+ </div>
39
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.7.0/jquery.min.js"></script>
40
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/datatables.net/1.13.6/jquery.dataTables.min.js"></script>
41
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/datatables.net-bs5/1.13.6/dataTables.bootstrap5.min.js"></script>
42
+ <script>
43
+ $(document).ready(function() {{
44
+ $('#interactive-table').DataTable({{
45
+ pageLength: 10,
46
+ lengthMenu: [[5, 10, 25, -1], [5, 10, 25, "All"]],
47
+ scrollX: true,
48
+ responsive: true,
49
+ dom: 'Bfrtip',
50
+ buttons: [
51
+ {{
52
+ extend: 'colvis',
53
+ text: 'Show/Hide Columns'
54
+ }}
55
+ ]
56
+ }});
57
+ }});
58
+ </script>
59
+ </body>
60
+ </html>
61
+ """
62
+
63
+ header_cells = "".join(
64
+ f"<th>{header}</th>" for header in self.table_data.headers
65
+ )
66
+ body_rows = ""
67
+ for row in self.table_data.data:
68
+ body_rows += "<tr>"
69
+ body_rows += "".join(f"<td>{cell}</td>" for cell in row)
70
+ body_rows += "</tr>"
71
+
72
+ parameters = self.table_data.parameters or {}
73
+ css = self.get_css()
74
+ if hasattr(self, "css_parameterizer"):
75
+ css = self.css_parameterizer(css).apply_parameters(parameters)
76
+
77
+ return html_template.format(
78
+ css=css, header_cells=header_cells, body_rows=body_rows
79
+ )
80
+
81
+ @classmethod
82
+ def get_css(cls) -> str:
83
+ """Load CSS content from the file next to this module"""
84
+ css_path = Path(__file__).parent / "table_display.css"
85
+ return css_path.read_text()
86
+
87
+
88
+ class PandasStyleRenderer(DataTablesRendererABC):
89
+ """Pandas-based styled renderer implementation"""
90
+
91
+ def render_html(self) -> str:
92
+ import pandas as pd
93
+
94
+ from contextlib import redirect_stderr
95
+ import io
96
+
97
+ stderr = io.StringIO()
98
+ with redirect_stderr(stderr):
99
+ if self.table_data.raw_data_set is not None and hasattr(
100
+ self.table_data.raw_data_set, "to_pandas"
101
+ ):
102
+ df = self.table_data.raw_data_set.to_pandas()
103
+ else:
104
+ df = pd.DataFrame(self.table_data.data, columns=self.table_data.headers)
105
+
106
+ styled_df = df.style.set_properties(
107
+ **{"text-align": "left"}
108
+ ).background_gradient()
109
+
110
+ return f"""
111
+ <div style="max-height: 500px; overflow-y: auto;">
112
+ {styled_df.to_html()}
113
+ </div>
114
+ """
115
+
116
+ @classmethod
117
+ def get_css(cls) -> str:
118
+ return "" # Pandas styling handles its own CSS
@@ -0,0 +1,109 @@
1
+ import os
2
+ import mimetypes
3
+
4
+
5
+ class ConstructDownloadLink:
6
+ """
7
+ A class to create HTML download links for FileStore objects.
8
+ The links can be displayed in Jupyter notebooks or other web interfaces.
9
+ """
10
+
11
+ def __init__(self, filestore):
12
+ """
13
+ Initialize with a FileStore object.
14
+
15
+ Args:
16
+ filestore: A FileStore object containing the file to be made downloadable
17
+ """
18
+ self.filestore = filestore
19
+
20
+ def create_link(self, custom_filename=None, style=None):
21
+ from IPython.display import HTML
22
+
23
+ html = self.html_create_link(custom_filename, style)
24
+ return HTML(html)
25
+
26
+ def html_create_link(self, custom_filename=None, style=None):
27
+ """
28
+ Create an HTML download link for the file.
29
+
30
+ Args:
31
+ custom_filename (str, optional): Custom name for the downloaded file.
32
+ If None, uses original filename.
33
+ style (dict, optional): Custom CSS styles for the download button.
34
+ If None, uses default styling.
35
+
36
+ Returns:
37
+ IPython.display.HTML: HTML object containing the download link
38
+ """
39
+
40
+ # Get filename from path or use custom filename
41
+ original_filename = os.path.basename(self.filestore.path)
42
+ filename = custom_filename or original_filename
43
+
44
+ # Use the base64 string already stored in FileStore
45
+ b64_data = self.filestore.base64_string
46
+
47
+ # Use mime type from FileStore or guess it
48
+ mime_type = self.filestore.mime_type
49
+
50
+ # Default style if none provided
51
+ default_style = {
52
+ "background-color": "#4CAF50",
53
+ "color": "white",
54
+ "padding": "10px 20px",
55
+ "text-decoration": "none",
56
+ "border-radius": "4px",
57
+ "display": "inline-block",
58
+ "margin": "10px 0",
59
+ "font-family": "sans-serif",
60
+ "cursor": "pointer",
61
+ }
62
+
63
+ button_style = style or default_style
64
+ style_str = "; ".join(f"{k}: {v}" for k, v in button_style.items())
65
+
66
+ html = f"""
67
+ <a download="{filename}"
68
+ href="data:{mime_type};base64,{b64_data}"
69
+ style="{style_str}">
70
+ Download {filename}
71
+ </a>
72
+ """
73
+ return html
74
+
75
+ def create_multiple_links(self, files, custom_filenames=None, style=None):
76
+ """
77
+ Create multiple download links at once.
78
+ Useful when you want to provide different versions of the same file
79
+ or related files together.
80
+
81
+ Args:
82
+ files (list): List of FileStore objects
83
+ custom_filenames (list, optional): List of custom filenames for downloads
84
+ style (dict, optional): Custom CSS styles for the download buttons
85
+
86
+ Returns:
87
+ IPython.display.HTML: HTML object containing all download links
88
+ """
89
+ if custom_filenames is None:
90
+ custom_filenames = [None] * len(files)
91
+
92
+ html_parts = []
93
+ for file_obj, custom_name in zip(files, custom_filenames):
94
+ link_creator = ConstructDownloadLink(file_obj)
95
+ html_parts.append(
96
+ link_creator.create_link(
97
+ custom_filename=custom_name, style=style
98
+ )._repr_html_()
99
+ )
100
+
101
+ return HTML(
102
+ '<div style="display: flex; gap: 10px;">' + "".join(html_parts) + "</div>"
103
+ )
104
+
105
+
106
+ if __name__ == "__main__":
107
+ import doctest
108
+
109
+ doctest.testmod()
@@ -0,0 +1,102 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, Generator, TYPE_CHECKING
3
+ import copy
4
+
5
+ if TYPE_CHECKING:
6
+ from edsl.scenarios.Scenario import Scenario
7
+ from edsl.scenarios.ScenarioList import ScenarioList
8
+
9
+
10
+ class DocumentChunker:
11
+ def __init__(self, scenario: "Scenario"):
12
+ self.scenario = scenario
13
+
14
+ @staticmethod
15
+ def _line_chunks(text, num_lines: int) -> Generator[str, None, None]:
16
+ """Split a text into chunks of a given size.
17
+
18
+ :param text: The text to split.
19
+ :param num_lines: The number of lines in each chunk.
20
+
21
+ Example:
22
+
23
+ >>> list(DocumentChunker._line_chunks("This is a test.\\nThis is a test. This is a test.", 1))
24
+ ['This is a test.', 'This is a test. This is a test.']
25
+ """
26
+ lines = text.split("\n")
27
+ for i in range(0, len(lines), num_lines):
28
+ chunk = "\n".join(lines[i : i + num_lines])
29
+ yield chunk
30
+
31
+ @staticmethod
32
+ def _word_chunks(text, num_words: int) -> Generator[str, None, None]:
33
+ """Split a text into chunks of a given size.
34
+
35
+ :param text: The text to split.
36
+ :param num_words: The number of words in each chunk.
37
+
38
+ Example:
39
+
40
+ >>> list(DocumentChunker._word_chunks("This is a test.", 2))
41
+ ['This is', 'a test.']
42
+ """
43
+ words = text.split()
44
+ for i in range(0, len(words), num_words):
45
+ chunk = " ".join(words[i : i + num_words])
46
+ yield chunk
47
+
48
+ def chunk(
49
+ self,
50
+ field,
51
+ num_words: Optional[int] = None,
52
+ num_lines: Optional[int] = None,
53
+ include_original=False,
54
+ hash_original=False,
55
+ ) -> ScenarioList:
56
+ """Split a field into chunks of a given size.
57
+
58
+ :param field: The field to split.
59
+ :param num_words: The number of words in each chunk.
60
+ :param num_lines: The number of lines in each chunk.
61
+ :param include_original: Whether to include the original field in the new scenarios.
62
+ :param hash_original: Whether to hash the original field in the new scenarios.
63
+
64
+ If you specify `include_original=True`, the original field will be included in the new scenarios with an "_original" suffix.
65
+ """
66
+ from edsl.scenarios.ScenarioList import ScenarioList
67
+ import hashlib
68
+
69
+ if num_words is not None:
70
+ chunks = list(self._word_chunks(self.scenario[field], num_words))
71
+
72
+ if num_lines is not None:
73
+ chunks = list(self._line_chunks(self.scenario[field], num_lines))
74
+
75
+ if num_words is None and num_lines is None:
76
+ raise ValueError("You must specify either num_words or num_lines.")
77
+
78
+ if num_words is not None and num_lines is not None:
79
+ raise ValueError(
80
+ "You must specify either num_words or num_lines, but not both."
81
+ )
82
+
83
+ scenarios = []
84
+ for i, chunk in enumerate(chunks):
85
+ new_scenario = copy.deepcopy(self.scenario)
86
+ new_scenario[field] = chunk
87
+ new_scenario[field + "_chunk"] = i
88
+ if include_original:
89
+ if hash_original:
90
+ new_scenario[field + "_original"] = hashlib.md5(
91
+ self.scenario[field].encode()
92
+ ).hexdigest()
93
+ else:
94
+ new_scenario[field + "_original"] = self.scenario[field]
95
+ scenarios.append(new_scenario)
96
+ return ScenarioList(scenarios)
97
+
98
+
99
+ if __name__ == "__main__":
100
+ import doctest
101
+
102
+ doctest.testmod()
@@ -0,0 +1,16 @@
1
+ class DocxScenario:
2
+ def __init__(self, docx_path: str):
3
+ from docx import Document
4
+
5
+ self.doc = Document(docx_path)
6
+ self.docx_path = docx_path
7
+
8
+ def get_scenario_dict(self) -> dict:
9
+ # Extract all text
10
+ full_text = []
11
+ for para in self.doc.paragraphs:
12
+ full_text.append(para.text)
13
+
14
+ # Join the text from all paragraphs
15
+ text = "\n".join(full_text)
16
+ return {"file_path": self.docx_path, "text": text}