edsl 0.1.39.dev1__py3-none-any.whl → 0.1.39.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. edsl/Base.py +169 -116
  2. edsl/__init__.py +14 -6
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +358 -146
  5. edsl/agents/AgentList.py +211 -73
  6. edsl/agents/Invigilator.py +88 -36
  7. edsl/agents/InvigilatorBase.py +59 -70
  8. edsl/agents/PromptConstructor.py +117 -219
  9. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  10. edsl/agents/QuestionOptionProcessor.py +172 -0
  11. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  12. edsl/agents/__init__.py +0 -1
  13. edsl/agents/prompt_helpers.py +3 -3
  14. edsl/config.py +22 -2
  15. edsl/conversation/car_buying.py +2 -1
  16. edsl/coop/CoopFunctionsMixin.py +15 -0
  17. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  18. edsl/coop/PriceFetcher.py +1 -1
  19. edsl/coop/coop.py +104 -42
  20. edsl/coop/utils.py +14 -14
  21. edsl/data/Cache.py +21 -14
  22. edsl/data/CacheEntry.py +12 -15
  23. edsl/data/CacheHandler.py +33 -12
  24. edsl/data/__init__.py +4 -3
  25. edsl/data_transfer_models.py +2 -1
  26. edsl/enums.py +20 -0
  27. edsl/exceptions/__init__.py +50 -50
  28. edsl/exceptions/agents.py +12 -0
  29. edsl/exceptions/inference_services.py +5 -0
  30. edsl/exceptions/questions.py +24 -6
  31. edsl/exceptions/scenarios.py +7 -0
  32. edsl/inference_services/AnthropicService.py +0 -3
  33. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  34. edsl/inference_services/AvailableModelFetcher.py +209 -0
  35. edsl/inference_services/AwsBedrock.py +0 -2
  36. edsl/inference_services/AzureAI.py +0 -2
  37. edsl/inference_services/GoogleService.py +2 -11
  38. edsl/inference_services/InferenceServiceABC.py +18 -85
  39. edsl/inference_services/InferenceServicesCollection.py +105 -80
  40. edsl/inference_services/MistralAIService.py +0 -3
  41. edsl/inference_services/OpenAIService.py +1 -4
  42. edsl/inference_services/PerplexityService.py +0 -3
  43. edsl/inference_services/ServiceAvailability.py +135 -0
  44. edsl/inference_services/TestService.py +11 -8
  45. edsl/inference_services/data_structures.py +62 -0
  46. edsl/jobs/AnswerQuestionFunctionConstructor.py +188 -0
  47. edsl/jobs/Answers.py +1 -14
  48. edsl/jobs/FetchInvigilator.py +40 -0
  49. edsl/jobs/InterviewTaskManager.py +98 -0
  50. edsl/jobs/InterviewsConstructor.py +48 -0
  51. edsl/jobs/Jobs.py +102 -243
  52. edsl/jobs/JobsChecks.py +35 -10
  53. edsl/jobs/JobsComponentConstructor.py +189 -0
  54. edsl/jobs/JobsPrompts.py +5 -3
  55. edsl/jobs/JobsRemoteInferenceHandler.py +128 -80
  56. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  57. edsl/jobs/RequestTokenEstimator.py +30 -0
  58. edsl/jobs/buckets/BucketCollection.py +44 -3
  59. edsl/jobs/buckets/TokenBucket.py +53 -21
  60. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  61. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  62. edsl/jobs/decorators.py +35 -0
  63. edsl/jobs/interviews/Interview.py +77 -380
  64. edsl/jobs/jobs_status_enums.py +9 -0
  65. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  66. edsl/jobs/runners/JobsRunnerAsyncio.py +4 -49
  67. edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
  68. edsl/jobs/tasks/TaskHistory.py +14 -15
  69. edsl/jobs/tasks/task_status_enum.py +0 -2
  70. edsl/language_models/ComputeCost.py +63 -0
  71. edsl/language_models/LanguageModel.py +137 -234
  72. edsl/language_models/ModelList.py +11 -13
  73. edsl/language_models/PriceManager.py +127 -0
  74. edsl/language_models/RawResponseHandler.py +106 -0
  75. edsl/language_models/ServiceDataSources.py +0 -0
  76. edsl/language_models/__init__.py +0 -1
  77. edsl/language_models/key_management/KeyLookup.py +63 -0
  78. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  79. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  80. edsl/language_models/key_management/__init__.py +0 -0
  81. edsl/language_models/key_management/models.py +131 -0
  82. edsl/language_models/registry.py +49 -59
  83. edsl/language_models/repair.py +2 -2
  84. edsl/language_models/utilities.py +5 -4
  85. edsl/notebooks/Notebook.py +19 -14
  86. edsl/notebooks/NotebookToLaTeX.py +142 -0
  87. edsl/prompts/Prompt.py +29 -39
  88. edsl/questions/AnswerValidatorMixin.py +47 -2
  89. edsl/questions/ExceptionExplainer.py +77 -0
  90. edsl/questions/HTMLQuestion.py +103 -0
  91. edsl/questions/LoopProcessor.py +149 -0
  92. edsl/questions/QuestionBase.py +37 -192
  93. edsl/questions/QuestionBaseGenMixin.py +52 -48
  94. edsl/questions/QuestionBasePromptsMixin.py +7 -3
  95. edsl/questions/QuestionCheckBox.py +1 -1
  96. edsl/questions/QuestionExtract.py +1 -1
  97. edsl/questions/QuestionFreeText.py +1 -2
  98. edsl/questions/QuestionList.py +3 -5
  99. edsl/questions/QuestionMatrix.py +265 -0
  100. edsl/questions/QuestionMultipleChoice.py +66 -22
  101. edsl/questions/QuestionNumerical.py +1 -3
  102. edsl/questions/QuestionRank.py +6 -16
  103. edsl/questions/ResponseValidatorABC.py +37 -11
  104. edsl/questions/ResponseValidatorFactory.py +28 -0
  105. edsl/questions/SimpleAskMixin.py +4 -3
  106. edsl/questions/__init__.py +1 -0
  107. edsl/questions/derived/QuestionLinearScale.py +6 -3
  108. edsl/questions/derived/QuestionTopK.py +1 -1
  109. edsl/questions/descriptors.py +17 -3
  110. edsl/questions/question_registry.py +1 -1
  111. edsl/questions/templates/matrix/__init__.py +1 -0
  112. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  113. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  114. edsl/results/CSSParameterizer.py +1 -1
  115. edsl/results/Dataset.py +170 -7
  116. edsl/results/DatasetExportMixin.py +224 -302
  117. edsl/results/DatasetTree.py +28 -8
  118. edsl/results/MarkdownToDocx.py +122 -0
  119. edsl/results/MarkdownToPDF.py +111 -0
  120. edsl/results/Result.py +192 -206
  121. edsl/results/Results.py +120 -113
  122. edsl/results/ResultsExportMixin.py +2 -0
  123. edsl/results/Selector.py +23 -13
  124. edsl/results/TableDisplay.py +98 -171
  125. edsl/results/TextEditor.py +50 -0
  126. edsl/results/__init__.py +1 -1
  127. edsl/results/smart_objects.py +96 -0
  128. edsl/results/table_data_class.py +12 -0
  129. edsl/results/table_renderers.py +118 -0
  130. edsl/scenarios/ConstructDownloadLink.py +109 -0
  131. edsl/scenarios/DirectoryScanner.py +96 -0
  132. edsl/scenarios/DocumentChunker.py +102 -0
  133. edsl/scenarios/DocxScenario.py +16 -0
  134. edsl/scenarios/FileStore.py +118 -239
  135. edsl/scenarios/PdfExtractor.py +40 -0
  136. edsl/scenarios/Scenario.py +90 -193
  137. edsl/scenarios/ScenarioHtmlMixin.py +4 -3
  138. edsl/scenarios/ScenarioJoin.py +10 -6
  139. edsl/scenarios/ScenarioList.py +383 -240
  140. edsl/scenarios/ScenarioListExportMixin.py +0 -7
  141. edsl/scenarios/ScenarioListPdfMixin.py +15 -37
  142. edsl/scenarios/ScenarioSelector.py +156 -0
  143. edsl/scenarios/__init__.py +1 -2
  144. edsl/scenarios/file_methods.py +85 -0
  145. edsl/scenarios/handlers/__init__.py +13 -0
  146. edsl/scenarios/handlers/csv.py +38 -0
  147. edsl/scenarios/handlers/docx.py +76 -0
  148. edsl/scenarios/handlers/html.py +37 -0
  149. edsl/scenarios/handlers/json.py +111 -0
  150. edsl/scenarios/handlers/latex.py +5 -0
  151. edsl/scenarios/handlers/md.py +51 -0
  152. edsl/scenarios/handlers/pdf.py +68 -0
  153. edsl/scenarios/handlers/png.py +39 -0
  154. edsl/scenarios/handlers/pptx.py +105 -0
  155. edsl/scenarios/handlers/py.py +294 -0
  156. edsl/scenarios/handlers/sql.py +313 -0
  157. edsl/scenarios/handlers/sqlite.py +149 -0
  158. edsl/scenarios/handlers/txt.py +33 -0
  159. edsl/study/ObjectEntry.py +1 -1
  160. edsl/study/SnapShot.py +1 -1
  161. edsl/study/Study.py +5 -12
  162. edsl/surveys/ConstructDAG.py +92 -0
  163. edsl/surveys/EditSurvey.py +221 -0
  164. edsl/surveys/InstructionHandler.py +100 -0
  165. edsl/surveys/MemoryManagement.py +72 -0
  166. edsl/surveys/Rule.py +5 -4
  167. edsl/surveys/RuleCollection.py +25 -27
  168. edsl/surveys/RuleManager.py +172 -0
  169. edsl/surveys/Simulator.py +75 -0
  170. edsl/surveys/Survey.py +199 -771
  171. edsl/surveys/SurveyCSS.py +20 -8
  172. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
  173. edsl/surveys/SurveyToApp.py +141 -0
  174. edsl/surveys/__init__.py +4 -2
  175. edsl/surveys/descriptors.py +6 -2
  176. edsl/surveys/instructions/ChangeInstruction.py +1 -2
  177. edsl/surveys/instructions/Instruction.py +4 -13
  178. edsl/surveys/instructions/InstructionCollection.py +11 -6
  179. edsl/templates/error_reporting/interview_details.html +1 -1
  180. edsl/templates/error_reporting/report.html +1 -1
  181. edsl/tools/plotting.py +1 -1
  182. edsl/utilities/PrettyList.py +56 -0
  183. edsl/utilities/is_notebook.py +18 -0
  184. edsl/utilities/is_valid_variable_name.py +11 -0
  185. edsl/utilities/remove_edsl_version.py +24 -0
  186. edsl/utilities/utilities.py +35 -23
  187. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/METADATA +12 -10
  188. edsl-0.1.39.dev2.dist-info/RECORD +352 -0
  189. edsl/language_models/KeyLookup.py +0 -30
  190. edsl/language_models/unused/ReplicateBase.py +0 -83
  191. edsl/results/ResultsDBMixin.py +0 -238
  192. edsl-0.1.39.dev1.dist-info/RECORD +0 -277
  193. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/LICENSE +0 -0
  194. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,96 @@
1
+ # directory_scanner.py
2
+ from dataclasses import dataclass
3
+ from typing import Optional, List, Iterator, TypeVar, Generic, Callable, Any
4
+ import os
5
+
6
+ T = TypeVar("T")
7
+
8
+
9
+ @dataclass
10
+ class DirectoryScanner:
11
+ """
12
+ Scanner for finding files in a directory based on various criteria.
13
+ """
14
+
15
+ directory_path: str
16
+
17
+ def scan(
18
+ self,
19
+ factory: Callable[[str], T],
20
+ recursive: bool = False,
21
+ suffix_allow_list: Optional[List[str]] = None,
22
+ suffix_exclude_list: Optional[List[str]] = None,
23
+ example_suffix: Optional[str] = None,
24
+ include_no_extension: bool = True,
25
+ ) -> List[T]:
26
+ """
27
+ Eagerly scan directory and return list of objects created by factory.
28
+
29
+ Args:
30
+ factory: Callable that creates objects from file paths
31
+ recursive: If True, recursively traverse subdirectories
32
+ suffix_allow_list: List of allowed file extensions (without dots)
33
+ suffix_exclude_list: List of excluded file extensions (takes precedence over allow list)
34
+ example_suffix: If provided, only include files with this example suffix
35
+ include_no_extension: Whether to include files without extensions
36
+ """
37
+ return list(
38
+ self.iter_scan(
39
+ factory,
40
+ recursive=recursive,
41
+ suffix_allow_list=suffix_allow_list,
42
+ suffix_exclude_list=suffix_exclude_list,
43
+ example_suffix=example_suffix,
44
+ include_no_extension=include_no_extension,
45
+ )
46
+ )
47
+
48
+ def iter_scan(
49
+ self,
50
+ factory: Callable[[str], T],
51
+ recursive: bool = False,
52
+ suffix_allow_list: Optional[List[str]] = None,
53
+ suffix_exclude_list: Optional[List[str]] = None,
54
+ example_suffix: Optional[str] = None,
55
+ include_no_extension: bool = True,
56
+ ) -> Iterator[T]:
57
+ """
58
+ Lazily scan directory and yield objects created by factory.
59
+ """
60
+
61
+ def should_include_file(filepath: str) -> bool:
62
+ _, ext = os.path.splitext(filepath)
63
+ ext = ext[1:] if ext else ""
64
+
65
+ # Handle no extension case
66
+ if not ext:
67
+ return include_no_extension
68
+
69
+ # Check exclusions first (they take precedence)
70
+ if suffix_exclude_list and ext in suffix_exclude_list:
71
+ return False
72
+
73
+ # Check example suffix if specified
74
+ if example_suffix and not filepath.endswith(example_suffix):
75
+ return False
76
+
77
+ # Check allowed suffixes if specified
78
+ if suffix_allow_list and ext not in suffix_allow_list:
79
+ return False
80
+
81
+ return True
82
+
83
+ def iter_files():
84
+ if recursive:
85
+ for root, _, files in os.walk(self.directory_path):
86
+ for file in files:
87
+ yield os.path.join(root, file)
88
+ else:
89
+ for file in os.listdir(self.directory_path):
90
+ file_path = os.path.join(self.directory_path, file)
91
+ if os.path.isfile(file_path):
92
+ yield file_path
93
+
94
+ for file_path in iter_files():
95
+ if should_include_file(file_path):
96
+ yield factory(file_path)
@@ -0,0 +1,102 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, Generator, TYPE_CHECKING
3
+ import copy
4
+
5
+ if TYPE_CHECKING:
6
+ from edsl.scenarios.Scenario import Scenario
7
+ from edsl.scenarios.ScenarioList import ScenarioList
8
+
9
+
10
+ class DocumentChunker:
11
+ def __init__(self, scenario: "Scenario"):
12
+ self.scenario = scenario
13
+
14
+ @staticmethod
15
+ def _line_chunks(text, num_lines: int) -> Generator[str, None, None]:
16
+ """Split a text into chunks of a given size.
17
+
18
+ :param text: The text to split.
19
+ :param num_lines: The number of lines in each chunk.
20
+
21
+ Example:
22
+
23
+ >>> list(DocumentChunker._line_chunks("This is a test.\\nThis is a test. This is a test.", 1))
24
+ ['This is a test.', 'This is a test. This is a test.']
25
+ """
26
+ lines = text.split("\n")
27
+ for i in range(0, len(lines), num_lines):
28
+ chunk = "\n".join(lines[i : i + num_lines])
29
+ yield chunk
30
+
31
+ @staticmethod
32
+ def _word_chunks(text, num_words: int) -> Generator[str, None, None]:
33
+ """Split a text into chunks of a given size.
34
+
35
+ :param text: The text to split.
36
+ :param num_words: The number of words in each chunk.
37
+
38
+ Example:
39
+
40
+ >>> list(DocumentChunker._word_chunks("This is a test.", 2))
41
+ ['This is', 'a test.']
42
+ """
43
+ words = text.split()
44
+ for i in range(0, len(words), num_words):
45
+ chunk = " ".join(words[i : i + num_words])
46
+ yield chunk
47
+
48
+ def chunk(
49
+ self,
50
+ field,
51
+ num_words: Optional[int] = None,
52
+ num_lines: Optional[int] = None,
53
+ include_original=False,
54
+ hash_original=False,
55
+ ) -> ScenarioList:
56
+ """Split a field into chunks of a given size.
57
+
58
+ :param field: The field to split.
59
+ :param num_words: The number of words in each chunk.
60
+ :param num_lines: The number of lines in each chunk.
61
+ :param include_original: Whether to include the original field in the new scenarios.
62
+ :param hash_original: Whether to hash the original field in the new scenarios.
63
+
64
+ If you specify `include_original=True`, the original field will be included in the new scenarios with an "_original" suffix.
65
+ """
66
+ from edsl.scenarios.ScenarioList import ScenarioList
67
+ import hashlib
68
+
69
+ if num_words is not None:
70
+ chunks = list(self._word_chunks(self.scenario[field], num_words))
71
+
72
+ if num_lines is not None:
73
+ chunks = list(self._line_chunks(self.scenario[field], num_lines))
74
+
75
+ if num_words is None and num_lines is None:
76
+ raise ValueError("You must specify either num_words or num_lines.")
77
+
78
+ if num_words is not None and num_lines is not None:
79
+ raise ValueError(
80
+ "You must specify either num_words or num_lines, but not both."
81
+ )
82
+
83
+ scenarios = []
84
+ for i, chunk in enumerate(chunks):
85
+ new_scenario = copy.deepcopy(self.scenario)
86
+ new_scenario[field] = chunk
87
+ new_scenario[field + "_chunk"] = i
88
+ if include_original:
89
+ if hash_original:
90
+ new_scenario[field + "_original"] = hashlib.md5(
91
+ self.scenario[field].encode()
92
+ ).hexdigest()
93
+ else:
94
+ new_scenario[field + "_original"] = self.scenario[field]
95
+ scenarios.append(new_scenario)
96
+ return ScenarioList(scenarios)
97
+
98
+
99
+ if __name__ == "__main__":
100
+ import doctest
101
+
102
+ doctest.testmod()
@@ -0,0 +1,16 @@
1
+ class DocxScenario:
2
+ def __init__(self, docx_path: str):
3
+ from docx import Document
4
+
5
+ self.doc = Document(docx_path)
6
+ self.docx_path = docx_path
7
+
8
+ def get_scenario_dict(self) -> dict:
9
+ # Extract all text
10
+ full_text = []
11
+ for para in self.doc.paragraphs:
12
+ full_text.append(para.text)
13
+
14
+ # Join the text from all paragraphs
15
+ text = "\n".join(full_text)
16
+ return {"file_path": self.docx_path, "text": text}
@@ -4,111 +4,11 @@ import tempfile
4
4
  import mimetypes
5
5
  import os
6
6
  from typing import Dict, Any, IO, Optional
7
- import requests
8
- from urllib.parse import urlparse
9
7
 
10
- import google.generativeai as genai
8
+ from edsl.scenarios.Scenario import Scenario
9
+ from edsl.utilities.remove_edsl_version import remove_edsl_version
11
10
 
12
- from edsl import Scenario
13
- from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
14
- from edsl.utilities.utilities import is_notebook
15
-
16
-
17
- def view_csv(csv_path):
18
- import pandas as pd
19
-
20
- df = pd.read_csv(csv_path)
21
- return df
22
-
23
-
24
- def view_html(html_path):
25
- import os
26
- import subprocess
27
- from IPython.display import IFrame, display, HTML
28
-
29
- if os.path.exists(html_path):
30
- if is_notebook():
31
- # Display the HTML inline in Jupyter Notebook
32
- display(IFrame(src=html_path, width=700, height=600))
33
- display(
34
- HTML(
35
- f'<a href="{html_path}" target="_blank">Open HTML in a new tab</a>'
36
- )
37
- )
38
- else:
39
- try:
40
- if (os_name := os.name) == "posix":
41
- # Open with the default browser on macOS
42
- subprocess.run(["open", html_path], check=True)
43
- elif os_name == "nt":
44
- # Open with the default browser on Windows
45
- os.startfile(html_path)
46
- else:
47
- # Open with the default browser on Linux
48
- subprocess.run(["xdg-open", html_path], check=True)
49
- except Exception as e:
50
- print(f"Error opening HTML file: {e}")
51
- else:
52
- print("HTML file was not found.")
53
-
54
-
55
- def view_html(html_path):
56
- import os
57
- from IPython.display import display, HTML
58
-
59
- if is_notebook():
60
- with open(html_path, "r") as f:
61
- html_content = f.read()
62
- display(HTML(html_content))
63
- else:
64
- if os.path.exists(html_path):
65
- try:
66
- if (os_name := os.name) == "posix":
67
- subprocess.run(["open", html_path], check=True)
68
- elif os_name == "nt":
69
- os.startfile(html_path)
70
- else:
71
- subprocess.run(["xdg-open", html_path], check=True)
72
- except Exception as e:
73
- print(f"Error opening file: {e}")
74
- else:
75
- print("File was not created successfully.")
76
-
77
-
78
- def view_pdf(pdf_path):
79
- import os
80
- import subprocess
81
- import os
82
- from IPython.display import HTML, display
83
-
84
- if is_notebook():
85
- # Convert to absolute path if needed
86
- with open(pdf_path, "rb") as f:
87
- base64_pdf = base64.b64encode(f.read()).decode("utf-8")
88
-
89
- html = f"""
90
- <iframe
91
- src="data:application/pdf;base64,{base64_pdf}"
92
- width="800px"
93
- height="800px"
94
- type="application/pdf"
95
- ></iframe>
96
- """
97
- display(HTML(html))
98
-
99
- if os.path.exists(pdf_path):
100
- try:
101
- if (os_name := os.name) == "posix":
102
- # for cool kids
103
- subprocess.run(["open", pdf_path], check=True) # macOS
104
- elif os_name == "nt":
105
- os.startfile(pdf_path) # Windows
106
- else:
107
- subprocess.run(["xdg-open", pdf_path], check=True) # Linux
108
- except Exception as e:
109
- print(f"Error opening PDF: {e}")
110
- else:
111
- print("PDF file was not created successfully.")
11
+ from edsl.scenarios.file_methods import FileMethods
112
12
 
113
13
 
114
14
  class FileStore(Scenario):
@@ -122,6 +22,7 @@ class FileStore(Scenario):
122
22
  suffix: Optional[str] = None,
123
23
  base64_string: Optional[str] = None,
124
24
  external_locations: Optional[Dict[str, str]] = None,
25
+ extracted_text: Optional[str] = None,
125
26
  **kwargs,
126
27
  ):
127
28
  if path is None and "filename" in kwargs:
@@ -137,6 +38,11 @@ class FileStore(Scenario):
137
38
  )
138
39
  self.base64_string = base64_string or self.encode_file_to_base64_string(path)
139
40
  self.external_locations = external_locations or {}
41
+
42
+ self.extracted_text = (
43
+ self.extract_text() if extracted_text is None else extracted_text
44
+ )
45
+
140
46
  super().__init__(
141
47
  {
142
48
  "path": path,
@@ -145,6 +51,7 @@ class FileStore(Scenario):
145
51
  "suffix": self.suffix,
146
52
  "mime_type": self.mime_type,
147
53
  "external_locations": self.external_locations,
54
+ "extracted_text": self.extracted_text,
148
55
  }
149
56
  )
150
57
 
@@ -170,88 +77,12 @@ class FileStore(Scenario):
170
77
  return "FileStore: self.path"
171
78
 
172
79
  @classmethod
173
- def example(cls, example_type="text"):
174
- import textwrap
175
- import tempfile
176
-
177
- if example_type == "png" or example_type == "image":
178
- import importlib.resources
179
- from pathlib import Path
180
-
181
- # Get package root directory
182
- package_root = Path(__file__).parent.parent.parent
183
- logo_path = package_root / "static" / "logo.png"
184
- return cls(str(logo_path))
185
-
186
- if example_type == "text":
187
- with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f:
188
- f.write(b"Hello, World!")
189
-
190
- return cls(path=f.name)
191
-
192
- elif example_type == "csv":
193
- from edsl.results.Results import Results
194
-
195
- r = Results.example()
196
-
197
- with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
198
- r.to_csv(filename=f.name)
199
- return cls(f.name)
200
-
201
- elif example_type == "pdf":
202
- pdf_string = textwrap.dedent(
203
- """\
204
- %PDF-1.4
205
- 1 0 obj
206
- << /Type /Catalog /Pages 2 0 R >>
207
- endobj
208
- 2 0 obj
209
- << /Type /Pages /Kids [3 0 R] /Count 1 >>
210
- endobj
211
- 3 0 obj
212
- << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
213
- endobj
214
- 4 0 obj
215
- << /Length 44 >>
216
- stream
217
- BT
218
- /F1 24 Tf
219
- 100 700 Td
220
- (Hello, World!) Tj
221
- ET
222
- endstream
223
- endobj
224
- 5 0 obj
225
- << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
226
- endobj
227
- 6 0 obj
228
- << /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
229
- endobj
230
- xref
231
- 0 7
232
- 0000000000 65535 f
233
- 0000000010 00000 n
234
- 0000000053 00000 n
235
- 0000000100 00000 n
236
- 0000000173 00000 n
237
- 0000000232 00000 n
238
- 0000000272 00000 n
239
- trailer
240
- << /Size 7 /Root 1 0 R >>
241
- startxref
242
- 318
243
- %%EOF"""
244
- )
245
- with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
246
- f.write(pdf_string.encode())
247
-
248
- return cls(f.name)
249
-
250
- elif example_type == "html":
251
- with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
252
- f.write("<html><body><h1>Test</h1></body></html>".encode())
253
-
254
- return cls(f.name)
80
+ def example(cls, example_type="txt"):
81
+ file_methods_class = FileMethods.get_handler(example_type)
82
+ if file_methods_class:
83
+ return cls(file_methods_class().example())
84
+ else:
85
+ print(f"Example for {example_type} is not supported.")
255
86
 
256
87
  @property
257
88
  def size(self) -> int:
@@ -260,6 +91,8 @@ class FileStore(Scenario):
260
91
  return os.path.getsize(self.path)
261
92
 
262
93
  def upload_google(self, refresh: bool = False) -> None:
94
+ import google.generativeai as genai
95
+
263
96
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
264
97
  google_info = genai.upload_file(self.path, mime_type=self.mime_type)
265
98
  self.external_locations["google"] = google_info.to_dict()
@@ -271,7 +104,21 @@ class FileStore(Scenario):
271
104
  return cls(**d)
272
105
 
273
106
  def __repr__(self):
274
- return f"FileStore(path='{self.path}')"
107
+ import reprlib
108
+
109
+ r = reprlib.Repr()
110
+ r.maxstring = 20 # Limit strings to 20 chars
111
+ r.maxother = 30 # Limit other types to 30 chars
112
+
113
+ params = ", ".join(f"{key}={r.repr(value)}" for key, value in self.data.items())
114
+ return f"{self.__class__.__name__}({params})"
115
+
116
+ def _repr_html_(self):
117
+ parent_html = super()._repr_html_()
118
+ from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
119
+
120
+ link = ConstructDownloadLink(self).html_create_link(self.path, style=None)
121
+ return f"{parent_html}<br>{link}"
275
122
 
276
123
  def encode_file_to_base64_string(self, file_path: str):
277
124
  try:
@@ -296,9 +143,44 @@ class FileStore(Scenario):
296
143
 
297
144
  def open(self) -> "IO":
298
145
  if self.binary:
299
- return self.base64_to_file(self["base64_string"], is_binary=True)
146
+ return self.base64_to_file(self.base64_string, is_binary=True)
300
147
  else:
301
- return self.base64_to_text_file(self["base64_string"])
148
+ return self.base64_to_text_file(self.base64_string)
149
+
150
+ def write(self, filename: Optional[str] = None) -> str:
151
+ """
152
+ Write the file content to disk, either to a specified filename or a temporary file.
153
+
154
+ Args:
155
+ filename (Optional[str]): The destination filename. If None, creates a temporary file.
156
+
157
+ Returns:
158
+ str: The path to the written file.
159
+ """
160
+ # Determine the mode based on binary flag
161
+ mode = "wb" if self.binary else "w"
162
+
163
+ # If no filename provided, create a temporary file
164
+ if filename is None:
165
+ from tempfile import NamedTemporaryFile
166
+
167
+ with NamedTemporaryFile(delete=False, suffix="." + self.suffix) as f:
168
+ filename = f.name
169
+
170
+ # Write the content using the appropriate mode
171
+ try:
172
+ with open(filename, mode) as f:
173
+ content = self.open().read()
174
+ # For text mode, ensure we're writing a string
175
+ if not self.binary and isinstance(content, bytes):
176
+ content = content.decode("utf-8")
177
+ f.write(content)
178
+ print(f"File written to {filename}")
179
+ except Exception as e:
180
+ print(f"Error writing file: {e}")
181
+ raise
182
+
183
+ # return filename
302
184
 
303
185
  @staticmethod
304
186
  def base64_to_text_file(base64_string) -> "IO":
@@ -327,6 +209,15 @@ class FileStore(Scenario):
327
209
  # Create a StringIO object for text data
328
210
  return io.StringIO(text_data)
329
211
 
212
+ @property
213
+ def text(self):
214
+ if self.binary:
215
+ import warnings
216
+
217
+ warnings.warn("This is a binary file.")
218
+ else:
219
+ return self.base64_to_text_file(self.base64_string).read()
220
+
330
221
  def to_tempfile(self, suffix=None):
331
222
  if suffix is None:
332
223
  suffix = self.suffix
@@ -335,7 +226,7 @@ class FileStore(Scenario):
335
226
  self["base64_string"], is_binary=True
336
227
  )
337
228
  else:
338
- file_like_object = self.base64_to_text_file(self["base64_string"])
229
+ file_like_object = self.base64_to_text_file(self.base64_string)
339
230
 
340
231
  # Create a named temporary file
341
232
  mode = "wb" if self.binary else "w"
@@ -352,40 +243,23 @@ class FileStore(Scenario):
352
243
 
353
244
  return temp_file.name
354
245
 
355
- def view(self, max_size: int = 300) -> None:
356
- # with self.open() as f:
357
- if self.suffix == "csv":
358
- return view_csv(self.path)
359
-
360
- if self.suffix == "pdf":
361
- view_pdf(self.path)
362
-
363
- if self.suffix == "html":
364
- view_html(self.path)
365
-
366
- if self.suffix == "png" or self.suffix == "jpg" or self.suffix == "jpeg":
367
- if is_notebook():
368
- from IPython.display import Image
369
- from PIL import Image as PILImage
370
-
371
- if max_size:
372
- # Open the image using Pillow
373
- with PILImage.open(self.path) as img:
374
- # Get original width and height
375
- original_width, original_height = img.size
246
+ def view(self) -> None:
247
+ handler = FileMethods.get_handler(self.suffix)
248
+ if handler:
249
+ handler(self.path).view()
250
+ else:
251
+ print(f"Viewing of {self.suffix} files is not supported.")
376
252
 
377
- # Calculate the scaling factor
378
- scale = min(
379
- max_size / original_width, max_size / original_height
380
- )
253
+ def extract_text(self) -> str:
254
+ handler = FileMethods.get_handler(self.suffix)
255
+ if handler and hasattr(handler, "extract_text"):
256
+ return handler(self.path).extract_text()
381
257
 
382
- # Calculate new dimensions
383
- new_width = int(original_width * scale)
384
- new_height = int(original_height * scale)
258
+ if not self.binary:
259
+ return self.text
385
260
 
386
- return Image(self.path, width=new_width, height=new_height)
387
- else:
388
- return Image(self.path)
261
+ return None
262
+ # raise TypeError("No text method found for this file type.")
389
263
 
390
264
  def push(
391
265
  self, description: Optional[str] = None, visibility: str = "unlisted"
@@ -423,6 +297,8 @@ class FileStore(Scenario):
423
297
  :param download_path: The path to save the downloaded file.
424
298
  :param mime_type: The MIME type of the file. If None, it will be guessed from the file extension.
425
299
  """
300
+ import requests
301
+ from urllib.parse import urlparse
426
302
 
427
303
  response = requests.get(url, stream=True)
428
304
  response.raise_for_status() # Raises an HTTPError for bad responses
@@ -446,6 +322,11 @@ class FileStore(Scenario):
446
322
  # Create and return a new File instance
447
323
  return cls(download_path, mime_type=mime_type)
448
324
 
325
+ def create_link(self, custom_filename=None, style=None):
326
+ from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
327
+
328
+ return ConstructDownloadLink(self).create_link(custom_filename, style)
329
+
449
330
 
450
331
  class CSVFileStore(FileStore):
451
332
  @classmethod
@@ -606,27 +487,25 @@ class HTMLFileStore(FileStore):
606
487
 
607
488
 
608
489
  if __name__ == "__main__":
609
- # file_path = "../conjure/examples/Ex11-2.sav"
610
- # fs = FileStore(file_path)
611
- # info = fs.push()
612
- # print(info)
490
+ import doctest
613
491
 
614
- # fs = CSVFileStore.example()
615
- # fs.to_tempfile()
616
- # print(fs.view())
492
+ doctest.testmod()
617
493
 
618
- # fs = PDFFileStore.example()
494
+ # fs = FileStore.example("pdf")
619
495
  # fs.view()
620
496
 
621
- # fs = PDFFileStore("paper.pdf")
622
- # fs.view()
623
- # from edsl import Conjure
624
- pass
625
- # fs = PNGFileStore("logo.png")
626
- # fs.view()
627
- # fs.upload_google()
497
+ formats = FileMethods.supported_file_types()
498
+ for file_type in formats:
499
+ print("Now testinging", file_type)
500
+ fs = FileStore.example(file_type)
501
+ fs.view()
502
+ input("Press Enter to continue...")
503
+
504
+ # pdf_example.view()
505
+ # FileStore(pdf_example).view()
506
+
507
+ # pdf_methods = methods.get("pdf")
508
+ # file = pdf_methods().example()
509
+ # pdf_methods(file).view()
628
510
 
629
- # c = Conjure(datafile_name=fs.to_tempfile())
630
- # f = PDFFileStore("paper.pdf")
631
- # print(f.to_tempfile())
632
- # f.push()
511
+ # print(FileMethods._handlers)