ara-cli 0.1.9.69__py3-none-any.whl → 0.1.10.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ara-cli might be problematic. Click here for more details.

Files changed (150) hide show
  1. ara_cli/__init__.py +18 -2
  2. ara_cli/__main__.py +248 -62
  3. ara_cli/ara_command_action.py +155 -86
  4. ara_cli/ara_config.py +226 -80
  5. ara_cli/ara_subcommands/__init__.py +0 -0
  6. ara_cli/ara_subcommands/autofix.py +26 -0
  7. ara_cli/ara_subcommands/chat.py +27 -0
  8. ara_cli/ara_subcommands/classifier_directory.py +16 -0
  9. ara_cli/ara_subcommands/common.py +100 -0
  10. ara_cli/ara_subcommands/create.py +75 -0
  11. ara_cli/ara_subcommands/delete.py +22 -0
  12. ara_cli/ara_subcommands/extract.py +22 -0
  13. ara_cli/ara_subcommands/fetch_templates.py +14 -0
  14. ara_cli/ara_subcommands/list.py +65 -0
  15. ara_cli/ara_subcommands/list_tags.py +25 -0
  16. ara_cli/ara_subcommands/load.py +48 -0
  17. ara_cli/ara_subcommands/prompt.py +136 -0
  18. ara_cli/ara_subcommands/read.py +47 -0
  19. ara_cli/ara_subcommands/read_status.py +20 -0
  20. ara_cli/ara_subcommands/read_user.py +20 -0
  21. ara_cli/ara_subcommands/reconnect.py +27 -0
  22. ara_cli/ara_subcommands/rename.py +22 -0
  23. ara_cli/ara_subcommands/scan.py +14 -0
  24. ara_cli/ara_subcommands/set_status.py +22 -0
  25. ara_cli/ara_subcommands/set_user.py +22 -0
  26. ara_cli/ara_subcommands/template.py +16 -0
  27. ara_cli/artefact_autofix.py +649 -68
  28. ara_cli/artefact_creator.py +8 -11
  29. ara_cli/artefact_deleter.py +2 -4
  30. ara_cli/artefact_fuzzy_search.py +22 -10
  31. ara_cli/artefact_link_updater.py +4 -4
  32. ara_cli/artefact_lister.py +29 -55
  33. ara_cli/artefact_models/artefact_data_retrieval.py +23 -0
  34. ara_cli/artefact_models/artefact_load.py +11 -3
  35. ara_cli/artefact_models/artefact_model.py +146 -39
  36. ara_cli/artefact_models/artefact_templates.py +70 -44
  37. ara_cli/artefact_models/businessgoal_artefact_model.py +23 -25
  38. ara_cli/artefact_models/epic_artefact_model.py +34 -26
  39. ara_cli/artefact_models/feature_artefact_model.py +203 -64
  40. ara_cli/artefact_models/keyfeature_artefact_model.py +21 -24
  41. ara_cli/artefact_models/serialize_helper.py +1 -1
  42. ara_cli/artefact_models/task_artefact_model.py +83 -15
  43. ara_cli/artefact_models/userstory_artefact_model.py +37 -27
  44. ara_cli/artefact_models/vision_artefact_model.py +23 -42
  45. ara_cli/artefact_reader.py +92 -91
  46. ara_cli/artefact_renamer.py +8 -4
  47. ara_cli/artefact_scan.py +66 -3
  48. ara_cli/chat.py +622 -162
  49. ara_cli/chat_agent/__init__.py +0 -0
  50. ara_cli/chat_agent/agent_communicator.py +62 -0
  51. ara_cli/chat_agent/agent_process_manager.py +211 -0
  52. ara_cli/chat_agent/agent_status_manager.py +73 -0
  53. ara_cli/chat_agent/agent_workspace_manager.py +76 -0
  54. ara_cli/commands/__init__.py +0 -0
  55. ara_cli/commands/command.py +7 -0
  56. ara_cli/commands/extract_command.py +15 -0
  57. ara_cli/commands/load_command.py +65 -0
  58. ara_cli/commands/load_image_command.py +34 -0
  59. ara_cli/commands/read_command.py +117 -0
  60. ara_cli/completers.py +144 -0
  61. ara_cli/directory_navigator.py +37 -4
  62. ara_cli/error_handler.py +134 -0
  63. ara_cli/file_classifier.py +6 -5
  64. ara_cli/file_lister.py +1 -1
  65. ara_cli/file_loaders/__init__.py +0 -0
  66. ara_cli/file_loaders/binary_file_loader.py +33 -0
  67. ara_cli/file_loaders/document_file_loader.py +34 -0
  68. ara_cli/file_loaders/document_reader.py +245 -0
  69. ara_cli/file_loaders/document_readers.py +233 -0
  70. ara_cli/file_loaders/file_loader.py +50 -0
  71. ara_cli/file_loaders/file_loaders.py +123 -0
  72. ara_cli/file_loaders/image_processor.py +89 -0
  73. ara_cli/file_loaders/markdown_reader.py +75 -0
  74. ara_cli/file_loaders/text_file_loader.py +187 -0
  75. ara_cli/global_file_lister.py +51 -0
  76. ara_cli/list_filter.py +1 -1
  77. ara_cli/output_suppressor.py +1 -1
  78. ara_cli/prompt_extractor.py +215 -88
  79. ara_cli/prompt_handler.py +521 -134
  80. ara_cli/prompt_rag.py +2 -2
  81. ara_cli/tag_extractor.py +83 -38
  82. ara_cli/template_loader.py +245 -0
  83. ara_cli/template_manager.py +18 -13
  84. ara_cli/templates/prompt-modules/commands/empty.commands.md +2 -12
  85. ara_cli/templates/prompt-modules/commands/extract_general.commands.md +12 -0
  86. ara_cli/templates/prompt-modules/commands/extract_markdown.commands.md +11 -0
  87. ara_cli/templates/prompt-modules/commands/extract_python.commands.md +13 -0
  88. ara_cli/templates/prompt-modules/commands/feature_add_or_modifiy_specified_behavior.commands.md +36 -0
  89. ara_cli/templates/prompt-modules/commands/feature_generate_initial_specified_bevahior.commands.md +53 -0
  90. ara_cli/templates/prompt-modules/commands/prompt_template_tech_stack_transformer.commands.md +95 -0
  91. ara_cli/templates/prompt-modules/commands/python_bug_fixing_code.commands.md +34 -0
  92. ara_cli/templates/prompt-modules/commands/python_generate_code.commands.md +27 -0
  93. ara_cli/templates/prompt-modules/commands/python_refactoring_code.commands.md +39 -0
  94. ara_cli/templates/prompt-modules/commands/python_step_definitions_generation_and_fixing.commands.md +40 -0
  95. ara_cli/templates/prompt-modules/commands/python_unittest_generation_and_fixing.commands.md +48 -0
  96. ara_cli/update_config_prompt.py +9 -3
  97. ara_cli/version.py +1 -1
  98. ara_cli-0.1.10.8.dist-info/METADATA +241 -0
  99. ara_cli-0.1.10.8.dist-info/RECORD +193 -0
  100. tests/test_ara_command_action.py +73 -59
  101. tests/test_ara_config.py +341 -36
  102. tests/test_artefact_autofix.py +1060 -0
  103. tests/test_artefact_link_updater.py +3 -3
  104. tests/test_artefact_lister.py +52 -132
  105. tests/test_artefact_renamer.py +2 -2
  106. tests/test_artefact_scan.py +327 -33
  107. tests/test_chat.py +2063 -498
  108. tests/test_file_classifier.py +24 -1
  109. tests/test_file_creator.py +3 -5
  110. tests/test_file_lister.py +1 -1
  111. tests/test_global_file_lister.py +131 -0
  112. tests/test_list_filter.py +2 -2
  113. tests/test_prompt_handler.py +746 -0
  114. tests/test_tag_extractor.py +19 -13
  115. tests/test_template_loader.py +192 -0
  116. tests/test_template_manager.py +5 -4
  117. tests/test_update_config_prompt.py +2 -2
  118. ara_cli/ara_command_parser.py +0 -327
  119. ara_cli/templates/prompt-modules/blueprints/complete_pytest_unittest.blueprint.md +0 -27
  120. ara_cli/templates/prompt-modules/blueprints/task_todo_list_implement_feature_BDD_way.blueprint.md +0 -30
  121. ara_cli/templates/prompt-modules/commands/artefact_classification.commands.md +0 -9
  122. ara_cli/templates/prompt-modules/commands/artefact_extension.commands.md +0 -17
  123. ara_cli/templates/prompt-modules/commands/artefact_formulation.commands.md +0 -14
  124. ara_cli/templates/prompt-modules/commands/behave_step_generation.commands.md +0 -102
  125. ara_cli/templates/prompt-modules/commands/code_generation_complex.commands.md +0 -20
  126. ara_cli/templates/prompt-modules/commands/code_generation_simple.commands.md +0 -13
  127. ara_cli/templates/prompt-modules/commands/error_fixing.commands.md +0 -20
  128. ara_cli/templates/prompt-modules/commands/feature_file_update.commands.md +0 -18
  129. ara_cli/templates/prompt-modules/commands/feature_formulation.commands.md +0 -43
  130. ara_cli/templates/prompt-modules/commands/js_code_generation_simple.commands.md +0 -13
  131. ara_cli/templates/prompt-modules/commands/refactoring.commands.md +0 -15
  132. ara_cli/templates/prompt-modules/commands/refactoring_analysis.commands.md +0 -9
  133. ara_cli/templates/prompt-modules/commands/reverse_engineer_feature_file.commands.md +0 -15
  134. ara_cli/templates/prompt-modules/commands/reverse_engineer_program_flow.commands.md +0 -19
  135. ara_cli/templates/template.businessgoal +0 -10
  136. ara_cli/templates/template.capability +0 -10
  137. ara_cli/templates/template.epic +0 -15
  138. ara_cli/templates/template.example +0 -6
  139. ara_cli/templates/template.feature +0 -26
  140. ara_cli/templates/template.issue +0 -14
  141. ara_cli/templates/template.keyfeature +0 -15
  142. ara_cli/templates/template.task +0 -6
  143. ara_cli/templates/template.userstory +0 -17
  144. ara_cli/templates/template.vision +0 -14
  145. ara_cli-0.1.9.69.dist-info/METADATA +0 -16
  146. ara_cli-0.1.9.69.dist-info/RECORD +0 -158
  147. tests/test_ara_autofix.py +0 -219
  148. {ara_cli-0.1.9.69.dist-info → ara_cli-0.1.10.8.dist-info}/WHEEL +0 -0
  149. {ara_cli-0.1.9.69.dist-info → ara_cli-0.1.10.8.dist-info}/entry_points.txt +0 -0
  150. {ara_cli-0.1.9.69.dist-info → ara_cli-0.1.10.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,245 @@
1
+ import os
2
+ from abc import ABC, abstractmethod
3
+ from typing import Tuple, Optional
4
+
5
+
6
+ class DocumentReader(ABC):
7
+ """Abstract base class for document readers"""
8
+
9
+ def __init__(self, file_path: str):
10
+ self.file_path = file_path
11
+ self.base_dir = os.path.dirname(file_path)
12
+
13
+ @abstractmethod
14
+ def read(self, extract_images: bool = False) -> str:
15
+ """Read document and optionally extract images"""
16
+ pass
17
+
18
+ def create_image_data_dir(self, extension_suffix: str) -> str:
19
+ """
20
+ Create data directory for images with file extension suffix to avoid conflicts.
21
+
22
+ Returns:
23
+ str: Path to images directory
24
+ """
25
+ file_name_with_ext = os.path.splitext(os.path.basename(self.file_path))[
26
+ 0] + f"_{extension_suffix}"
27
+ data_dir = os.path.join(self.base_dir, f"{file_name_with_ext}.data")
28
+ images_dir = os.path.join(data_dir, "images")
29
+ if not os.path.exists(images_dir):
30
+ os.makedirs(images_dir)
31
+ return images_dir
32
+
33
+ def save_and_describe_image(
34
+ self,
35
+ image_data: bytes,
36
+ image_format: str,
37
+ save_dir: str,
38
+ image_counter: int
39
+ ) -> Tuple[str, str]:
40
+ """
41
+ Save image data and get its description from LLM.
42
+
43
+ Returns:
44
+ tuple: (relative_image_path, description)
45
+ """
46
+ from ara_cli.prompt_handler import describe_image
47
+
48
+ # Save image
49
+ image_filename = f"{image_counter}.{image_format}"
50
+ image_path = os.path.join(save_dir, image_filename)
51
+
52
+ with open(image_path, "wb") as image_file:
53
+ image_file.write(image_data)
54
+
55
+ # Get image description from LLM
56
+ description = describe_image(image_path)
57
+
58
+ # Get relative path
59
+ relative_image_path = os.path.relpath(image_path, self.base_dir)
60
+
61
+ return relative_image_path, description
62
+
63
+
64
+ class DocxReader(DocumentReader):
65
+ """Reader for DOCX files"""
66
+
67
+ def read(self, extract_images: bool = False) -> str:
68
+ import docx
69
+
70
+ doc = docx.Document(self.file_path)
71
+ text_content = '\n'.join(para.text for para in doc.paragraphs)
72
+
73
+ if not extract_images:
74
+ return text_content
75
+
76
+ from PIL import Image
77
+ import io
78
+
79
+ # Create data directory for images
80
+ images_dir = self.create_image_data_dir("docx")
81
+
82
+ # Extract and process images
83
+ image_descriptions = []
84
+ image_counter = 1
85
+
86
+ for rel in doc.part.rels.values():
87
+ if "image" in rel.reltype:
88
+ image_data = rel.target_part.blob
89
+
90
+ # Determine image format
91
+ image = Image.open(io.BytesIO(image_data))
92
+ image_format = image.format.lower()
93
+
94
+ # Save and describe image
95
+ relative_path, description = self.save_and_describe_image(
96
+ image_data, image_format, images_dir, image_counter
97
+ )
98
+
99
+ # Add formatted description to list
100
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
101
+ image_descriptions.append(image_description)
102
+
103
+ image_counter += 1
104
+
105
+ # Combine text content with image descriptions
106
+ if image_descriptions:
107
+ text_content += "\n\n### Extracted Images\n" + \
108
+ "\n".join(image_descriptions)
109
+
110
+ return text_content
111
+
112
+
113
+ class PdfReader(DocumentReader):
114
+ """Reader for PDF files"""
115
+
116
+ def read(self, extract_images: bool = False) -> str:
117
+ import pymupdf4llm
118
+
119
+ if not extract_images:
120
+ return pymupdf4llm.to_markdown(self.file_path, write_images=False)
121
+
122
+ import fitz # PyMuPDF
123
+
124
+ # Create images directory
125
+ images_dir = self.create_image_data_dir("pdf")
126
+
127
+ # Extract text without images first
128
+ text_content = pymupdf4llm.to_markdown(
129
+ self.file_path, write_images=False)
130
+
131
+ # Extract and process images
132
+ doc = fitz.open(self.file_path)
133
+ image_descriptions = []
134
+ image_counter = 1
135
+
136
+ for page_num, page in enumerate(doc):
137
+ image_list = page.get_images()
138
+
139
+ for img_index, img in enumerate(image_list):
140
+ # Extract image
141
+ xref = img[0]
142
+ base_image = doc.extract_image(xref)
143
+ image_bytes = base_image["image"]
144
+ image_ext = base_image["ext"]
145
+
146
+ # Save and describe image
147
+ relative_path, description = self.save_and_describe_image(
148
+ image_bytes, image_ext, images_dir, image_counter
149
+ )
150
+
151
+ # Add formatted description to list
152
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
153
+ image_descriptions.append(image_description)
154
+
155
+ image_counter += 1
156
+
157
+ doc.close()
158
+
159
+ # Combine text content with image descriptions
160
+ if image_descriptions:
161
+ text_content += "\n\n### Extracted Images\n" + \
162
+ "\n".join(image_descriptions)
163
+
164
+ return text_content
165
+
166
+
167
+ class OdtReader(DocumentReader):
168
+ """Reader for ODT files"""
169
+
170
+ def read(self, extract_images: bool = False) -> str:
171
+ import pymupdf4llm
172
+
173
+ if not extract_images:
174
+ return pymupdf4llm.to_markdown(self.file_path, write_images=False)
175
+
176
+ import zipfile
177
+ from PIL import Image
178
+ import io
179
+
180
+ # Create data directory for images
181
+ images_dir = self.create_image_data_dir("odt")
182
+
183
+ # Get text content
184
+ text_content = pymupdf4llm.to_markdown(
185
+ self.file_path, write_images=False)
186
+
187
+ # Extract and process images from ODT
188
+ image_descriptions = []
189
+ image_counter = 1
190
+
191
+ try:
192
+ with zipfile.ZipFile(self.file_path, 'r') as odt_zip:
193
+ # List all files in the Pictures directory
194
+ picture_files = [
195
+ f for f in odt_zip.namelist() if f.startswith('Pictures/')]
196
+
197
+ for picture_file in picture_files:
198
+ # Extract image data
199
+ image_data = odt_zip.read(picture_file)
200
+
201
+ # Determine image format
202
+ image = Image.open(io.BytesIO(image_data))
203
+ image_format = image.format.lower()
204
+
205
+ # Save and describe image
206
+ relative_path, description = self.save_and_describe_image(
207
+ image_data, image_format, images_dir, image_counter
208
+ )
209
+
210
+ # Add formatted description to list
211
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
212
+ image_descriptions.append(image_description)
213
+
214
+ image_counter += 1
215
+ except Exception as e:
216
+ print(f"Warning: Could not extract images from ODT: {e}")
217
+
218
+ # Combine text content with image descriptions
219
+ if image_descriptions:
220
+ text_content += "\n\n### Extracted Images\n" + \
221
+ "\n".join(image_descriptions)
222
+
223
+ return text_content
224
+
225
+
226
+ class DocumentReaderFactory:
227
+ """Factory for creating appropriate document readers"""
228
+
229
+ @staticmethod
230
+ def create_reader(file_path: str) -> Optional[DocumentReader]:
231
+ """Create appropriate reader based on file extension"""
232
+ _, ext = os.path.splitext(file_path)
233
+ ext = ext.lower()
234
+
235
+ readers = {
236
+ '.docx': DocxReader,
237
+ '.pdf': PdfReader,
238
+ '.odt': OdtReader
239
+ }
240
+
241
+ reader_class = readers.get(ext)
242
+ if reader_class:
243
+ return reader_class(file_path)
244
+
245
+ return None
@@ -0,0 +1,233 @@
1
+ import os
2
+ from abc import ABC, abstractmethod
3
+ from typing import Tuple, Optional
4
+
5
+
6
+ class DocumentReader(ABC):
7
+ """Abstract base class for document readers"""
8
+
9
+ def __init__(self, file_path: str):
10
+ self.file_path = file_path
11
+ self.base_dir = os.path.dirname(file_path)
12
+
13
+ @abstractmethod
14
+ def read(self, extract_images: bool = False) -> str:
15
+ """Read document and optionally extract images"""
16
+ pass
17
+
18
+ def create_image_data_dir(self, extension_suffix: str) -> str:
19
+ """
20
+ Create data directory for images with file extension suffix to avoid conflicts.
21
+
22
+ Returns:
23
+ str: Path to images directory
24
+ """
25
+ file_name_with_ext = os.path.splitext(os.path.basename(self.file_path))[0] + f"_{extension_suffix}"
26
+ data_dir = os.path.join(self.base_dir, f"{file_name_with_ext}.data")
27
+ images_dir = os.path.join(data_dir, "images")
28
+ if not os.path.exists(images_dir):
29
+ os.makedirs(images_dir)
30
+ return images_dir
31
+
32
+ def save_and_describe_image(self, image_data: bytes, image_format: str,
33
+ save_dir: str, image_counter: int) -> Tuple[str, str]:
34
+ """
35
+ Save image data and get its description from LLM.
36
+
37
+ Returns:
38
+ tuple: (relative_image_path, description)
39
+ """
40
+ from ara_cli.prompt_handler import describe_image
41
+
42
+ # Save image
43
+ image_filename = f"{image_counter}.{image_format}"
44
+ image_path = os.path.join(save_dir, image_filename)
45
+
46
+ with open(image_path, "wb") as image_file:
47
+ image_file.write(image_data)
48
+
49
+ # Get image description from LLM
50
+ description = describe_image(image_path)
51
+
52
+ # Get relative path
53
+ relative_image_path = os.path.relpath(image_path, self.base_dir)
54
+
55
+ return relative_image_path, description
56
+
57
+
58
+ class DocxReader(DocumentReader):
59
+ """Reader for DOCX files"""
60
+
61
+ def read(self, extract_images: bool = False) -> str:
62
+ import docx
63
+
64
+ doc = docx.Document(self.file_path)
65
+ text_content = '\n'.join(para.text for para in doc.paragraphs)
66
+
67
+ if not extract_images:
68
+ return text_content
69
+
70
+ from PIL import Image
71
+ import io
72
+
73
+ # Create data directory for images
74
+ images_dir = self.create_image_data_dir("docx")
75
+
76
+ # Extract and process images
77
+ image_descriptions = []
78
+ image_counter = 1
79
+
80
+ for rel in doc.part.rels.values():
81
+ if "image" in rel.reltype:
82
+ image_data = rel.target_part.blob
83
+
84
+ # Determine image format
85
+ image = Image.open(io.BytesIO(image_data))
86
+ image_format = image.format.lower()
87
+
88
+ # Save and describe image
89
+ relative_path, description = self.save_and_describe_image(
90
+ image_data, image_format, images_dir, image_counter
91
+ )
92
+
93
+ # Add formatted description to list
94
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
95
+ image_descriptions.append(image_description)
96
+
97
+ image_counter += 1
98
+
99
+ # Combine text content with image descriptions
100
+ if image_descriptions:
101
+ text_content += "\n\n### Extracted Images\n" + "\n".join(image_descriptions)
102
+
103
+ return text_content
104
+
105
+
106
+ class PdfReader(DocumentReader):
107
+ """Reader for PDF files"""
108
+
109
+ def read(self, extract_images: bool = False) -> str:
110
+ import pymupdf4llm
111
+
112
+ if not extract_images:
113
+ return pymupdf4llm.to_markdown(self.file_path, write_images=False)
114
+
115
+ import fitz # PyMuPDF
116
+
117
+ # Create images directory
118
+ images_dir = self.create_image_data_dir("pdf")
119
+
120
+ # Extract text without images first
121
+ text_content = pymupdf4llm.to_markdown(self.file_path, write_images=False)
122
+
123
+ # Extract and process images
124
+ doc = fitz.open(self.file_path)
125
+ image_descriptions = []
126
+ image_counter = 1
127
+
128
+ for page_num, page in enumerate(doc):
129
+ image_list = page.get_images()
130
+
131
+ for img_index, img in enumerate(image_list):
132
+ # Extract image
133
+ xref = img[0]
134
+ base_image = doc.extract_image(xref)
135
+ image_bytes = base_image["image"]
136
+ image_ext = base_image["ext"]
137
+
138
+ # Save and describe image
139
+ relative_path, description = self.save_and_describe_image(
140
+ image_bytes, image_ext, images_dir, image_counter
141
+ )
142
+
143
+ # Add formatted description to list
144
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
145
+ image_descriptions.append(image_description)
146
+
147
+ image_counter += 1
148
+
149
+ doc.close()
150
+
151
+ # Combine text content with image descriptions
152
+ if image_descriptions:
153
+ text_content += "\n\n### Extracted Images\n" + "\n".join(image_descriptions)
154
+
155
+ return text_content
156
+
157
+
158
+ class OdtReader(DocumentReader):
159
+ """Reader for ODT files"""
160
+
161
+ def read(self, extract_images: bool = False) -> str:
162
+ import pymupdf4llm
163
+
164
+ if not extract_images:
165
+ return pymupdf4llm.to_markdown(self.file_path, write_images=False)
166
+
167
+ import zipfile
168
+ from PIL import Image
169
+ import io
170
+
171
+ # Create data directory for images
172
+ images_dir = self.create_image_data_dir("odt")
173
+
174
+ # Get text content
175
+ text_content = pymupdf4llm.to_markdown(self.file_path, write_images=False)
176
+
177
+ # Extract and process images from ODT
178
+ image_descriptions = []
179
+ image_counter = 1
180
+
181
+ try:
182
+ with zipfile.ZipFile(self.file_path, 'r') as odt_zip:
183
+ # List all files in the Pictures directory
184
+ picture_files = [f for f in odt_zip.namelist() if f.startswith('Pictures/')]
185
+
186
+ for picture_file in picture_files:
187
+ # Extract image data
188
+ image_data = odt_zip.read(picture_file)
189
+
190
+ # Determine image format
191
+ image = Image.open(io.BytesIO(image_data))
192
+ image_format = image.format.lower()
193
+
194
+ # Save and describe image
195
+ relative_path, description = self.save_and_describe_image(
196
+ image_data, image_format, images_dir, image_counter
197
+ )
198
+
199
+ # Add formatted description to list
200
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
201
+ image_descriptions.append(image_description)
202
+
203
+ image_counter += 1
204
+ except Exception as e:
205
+ print(f"Warning: Could not extract images from ODT: {e}")
206
+
207
+ # Combine text content with image descriptions
208
+ if image_descriptions:
209
+ text_content += "\n\n### Extracted Images\n" + "\n".join(image_descriptions)
210
+
211
+ return text_content
212
+
213
+
214
+ class DocumentReaderFactory:
215
+ """Factory for creating appropriate document readers"""
216
+
217
+ @staticmethod
218
+ def create_reader(file_path: str) -> Optional[DocumentReader]:
219
+ """Create appropriate reader based on file extension"""
220
+ _, ext = os.path.splitext(file_path)
221
+ ext = ext.lower()
222
+
223
+ readers = {
224
+ '.docx': DocxReader,
225
+ '.pdf': PdfReader,
226
+ '.odt': OdtReader
227
+ }
228
+
229
+ reader_class = readers.get(ext)
230
+ if reader_class:
231
+ return reader_class(file_path)
232
+
233
+ return None
@@ -0,0 +1,50 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional
3
+
4
+
5
+ class FileLoader(ABC):
6
+ """Abstract base class for file loaders"""
7
+
8
+ def __init__(self, chat_instance):
9
+ self.chat = chat_instance
10
+
11
+ @abstractmethod
12
+ def load(self, file_path: str, **kwargs) -> bool:
13
+ """Load file with specific implementation"""
14
+ pass
15
+
16
+ def add_prompt_tag_if_needed(self):
17
+ """Add prompt tag to chat if needed"""
18
+ self.chat.add_prompt_tag_if_needed(self.chat.chat_name)
19
+
20
+
21
+ class FileLoaderFactory:
22
+ """Factory for creating appropriate file loaders"""
23
+ BINARY_TYPE_MAPPING = {
24
+ ".png": "image/png",
25
+ ".jpg": "image/jpeg",
26
+ ".jpeg": "image/jpeg",
27
+ }
28
+
29
+ DOCUMENT_TYPE_EXTENSIONS = [".docx", ".doc", ".odt", ".pdf"]
30
+
31
+ @staticmethod
32
+ def create_loader(file_name: str, chat_instance) -> Optional[FileLoader]:
33
+ """Create appropriate loader based on file type"""
34
+ from ara_cli.file_loaders.binary_file_loader import BinaryFileLoader
35
+ from ara_cli.file_loaders.text_file_loader import TextFileLoader
36
+ from ara_cli.file_loaders.document_file_loader import DocumentFileLoader
37
+
38
+ file_name_lower = file_name.lower()
39
+
40
+ # Check if it's a binary file
41
+ for extension, mime_type in FileLoaderFactory.BINARY_TYPE_MAPPING.items():
42
+ if file_name_lower.endswith(extension):
43
+ return BinaryFileLoader(chat_instance)
44
+
45
+ # Check if it's a document
46
+ if any(file_name_lower.endswith(ext) for ext in FileLoaderFactory.DOCUMENT_TYPE_EXTENSIONS):
47
+ return DocumentFileLoader(chat_instance)
48
+
49
+ # Default to text file loader
50
+ return TextFileLoader(chat_instance)
@@ -0,0 +1,123 @@
1
+ import os
2
+ import base64
3
+ from abc import ABC, abstractmethod
4
+ from typing import Optional
5
+ from ara_cli.file_loaders.markdown_reader import MarkdownReader
6
+ from ara_cli.file_loaders.document_readers import DocumentReaderFactory
7
+
8
+
9
+ class FileLoader(ABC):
10
+ """Abstract base class for file loaders"""
11
+
12
+ def __init__(self, chat_instance):
13
+ self.chat = chat_instance
14
+
15
+ @abstractmethod
16
+ def load(self, file_path: str, **kwargs) -> bool:
17
+ """Load file with specific implementation"""
18
+ pass
19
+
20
+ def add_prompt_tag_if_needed(self):
21
+ """Add prompt tag to chat if needed"""
22
+ self.chat.add_prompt_tag_if_needed(self.chat.chat_name)
23
+
24
+
25
+ class TextFileLoader(FileLoader):
26
+ """Loads text files"""
27
+
28
+ def load(self, file_path: str, prefix: str = "", suffix: str = "",
29
+ block_delimiter: str = "", extract_images: bool = False) -> bool:
30
+ """Load text file with optional markdown image extraction"""
31
+
32
+ is_md_file = file_path.lower().endswith('.md')
33
+
34
+ if is_md_file and extract_images:
35
+ reader = MarkdownReader(file_path)
36
+ file_content = reader.read(extract_images=True)
37
+ else:
38
+ with open(file_path, 'r', encoding='utf-8', errors="replace") as file:
39
+ file_content = file.read()
40
+
41
+ if block_delimiter:
42
+ file_content = f"{block_delimiter}\n{file_content}\n{block_delimiter}"
43
+
44
+ write_content = f"{prefix}{file_content}{suffix}\n"
45
+
46
+ with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
47
+ chat_file.write(write_content)
48
+
49
+ return True
50
+
51
+
52
+ class BinaryFileLoader(FileLoader):
53
+ """Loads binary files (images)"""
54
+
55
+ def load(self, file_path: str, mime_type: str, prefix: str = "", suffix: str = "") -> bool:
56
+ """Load binary file as base64"""
57
+
58
+ with open(file_path, 'rb') as file:
59
+ file_content = file.read()
60
+
61
+ base64_image = base64.b64encode(file_content).decode("utf-8")
62
+ write_content = f"{prefix}![{os.path.basename(file_path)}](data:{mime_type};base64,{base64_image}){suffix}\n"
63
+
64
+ with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
65
+ chat_file.write(write_content)
66
+
67
+ return True
68
+
69
+
70
+ class DocumentFileLoader(FileLoader):
71
+ """Loads document files (PDF, DOCX, ODT)"""
72
+
73
+ def load(self, file_path: str, prefix: str = "", suffix: str = "",
74
+ block_delimiter: str = "```", extract_images: bool = False) -> bool:
75
+ """Load document file with optional image extraction"""
76
+
77
+ reader = DocumentReaderFactory.create_reader(file_path)
78
+
79
+ if not reader:
80
+ print("Unsupported document type.")
81
+ return False
82
+
83
+ text_content = reader.read(extract_images=extract_images)
84
+
85
+ if block_delimiter:
86
+ text_content = f"{block_delimiter}\n{text_content}\n{block_delimiter}"
87
+
88
+ write_content = f"{prefix}{text_content}{suffix}\n"
89
+
90
+ with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
91
+ chat_file.write(write_content)
92
+
93
+ return True
94
+
95
+
96
+ class FileLoaderFactory:
97
+ """Factory for creating appropriate file loaders"""
98
+
99
+ BINARY_TYPE_MAPPING = {
100
+ ".png": "image/png",
101
+ ".jpg": "image/jpeg",
102
+ ".jpeg": "image/jpeg",
103
+ }
104
+
105
+ DOCUMENT_TYPE_EXTENSIONS = [".docx", ".doc", ".odt", ".pdf"]
106
+
107
+ @staticmethod
108
+ def create_loader(file_name: str, chat_instance) -> Optional[FileLoader]:
109
+ """Create appropriate loader based on file type"""
110
+
111
+ file_name_lower = file_name.lower()
112
+
113
+ # Check if it's a binary file
114
+ for extension, mime_type in FileLoaderFactory.BINARY_TYPE_MAPPING.items():
115
+ if file_name_lower.endswith(extension):
116
+ return BinaryFileLoader(chat_instance)
117
+
118
+ # Check if it's a document
119
+ if any(file_name_lower.endswith(ext) for ext in FileLoaderFactory.DOCUMENT_TYPE_EXTENSIONS):
120
+ return DocumentFileLoader(chat_instance)
121
+
122
+ # Default to text file loader
123
+ return TextFileLoader(chat_instance)