ara-cli 0.1.9.93__py3-none-any.whl → 0.1.9.95__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ara-cli might be problematic. Click here for more details.

Files changed (35) hide show
  1. ara_cli/__init__.py +15 -1
  2. ara_cli/ara_command_action.py +23 -43
  3. ara_cli/ara_command_parser.py +16 -1
  4. ara_cli/ara_config.py +17 -2
  5. ara_cli/artefact_autofix.py +40 -21
  6. ara_cli/artefact_creator.py +3 -1
  7. ara_cli/artefact_lister.py +29 -55
  8. ara_cli/artefact_models/artefact_data_retrieval.py +23 -0
  9. ara_cli/artefact_renamer.py +6 -2
  10. ara_cli/chat.py +80 -34
  11. ara_cli/commands/extract_command.py +4 -3
  12. ara_cli/commands/read_command.py +104 -0
  13. ara_cli/file_loaders/document_readers.py +233 -0
  14. ara_cli/file_loaders/file_loaders.py +123 -0
  15. ara_cli/file_loaders/image_processor.py +89 -0
  16. ara_cli/file_loaders/markdown_reader.py +75 -0
  17. ara_cli/file_loaders/text_file_loader.py +9 -11
  18. ara_cli/global_file_lister.py +61 -0
  19. ara_cli/prompt_extractor.py +21 -6
  20. ara_cli/prompt_handler.py +24 -4
  21. ara_cli/tag_extractor.py +21 -11
  22. ara_cli/template_manager.py +14 -4
  23. ara_cli/update_config_prompt.py +7 -1
  24. ara_cli/version.py +1 -1
  25. {ara_cli-0.1.9.93.dist-info → ara_cli-0.1.9.95.dist-info}/METADATA +18 -17
  26. {ara_cli-0.1.9.93.dist-info → ara_cli-0.1.9.95.dist-info}/RECORD +35 -27
  27. tests/test_ara_config.py +28 -0
  28. tests/test_artefact_lister.py +52 -132
  29. tests/test_chat.py +28 -40
  30. tests/test_global_file_lister.py +131 -0
  31. tests/test_prompt_handler.py +26 -1
  32. tests/test_template_manager.py +5 -4
  33. {ara_cli-0.1.9.93.dist-info → ara_cli-0.1.9.95.dist-info}/WHEEL +0 -0
  34. {ara_cli-0.1.9.93.dist-info → ara_cli-0.1.9.95.dist-info}/entry_points.txt +0 -0
  35. {ara_cli-0.1.9.93.dist-info → ara_cli-0.1.9.95.dist-info}/top_level.txt +0 -0
ara_cli/chat.py CHANGED
@@ -2,6 +2,7 @@ import os
2
2
  import argparse
3
3
  import cmd2
4
4
  from ara_cli.prompt_handler import send_prompt
5
+ from ara_cli.file_loaders.markdown_reader import MarkdownReader
5
6
 
6
7
  from ara_cli.file_loaders.document_file_loader import DocumentFileLoader
7
8
  from ara_cli.file_loaders.binary_file_loader import BinaryFileLoader
@@ -9,13 +10,29 @@ from ara_cli.file_loaders.text_file_loader import TextFileLoader
9
10
 
10
11
 
11
12
  extract_parser = argparse.ArgumentParser()
12
- extract_parser.add_argument('-s', '--skip-queries', action='store_true', help='Force extraction')
13
+ extract_parser.add_argument('-f', '--force', action='store_true', help='Force extraction')
14
+ extract_parser.add_argument('-w','--write', action='store_true', help='Overwrite existing files without using LLM for merging.')
13
15
 
14
16
  load_parser = argparse.ArgumentParser()
15
17
  load_parser.add_argument('file_name', nargs='?', default='', help='File to load')
16
18
  load_parser.add_argument('--load-images', action='store_true', help='Extract and describe images from documents')
17
19
 
18
20
 
21
+ from ara_cli.file_loaders.document_file_loader import DocumentFileLoader
22
+ from ara_cli.file_loaders.binary_file_loader import BinaryFileLoader
23
+ from ara_cli.file_loaders.text_file_loader import TextFileLoader
24
+
25
+
26
+ extract_parser = argparse.ArgumentParser()
27
+ extract_parser.add_argument('-f', '--force', action='store_true', help='Force extraction')
28
+ extract_parser.add_argument('-w','--write', action='store_true', help='Overwrite existing files without using LLM for merging.')
29
+
30
+ load_parser = argparse.ArgumentParser()
31
+ load_parser.add_argument('file_name', nargs='?', default='', help='File to load')
32
+ load_parser.add_argument('--load-images', action='store_true', help='Extract and describe images from documents')
33
+
34
+
35
+
19
36
  class Chat(cmd2.Cmd):
20
37
  CATEGORY_CHAT_CONTROL = "Chat control commands"
21
38
  CATEGORY_LLM_CONTROL = "Language model controls"
@@ -813,51 +830,80 @@ Start chatting (type 'HELP'/'h' for available commands, 'QUIT'/'q' to exit chat
813
830
 
814
831
  command = ExtractCommand(
815
832
  file_name=self.chat_name,
816
- skip_queries=args.skip_queries,
833
+ force=args.force,
834
+ write=args.write,
817
835
  output=self.poutput,
818
836
  error_output=self.perror
819
837
  )
820
838
  command.execute()
821
839
 
840
+ def _find_givens_files(self, file_name: str) -> list[str]:
841
+ """
842
+ Finds the givens files to be processed.
843
+ - If file_name is provided, it resolves that path.
844
+ - Otherwise, it looks for default givens files.
845
+ - If no defaults are found, it prompts the user.
846
+ Returns a list of absolute file paths or an empty list if none are found.
847
+ """
848
+ base_directory = os.path.dirname(self.chat_name)
849
+
850
+ def resolve_path(name):
851
+ """Inner helper to resolve a path relative to chat, then absolute."""
852
+ relative_path = os.path.join(base_directory, name)
853
+ if os.path.exists(relative_path):
854
+ return relative_path
855
+ if os.path.exists(name):
856
+ return name
857
+ return None
858
+
859
+ if file_name:
860
+ path = resolve_path(file_name)
861
+ if path:
862
+ return [path]
863
+ relative_path_for_error = os.path.join(base_directory, file_name)
864
+ self.perror(f"No givens file found at {relative_path_for_error} or {file_name}")
865
+ return []
866
+
867
+ # If no file_name, check for defaults
868
+ default_files_to_check = [
869
+ os.path.join(base_directory, "prompt.data", "config.prompt_givens.md"),
870
+ os.path.join(base_directory, "prompt.data", "config.prompt_global_givens.md")
871
+ ]
872
+ existing_defaults = [f for f in default_files_to_check if os.path.exists(f)]
873
+ if existing_defaults:
874
+ return existing_defaults
875
+
876
+ # No defaults found, prompt user
877
+ user_input = input("Please specify a givens file: ")
878
+ if not user_input:
879
+ self.poutput("Aborting.")
880
+ return []
881
+
882
+ path = resolve_path(user_input)
883
+ if path:
884
+ return [path]
885
+ self.perror(f"No givens file found at {user_input}. Aborting.")
886
+ return []
887
+
822
888
  @cmd2.with_category(CATEGORY_CHAT_CONTROL)
823
889
  def do_LOAD_GIVENS(self, file_name):
824
- """Load all files listed in a ./prompt.data/config.prompt_givens.md"""
825
- from ara_cli.directory_navigator import DirectoryNavigator
890
+ """Load all files listed in a ./prompt.data/config.prompt_givens.md and ./prompt.data/config.prompt_global_givens.md"""
826
891
  from ara_cli.prompt_handler import load_givens
827
892
 
828
- base_directory = os.path.dirname(self.chat_name)
829
-
830
- if file_name == "":
831
- file_name = f"{base_directory}/prompt.data/config.prompt_givens.md"
832
-
833
- # Check the relative path first
834
- relative_givens_path = os.path.join(base_directory, file_name)
835
- if os.path.exists(relative_givens_path):
836
- givens_path = relative_givens_path
837
- elif os.path.exists(file_name): # Check the absolute path
838
- givens_path = file_name
839
- else:
840
- print(f"No givens file found at {relative_givens_path} or {file_name}")
841
- user_input = input("Please specify a givens file: ")
842
- if os.path.exists(os.path.join(base_directory, user_input)):
843
- givens_path = os.path.join(base_directory, user_input)
844
- elif os.path.exists(user_input):
845
- givens_path = user_input
846
- else:
847
- print(f"No givens file found at {user_input}. Aborting.")
848
- return
893
+ givens_files_to_process = self._find_givens_files(file_name)
894
+ if not givens_files_to_process:
895
+ self.poutput("No givens files to load.")
896
+ return
849
897
 
850
- cwd = os.getcwd()
851
- navigator = DirectoryNavigator()
852
- navigator.navigate_to_target()
853
- os.chdir('..')
854
- content, image_data = load_givens(givens_path)
855
- os.chdir(cwd)
898
+ for givens_path in givens_files_to_process:
899
+ # The givens_path is absolute, and load_givens reconstructs absolute paths
900
+ # from the markdown file. No directory change is needed.
901
+ content, _ = load_givens(givens_path)
856
902
 
857
- with open(self.chat_name, 'a', encoding='utf-8') as chat_file:
858
- chat_file.write(content)
903
+ with open(self.chat_name, 'a', encoding='utf-8') as chat_file:
904
+ chat_file.write(content)
859
905
 
860
- print(f"Loaded files listed and marked in {givens_path}")
906
+ self.poutput(f"Loaded files listed and marked in {givens_path}")
861
907
 
862
908
  @cmd2.with_category(CATEGORY_CHAT_CONTROL)
863
909
  def do_SEND(self, _):
@@ -3,15 +3,16 @@ from ara_cli.prompt_extractor import extract_responses
3
3
  import os
4
4
 
5
5
  class ExtractCommand(Command):
6
- def __init__(self, file_name, skip_queries=False, output=None, error_output=None):
6
+ def __init__(self, file_name, force=False, write=False, output=None, error_output=None):
7
7
  self.file_name = file_name
8
- self.skip_queries = skip_queries
8
+ self.force = force
9
+ self.write = write
9
10
  self.output = output # Callable for standard output (optional)
10
11
  self.error_output = error_output # Callable for errors (optional)
11
12
 
12
13
  def execute(self, *args, **kwargs):
13
14
  try:
14
- extract_responses(self.file_name, True, skip_queries=self.skip_queries)
15
+ extract_responses(self.file_name, True, force=self.force, write=self.write)
15
16
  if self.output:
16
17
  self.output("End of extraction")
17
18
  except Exception as e:
@@ -0,0 +1,104 @@
1
+ from ara_cli.commands.command import Command
2
+ from ara_cli.artefact_reader import ArtefactReader
3
+ from ara_cli.file_classifier import FileClassifier
4
+ from ara_cli.list_filter import ListFilter, filter_list
5
+ from ara_cli.artefact_models.artefact_data_retrieval import (
6
+ artefact_content_retrieval,
7
+ artefact_path_retrieval,
8
+ artefact_tags_retrieval
9
+ )
10
+ from ara_cli.artefact_fuzzy_search import suggest_close_name_matches
11
+ import os
12
+
13
+
14
+ class ReadCommand(Command):
15
+ def __init__(
16
+ self,
17
+ classifier: str,
18
+ artefact_name: str,
19
+ read_mode: str = "default",
20
+ list_filter: ListFilter = None,
21
+ output=None
22
+ ):
23
+ self.classifier = classifier
24
+ self.artefact_name = artefact_name
25
+ self.read_mode = read_mode
26
+ self.list_filter = list_filter or ListFilter()
27
+ self.output = output or print
28
+
29
+ def execute(self) -> bool:
30
+ """Execute the read command and return success status."""
31
+ file_classifier = FileClassifier(os)
32
+ classified_artefacts = ArtefactReader.read_artefacts()
33
+ artefacts = classified_artefacts.get(self.classifier, [])
34
+ all_artefact_names = [a.title for a in artefacts]
35
+
36
+ if self.artefact_name not in all_artefact_names:
37
+ suggest_close_name_matches(
38
+ self.artefact_name,
39
+ all_artefact_names
40
+ )
41
+ return False
42
+
43
+ target_artefact = next(filter(
44
+ lambda x: x.title == self.artefact_name, artefacts
45
+ ))
46
+
47
+ artefacts_by_classifier = {self.classifier: []}
48
+
49
+ try:
50
+ match self.read_mode:
51
+ case "branch":
52
+ self._handle_branch_mode(
53
+ classified_artefacts, artefacts_by_classifier
54
+ )
55
+ case "children":
56
+ artefacts_by_classifier = self._handle_children_mode(
57
+ classified_artefacts
58
+ )
59
+ case _:
60
+ self._handle_default_mode(
61
+ target_artefact, artefacts_by_classifier
62
+ )
63
+
64
+ # Apply filtering and print results
65
+ filtered_artefacts = self._apply_filtering(artefacts_by_classifier)
66
+ file_classifier.print_classified_files(
67
+ filtered_artefacts, print_content=True
68
+ )
69
+ return True
70
+
71
+ except Exception as e:
72
+ self.output(f"Error reading artefact: {e}")
73
+ return False
74
+
75
+ def _handle_branch_mode(self, classified_artefacts, artefacts_by_classifier):
76
+ """Handle branch read mode."""
77
+ ArtefactReader.step_through_value_chain(
78
+ artefact_name=self.artefact_name,
79
+ classifier=self.classifier,
80
+ artefacts_by_classifier=artefacts_by_classifier,
81
+ classified_artefacts=classified_artefacts
82
+ )
83
+
84
+ def _handle_children_mode(self, classified_artefacts):
85
+ """Handle children read mode."""
86
+ return ArtefactReader.find_children(
87
+ artefact_name=self.artefact_name,
88
+ classifier=self.classifier,
89
+ classified_artefacts=classified_artefacts
90
+ )
91
+
92
+ def _handle_default_mode(self, target_artefact, artefacts_by_classifier):
93
+ """Handle default read mode."""
94
+ artefacts_by_classifier[self.classifier].append(target_artefact)
95
+
96
+ def _apply_filtering(self, artefacts_by_classifier):
97
+ """Apply list filtering to artefacts."""
98
+ return filter_list(
99
+ list_to_filter=artefacts_by_classifier,
100
+ list_filter=self.list_filter,
101
+ content_retrieval_strategy=artefact_content_retrieval,
102
+ file_path_retrieval=artefact_path_retrieval,
103
+ tag_retrieval=artefact_tags_retrieval
104
+ )
@@ -0,0 +1,233 @@
1
+ import os
2
+ from abc import ABC, abstractmethod
3
+ from typing import Tuple, Optional
4
+
5
+
6
+ class DocumentReader(ABC):
7
+ """Abstract base class for document readers"""
8
+
9
+ def __init__(self, file_path: str):
10
+ self.file_path = file_path
11
+ self.base_dir = os.path.dirname(file_path)
12
+
13
+ @abstractmethod
14
+ def read(self, extract_images: bool = False) -> str:
15
+ """Read document and optionally extract images"""
16
+ pass
17
+
18
+ def create_image_data_dir(self, extension_suffix: str) -> str:
19
+ """
20
+ Create data directory for images with file extension suffix to avoid conflicts.
21
+
22
+ Returns:
23
+ str: Path to images directory
24
+ """
25
+ file_name_with_ext = os.path.splitext(os.path.basename(self.file_path))[0] + f"_{extension_suffix}"
26
+ data_dir = os.path.join(self.base_dir, f"{file_name_with_ext}.data")
27
+ images_dir = os.path.join(data_dir, "images")
28
+ if not os.path.exists(images_dir):
29
+ os.makedirs(images_dir)
30
+ return images_dir
31
+
32
+ def save_and_describe_image(self, image_data: bytes, image_format: str,
33
+ save_dir: str, image_counter: int) -> Tuple[str, str]:
34
+ """
35
+ Save image data and get its description from LLM.
36
+
37
+ Returns:
38
+ tuple: (relative_image_path, description)
39
+ """
40
+ from ara_cli.prompt_handler import describe_image
41
+
42
+ # Save image
43
+ image_filename = f"{image_counter}.{image_format}"
44
+ image_path = os.path.join(save_dir, image_filename)
45
+
46
+ with open(image_path, "wb") as image_file:
47
+ image_file.write(image_data)
48
+
49
+ # Get image description from LLM
50
+ description = describe_image(image_path)
51
+
52
+ # Get relative path
53
+ relative_image_path = os.path.relpath(image_path, self.base_dir)
54
+
55
+ return relative_image_path, description
56
+
57
+
58
+ class DocxReader(DocumentReader):
59
+ """Reader for DOCX files"""
60
+
61
+ def read(self, extract_images: bool = False) -> str:
62
+ import docx
63
+
64
+ doc = docx.Document(self.file_path)
65
+ text_content = '\n'.join(para.text for para in doc.paragraphs)
66
+
67
+ if not extract_images:
68
+ return text_content
69
+
70
+ from PIL import Image
71
+ import io
72
+
73
+ # Create data directory for images
74
+ images_dir = self.create_image_data_dir("docx")
75
+
76
+ # Extract and process images
77
+ image_descriptions = []
78
+ image_counter = 1
79
+
80
+ for rel in doc.part.rels.values():
81
+ if "image" in rel.reltype:
82
+ image_data = rel.target_part.blob
83
+
84
+ # Determine image format
85
+ image = Image.open(io.BytesIO(image_data))
86
+ image_format = image.format.lower()
87
+
88
+ # Save and describe image
89
+ relative_path, description = self.save_and_describe_image(
90
+ image_data, image_format, images_dir, image_counter
91
+ )
92
+
93
+ # Add formatted description to list
94
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
95
+ image_descriptions.append(image_description)
96
+
97
+ image_counter += 1
98
+
99
+ # Combine text content with image descriptions
100
+ if image_descriptions:
101
+ text_content += "\n\n### Extracted Images\n" + "\n".join(image_descriptions)
102
+
103
+ return text_content
104
+
105
+
106
+ class PdfReader(DocumentReader):
107
+ """Reader for PDF files"""
108
+
109
+ def read(self, extract_images: bool = False) -> str:
110
+ import pymupdf4llm
111
+
112
+ if not extract_images:
113
+ return pymupdf4llm.to_markdown(self.file_path, write_images=False)
114
+
115
+ import fitz # PyMuPDF
116
+
117
+ # Create images directory
118
+ images_dir = self.create_image_data_dir("pdf")
119
+
120
+ # Extract text without images first
121
+ text_content = pymupdf4llm.to_markdown(self.file_path, write_images=False)
122
+
123
+ # Extract and process images
124
+ doc = fitz.open(self.file_path)
125
+ image_descriptions = []
126
+ image_counter = 1
127
+
128
+ for page_num, page in enumerate(doc):
129
+ image_list = page.get_images()
130
+
131
+ for img_index, img in enumerate(image_list):
132
+ # Extract image
133
+ xref = img[0]
134
+ base_image = doc.extract_image(xref)
135
+ image_bytes = base_image["image"]
136
+ image_ext = base_image["ext"]
137
+
138
+ # Save and describe image
139
+ relative_path, description = self.save_and_describe_image(
140
+ image_bytes, image_ext, images_dir, image_counter
141
+ )
142
+
143
+ # Add formatted description to list
144
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
145
+ image_descriptions.append(image_description)
146
+
147
+ image_counter += 1
148
+
149
+ doc.close()
150
+
151
+ # Combine text content with image descriptions
152
+ if image_descriptions:
153
+ text_content += "\n\n### Extracted Images\n" + "\n".join(image_descriptions)
154
+
155
+ return text_content
156
+
157
+
158
+ class OdtReader(DocumentReader):
159
+ """Reader for ODT files"""
160
+
161
+ def read(self, extract_images: bool = False) -> str:
162
+ import pymupdf4llm
163
+
164
+ if not extract_images:
165
+ return pymupdf4llm.to_markdown(self.file_path, write_images=False)
166
+
167
+ import zipfile
168
+ from PIL import Image
169
+ import io
170
+
171
+ # Create data directory for images
172
+ images_dir = self.create_image_data_dir("odt")
173
+
174
+ # Get text content
175
+ text_content = pymupdf4llm.to_markdown(self.file_path, write_images=False)
176
+
177
+ # Extract and process images from ODT
178
+ image_descriptions = []
179
+ image_counter = 1
180
+
181
+ try:
182
+ with zipfile.ZipFile(self.file_path, 'r') as odt_zip:
183
+ # List all files in the Pictures directory
184
+ picture_files = [f for f in odt_zip.namelist() if f.startswith('Pictures/')]
185
+
186
+ for picture_file in picture_files:
187
+ # Extract image data
188
+ image_data = odt_zip.read(picture_file)
189
+
190
+ # Determine image format
191
+ image = Image.open(io.BytesIO(image_data))
192
+ image_format = image.format.lower()
193
+
194
+ # Save and describe image
195
+ relative_path, description = self.save_and_describe_image(
196
+ image_data, image_format, images_dir, image_counter
197
+ )
198
+
199
+ # Add formatted description to list
200
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
201
+ image_descriptions.append(image_description)
202
+
203
+ image_counter += 1
204
+ except Exception as e:
205
+ print(f"Warning: Could not extract images from ODT: {e}")
206
+
207
+ # Combine text content with image descriptions
208
+ if image_descriptions:
209
+ text_content += "\n\n### Extracted Images\n" + "\n".join(image_descriptions)
210
+
211
+ return text_content
212
+
213
+
214
+ class DocumentReaderFactory:
215
+ """Factory for creating appropriate document readers"""
216
+
217
+ @staticmethod
218
+ def create_reader(file_path: str) -> Optional[DocumentReader]:
219
+ """Create appropriate reader based on file extension"""
220
+ _, ext = os.path.splitext(file_path)
221
+ ext = ext.lower()
222
+
223
+ readers = {
224
+ '.docx': DocxReader,
225
+ '.pdf': PdfReader,
226
+ '.odt': OdtReader
227
+ }
228
+
229
+ reader_class = readers.get(ext)
230
+ if reader_class:
231
+ return reader_class(file_path)
232
+
233
+ return None
@@ -0,0 +1,123 @@
1
+ import os
2
+ import base64
3
+ from abc import ABC, abstractmethod
4
+ from typing import Optional
5
+ from ara_cli.file_loaders.markdown_reader import MarkdownReader
6
+ from ara_cli.file_loaders.document_readers import DocumentReaderFactory
7
+
8
+
9
+ class FileLoader(ABC):
10
+ """Abstract base class for file loaders"""
11
+
12
+ def __init__(self, chat_instance):
13
+ self.chat = chat_instance
14
+
15
+ @abstractmethod
16
+ def load(self, file_path: str, **kwargs) -> bool:
17
+ """Load file with specific implementation"""
18
+ pass
19
+
20
+ def add_prompt_tag_if_needed(self):
21
+ """Add prompt tag to chat if needed"""
22
+ self.chat.add_prompt_tag_if_needed(self.chat.chat_name)
23
+
24
+
25
+ class TextFileLoader(FileLoader):
26
+ """Loads text files"""
27
+
28
+ def load(self, file_path: str, prefix: str = "", suffix: str = "",
29
+ block_delimiter: str = "", extract_images: bool = False) -> bool:
30
+ """Load text file with optional markdown image extraction"""
31
+
32
+ is_md_file = file_path.lower().endswith('.md')
33
+
34
+ if is_md_file and extract_images:
35
+ reader = MarkdownReader(file_path)
36
+ file_content = reader.read(extract_images=True)
37
+ else:
38
+ with open(file_path, 'r', encoding='utf-8', errors="replace") as file:
39
+ file_content = file.read()
40
+
41
+ if block_delimiter:
42
+ file_content = f"{block_delimiter}\n{file_content}\n{block_delimiter}"
43
+
44
+ write_content = f"{prefix}{file_content}{suffix}\n"
45
+
46
+ with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
47
+ chat_file.write(write_content)
48
+
49
+ return True
50
+
51
+
52
+ class BinaryFileLoader(FileLoader):
53
+ """Loads binary files (images)"""
54
+
55
+ def load(self, file_path: str, mime_type: str, prefix: str = "", suffix: str = "") -> bool:
56
+ """Load binary file as base64"""
57
+
58
+ with open(file_path, 'rb') as file:
59
+ file_content = file.read()
60
+
61
+ base64_image = base64.b64encode(file_content).decode("utf-8")
62
+ write_content = f"{prefix}![{os.path.basename(file_path)}](data:{mime_type};base64,{base64_image}){suffix}\n"
63
+
64
+ with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
65
+ chat_file.write(write_content)
66
+
67
+ return True
68
+
69
+
70
+ class DocumentFileLoader(FileLoader):
71
+ """Loads document files (PDF, DOCX, ODT)"""
72
+
73
+ def load(self, file_path: str, prefix: str = "", suffix: str = "",
74
+ block_delimiter: str = "```", extract_images: bool = False) -> bool:
75
+ """Load document file with optional image extraction"""
76
+
77
+ reader = DocumentReaderFactory.create_reader(file_path)
78
+
79
+ if not reader:
80
+ print("Unsupported document type.")
81
+ return False
82
+
83
+ text_content = reader.read(extract_images=extract_images)
84
+
85
+ if block_delimiter:
86
+ text_content = f"{block_delimiter}\n{text_content}\n{block_delimiter}"
87
+
88
+ write_content = f"{prefix}{text_content}{suffix}\n"
89
+
90
+ with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
91
+ chat_file.write(write_content)
92
+
93
+ return True
94
+
95
+
96
+ class FileLoaderFactory:
97
+ """Factory for creating appropriate file loaders"""
98
+
99
+ BINARY_TYPE_MAPPING = {
100
+ ".png": "image/png",
101
+ ".jpg": "image/jpeg",
102
+ ".jpeg": "image/jpeg",
103
+ }
104
+
105
+ DOCUMENT_TYPE_EXTENSIONS = [".docx", ".doc", ".odt", ".pdf"]
106
+
107
+ @staticmethod
108
+ def create_loader(file_name: str, chat_instance) -> Optional[FileLoader]:
109
+ """Create appropriate loader based on file type"""
110
+
111
+ file_name_lower = file_name.lower()
112
+
113
+ # Check if it's a binary file
114
+ for extension, mime_type in FileLoaderFactory.BINARY_TYPE_MAPPING.items():
115
+ if file_name_lower.endswith(extension):
116
+ return BinaryFileLoader(chat_instance)
117
+
118
+ # Check if it's a document
119
+ if any(file_name_lower.endswith(ext) for ext in FileLoaderFactory.DOCUMENT_TYPE_EXTENSIONS):
120
+ return DocumentFileLoader(chat_instance)
121
+
122
+ # Default to text file loader
123
+ return TextFileLoader(chat_instance)