ara-cli 0.1.13.3__py3-none-any.whl → 0.1.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. ara_cli/__init__.py +1 -1
  2. ara_cli/ara_command_action.py +162 -112
  3. ara_cli/ara_config.py +1 -1
  4. ara_cli/ara_subcommands/convert.py +66 -2
  5. ara_cli/ara_subcommands/prompt.py +266 -106
  6. ara_cli/artefact_autofix.py +2 -2
  7. ara_cli/artefact_converter.py +152 -53
  8. ara_cli/artefact_creator.py +41 -17
  9. ara_cli/artefact_lister.py +3 -3
  10. ara_cli/artefact_models/artefact_model.py +1 -1
  11. ara_cli/artefact_models/artefact_templates.py +0 -9
  12. ara_cli/artefact_models/feature_artefact_model.py +8 -8
  13. ara_cli/artefact_reader.py +62 -43
  14. ara_cli/artefact_scan.py +39 -17
  15. ara_cli/chat.py +23 -15
  16. ara_cli/children_contribution_updater.py +737 -0
  17. ara_cli/classifier.py +34 -0
  18. ara_cli/commands/load_command.py +4 -3
  19. ara_cli/commands/load_image_command.py +1 -1
  20. ara_cli/commands/read_command.py +23 -27
  21. ara_cli/completers.py +24 -0
  22. ara_cli/error_handler.py +26 -11
  23. ara_cli/file_loaders/document_reader.py +0 -178
  24. ara_cli/file_loaders/factories/__init__.py +0 -0
  25. ara_cli/file_loaders/factories/document_reader_factory.py +32 -0
  26. ara_cli/file_loaders/factories/file_loader_factory.py +27 -0
  27. ara_cli/file_loaders/file_loader.py +1 -30
  28. ara_cli/file_loaders/loaders/__init__.py +0 -0
  29. ara_cli/file_loaders/{document_file_loader.py → loaders/document_file_loader.py} +1 -1
  30. ara_cli/file_loaders/loaders/text_file_loader.py +47 -0
  31. ara_cli/file_loaders/readers/__init__.py +0 -0
  32. ara_cli/file_loaders/readers/docx_reader.py +49 -0
  33. ara_cli/file_loaders/readers/excel_reader.py +27 -0
  34. ara_cli/file_loaders/{markdown_reader.py → readers/markdown_reader.py} +1 -1
  35. ara_cli/file_loaders/readers/odt_reader.py +59 -0
  36. ara_cli/file_loaders/readers/pdf_reader.py +54 -0
  37. ara_cli/file_loaders/readers/pptx_reader.py +104 -0
  38. ara_cli/file_loaders/tools/__init__.py +0 -0
  39. ara_cli/output_suppressor.py +53 -0
  40. ara_cli/prompt_handler.py +123 -17
  41. ara_cli/tag_extractor.py +8 -7
  42. ara_cli/version.py +1 -1
  43. {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/METADATA +18 -12
  44. {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/RECORD +58 -45
  45. {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/WHEEL +1 -1
  46. tests/test_artefact_converter.py +1 -46
  47. tests/test_artefact_lister.py +11 -8
  48. tests/test_chat.py +4 -4
  49. tests/test_chat_givens_images.py +1 -1
  50. tests/test_children_contribution_updater.py +98 -0
  51. tests/test_document_loader_office.py +267 -0
  52. tests/test_prompt_handler.py +416 -214
  53. tests/test_setup_default_chat_prompt_mode.py +198 -0
  54. tests/test_tag_extractor.py +95 -49
  55. ara_cli/file_loaders/document_readers.py +0 -233
  56. ara_cli/file_loaders/file_loaders.py +0 -123
  57. ara_cli/file_loaders/text_file_loader.py +0 -187
  58. /ara_cli/file_loaders/{binary_file_loader.py → loaders/binary_file_loader.py} +0 -0
  59. /ara_cli/file_loaders/{image_processor.py → tools/image_processor.py} +0 -0
  60. {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/entry_points.txt +0 -0
  61. {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/top_level.txt +0 -0
ara_cli/classifier.py CHANGED
@@ -72,3 +72,37 @@ class Classifier:
72
72
  @lru_cache(maxsize=None)
73
73
  def artefact_titles():
74
74
  return Classifier.artefact_title.values()
75
+
76
+ # Leaf-node classifiers that cannot have children
77
+ _leaf_classifiers = {"task", "issue"}
78
+
79
+ # Contribution hierarchy: child -> valid parent classifiers
80
+ contribution_hierarchy = {
81
+ "businessgoal": ["vision"],
82
+ "capability": ["vision", "businessgoal"],
83
+ "keyfeature": ["vision", "businessgoal", "capability"],
84
+ "epic": ["vision", "businessgoal", "capability", "keyfeature"],
85
+ "userstory": ["vision", "businessgoal", "capability", "keyfeature", "epic"],
86
+ "example": ["vision", "businessgoal", "capability", "keyfeature", "epic", "userstory"],
87
+ "feature": ["vision", "businessgoal", "capability", "keyfeature", "epic", "userstory"],
88
+ "task": ["vision", "businessgoal", "capability", "keyfeature", "epic", "userstory", "feature"],
89
+ "issue": ["vision", "businessgoal", "capability", "keyfeature", "epic", "userstory", "feature"],
90
+ }
91
+
92
+ @staticmethod
93
+ @lru_cache(maxsize=None)
94
+ def can_have_children(classifier: str) -> bool:
95
+ """
96
+ Check if a classifier can have children artefacts.
97
+ Task and Issue are leaf-node classifiers and cannot have children.
98
+ """
99
+ return classifier not in Classifier._leaf_classifiers
100
+
101
+ @staticmethod
102
+ @lru_cache(maxsize=None)
103
+ def get_valid_parent_classifiers(child_classifier: str) -> list:
104
+ """
105
+ Get list of valid parent classifiers for a given child classifier.
106
+ Returns empty list if classifier has no valid parents (e.g., vision).
107
+ """
108
+ return Classifier.contribution_hierarchy.get(child_classifier, [])
@@ -1,6 +1,7 @@
1
1
  from ara_cli.commands.command import Command
2
- from ara_cli.file_loaders.file_loader import FileLoaderFactory
3
- from ara_cli.file_loaders.binary_file_loader import BinaryFileLoader
2
+ from ara_cli.file_loaders.factories.file_loader_factory import FileLoaderFactory
3
+ from ara_cli.file_loaders.loaders.binary_file_loader import BinaryFileLoader
4
+ from ara_cli import BINARY_TYPE_MAPPING
4
5
 
5
6
 
6
7
  class LoadCommand(Command):
@@ -29,7 +30,7 @@ class LoadCommand(Command):
29
30
  # Determine mime type for binary files
30
31
  file_name_lower = self.file_path.lower()
31
32
  mime_type = None
32
- for extension, mt in FileLoaderFactory.BINARY_TYPE_MAPPING.items():
33
+ for extension, mt in BINARY_TYPE_MAPPING.items():
33
34
  if file_name_lower.endswith(extension):
34
35
  mime_type = mt
35
36
  break
@@ -1,5 +1,5 @@
1
1
  from ara_cli.commands.command import Command
2
- from ara_cli.file_loaders.binary_file_loader import BinaryFileLoader
2
+ from ara_cli.file_loaders.loaders.binary_file_loader import BinaryFileLoader
3
3
 
4
4
 
5
5
  class LoadImageCommand(Command):
@@ -3,9 +3,9 @@ from ara_cli.artefact_reader import ArtefactReader
3
3
  from ara_cli.file_classifier import FileClassifier
4
4
  from ara_cli.list_filter import ListFilter, filter_list
5
5
  from ara_cli.artefact_models.artefact_data_retrieval import (
6
- artefact_content_retrieval,
7
- artefact_path_retrieval,
8
- artefact_tags_retrieval
6
+ artefact_content_retrieval,
7
+ artefact_path_retrieval,
8
+ artefact_tags_retrieval,
9
9
  )
10
10
  from ara_cli.artefact_fuzzy_search import suggest_close_name_matches
11
11
  import os
@@ -18,7 +18,7 @@ class ReadCommand(Command):
18
18
  artefact_name: str,
19
19
  read_mode: str = "default",
20
20
  list_filter: ListFilter = None,
21
- output=None
21
+ output=None,
22
22
  ):
23
23
  self.classifier = classifier
24
24
  self.artefact_name = artefact_name
@@ -29,7 +29,8 @@ class ReadCommand(Command):
29
29
  def execute(self) -> bool:
30
30
  """Execute the read command and return success status."""
31
31
  file_classifier = FileClassifier(os)
32
- classified_artefacts = ArtefactReader.read_artefacts()
32
+ reader = ArtefactReader()
33
+ classified_artefacts = reader.read_artefacts()
33
34
 
34
35
  if not self.classifier or not self.artefact_name:
35
36
  self._filter_and_print(classified_artefacts, file_classifier)
@@ -39,15 +40,12 @@ class ReadCommand(Command):
39
40
  all_artefact_names = [a.title for a in artefacts]
40
41
 
41
42
  if self.artefact_name not in all_artefact_names:
42
- suggest_close_name_matches(
43
- self.artefact_name,
44
- all_artefact_names
45
- )
43
+ suggest_close_name_matches(self.artefact_name, all_artefact_names)
46
44
  return False
47
45
 
48
- target_artefact = next(filter(
49
- lambda x: x.title == self.artefact_name, artefacts
50
- ))
46
+ target_artefact = next(
47
+ filter(lambda x: x.title == self.artefact_name, artefacts)
48
+ )
51
49
 
52
50
  artefacts_by_classifier = {self.classifier: []}
53
51
 
@@ -55,16 +53,14 @@ class ReadCommand(Command):
55
53
  match self.read_mode:
56
54
  case "branch":
57
55
  self._handle_branch_mode(
58
- classified_artefacts, artefacts_by_classifier
56
+ classified_artefacts, artefacts_by_classifier, reader
59
57
  )
60
58
  case "children":
61
59
  artefacts_by_classifier = self._handle_children_mode(
62
- classified_artefacts
60
+ classified_artefacts, reader
63
61
  )
64
62
  case _:
65
- self._handle_default_mode(
66
- target_artefact, artefacts_by_classifier
67
- )
63
+ self._handle_default_mode(target_artefact, artefacts_by_classifier)
68
64
 
69
65
  # Apply filtering and print results
70
66
  self._filter_and_print(artefacts_by_classifier, file_classifier)
@@ -78,21 +74,23 @@ class ReadCommand(Command):
78
74
  self.output(f"Error reading artefact: {e}")
79
75
  return False
80
76
 
81
- def _handle_branch_mode(self, classified_artefacts, artefacts_by_classifier):
77
+ def _handle_branch_mode(
78
+ self, classified_artefacts, artefacts_by_classifier, reader
79
+ ):
82
80
  """Handle branch read mode."""
83
- ArtefactReader.step_through_value_chain(
81
+ reader.step_through_value_chain(
84
82
  artefact_name=self.artefact_name,
85
83
  classifier=self.classifier,
86
84
  artefacts_by_classifier=artefacts_by_classifier,
87
- classified_artefacts=classified_artefacts
85
+ classified_artefacts=classified_artefacts,
88
86
  )
89
87
 
90
- def _handle_children_mode(self, classified_artefacts):
88
+ def _handle_children_mode(self, classified_artefacts, reader):
91
89
  """Handle children read mode."""
92
- return ArtefactReader.find_children(
90
+ return reader.find_children(
93
91
  artefact_name=self.artefact_name,
94
92
  classifier=self.classifier,
95
- classified_artefacts=classified_artefacts
93
+ classified_artefacts=classified_artefacts,
96
94
  )
97
95
 
98
96
  def _handle_default_mode(self, target_artefact, artefacts_by_classifier):
@@ -106,12 +104,10 @@ class ReadCommand(Command):
106
104
  list_filter=self.list_filter,
107
105
  content_retrieval_strategy=artefact_content_retrieval,
108
106
  file_path_retrieval=artefact_path_retrieval,
109
- tag_retrieval=artefact_tags_retrieval
107
+ tag_retrieval=artefact_tags_retrieval,
110
108
  )
111
109
 
112
110
  def _filter_and_print(self, artefacts_by_classifier, file_classifier):
113
111
  """Apply list filtering and print results"""
114
112
  filtered_artefacts = self._apply_filtering(artefacts_by_classifier)
115
- file_classifier.print_classified_files(
116
- filtered_artefacts, print_content=True
117
- )
113
+ file_classifier.print_classified_files(filtered_artefacts, print_content=True)
ara_cli/completers.py CHANGED
@@ -87,6 +87,21 @@ def complete_chat_files(incomplete: str) -> List[str]:
87
87
  return []
88
88
 
89
89
 
90
+ def complete_prompt_step(incomplete: str) -> List[str]:
91
+ """Complete prompt step/subcommand names."""
92
+ steps = [
93
+ "init",
94
+ "load",
95
+ "send",
96
+ "load-and-send",
97
+ "extract",
98
+ "update",
99
+ "chat",
100
+ "init-rag",
101
+ ]
102
+ return [s for s in steps if s.startswith(incomplete)]
103
+
104
+
90
105
  # Dynamic completers that need context
91
106
  class DynamicCompleters:
92
107
  @staticmethod
@@ -164,6 +179,15 @@ class DynamicCompleters:
164
179
 
165
180
  return completer
166
181
 
182
+ @staticmethod
183
+ def create_prompt_step_completer():
184
+ """Create a completer for prompt step/subcommand names."""
185
+
186
+ def completer(ctx: typer.Context, incomplete: str) -> List[str]:
187
+ return complete_prompt_step(incomplete)
188
+
189
+ return completer
190
+
167
191
  @staticmethod
168
192
  def create_convert_source_artefact_name_completer():
169
193
  """Create a completer for convert command source artefact names based on old_classifier context."""
ara_cli/error_handler.py CHANGED
@@ -5,8 +5,9 @@ from enum import Enum
5
5
  from functools import wraps
6
6
 
7
7
 
8
- RED = '\033[91m'
9
- RESET = '\033[0m'
8
+ RED = "\033[91m"
9
+ YELLOW = "\033[93m"
10
+ RESET = "\033[0m"
10
11
 
11
12
 
12
13
  class ErrorLevel(Enum):
@@ -69,7 +70,6 @@ class ErrorHandler:
69
70
 
70
71
  sys.exit(1)
71
72
 
72
-
73
73
  def report_error(self, error: Exception, context: Optional[str] = None) -> None:
74
74
  """Report error with standardized formatting but don't exit"""
75
75
  if isinstance(error, AraError):
@@ -77,33 +77,44 @@ class ErrorHandler:
77
77
  else:
78
78
  self._report_generic_error(error, context)
79
79
 
80
-
81
80
  def _report_ara_error(self, error: AraError, context: Optional[str] = None) -> None:
82
81
  """Report ARA-specific errors without exiting"""
83
82
  error_prefix = f"[{error.level.value}]"
84
83
 
84
+ # Choose color based on error level
85
+ if error.level in (ErrorLevel.INFO, ErrorLevel.WARNING):
86
+ color = YELLOW
87
+ else:
88
+ color = RED
89
+
85
90
  if context:
86
- print(f"{RED}{error_prefix} {context}: {error.message}{RESET}", file=sys.stderr)
91
+ print(
92
+ f"{color}{error_prefix} {context}: {error.message}{RESET}",
93
+ file=sys.stderr,
94
+ )
87
95
  else:
88
- print(f"{RED}{error_prefix} {error.message}{RESET}", file=sys.stderr)
96
+ print(f"{color}{error_prefix} {error.message}{RESET}", file=sys.stderr)
89
97
 
90
98
  if self.debug_mode:
91
99
  traceback.print_exc()
92
100
 
93
-
94
- def _report_generic_error(self, error: Exception, context: Optional[str] = None) -> None:
101
+ def _report_generic_error(
102
+ self, error: Exception, context: Optional[str] = None
103
+ ) -> None:
95
104
  """Report generic Python errors without exiting"""
96
105
  error_type = type(error).__name__
97
106
 
98
107
  if context:
99
- print(f"{RED}[ERROR] {context}: {error_type}: {str(error)}{RESET}", file=sys.stderr)
108
+ print(
109
+ f"{RED}[ERROR] {context}: {error_type}: {str(error)}{RESET}",
110
+ file=sys.stderr,
111
+ )
100
112
  else:
101
113
  print(f"{RED}[ERROR] {error_type}: {str(error)}{RESET}", file=sys.stderr)
102
114
 
103
115
  if self.debug_mode:
104
116
  traceback.print_exc()
105
117
 
106
-
107
118
  def validate_and_exit(
108
119
  self, condition: bool, message: str, error_code: int = 1
109
120
  ) -> None:
@@ -112,7 +123,11 @@ class ErrorHandler:
112
123
  raise AraValidationError(message)
113
124
 
114
125
 
115
- def handle_errors(_func=None, context: Optional[str] = None, error_handler: Optional[ErrorHandler] = None):
126
+ def handle_errors(
127
+ _func=None,
128
+ context: Optional[str] = None,
129
+ error_handler: Optional[ErrorHandler] = None,
130
+ ):
116
131
  """Decorator to handle errors in action functions"""
117
132
 
118
133
  def decorator(func):
@@ -61,185 +61,7 @@ class DocumentReader(ABC):
61
61
  return relative_image_path, description
62
62
 
63
63
 
64
- class DocxReader(DocumentReader):
65
- """Reader for DOCX files"""
66
64
 
67
- def read(self, extract_images: bool = False) -> str:
68
- import docx
69
-
70
- doc = docx.Document(self.file_path)
71
- text_content = '\n'.join(para.text for para in doc.paragraphs)
72
-
73
- if not extract_images:
74
- return text_content
75
-
76
- from PIL import Image
77
- import io
78
-
79
- # Create data directory for images
80
- images_dir = self.create_image_data_dir("docx")
81
-
82
- # Extract and process images
83
- image_descriptions = []
84
- image_counter = 1
85
-
86
- for rel in doc.part.rels.values():
87
- if "image" in rel.reltype:
88
- image_data = rel.target_part.blob
89
-
90
- # Determine image format
91
- image = Image.open(io.BytesIO(image_data))
92
- image_format = image.format.lower()
93
-
94
- # Save and describe image
95
- relative_path, description = self.save_and_describe_image(
96
- image_data, image_format, images_dir, image_counter
97
- )
98
-
99
- # Add formatted description to list
100
- image_description = f"\nImage: {relative_path}\n[{description}]\n"
101
- image_descriptions.append(image_description)
102
-
103
- image_counter += 1
104
-
105
- # Combine text content with image descriptions
106
- if image_descriptions:
107
- text_content += "\n\n### Extracted Images\n" + \
108
- "\n".join(image_descriptions)
109
-
110
- return text_content
111
-
112
-
113
- class PdfReader(DocumentReader):
114
- """Reader for PDF files"""
115
-
116
- def read(self, extract_images: bool = False) -> str:
117
- import pymupdf4llm
118
-
119
- if not extract_images:
120
- return pymupdf4llm.to_markdown(self.file_path, write_images=False)
121
-
122
- import fitz # PyMuPDF
123
-
124
- # Create images directory
125
- images_dir = self.create_image_data_dir("pdf")
126
-
127
- # Extract text without images first
128
- text_content = pymupdf4llm.to_markdown(
129
- self.file_path, write_images=False)
130
-
131
- # Extract and process images
132
- doc = fitz.open(self.file_path)
133
- image_descriptions = []
134
- image_counter = 1
135
-
136
- for page_num, page in enumerate(doc):
137
- image_list = page.get_images()
138
-
139
- for img_index, img in enumerate(image_list):
140
- # Extract image
141
- xref = img[0]
142
- base_image = doc.extract_image(xref)
143
- image_bytes = base_image["image"]
144
- image_ext = base_image["ext"]
145
-
146
- # Save and describe image
147
- relative_path, description = self.save_and_describe_image(
148
- image_bytes, image_ext, images_dir, image_counter
149
- )
150
-
151
- # Add formatted description to list
152
- image_description = f"\nImage: {relative_path}\n[{description}]\n"
153
- image_descriptions.append(image_description)
154
-
155
- image_counter += 1
156
-
157
- doc.close()
158
-
159
- # Combine text content with image descriptions
160
- if image_descriptions:
161
- text_content += "\n\n### Extracted Images\n" + \
162
- "\n".join(image_descriptions)
163
-
164
- return text_content
165
-
166
-
167
- class OdtReader(DocumentReader):
168
- """Reader for ODT files"""
169
-
170
- def read(self, extract_images: bool = False) -> str:
171
- import pymupdf4llm
172
-
173
- if not extract_images:
174
- return pymupdf4llm.to_markdown(self.file_path, write_images=False)
175
-
176
- import zipfile
177
- from PIL import Image
178
- import io
179
-
180
- # Create data directory for images
181
- images_dir = self.create_image_data_dir("odt")
182
-
183
- # Get text content
184
- text_content = pymupdf4llm.to_markdown(
185
- self.file_path, write_images=False)
186
-
187
- # Extract and process images from ODT
188
- image_descriptions = []
189
- image_counter = 1
190
-
191
- try:
192
- with zipfile.ZipFile(self.file_path, 'r') as odt_zip:
193
- # List all files in the Pictures directory
194
- picture_files = [
195
- f for f in odt_zip.namelist() if f.startswith('Pictures/')]
196
-
197
- for picture_file in picture_files:
198
- # Extract image data
199
- image_data = odt_zip.read(picture_file)
200
-
201
- # Determine image format
202
- image = Image.open(io.BytesIO(image_data))
203
- image_format = image.format.lower()
204
-
205
- # Save and describe image
206
- relative_path, description = self.save_and_describe_image(
207
- image_data, image_format, images_dir, image_counter
208
- )
209
-
210
- # Add formatted description to list
211
- image_description = f"\nImage: {relative_path}\n[{description}]\n"
212
- image_descriptions.append(image_description)
213
-
214
- image_counter += 1
215
- except Exception as e:
216
- print(f"Warning: Could not extract images from ODT: {e}")
217
-
218
- # Combine text content with image descriptions
219
- if image_descriptions:
220
- text_content += "\n\n### Extracted Images\n" + \
221
- "\n".join(image_descriptions)
222
-
223
- return text_content
224
-
225
-
226
- class DocumentReaderFactory:
227
- """Factory for creating appropriate document readers"""
228
-
229
- @staticmethod
230
- def create_reader(file_path: str) -> Optional[DocumentReader]:
231
- """Create appropriate reader based on file extension"""
232
- _, ext = os.path.splitext(file_path)
233
- ext = ext.lower()
234
65
 
235
- readers = {
236
- '.docx': DocxReader,
237
- '.pdf': PdfReader,
238
- '.odt': OdtReader
239
- }
240
66
 
241
- reader_class = readers.get(ext)
242
- if reader_class:
243
- return reader_class(file_path)
244
67
 
245
- return None
File without changes
@@ -0,0 +1,32 @@
1
+ import os
2
+ from typing import Optional
3
+ from ara_cli.file_loaders.document_reader import DocumentReader
4
+ from ara_cli.file_loaders.readers.docx_reader import DocxReader
5
+ from ara_cli.file_loaders.readers.pdf_reader import PdfReader
6
+ from ara_cli.file_loaders.readers.odt_reader import OdtReader
7
+ from ara_cli.file_loaders.readers.excel_reader import ExcelReader
8
+ from ara_cli.file_loaders.readers.pptx_reader import PptxReader
9
+
10
+ class DocumentReaderFactory:
11
+ """Factory for creating appropriate document readers"""
12
+
13
+ @staticmethod
14
+ def create_reader(file_path: str) -> Optional[DocumentReader]:
15
+ """Create appropriate reader based on file extension"""
16
+ _, ext = os.path.splitext(file_path)
17
+ ext = ext.lower()
18
+
19
+ readers = {
20
+ '.docx': DocxReader,
21
+ '.pdf': PdfReader,
22
+ '.odt': OdtReader,
23
+ '.xlsx': ExcelReader,
24
+ '.xls': ExcelReader,
25
+ '.pptx': PptxReader
26
+ }
27
+
28
+ reader_class = readers.get(ext)
29
+ if reader_class:
30
+ return reader_class(file_path)
31
+
32
+ return None
@@ -0,0 +1,27 @@
1
+ from typing import Optional
2
+ from ara_cli import BINARY_TYPE_MAPPING, DOCUMENT_TYPE_EXTENSIONS
3
+ from ara_cli.file_loaders.file_loader import FileLoader
4
+
5
+ class FileLoaderFactory:
6
+ """Factory for creating appropriate file loaders"""
7
+
8
+ @staticmethod
9
+ def create_loader(file_name: str, chat_instance) -> Optional[FileLoader]:
10
+ """Create appropriate loader based on file type"""
11
+ from ara_cli.file_loaders.loaders.binary_file_loader import BinaryFileLoader
12
+ from ara_cli.file_loaders.loaders.text_file_loader import TextFileLoader
13
+ from ara_cli.file_loaders.loaders.document_file_loader import DocumentFileLoader
14
+
15
+ file_name_lower = file_name.lower()
16
+
17
+ # Check if it's a binary file
18
+ for extension, mime_type in BINARY_TYPE_MAPPING.items():
19
+ if file_name_lower.endswith(extension):
20
+ return BinaryFileLoader(chat_instance)
21
+
22
+ # Check if it's a document
23
+ if any(file_name_lower.endswith(ext) for ext in DOCUMENT_TYPE_EXTENSIONS):
24
+ return DocumentFileLoader(chat_instance)
25
+
26
+ # Default to text file loader
27
+ return TextFileLoader(chat_instance)
@@ -18,33 +18,4 @@ class FileLoader(ABC):
18
18
  self.chat.add_prompt_tag_if_needed(self.chat.chat_name)
19
19
 
20
20
 
21
- class FileLoaderFactory:
22
- """Factory for creating appropriate file loaders"""
23
- BINARY_TYPE_MAPPING = {
24
- ".png": "image/png",
25
- ".jpg": "image/jpeg",
26
- ".jpeg": "image/jpeg",
27
- }
28
-
29
- DOCUMENT_TYPE_EXTENSIONS = [".docx", ".doc", ".odt", ".pdf"]
30
-
31
- @staticmethod
32
- def create_loader(file_name: str, chat_instance) -> Optional[FileLoader]:
33
- """Create appropriate loader based on file type"""
34
- from ara_cli.file_loaders.binary_file_loader import BinaryFileLoader
35
- from ara_cli.file_loaders.text_file_loader import TextFileLoader
36
- from ara_cli.file_loaders.document_file_loader import DocumentFileLoader
37
-
38
- file_name_lower = file_name.lower()
39
-
40
- # Check if it's a binary file
41
- for extension, mime_type in FileLoaderFactory.BINARY_TYPE_MAPPING.items():
42
- if file_name_lower.endswith(extension):
43
- return BinaryFileLoader(chat_instance)
44
-
45
- # Check if it's a document
46
- if any(file_name_lower.endswith(ext) for ext in FileLoaderFactory.DOCUMENT_TYPE_EXTENSIONS):
47
- return DocumentFileLoader(chat_instance)
48
-
49
- # Default to text file loader
50
- return TextFileLoader(chat_instance)
21
+
File without changes
@@ -1,4 +1,4 @@
1
- from ara_cli.file_loaders.document_reader import DocumentReaderFactory
1
+ from ara_cli.file_loaders.factories.document_reader_factory import DocumentReaderFactory
2
2
  from ara_cli.file_loaders.file_loader import FileLoader
3
3
 
4
4
 
@@ -0,0 +1,47 @@
1
+ import os
2
+ import re
3
+ import base64
4
+ import tempfile
5
+ from typing import Optional, Tuple
6
+ import requests
7
+ from charset_normalizer import from_path
8
+ from ara_cli.file_loaders.file_loader import FileLoader
9
+ from ara_cli.file_loaders.readers.markdown_reader import MarkdownReader
10
+
11
+
12
+ class TextFileLoader(FileLoader):
13
+ """Loads text files"""
14
+
15
+ def load(
16
+ self,
17
+ file_path: str,
18
+ prefix: str = "",
19
+ suffix: str = "",
20
+ block_delimiter: str = "",
21
+ extract_images: bool = False,
22
+ **kwargs,
23
+ ) -> bool:
24
+ """Load text file with optional markdown image extraction"""
25
+
26
+ is_md_file = file_path.lower().endswith(".md")
27
+
28
+ if is_md_file and extract_images:
29
+ reader = MarkdownReader(file_path)
30
+ file_content = reader.read(extract_images=True).replace("\r\n", "\n")
31
+ else:
32
+ # Use charset-normalizer to detect encoding
33
+ encoded_content = from_path(file_path).best()
34
+ if not encoded_content:
35
+ print(f"Failed to detect encoding for {file_path}")
36
+ return False
37
+ file_content = str(encoded_content).replace("\r\n", "\n")
38
+
39
+ if block_delimiter:
40
+ file_content = f"{block_delimiter}\n{file_content}\n{block_delimiter}"
41
+
42
+ write_content = f"{prefix}{file_content}{suffix}\n"
43
+
44
+ with open(self.chat.chat_name, "a", encoding="utf-8") as chat_file:
45
+ chat_file.write(write_content)
46
+
47
+ return True
File without changes