ara-cli 0.1.9.77__py3-none-any.whl → 0.1.10.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ara-cli might be problematic. Click here for more details.

Files changed (122) hide show
  1. ara_cli/__init__.py +18 -2
  2. ara_cli/__main__.py +245 -66
  3. ara_cli/ara_command_action.py +128 -63
  4. ara_cli/ara_config.py +201 -177
  5. ara_cli/ara_subcommands/__init__.py +0 -0
  6. ara_cli/ara_subcommands/autofix.py +26 -0
  7. ara_cli/ara_subcommands/chat.py +27 -0
  8. ara_cli/ara_subcommands/classifier_directory.py +16 -0
  9. ara_cli/ara_subcommands/common.py +100 -0
  10. ara_cli/ara_subcommands/create.py +75 -0
  11. ara_cli/ara_subcommands/delete.py +22 -0
  12. ara_cli/ara_subcommands/extract.py +22 -0
  13. ara_cli/ara_subcommands/fetch_templates.py +14 -0
  14. ara_cli/ara_subcommands/list.py +65 -0
  15. ara_cli/ara_subcommands/list_tags.py +25 -0
  16. ara_cli/ara_subcommands/load.py +48 -0
  17. ara_cli/ara_subcommands/prompt.py +136 -0
  18. ara_cli/ara_subcommands/read.py +47 -0
  19. ara_cli/ara_subcommands/read_status.py +20 -0
  20. ara_cli/ara_subcommands/read_user.py +20 -0
  21. ara_cli/ara_subcommands/reconnect.py +27 -0
  22. ara_cli/ara_subcommands/rename.py +22 -0
  23. ara_cli/ara_subcommands/scan.py +14 -0
  24. ara_cli/ara_subcommands/set_status.py +22 -0
  25. ara_cli/ara_subcommands/set_user.py +22 -0
  26. ara_cli/ara_subcommands/template.py +16 -0
  27. ara_cli/artefact_autofix.py +214 -28
  28. ara_cli/artefact_creator.py +5 -8
  29. ara_cli/artefact_deleter.py +2 -4
  30. ara_cli/artefact_fuzzy_search.py +13 -6
  31. ara_cli/artefact_lister.py +29 -55
  32. ara_cli/artefact_models/artefact_data_retrieval.py +23 -0
  33. ara_cli/artefact_models/artefact_model.py +106 -25
  34. ara_cli/artefact_models/artefact_templates.py +23 -13
  35. ara_cli/artefact_models/epic_artefact_model.py +11 -2
  36. ara_cli/artefact_models/feature_artefact_model.py +56 -1
  37. ara_cli/artefact_models/userstory_artefact_model.py +15 -3
  38. ara_cli/artefact_reader.py +4 -5
  39. ara_cli/artefact_renamer.py +6 -2
  40. ara_cli/artefact_scan.py +2 -2
  41. ara_cli/chat.py +594 -219
  42. ara_cli/chat_agent/__init__.py +0 -0
  43. ara_cli/chat_agent/agent_communicator.py +62 -0
  44. ara_cli/chat_agent/agent_process_manager.py +211 -0
  45. ara_cli/chat_agent/agent_status_manager.py +73 -0
  46. ara_cli/chat_agent/agent_workspace_manager.py +76 -0
  47. ara_cli/commands/__init__.py +0 -0
  48. ara_cli/commands/command.py +7 -0
  49. ara_cli/commands/extract_command.py +15 -0
  50. ara_cli/commands/load_command.py +65 -0
  51. ara_cli/commands/load_image_command.py +34 -0
  52. ara_cli/commands/read_command.py +117 -0
  53. ara_cli/completers.py +144 -0
  54. ara_cli/directory_navigator.py +37 -4
  55. ara_cli/error_handler.py +134 -0
  56. ara_cli/file_classifier.py +3 -2
  57. ara_cli/file_loaders/__init__.py +0 -0
  58. ara_cli/file_loaders/binary_file_loader.py +33 -0
  59. ara_cli/file_loaders/document_file_loader.py +34 -0
  60. ara_cli/file_loaders/document_reader.py +245 -0
  61. ara_cli/file_loaders/document_readers.py +233 -0
  62. ara_cli/file_loaders/file_loader.py +50 -0
  63. ara_cli/file_loaders/file_loaders.py +123 -0
  64. ara_cli/file_loaders/image_processor.py +89 -0
  65. ara_cli/file_loaders/markdown_reader.py +75 -0
  66. ara_cli/file_loaders/text_file_loader.py +187 -0
  67. ara_cli/global_file_lister.py +51 -0
  68. ara_cli/prompt_extractor.py +214 -87
  69. ara_cli/prompt_handler.py +508 -146
  70. ara_cli/tag_extractor.py +54 -24
  71. ara_cli/template_loader.py +245 -0
  72. ara_cli/template_manager.py +14 -4
  73. ara_cli/templates/prompt-modules/commands/empty.commands.md +2 -12
  74. ara_cli/templates/prompt-modules/commands/extract_general.commands.md +12 -0
  75. ara_cli/templates/prompt-modules/commands/extract_markdown.commands.md +11 -0
  76. ara_cli/templates/prompt-modules/commands/extract_python.commands.md +13 -0
  77. ara_cli/templates/prompt-modules/commands/feature_add_or_modifiy_specified_behavior.commands.md +36 -0
  78. ara_cli/templates/prompt-modules/commands/feature_generate_initial_specified_bevahior.commands.md +53 -0
  79. ara_cli/templates/prompt-modules/commands/prompt_template_tech_stack_transformer.commands.md +95 -0
  80. ara_cli/templates/prompt-modules/commands/python_bug_fixing_code.commands.md +34 -0
  81. ara_cli/templates/prompt-modules/commands/python_generate_code.commands.md +27 -0
  82. ara_cli/templates/prompt-modules/commands/python_refactoring_code.commands.md +39 -0
  83. ara_cli/templates/prompt-modules/commands/python_step_definitions_generation_and_fixing.commands.md +40 -0
  84. ara_cli/templates/prompt-modules/commands/python_unittest_generation_and_fixing.commands.md +48 -0
  85. ara_cli/update_config_prompt.py +7 -1
  86. ara_cli/version.py +1 -1
  87. ara_cli-0.1.10.8.dist-info/METADATA +241 -0
  88. {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/RECORD +104 -59
  89. tests/test_ara_command_action.py +66 -52
  90. tests/test_ara_config.py +200 -279
  91. tests/test_artefact_autofix.py +361 -5
  92. tests/test_artefact_lister.py +52 -132
  93. tests/test_artefact_scan.py +1 -1
  94. tests/test_chat.py +2009 -603
  95. tests/test_file_classifier.py +23 -0
  96. tests/test_file_creator.py +3 -5
  97. tests/test_global_file_lister.py +131 -0
  98. tests/test_prompt_handler.py +746 -0
  99. tests/test_tag_extractor.py +19 -13
  100. tests/test_template_loader.py +192 -0
  101. tests/test_template_manager.py +5 -4
  102. ara_cli/ara_command_parser.py +0 -536
  103. ara_cli/templates/prompt-modules/blueprints/complete_pytest_unittest.blueprint.md +0 -27
  104. ara_cli/templates/prompt-modules/blueprints/task_todo_list_implement_feature_BDD_way.blueprint.md +0 -30
  105. ara_cli/templates/prompt-modules/commands/artefact_classification.commands.md +0 -9
  106. ara_cli/templates/prompt-modules/commands/artefact_extension.commands.md +0 -17
  107. ara_cli/templates/prompt-modules/commands/artefact_formulation.commands.md +0 -14
  108. ara_cli/templates/prompt-modules/commands/behave_step_generation.commands.md +0 -102
  109. ara_cli/templates/prompt-modules/commands/code_generation_complex.commands.md +0 -20
  110. ara_cli/templates/prompt-modules/commands/code_generation_simple.commands.md +0 -13
  111. ara_cli/templates/prompt-modules/commands/error_fixing.commands.md +0 -20
  112. ara_cli/templates/prompt-modules/commands/feature_file_update.commands.md +0 -18
  113. ara_cli/templates/prompt-modules/commands/feature_formulation.commands.md +0 -43
  114. ara_cli/templates/prompt-modules/commands/js_code_generation_simple.commands.md +0 -13
  115. ara_cli/templates/prompt-modules/commands/refactoring.commands.md +0 -15
  116. ara_cli/templates/prompt-modules/commands/refactoring_analysis.commands.md +0 -9
  117. ara_cli/templates/prompt-modules/commands/reverse_engineer_feature_file.commands.md +0 -15
  118. ara_cli/templates/prompt-modules/commands/reverse_engineer_program_flow.commands.md +0 -19
  119. ara_cli-0.1.9.77.dist-info/METADATA +0 -18
  120. {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/WHEEL +0 -0
  121. {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/entry_points.txt +0 -0
  122. {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/top_level.txt +0 -0
ara_cli/completers.py ADDED
@@ -0,0 +1,144 @@
1
+ import os
2
+ from typing import List, Optional
3
+ from pathlib import Path
4
+ import typer
5
+
6
+ from ara_cli.classifier import Classifier
7
+ from ara_cli.template_manager import SpecificationBreakdownAspects
8
+
9
+
10
+ def complete_classifier(incomplete: str) -> List[str]:
11
+ """Complete classifier names."""
12
+ classifiers = Classifier.ordered_classifiers()
13
+ return [c for c in classifiers if c.startswith(incomplete)]
14
+
15
+
16
+ def complete_aspect(incomplete: str) -> List[str]:
17
+ """Complete aspect names."""
18
+ aspects = SpecificationBreakdownAspects.VALID_ASPECTS
19
+ return [a for a in aspects if a.startswith(incomplete)]
20
+
21
+
22
+ def complete_status(incomplete: str) -> List[str]:
23
+ """Complete task status values."""
24
+ statuses = ["to-do", "in-progress", "review", "done", "closed"]
25
+ return [s for s in statuses if s.startswith(incomplete)]
26
+
27
+
28
+ def complete_template_type(incomplete: str) -> List[str]:
29
+ """Complete template type values."""
30
+ template_types = ["rules", "intention", "commands", "blueprint"]
31
+ return [t for t in template_types if t.startswith(incomplete)]
32
+
33
+
34
+ def complete_artefact_name(classifier: str) -> List[str]:
35
+ """Complete artefact names for a given classifier."""
36
+ try:
37
+ # Get the directory for the classifier
38
+ classifier_dir = f"ara/{Classifier.get_sub_directory(classifier)}"
39
+
40
+ if not os.path.exists(classifier_dir):
41
+ return []
42
+
43
+ # Find all files with the classifier extension
44
+ artefacts = []
45
+ for file in os.listdir(classifier_dir):
46
+ if file.endswith(f'.{classifier}'):
47
+ # Remove the extension to get the artefact name
48
+ name = file[:-len(f'.{classifier}')]
49
+ artefacts.append(name)
50
+
51
+ return sorted(artefacts)
52
+ except Exception:
53
+ return []
54
+
55
+
56
+ def complete_artefact_name_for_classifier(classifier: str):
57
+ """Create a completer function for artefact names of a specific classifier."""
58
+ def completer(incomplete: str) -> List[str]:
59
+ artefacts = complete_artefact_name(classifier)
60
+ return [a for a in artefacts if a.startswith(incomplete)]
61
+ return completer
62
+
63
+
64
+ def complete_chat_files(incomplete: str) -> List[str]:
65
+ """Complete chat file names (without .md extension)."""
66
+ try:
67
+ chat_files = []
68
+ current_dir = Path.cwd()
69
+
70
+ # Look for .md files in current directory
71
+ for file in current_dir.glob("*.md"):
72
+ name = file.stem
73
+ if name.startswith(incomplete):
74
+ chat_files.append(name)
75
+
76
+ return sorted(chat_files)
77
+ except Exception:
78
+ return []
79
+
80
+
81
+ # Dynamic completers that need context
82
+ class DynamicCompleters:
83
+ @staticmethod
84
+ def create_classifier_completer():
85
+ """Create a completer for classifiers."""
86
+ def completer(ctx: typer.Context, incomplete: str) -> List[str]:
87
+ return complete_classifier(incomplete)
88
+ return completer
89
+
90
+ @staticmethod
91
+ def create_aspect_completer():
92
+ """Create a completer for aspects."""
93
+ def completer(ctx: typer.Context, incomplete: str) -> List[str]:
94
+ return complete_aspect(incomplete)
95
+ return completer
96
+
97
+ @staticmethod
98
+ def create_status_completer():
99
+ """Create a completer for status values."""
100
+ def completer(ctx: typer.Context, incomplete: str) -> List[str]:
101
+ return complete_status(incomplete)
102
+ return completer
103
+
104
+ @staticmethod
105
+ def create_template_type_completer():
106
+ """Create a completer for template types."""
107
+ def completer(ctx: typer.Context, incomplete: str) -> List[str]:
108
+ return complete_template_type(incomplete)
109
+ return completer
110
+
111
+ @staticmethod
112
+ def create_artefact_name_completer():
113
+ """Create a completer for artefact names based on classifier context."""
114
+ def completer(ctx: typer.Context, incomplete: str) -> List[str]:
115
+ # Try to get classifier from context
116
+ if hasattr(ctx, 'params') and 'classifier' in ctx.params:
117
+ classifier = ctx.params['classifier']
118
+ if hasattr(classifier, 'value'):
119
+ classifier = classifier.value
120
+ artefacts = complete_artefact_name(classifier)
121
+ return [a for a in artefacts if a.startswith(incomplete)]
122
+ return []
123
+ return completer
124
+
125
+ @staticmethod
126
+ def create_parent_name_completer():
127
+ """Create a completer for parent artefact names based on parent classifier context."""
128
+ def completer(ctx: typer.Context, incomplete: str) -> List[str]:
129
+ # Try to get parent_classifier from context
130
+ if hasattr(ctx, 'params') and 'parent_classifier' in ctx.params:
131
+ parent_classifier = ctx.params['parent_classifier']
132
+ if hasattr(parent_classifier, 'value'):
133
+ parent_classifier = parent_classifier.value
134
+ artefacts = complete_artefact_name(parent_classifier)
135
+ return [a for a in artefacts if a.startswith(incomplete)]
136
+ return []
137
+ return completer
138
+
139
+ @staticmethod
140
+ def create_chat_file_completer():
141
+ """Create a completer for chat files."""
142
+ def completer(ctx: typer.Context, incomplete: str) -> List[str]:
143
+ return complete_chat_files(incomplete)
144
+ return completer
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import sys
2
3
  from os.path import join, exists, isdir, dirname, basename
3
4
  # from ara_cli.directory_searcher import DirectorySearcher
4
5
 
@@ -23,7 +24,8 @@ class DirectoryNavigator:
23
24
  return original_directory
24
25
 
25
26
  current_directory = original_directory
26
- while current_directory != dirname(current_directory): # Ensure loop breaks at root
27
+ # Ensure loop breaks at root
28
+ while current_directory != dirname(current_directory):
27
29
  potential_path = join(current_directory, self.target_directory)
28
30
  if self.exists(potential_path):
29
31
  os.chdir(potential_path)
@@ -31,7 +33,8 @@ class DirectoryNavigator:
31
33
  current_directory = dirname(current_directory)
32
34
 
33
35
  # If the loop completes, the target directory was not found
34
- user_input = input(f"Unable to locate the '{self.target_directory}' directory. Do you want to create an 'ara' folder in the working directory? (y/N): ").strip().lower()
36
+ user_input = input(
37
+ f"Unable to locate the '{self.target_directory}' directory. Do you want to create an 'ara' folder in the working directory? (y/N): ").strip().lower()
35
38
 
36
39
  if user_input == '' or user_input == 'y':
37
40
  ara_folder_path = join(original_directory, 'ara')
@@ -40,7 +43,8 @@ class DirectoryNavigator:
40
43
  os.chdir(ara_folder_path)
41
44
  return original_directory
42
45
  else:
43
- print(f"Unable to locate the '{self.target_directory}' directory and user declined to create 'ara' folder.")
46
+ print(
47
+ f"Unable to locate the '{self.target_directory}' directory and user declined to create 'ara' folder.")
44
48
  sys.exit(0)
45
49
 
46
50
  def navigate_to_relative(self, relative_path):
@@ -56,7 +60,36 @@ class DirectoryNavigator:
56
60
  if self.exists(path):
57
61
  os.chdir(path)
58
62
  else:
59
- raise Exception(f"Unable to navigate to '{relative_path}' relative to the target directory.")
63
+ raise Exception(
64
+ f"Unable to navigate to '{relative_path}' relative to the target directory.")
65
+
66
+ @staticmethod
67
+ def find_ara_directory_root():
68
+ """Find the root ara directory by traversing up the directory tree."""
69
+ current_dir = os.getcwd()
70
+
71
+ # Check if we're already inside an ara directory structure
72
+ path_parts = current_dir.split(os.sep)
73
+
74
+ # Look for 'ara' in the path parts
75
+ if 'ara' in path_parts:
76
+ ara_index = path_parts.index('ara')
77
+ # Reconstruct path up to and including 'ara'
78
+ ara_root_parts = path_parts[:ara_index + 1]
79
+ potential_ara_root = os.sep.join(ara_root_parts)
80
+ if os.path.exists(potential_ara_root) and os.path.isdir(potential_ara_root):
81
+ return potential_ara_root
82
+
83
+ # If not inside ara directory, check current directory and parents
84
+ check_dir = current_dir
85
+ # Stop at filesystem root
86
+ while check_dir != os.path.dirname(check_dir):
87
+ ara_path = os.path.join(check_dir, 'ara')
88
+ if os.path.exists(ara_path) and os.path.isdir(ara_path):
89
+ return ara_path
90
+ check_dir = os.path.dirname(check_dir)
91
+
92
+ return None
60
93
 
61
94
  # debug version
62
95
  # def get_ara_directory(self):
@@ -0,0 +1,134 @@
1
+ import sys
2
+ import traceback
3
+ from typing import Optional
4
+ from enum import Enum
5
+ from functools import wraps
6
+
7
+
8
+ RED = '\033[91m'
9
+ RESET = '\033[0m'
10
+
11
+
12
+ class ErrorLevel(Enum):
13
+ INFO = "INFO"
14
+ WARNING = "WARNING"
15
+ ERROR = "ERROR"
16
+ CRITICAL = "CRITICAL"
17
+
18
+
19
+ class AraError(Exception):
20
+ """Base exception class for ARA CLI errors"""
21
+
22
+ def __init__(
23
+ self, message: str, error_code: int = 1, level: ErrorLevel = ErrorLevel.ERROR
24
+ ):
25
+ self.message = message
26
+ self.error_code = error_code
27
+ self.level = level
28
+ super().__init__(self.message)
29
+
30
+
31
+ class AraValidationError(AraError):
32
+ """Raised when validation fails"""
33
+
34
+ def __init__(self, message: str):
35
+ super().__init__(message, error_code=2, level=ErrorLevel.ERROR)
36
+
37
+
38
+ class AraConfigurationError(AraError):
39
+ """Raised when configuration is invalid"""
40
+
41
+ def __init__(self, message: str):
42
+ super().__init__(message, error_code=4, level=ErrorLevel.ERROR)
43
+
44
+
45
+ class ErrorHandler:
46
+ """Centralized error handler for ARA CLI"""
47
+
48
+ def __init__(self, debug_mode: bool = False):
49
+ self.debug_mode = debug_mode
50
+
51
+ def handle_error(self, error: Exception, context: Optional[str] = None) -> None:
52
+ """Handle any error with standardized output"""
53
+ if isinstance(error, AraError):
54
+ self._handle_ara_error(error, context)
55
+ else:
56
+ self._handle_generic_error(error, context)
57
+
58
+ def _handle_ara_error(self, error: AraError, context: Optional[str] = None) -> None:
59
+ """Handle ARA-specific errors"""
60
+ self._report_ara_error(error, context)
61
+
62
+ sys.exit(error.error_code)
63
+
64
+ def _handle_generic_error(
65
+ self, error: Exception, context: Optional[str] = None
66
+ ) -> None:
67
+ """Handle generic Python errors"""
68
+ self._report_generic_error(error, context)
69
+
70
+ sys.exit(1)
71
+
72
+
73
+ def report_error(self, error: Exception, context: Optional[str] = None) -> None:
74
+ """Report error with standardized formatting but don't exit"""
75
+ if isinstance(error, AraError):
76
+ self._report_ara_error(error, context)
77
+ else:
78
+ self._report_generic_error(error, context)
79
+
80
+
81
+ def _report_ara_error(self, error: AraError, context: Optional[str] = None) -> None:
82
+ """Report ARA-specific errors without exiting"""
83
+ error_prefix = f"[{error.level.value}]"
84
+
85
+ if context:
86
+ print(f"{RED}{error_prefix} {context}: {error.message}{RESET}", file=sys.stderr)
87
+ else:
88
+ print(f"{RED}{error_prefix} {error.message}{RESET}", file=sys.stderr)
89
+
90
+ if self.debug_mode:
91
+ traceback.print_exc()
92
+
93
+
94
+ def _report_generic_error(self, error: Exception, context: Optional[str] = None) -> None:
95
+ """Report generic Python errors without exiting"""
96
+ error_type = type(error).__name__
97
+
98
+ if context:
99
+ print(f"{RED}[ERROR] {context}: {error_type}: {str(error)}{RESET}", file=sys.stderr)
100
+ else:
101
+ print(f"{RED}[ERROR] {error_type}: {str(error)}{RESET}", file=sys.stderr)
102
+
103
+ if self.debug_mode:
104
+ traceback.print_exc()
105
+
106
+
107
+ def validate_and_exit(
108
+ self, condition: bool, message: str, error_code: int = 1
109
+ ) -> None:
110
+ """Validate condition and exit with error if false"""
111
+ if not condition:
112
+ raise AraValidationError(message)
113
+
114
+
115
+ def handle_errors(_func=None, context: Optional[str] = None, error_handler: Optional[ErrorHandler] = None):
116
+ """Decorator to handle errors in action functions"""
117
+
118
+ def decorator(func):
119
+ @wraps(func)
120
+ def wrapper(*args, **kwargs):
121
+ nonlocal error_handler
122
+ if error_handler is None:
123
+ error_handler = ErrorHandler()
124
+
125
+ try:
126
+ return func(*args, **kwargs)
127
+ except Exception as e:
128
+ error_handler.handle_error(e, context or func.__name__)
129
+
130
+ return wrapper
131
+
132
+ if callable(_func):
133
+ return decorator(_func)
134
+ return decorator
@@ -1,3 +1,4 @@
1
+ from . import error_handler
1
2
  from ara_cli.classifier import Classifier
2
3
  from ara_cli.artefact_models.artefact_model import Artefact
3
4
  from ara_cli.artefact_fuzzy_search import find_closest_name_matches
@@ -33,8 +34,8 @@ class FileClassifier:
33
34
  if byte > 127:
34
35
  return True
35
36
  except Exception as e:
36
- # Handle unexpected errors while reading the file in binary mode
37
- print(f"Error while checking if file is binary: {e}")
37
+ error_handler.report_error(e, "checking if file is binary")
38
+ # print(f"Error while checking if file is binary: {e}")
38
39
  return False
39
40
 
40
41
  def read_file_with_fallback(self, file_path):
File without changes
@@ -0,0 +1,33 @@
1
+ import base64
2
+ import os
3
+ from ara_cli.file_loaders.file_loader import FileLoader
4
+
5
+
6
+ class BinaryFileLoader(FileLoader):
7
+ """Loads binary files (images)"""
8
+
9
+ def load(
10
+ self,
11
+ file_path: str,
12
+ mime_type: str,
13
+ prefix: str = "",
14
+ suffix: str = "",
15
+ block_delimiter: str = "",
16
+ extract_images: bool = False
17
+ ) -> bool:
18
+ """Load binary file as base64"""
19
+
20
+ with open(file_path, 'rb') as file:
21
+ file_content = file.read()
22
+
23
+ base64_image = base64.b64encode(file_content).decode("utf-8")
24
+
25
+ if block_delimiter:
26
+ file_content = f"{block_delimiter}\n{file_content}\n{block_delimiter}"
27
+
28
+ write_content = f"{prefix}![{os.path.basename(file_path)}](data:{mime_type};base64,{base64_image}){suffix}\n"
29
+
30
+ with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
31
+ chat_file.write(write_content)
32
+
33
+ return True
@@ -0,0 +1,34 @@
1
+ from ara_cli.file_loaders.document_reader import DocumentReaderFactory
2
+ from ara_cli.file_loaders.file_loader import FileLoader
3
+
4
+
5
+ class DocumentFileLoader(FileLoader):
6
+ """Loads document files (PDF, DOCX, ODT)"""
7
+
8
+ def load(
9
+ self,
10
+ file_path: str,
11
+ prefix: str = "",
12
+ suffix: str = "",
13
+ block_delimiter: str = "```",
14
+ extract_images: bool = False
15
+ ) -> bool:
16
+ """Load document file with optional image extraction"""
17
+
18
+ reader = DocumentReaderFactory.create_reader(file_path)
19
+
20
+ if not reader:
21
+ print("Unsupported document type.")
22
+ return False
23
+
24
+ text_content = reader.read(extract_images=extract_images)
25
+
26
+ if block_delimiter:
27
+ text_content = f"{block_delimiter}\n{text_content}\n{block_delimiter}"
28
+
29
+ write_content = f"{prefix}{text_content}{suffix}\n"
30
+
31
+ with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
32
+ chat_file.write(write_content)
33
+
34
+ return True
@@ -0,0 +1,245 @@
1
+ import os
2
+ from abc import ABC, abstractmethod
3
+ from typing import Tuple, Optional
4
+
5
+
6
+ class DocumentReader(ABC):
7
+ """Abstract base class for document readers"""
8
+
9
+ def __init__(self, file_path: str):
10
+ self.file_path = file_path
11
+ self.base_dir = os.path.dirname(file_path)
12
+
13
+ @abstractmethod
14
+ def read(self, extract_images: bool = False) -> str:
15
+ """Read document and optionally extract images"""
16
+ pass
17
+
18
+ def create_image_data_dir(self, extension_suffix: str) -> str:
19
+ """
20
+ Create data directory for images with file extension suffix to avoid conflicts.
21
+
22
+ Returns:
23
+ str: Path to images directory
24
+ """
25
+ file_name_with_ext = os.path.splitext(os.path.basename(self.file_path))[
26
+ 0] + f"_{extension_suffix}"
27
+ data_dir = os.path.join(self.base_dir, f"{file_name_with_ext}.data")
28
+ images_dir = os.path.join(data_dir, "images")
29
+ if not os.path.exists(images_dir):
30
+ os.makedirs(images_dir)
31
+ return images_dir
32
+
33
+ def save_and_describe_image(
34
+ self,
35
+ image_data: bytes,
36
+ image_format: str,
37
+ save_dir: str,
38
+ image_counter: int
39
+ ) -> Tuple[str, str]:
40
+ """
41
+ Save image data and get its description from LLM.
42
+
43
+ Returns:
44
+ tuple: (relative_image_path, description)
45
+ """
46
+ from ara_cli.prompt_handler import describe_image
47
+
48
+ # Save image
49
+ image_filename = f"{image_counter}.{image_format}"
50
+ image_path = os.path.join(save_dir, image_filename)
51
+
52
+ with open(image_path, "wb") as image_file:
53
+ image_file.write(image_data)
54
+
55
+ # Get image description from LLM
56
+ description = describe_image(image_path)
57
+
58
+ # Get relative path
59
+ relative_image_path = os.path.relpath(image_path, self.base_dir)
60
+
61
+ return relative_image_path, description
62
+
63
+
64
+ class DocxReader(DocumentReader):
65
+ """Reader for DOCX files"""
66
+
67
+ def read(self, extract_images: bool = False) -> str:
68
+ import docx
69
+
70
+ doc = docx.Document(self.file_path)
71
+ text_content = '\n'.join(para.text for para in doc.paragraphs)
72
+
73
+ if not extract_images:
74
+ return text_content
75
+
76
+ from PIL import Image
77
+ import io
78
+
79
+ # Create data directory for images
80
+ images_dir = self.create_image_data_dir("docx")
81
+
82
+ # Extract and process images
83
+ image_descriptions = []
84
+ image_counter = 1
85
+
86
+ for rel in doc.part.rels.values():
87
+ if "image" in rel.reltype:
88
+ image_data = rel.target_part.blob
89
+
90
+ # Determine image format
91
+ image = Image.open(io.BytesIO(image_data))
92
+ image_format = image.format.lower()
93
+
94
+ # Save and describe image
95
+ relative_path, description = self.save_and_describe_image(
96
+ image_data, image_format, images_dir, image_counter
97
+ )
98
+
99
+ # Add formatted description to list
100
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
101
+ image_descriptions.append(image_description)
102
+
103
+ image_counter += 1
104
+
105
+ # Combine text content with image descriptions
106
+ if image_descriptions:
107
+ text_content += "\n\n### Extracted Images\n" + \
108
+ "\n".join(image_descriptions)
109
+
110
+ return text_content
111
+
112
+
113
+ class PdfReader(DocumentReader):
114
+ """Reader for PDF files"""
115
+
116
+ def read(self, extract_images: bool = False) -> str:
117
+ import pymupdf4llm
118
+
119
+ if not extract_images:
120
+ return pymupdf4llm.to_markdown(self.file_path, write_images=False)
121
+
122
+ import fitz # PyMuPDF
123
+
124
+ # Create images directory
125
+ images_dir = self.create_image_data_dir("pdf")
126
+
127
+ # Extract text without images first
128
+ text_content = pymupdf4llm.to_markdown(
129
+ self.file_path, write_images=False)
130
+
131
+ # Extract and process images
132
+ doc = fitz.open(self.file_path)
133
+ image_descriptions = []
134
+ image_counter = 1
135
+
136
+ for page_num, page in enumerate(doc):
137
+ image_list = page.get_images()
138
+
139
+ for img_index, img in enumerate(image_list):
140
+ # Extract image
141
+ xref = img[0]
142
+ base_image = doc.extract_image(xref)
143
+ image_bytes = base_image["image"]
144
+ image_ext = base_image["ext"]
145
+
146
+ # Save and describe image
147
+ relative_path, description = self.save_and_describe_image(
148
+ image_bytes, image_ext, images_dir, image_counter
149
+ )
150
+
151
+ # Add formatted description to list
152
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
153
+ image_descriptions.append(image_description)
154
+
155
+ image_counter += 1
156
+
157
+ doc.close()
158
+
159
+ # Combine text content with image descriptions
160
+ if image_descriptions:
161
+ text_content += "\n\n### Extracted Images\n" + \
162
+ "\n".join(image_descriptions)
163
+
164
+ return text_content
165
+
166
+
167
+ class OdtReader(DocumentReader):
168
+ """Reader for ODT files"""
169
+
170
+ def read(self, extract_images: bool = False) -> str:
171
+ import pymupdf4llm
172
+
173
+ if not extract_images:
174
+ return pymupdf4llm.to_markdown(self.file_path, write_images=False)
175
+
176
+ import zipfile
177
+ from PIL import Image
178
+ import io
179
+
180
+ # Create data directory for images
181
+ images_dir = self.create_image_data_dir("odt")
182
+
183
+ # Get text content
184
+ text_content = pymupdf4llm.to_markdown(
185
+ self.file_path, write_images=False)
186
+
187
+ # Extract and process images from ODT
188
+ image_descriptions = []
189
+ image_counter = 1
190
+
191
+ try:
192
+ with zipfile.ZipFile(self.file_path, 'r') as odt_zip:
193
+ # List all files in the Pictures directory
194
+ picture_files = [
195
+ f for f in odt_zip.namelist() if f.startswith('Pictures/')]
196
+
197
+ for picture_file in picture_files:
198
+ # Extract image data
199
+ image_data = odt_zip.read(picture_file)
200
+
201
+ # Determine image format
202
+ image = Image.open(io.BytesIO(image_data))
203
+ image_format = image.format.lower()
204
+
205
+ # Save and describe image
206
+ relative_path, description = self.save_and_describe_image(
207
+ image_data, image_format, images_dir, image_counter
208
+ )
209
+
210
+ # Add formatted description to list
211
+ image_description = f"\nImage: {relative_path}\n[{description}]\n"
212
+ image_descriptions.append(image_description)
213
+
214
+ image_counter += 1
215
+ except Exception as e:
216
+ print(f"Warning: Could not extract images from ODT: {e}")
217
+
218
+ # Combine text content with image descriptions
219
+ if image_descriptions:
220
+ text_content += "\n\n### Extracted Images\n" + \
221
+ "\n".join(image_descriptions)
222
+
223
+ return text_content
224
+
225
+
226
+ class DocumentReaderFactory:
227
+ """Factory for creating appropriate document readers"""
228
+
229
+ @staticmethod
230
+ def create_reader(file_path: str) -> Optional[DocumentReader]:
231
+ """Create appropriate reader based on file extension"""
232
+ _, ext = os.path.splitext(file_path)
233
+ ext = ext.lower()
234
+
235
+ readers = {
236
+ '.docx': DocxReader,
237
+ '.pdf': PdfReader,
238
+ '.odt': OdtReader
239
+ }
240
+
241
+ reader_class = readers.get(ext)
242
+ if reader_class:
243
+ return reader_class(file_path)
244
+
245
+ return None