ara-cli 0.1.13.3__py3-none-any.whl → 0.1.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. ara_cli/__init__.py +1 -1
  2. ara_cli/ara_command_action.py +162 -112
  3. ara_cli/ara_config.py +1 -1
  4. ara_cli/ara_subcommands/convert.py +66 -2
  5. ara_cli/ara_subcommands/prompt.py +266 -106
  6. ara_cli/artefact_autofix.py +2 -2
  7. ara_cli/artefact_converter.py +152 -53
  8. ara_cli/artefact_creator.py +41 -17
  9. ara_cli/artefact_lister.py +3 -3
  10. ara_cli/artefact_models/artefact_model.py +1 -1
  11. ara_cli/artefact_models/artefact_templates.py +0 -9
  12. ara_cli/artefact_models/feature_artefact_model.py +8 -8
  13. ara_cli/artefact_reader.py +62 -43
  14. ara_cli/artefact_scan.py +39 -17
  15. ara_cli/chat.py +23 -15
  16. ara_cli/children_contribution_updater.py +737 -0
  17. ara_cli/classifier.py +34 -0
  18. ara_cli/commands/load_command.py +4 -3
  19. ara_cli/commands/load_image_command.py +1 -1
  20. ara_cli/commands/read_command.py +23 -27
  21. ara_cli/completers.py +24 -0
  22. ara_cli/error_handler.py +26 -11
  23. ara_cli/file_loaders/document_reader.py +0 -178
  24. ara_cli/file_loaders/factories/__init__.py +0 -0
  25. ara_cli/file_loaders/factories/document_reader_factory.py +32 -0
  26. ara_cli/file_loaders/factories/file_loader_factory.py +27 -0
  27. ara_cli/file_loaders/file_loader.py +1 -30
  28. ara_cli/file_loaders/loaders/__init__.py +0 -0
  29. ara_cli/file_loaders/{document_file_loader.py → loaders/document_file_loader.py} +1 -1
  30. ara_cli/file_loaders/loaders/text_file_loader.py +47 -0
  31. ara_cli/file_loaders/readers/__init__.py +0 -0
  32. ara_cli/file_loaders/readers/docx_reader.py +49 -0
  33. ara_cli/file_loaders/readers/excel_reader.py +27 -0
  34. ara_cli/file_loaders/{markdown_reader.py → readers/markdown_reader.py} +1 -1
  35. ara_cli/file_loaders/readers/odt_reader.py +59 -0
  36. ara_cli/file_loaders/readers/pdf_reader.py +54 -0
  37. ara_cli/file_loaders/readers/pptx_reader.py +104 -0
  38. ara_cli/file_loaders/tools/__init__.py +0 -0
  39. ara_cli/output_suppressor.py +53 -0
  40. ara_cli/prompt_handler.py +123 -17
  41. ara_cli/tag_extractor.py +8 -7
  42. ara_cli/version.py +1 -1
  43. {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/METADATA +18 -12
  44. {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/RECORD +58 -45
  45. {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/WHEEL +1 -1
  46. tests/test_artefact_converter.py +1 -46
  47. tests/test_artefact_lister.py +11 -8
  48. tests/test_chat.py +4 -4
  49. tests/test_chat_givens_images.py +1 -1
  50. tests/test_children_contribution_updater.py +98 -0
  51. tests/test_document_loader_office.py +267 -0
  52. tests/test_prompt_handler.py +416 -214
  53. tests/test_setup_default_chat_prompt_mode.py +198 -0
  54. tests/test_tag_extractor.py +95 -49
  55. ara_cli/file_loaders/document_readers.py +0 -233
  56. ara_cli/file_loaders/file_loaders.py +0 -123
  57. ara_cli/file_loaders/text_file_loader.py +0 -187
  58. /ara_cli/file_loaders/{binary_file_loader.py → loaders/binary_file_loader.py} +0 -0
  59. /ara_cli/file_loaders/{image_processor.py → tools/image_processor.py} +0 -0
  60. {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/entry_points.txt +0 -0
  61. {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/top_level.txt +0 -0
@@ -1,123 +0,0 @@
1
- import os
2
- import base64
3
- from abc import ABC, abstractmethod
4
- from typing import Optional
5
- from ara_cli.file_loaders.markdown_reader import MarkdownReader
6
- from ara_cli.file_loaders.document_readers import DocumentReaderFactory
7
-
8
-
9
- class FileLoader(ABC):
10
- """Abstract base class for file loaders"""
11
-
12
- def __init__(self, chat_instance):
13
- self.chat = chat_instance
14
-
15
- @abstractmethod
16
- def load(self, file_path: str, **kwargs) -> bool:
17
- """Load file with specific implementation"""
18
- pass
19
-
20
- def add_prompt_tag_if_needed(self):
21
- """Add prompt tag to chat if needed"""
22
- self.chat.add_prompt_tag_if_needed(self.chat.chat_name)
23
-
24
-
25
- class TextFileLoader(FileLoader):
26
- """Loads text files"""
27
-
28
- def load(self, file_path: str, prefix: str = "", suffix: str = "",
29
- block_delimiter: str = "", extract_images: bool = False) -> bool:
30
- """Load text file with optional markdown image extraction"""
31
-
32
- is_md_file = file_path.lower().endswith('.md')
33
-
34
- if is_md_file and extract_images:
35
- reader = MarkdownReader(file_path)
36
- file_content = reader.read(extract_images=True)
37
- else:
38
- with open(file_path, 'r', encoding='utf-8', errors="replace") as file:
39
- file_content = file.read()
40
-
41
- if block_delimiter:
42
- file_content = f"{block_delimiter}\n{file_content}\n{block_delimiter}"
43
-
44
- write_content = f"{prefix}{file_content}{suffix}\n"
45
-
46
- with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
47
- chat_file.write(write_content)
48
-
49
- return True
50
-
51
-
52
- class BinaryFileLoader(FileLoader):
53
- """Loads binary files (images)"""
54
-
55
- def load(self, file_path: str, mime_type: str, prefix: str = "", suffix: str = "") -> bool:
56
- """Load binary file as base64"""
57
-
58
- with open(file_path, 'rb') as file:
59
- file_content = file.read()
60
-
61
- base64_image = base64.b64encode(file_content).decode("utf-8")
62
- write_content = f"{prefix}![{os.path.basename(file_path)}](data:{mime_type};base64,{base64_image}){suffix}\n"
63
-
64
- with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
65
- chat_file.write(write_content)
66
-
67
- return True
68
-
69
-
70
- class DocumentFileLoader(FileLoader):
71
- """Loads document files (PDF, DOCX, ODT)"""
72
-
73
- def load(self, file_path: str, prefix: str = "", suffix: str = "",
74
- block_delimiter: str = "```", extract_images: bool = False) -> bool:
75
- """Load document file with optional image extraction"""
76
-
77
- reader = DocumentReaderFactory.create_reader(file_path)
78
-
79
- if not reader:
80
- print("Unsupported document type.")
81
- return False
82
-
83
- text_content = reader.read(extract_images=extract_images)
84
-
85
- if block_delimiter:
86
- text_content = f"{block_delimiter}\n{text_content}\n{block_delimiter}"
87
-
88
- write_content = f"{prefix}{text_content}{suffix}\n"
89
-
90
- with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
91
- chat_file.write(write_content)
92
-
93
- return True
94
-
95
-
96
- class FileLoaderFactory:
97
- """Factory for creating appropriate file loaders"""
98
-
99
- BINARY_TYPE_MAPPING = {
100
- ".png": "image/png",
101
- ".jpg": "image/jpeg",
102
- ".jpeg": "image/jpeg",
103
- }
104
-
105
- DOCUMENT_TYPE_EXTENSIONS = [".docx", ".doc", ".odt", ".pdf"]
106
-
107
- @staticmethod
108
- def create_loader(file_name: str, chat_instance) -> Optional[FileLoader]:
109
- """Create appropriate loader based on file type"""
110
-
111
- file_name_lower = file_name.lower()
112
-
113
- # Check if it's a binary file
114
- for extension, mime_type in FileLoaderFactory.BINARY_TYPE_MAPPING.items():
115
- if file_name_lower.endswith(extension):
116
- return BinaryFileLoader(chat_instance)
117
-
118
- # Check if it's a document
119
- if any(file_name_lower.endswith(ext) for ext in FileLoaderFactory.DOCUMENT_TYPE_EXTENSIONS):
120
- return DocumentFileLoader(chat_instance)
121
-
122
- # Default to text file loader
123
- return TextFileLoader(chat_instance)
@@ -1,187 +0,0 @@
1
- import os
2
- import re
3
- import base64
4
- import tempfile
5
- from typing import Optional, Tuple
6
- import requests
7
- from charset_normalizer import from_path
8
- from ara_cli.prompt_handler import describe_image
9
- from ara_cli.file_loaders.file_loader import FileLoader
10
-
11
-
12
- class TextFileLoader(FileLoader):
13
- """Loads text files"""
14
- def load(self, file_path: str, prefix: str = "", suffix: str = "",
15
- block_delimiter: str = "", extract_images: bool = False, **kwargs) -> bool:
16
- """Load text file with optional markdown image extraction"""
17
-
18
- is_md_file = file_path.lower().endswith('.md')
19
-
20
- if is_md_file and extract_images:
21
- reader = MarkdownReader(file_path)
22
- file_content = reader.read(extract_images=True).replace('\r\n', '\n')
23
- else:
24
- # Use charset-normalizer to detect encoding
25
- encoded_content = from_path(file_path).best()
26
- if not encoded_content:
27
- print(f"Failed to detect encoding for {file_path}")
28
- return False
29
- file_content = str(encoded_content).replace('\r\n', '\n')
30
-
31
- if block_delimiter:
32
- file_content = f"{block_delimiter}\n{file_content}\n{block_delimiter}"
33
-
34
- write_content = f"{prefix}{file_content}{suffix}\n"
35
-
36
- with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
37
- chat_file.write(write_content)
38
-
39
- return True
40
-
41
-
42
- class MarkdownReader:
43
- """Handles markdown file reading with optional image extraction"""
44
-
45
- def __init__(self, file_path: str):
46
- self.file_path = file_path
47
- self.base_dir = os.path.dirname(file_path)
48
- self.image_processor = ImageProcessor()
49
-
50
- def read(self, extract_images: bool = False) -> str:
51
- """Read markdown file and optionally extract/describe images"""
52
- with open(self.file_path, 'r', encoding='utf-8') as file:
53
- content = file.read()
54
-
55
- if not extract_images:
56
- return content
57
-
58
- return self._process_images(content)
59
-
60
- def _process_images(self, content: str) -> str:
61
- """Process all images in markdown content"""
62
- # Pattern to match markdown images: ![alt text](url or path)
63
- image_pattern = re.compile(r'!\[([^\]]*)\]\(([^\)]+)\)')
64
- base64_pattern = re.compile(r'data:image/([^;]+);base64,([^)]+)')
65
-
66
- # Process each image reference
67
- for match in image_pattern.finditer(content):
68
- image_ref = match.group(2)
69
- replacement = self._process_single_image(image_ref, base64_pattern)
70
-
71
- if replacement:
72
- content = content.replace(match.group(0), replacement, 1)
73
-
74
- return content
75
-
76
- def _process_single_image(self, image_ref: str, base64_pattern: re.Pattern) -> Optional[str]:
77
- """Process a single image reference"""
78
- try:
79
- # Try base64 first
80
- result = self.image_processor.process_base64_image(
81
- image_ref, base64_pattern)
82
- if result:
83
- return result[0]
84
-
85
- # Try URL
86
- result, error = self.image_processor.process_url_image(image_ref)
87
- if result:
88
- if error:
89
- print(f"Warning: {error}")
90
- return result
91
-
92
- # Try local file
93
- result, error = self.image_processor.process_local_image(
94
- image_ref, self.base_dir)
95
- if error:
96
- print(f"Warning: {error}")
97
- return result
98
-
99
- except Exception as e:
100
- print(f"Warning: Could not process image {image_ref}: {e}")
101
- return None
102
-
103
-
104
- class ImageProcessor:
105
- """Handles image processing operations"""
106
-
107
- @staticmethod
108
- def process_base64_image(
109
- image_ref: str,
110
- base64_pattern: re.Pattern
111
- ) -> Optional[Tuple[str, str]]:
112
- """Process base64 encoded image and return description"""
113
- base64_match = base64_pattern.match(image_ref)
114
- if not base64_match:
115
- return None
116
-
117
- image_format = base64_match.group(1)
118
- base64_data = base64_match.group(2)
119
- image_data = base64.b64decode(base64_data)
120
-
121
- # Create a temporary file to send to LLM
122
- with tempfile.NamedTemporaryFile(suffix=f'.{image_format}', delete=False) as tmp_file:
123
- tmp_file.write(image_data)
124
- tmp_file_path = tmp_file.name
125
-
126
- try:
127
- description = describe_image(tmp_file_path)
128
- return f"Image: (base64 embedded {image_format} image)\n[{description}]", None
129
- finally:
130
- os.unlink(tmp_file_path)
131
-
132
- @staticmethod
133
- def process_url_image(image_ref: str) -> Tuple[str, Optional[str]]:
134
- """Process image from URL and return description"""
135
- if not image_ref.startswith(('http://', 'https://')):
136
- return "", None
137
-
138
- try:
139
- response = requests.get(image_ref, timeout=10)
140
- response.raise_for_status()
141
-
142
- # Determine file extension from content-type
143
- content_type = response.headers.get('content-type', '')
144
- ext = ImageProcessor._get_extension_from_content_type(
145
- content_type, image_ref)
146
-
147
- # Create temporary file
148
- with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_file:
149
- tmp_file.write(response.content)
150
- tmp_file_path = tmp_file.name
151
-
152
- try:
153
- description = describe_image(tmp_file_path)
154
- return f"Image: {image_ref}\n[{description}]", None
155
- finally:
156
- os.unlink(tmp_file_path)
157
-
158
- except Exception as e:
159
- error_msg = f"Could not download image: {str(e)}"
160
- return f"Image: {image_ref}\n[{error_msg}]", error_msg
161
-
162
- @staticmethod
163
- def process_local_image(image_ref: str, base_dir: str) -> Tuple[str, Optional[str]]:
164
- """Process local image file and return description"""
165
- if os.path.isabs(image_ref):
166
- local_image_path = image_ref
167
- else:
168
- local_image_path = os.path.join(base_dir, image_ref)
169
-
170
- if os.path.exists(local_image_path):
171
- description = describe_image(local_image_path)
172
- return f"Image: {image_ref}\n[{description}]", None
173
- else:
174
- error_msg = f"Image file not found"
175
- return f"Image: {image_ref}\n[{error_msg}]", f"Local image not found: {local_image_path}"
176
-
177
- @staticmethod
178
- def _get_extension_from_content_type(content_type: str, url: str) -> str:
179
- """Determine file extension from content type or URL"""
180
- if 'image/jpeg' in content_type:
181
- return '.jpg'
182
- elif 'image/png' in content_type:
183
- return '.png'
184
- elif 'image/gif' in content_type:
185
- return '.gif'
186
- else:
187
- return os.path.splitext(url)[1] or '.png'