ara-cli 0.1.13.3__py3-none-any.whl → 0.1.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ara_cli/__init__.py +1 -1
- ara_cli/ara_command_action.py +162 -112
- ara_cli/ara_config.py +1 -1
- ara_cli/ara_subcommands/convert.py +66 -2
- ara_cli/ara_subcommands/prompt.py +266 -106
- ara_cli/artefact_autofix.py +2 -2
- ara_cli/artefact_converter.py +152 -53
- ara_cli/artefact_creator.py +41 -17
- ara_cli/artefact_lister.py +3 -3
- ara_cli/artefact_models/artefact_model.py +1 -1
- ara_cli/artefact_models/artefact_templates.py +0 -9
- ara_cli/artefact_models/feature_artefact_model.py +8 -8
- ara_cli/artefact_reader.py +62 -43
- ara_cli/artefact_scan.py +39 -17
- ara_cli/chat.py +23 -15
- ara_cli/children_contribution_updater.py +737 -0
- ara_cli/classifier.py +34 -0
- ara_cli/commands/load_command.py +4 -3
- ara_cli/commands/load_image_command.py +1 -1
- ara_cli/commands/read_command.py +23 -27
- ara_cli/completers.py +24 -0
- ara_cli/error_handler.py +26 -11
- ara_cli/file_loaders/document_reader.py +0 -178
- ara_cli/file_loaders/factories/__init__.py +0 -0
- ara_cli/file_loaders/factories/document_reader_factory.py +32 -0
- ara_cli/file_loaders/factories/file_loader_factory.py +27 -0
- ara_cli/file_loaders/file_loader.py +1 -30
- ara_cli/file_loaders/loaders/__init__.py +0 -0
- ara_cli/file_loaders/{document_file_loader.py → loaders/document_file_loader.py} +1 -1
- ara_cli/file_loaders/loaders/text_file_loader.py +47 -0
- ara_cli/file_loaders/readers/__init__.py +0 -0
- ara_cli/file_loaders/readers/docx_reader.py +49 -0
- ara_cli/file_loaders/readers/excel_reader.py +27 -0
- ara_cli/file_loaders/{markdown_reader.py → readers/markdown_reader.py} +1 -1
- ara_cli/file_loaders/readers/odt_reader.py +59 -0
- ara_cli/file_loaders/readers/pdf_reader.py +54 -0
- ara_cli/file_loaders/readers/pptx_reader.py +104 -0
- ara_cli/file_loaders/tools/__init__.py +0 -0
- ara_cli/output_suppressor.py +53 -0
- ara_cli/prompt_handler.py +123 -17
- ara_cli/tag_extractor.py +8 -7
- ara_cli/version.py +1 -1
- {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/METADATA +18 -12
- {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/RECORD +58 -45
- {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/WHEEL +1 -1
- tests/test_artefact_converter.py +1 -46
- tests/test_artefact_lister.py +11 -8
- tests/test_chat.py +4 -4
- tests/test_chat_givens_images.py +1 -1
- tests/test_children_contribution_updater.py +98 -0
- tests/test_document_loader_office.py +267 -0
- tests/test_prompt_handler.py +416 -214
- tests/test_setup_default_chat_prompt_mode.py +198 -0
- tests/test_tag_extractor.py +95 -49
- ara_cli/file_loaders/document_readers.py +0 -233
- ara_cli/file_loaders/file_loaders.py +0 -123
- ara_cli/file_loaders/text_file_loader.py +0 -187
- /ara_cli/file_loaders/{binary_file_loader.py → loaders/binary_file_loader.py} +0 -0
- /ara_cli/file_loaders/{image_processor.py → tools/image_processor.py} +0 -0
- {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/entry_points.txt +0 -0
- {ara_cli-0.1.13.3.dist-info → ara_cli-0.1.14.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from ara_cli.file_loaders.document_reader import DocumentReader
|
|
2
|
+
|
|
3
|
+
class DocxReader(DocumentReader):
|
|
4
|
+
"""Reader for DOCX files"""
|
|
5
|
+
|
|
6
|
+
def read(self, extract_images: bool = False) -> str:
|
|
7
|
+
import docx
|
|
8
|
+
|
|
9
|
+
doc = docx.Document(self.file_path)
|
|
10
|
+
text_content = '\n'.join(para.text for para in doc.paragraphs)
|
|
11
|
+
|
|
12
|
+
if not extract_images:
|
|
13
|
+
return text_content
|
|
14
|
+
|
|
15
|
+
from PIL import Image
|
|
16
|
+
import io
|
|
17
|
+
|
|
18
|
+
# Create data directory for images
|
|
19
|
+
images_dir = self.create_image_data_dir("docx")
|
|
20
|
+
|
|
21
|
+
# Extract and process images
|
|
22
|
+
image_descriptions = []
|
|
23
|
+
image_counter = 1
|
|
24
|
+
|
|
25
|
+
for rel in doc.part.rels.values():
|
|
26
|
+
if "image" in rel.reltype:
|
|
27
|
+
image_data = rel.target_part.blob
|
|
28
|
+
|
|
29
|
+
# Determine image format
|
|
30
|
+
image = Image.open(io.BytesIO(image_data))
|
|
31
|
+
image_format = image.format.lower()
|
|
32
|
+
|
|
33
|
+
# Save and describe image
|
|
34
|
+
relative_path, description = self.save_and_describe_image(
|
|
35
|
+
image_data, image_format, images_dir, image_counter
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Add formatted description to list
|
|
39
|
+
image_description = f"\nImage: {relative_path}\n[{description}]\n"
|
|
40
|
+
image_descriptions.append(image_description)
|
|
41
|
+
|
|
42
|
+
image_counter += 1
|
|
43
|
+
|
|
44
|
+
# Combine text content with image descriptions
|
|
45
|
+
if image_descriptions:
|
|
46
|
+
text_content += "\n\n### Extracted Images\n" + \
|
|
47
|
+
"\n".join(image_descriptions)
|
|
48
|
+
|
|
49
|
+
return text_content
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from ara_cli.file_loaders.document_reader import DocumentReader
|
|
2
|
+
|
|
3
|
+
class ExcelReader(DocumentReader):
|
|
4
|
+
"""Reader for Excel files"""
|
|
5
|
+
|
|
6
|
+
def read(self, extract_images: bool = False) -> str:
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
# Read all sheets
|
|
11
|
+
sheets_dict = pd.read_excel(self.file_path, sheet_name=None)
|
|
12
|
+
markdown_output = []
|
|
13
|
+
|
|
14
|
+
for sheet_name, df in sheets_dict.items():
|
|
15
|
+
markdown_output.append(f"### Sheet: {sheet_name}")
|
|
16
|
+
if df.empty:
|
|
17
|
+
markdown_output.append("_Empty Sheet_")
|
|
18
|
+
else:
|
|
19
|
+
# Convert to markdown, managing NaN values
|
|
20
|
+
markdown_table = df.fillna("").to_markdown(index=False)
|
|
21
|
+
markdown_output.append(markdown_table)
|
|
22
|
+
markdown_output.append("") # Add empty line between sheets
|
|
23
|
+
|
|
24
|
+
return "\n".join(markdown_output)
|
|
25
|
+
|
|
26
|
+
except Exception as e:
|
|
27
|
+
return f"Error reading Excel file: {str(e)}"
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from ara_cli.file_loaders.document_reader import DocumentReader
|
|
2
|
+
|
|
3
|
+
class OdtReader(DocumentReader):
|
|
4
|
+
"""Reader for ODT files"""
|
|
5
|
+
|
|
6
|
+
def read(self, extract_images: bool = False) -> str:
|
|
7
|
+
import pymupdf4llm
|
|
8
|
+
|
|
9
|
+
if not extract_images:
|
|
10
|
+
return pymupdf4llm.to_markdown(self.file_path, write_images=False)
|
|
11
|
+
|
|
12
|
+
import zipfile
|
|
13
|
+
from PIL import Image
|
|
14
|
+
import io
|
|
15
|
+
|
|
16
|
+
# Create data directory for images
|
|
17
|
+
images_dir = self.create_image_data_dir("odt")
|
|
18
|
+
|
|
19
|
+
# Get text content
|
|
20
|
+
text_content = pymupdf4llm.to_markdown(
|
|
21
|
+
self.file_path, write_images=False)
|
|
22
|
+
|
|
23
|
+
# Extract and process images from ODT
|
|
24
|
+
image_descriptions = []
|
|
25
|
+
image_counter = 1
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
with zipfile.ZipFile(self.file_path, 'r') as odt_zip:
|
|
29
|
+
# List all files in the Pictures directory
|
|
30
|
+
picture_files = [
|
|
31
|
+
f for f in odt_zip.namelist() if f.startswith('Pictures/')]
|
|
32
|
+
|
|
33
|
+
for picture_file in picture_files:
|
|
34
|
+
# Extract image data
|
|
35
|
+
image_data = odt_zip.read(picture_file)
|
|
36
|
+
|
|
37
|
+
# Determine image format
|
|
38
|
+
image = Image.open(io.BytesIO(image_data))
|
|
39
|
+
image_format = image.format.lower()
|
|
40
|
+
|
|
41
|
+
# Save and describe image
|
|
42
|
+
relative_path, description = self.save_and_describe_image(
|
|
43
|
+
image_data, image_format, images_dir, image_counter
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Add formatted description to list
|
|
47
|
+
image_description = f"\nImage: {relative_path}\n[{description}]\n"
|
|
48
|
+
image_descriptions.append(image_description)
|
|
49
|
+
|
|
50
|
+
image_counter += 1
|
|
51
|
+
except Exception as e:
|
|
52
|
+
print(f"Warning: Could not extract images from ODT: {e}")
|
|
53
|
+
|
|
54
|
+
# Combine text content with image descriptions
|
|
55
|
+
if image_descriptions:
|
|
56
|
+
text_content += "\n\n### Extracted Images\n" + \
|
|
57
|
+
"\n".join(image_descriptions)
|
|
58
|
+
|
|
59
|
+
return text_content
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from ara_cli.file_loaders.document_reader import DocumentReader
|
|
2
|
+
|
|
3
|
+
class PdfReader(DocumentReader):
|
|
4
|
+
"""Reader for PDF files"""
|
|
5
|
+
|
|
6
|
+
def read(self, extract_images: bool = False) -> str:
|
|
7
|
+
import pymupdf4llm
|
|
8
|
+
|
|
9
|
+
if not extract_images:
|
|
10
|
+
return pymupdf4llm.to_markdown(self.file_path, write_images=False)
|
|
11
|
+
|
|
12
|
+
import fitz # PyMuPDF
|
|
13
|
+
|
|
14
|
+
# Create images directory
|
|
15
|
+
images_dir = self.create_image_data_dir("pdf")
|
|
16
|
+
|
|
17
|
+
# Extract text without images first
|
|
18
|
+
text_content = pymupdf4llm.to_markdown(
|
|
19
|
+
self.file_path, write_images=False)
|
|
20
|
+
|
|
21
|
+
# Extract and process images
|
|
22
|
+
doc = fitz.open(self.file_path)
|
|
23
|
+
image_descriptions = []
|
|
24
|
+
image_counter = 1
|
|
25
|
+
|
|
26
|
+
for page_num, page in enumerate(doc):
|
|
27
|
+
image_list = page.get_images()
|
|
28
|
+
|
|
29
|
+
for img_index, img in enumerate(image_list):
|
|
30
|
+
# Extract image
|
|
31
|
+
xref = img[0]
|
|
32
|
+
base_image = doc.extract_image(xref)
|
|
33
|
+
image_bytes = base_image["image"]
|
|
34
|
+
image_ext = base_image["ext"]
|
|
35
|
+
|
|
36
|
+
# Save and describe image
|
|
37
|
+
relative_path, description = self.save_and_describe_image(
|
|
38
|
+
image_bytes, image_ext, images_dir, image_counter
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Add formatted description to list
|
|
42
|
+
image_description = f"\nImage: {relative_path}\n[{description}]\n"
|
|
43
|
+
image_descriptions.append(image_description)
|
|
44
|
+
|
|
45
|
+
image_counter += 1
|
|
46
|
+
|
|
47
|
+
doc.close()
|
|
48
|
+
|
|
49
|
+
# Combine text content with image descriptions
|
|
50
|
+
if image_descriptions:
|
|
51
|
+
text_content += "\n\n### Extracted Images\n" + \
|
|
52
|
+
"\n".join(image_descriptions)
|
|
53
|
+
|
|
54
|
+
return text_content
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from ara_cli.file_loaders.document_reader import DocumentReader
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class PptxReader(DocumentReader):
|
|
5
|
+
"""Reader for PowerPoint files"""
|
|
6
|
+
|
|
7
|
+
@staticmethod
|
|
8
|
+
def _getActionImage(shape, MSO_SHAPE_TYPE):
|
|
9
|
+
try:
|
|
10
|
+
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
|
|
11
|
+
return shape.image.blob, shape.image.ext
|
|
12
|
+
elif shape.is_placeholder and hasattr(shape, "image"):
|
|
13
|
+
return shape.image.blob, shape.image.ext
|
|
14
|
+
except Exception:
|
|
15
|
+
pass
|
|
16
|
+
return None, None
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def _get_shape_text(shape, slide):
|
|
20
|
+
if not shape.has_text_frame:
|
|
21
|
+
return []
|
|
22
|
+
|
|
23
|
+
lines = []
|
|
24
|
+
is_title = False
|
|
25
|
+
try:
|
|
26
|
+
if shape == slide.shapes.title:
|
|
27
|
+
is_title = True
|
|
28
|
+
except AttributeError:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
text_frame = shape.text_frame
|
|
32
|
+
if is_title:
|
|
33
|
+
lines.append(f"### {text_frame.text}")
|
|
34
|
+
else:
|
|
35
|
+
for paragraph in text_frame.paragraphs:
|
|
36
|
+
text = paragraph.text.strip()
|
|
37
|
+
if text:
|
|
38
|
+
lines.append(f"- {text}")
|
|
39
|
+
return lines
|
|
40
|
+
|
|
41
|
+
def read(self, extract_images: bool = False) -> str:
|
|
42
|
+
from pptx import Presentation
|
|
43
|
+
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
|
44
|
+
import io
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
prs = Presentation(self.file_path)
|
|
48
|
+
md_lines = []
|
|
49
|
+
|
|
50
|
+
# Prepare image extraction if requested
|
|
51
|
+
images_dir = None
|
|
52
|
+
image_counter = 1
|
|
53
|
+
image_descriptions = []
|
|
54
|
+
|
|
55
|
+
if extract_images:
|
|
56
|
+
images_dir = self.create_image_data_dir("pptx")
|
|
57
|
+
|
|
58
|
+
def process_shape(shape):
|
|
59
|
+
# Recursive function to handle groups and extract images
|
|
60
|
+
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
|
|
61
|
+
for sub_shape in shape.shapes:
|
|
62
|
+
process_shape(sub_shape)
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
# Text extraction
|
|
66
|
+
md_lines.extend(self._get_shape_text(shape, slide))
|
|
67
|
+
|
|
68
|
+
# Image extraction
|
|
69
|
+
if extract_images:
|
|
70
|
+
blob, ext = self._getActionImage(shape, MSO_SHAPE_TYPE)
|
|
71
|
+
if blob and ext:
|
|
72
|
+
try:
|
|
73
|
+
nonlocal image_counter
|
|
74
|
+
relative_path, description = self.save_and_describe_image(
|
|
75
|
+
blob, ext, images_dir, image_counter
|
|
76
|
+
)
|
|
77
|
+
image_desc_text = (
|
|
78
|
+
f"\nImage: {relative_path}\n[{description}]\n"
|
|
79
|
+
)
|
|
80
|
+
md_lines.append(image_desc_text)
|
|
81
|
+
image_descriptions.append(image_desc_text)
|
|
82
|
+
image_counter += 1
|
|
83
|
+
except Exception as img_err:
|
|
84
|
+
print(
|
|
85
|
+
f"Warning: Failed to extract image from slide {index+1}: {img_err}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
for index, slide in enumerate(prs.slides):
|
|
89
|
+
md_lines.append(f"## Slide {index + 1}")
|
|
90
|
+
|
|
91
|
+
# Collect shapes and sort by top position
|
|
92
|
+
shapes = sorted(
|
|
93
|
+
[s for s in slide.shapes], key=lambda x: (x.top or 0, x.left or 0)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
for shape in shapes:
|
|
97
|
+
process_shape(shape)
|
|
98
|
+
|
|
99
|
+
md_lines.append("\n---\n")
|
|
100
|
+
|
|
101
|
+
return "\n".join(md_lines)
|
|
102
|
+
|
|
103
|
+
except Exception as e:
|
|
104
|
+
return f"Error reading PowerPoint file: {str(e)}"
|
|
File without changes
|
ara_cli/output_suppressor.py
CHANGED
|
@@ -15,3 +15,56 @@ def suppress_stdout(suppress=False):
|
|
|
15
15
|
sys.stdout = old_stdout
|
|
16
16
|
else:
|
|
17
17
|
yield
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@contextmanager
|
|
21
|
+
def suppress_stderr():
|
|
22
|
+
"""Suppress stderr output - useful for hiding library debug/error messages."""
|
|
23
|
+
with open(os.devnull, "w", encoding="utf-8") as devnull:
|
|
24
|
+
old_stderr = sys.stderr
|
|
25
|
+
sys.stderr = devnull
|
|
26
|
+
try:
|
|
27
|
+
yield
|
|
28
|
+
finally:
|
|
29
|
+
sys.stderr = old_stderr
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class FilteredStdout:
|
|
33
|
+
"""A stdout wrapper that filters out specific unwanted messages."""
|
|
34
|
+
|
|
35
|
+
FILTERED_PATTERNS = [
|
|
36
|
+
"Provider List: https://docs.litellm.ai/docs/providers",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
def __init__(self, original_stdout):
|
|
40
|
+
self.original_stdout = original_stdout
|
|
41
|
+
|
|
42
|
+
def write(self, text):
|
|
43
|
+
# Check if text contains any filtered patterns
|
|
44
|
+
for pattern in self.FILTERED_PATTERNS:
|
|
45
|
+
if pattern in text:
|
|
46
|
+
return # Suppress this output
|
|
47
|
+
self.original_stdout.write(text)
|
|
48
|
+
|
|
49
|
+
def flush(self):
|
|
50
|
+
self.original_stdout.flush()
|
|
51
|
+
|
|
52
|
+
def __getattr__(self, name):
|
|
53
|
+
return getattr(self.original_stdout, name)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@contextmanager
|
|
57
|
+
def filter_unwanted_output():
|
|
58
|
+
"""Filter out unwanted stdout messages and suppress stderr."""
|
|
59
|
+
old_stdout = sys.stdout
|
|
60
|
+
old_stderr = sys.stderr
|
|
61
|
+
|
|
62
|
+
sys.stdout = FilteredStdout(old_stdout)
|
|
63
|
+
|
|
64
|
+
with open(os.devnull, "w", encoding="utf-8") as devnull:
|
|
65
|
+
sys.stderr = devnull
|
|
66
|
+
try:
|
|
67
|
+
yield
|
|
68
|
+
finally:
|
|
69
|
+
sys.stdout = old_stdout
|
|
70
|
+
sys.stderr = old_stderr
|
ara_cli/prompt_handler.py
CHANGED
|
@@ -10,15 +10,79 @@ import logging
|
|
|
10
10
|
import warnings
|
|
11
11
|
from io import StringIO
|
|
12
12
|
from contextlib import redirect_stderr
|
|
13
|
-
from langfuse import Langfuse
|
|
14
|
-
from langfuse.api.resources.commons.errors import Error as LangfuseError, NotFoundError
|
|
15
|
-
import litellm
|
|
16
13
|
from ara_cli.classifier import Classifier
|
|
17
14
|
from ara_cli.artefact_creator import ArtefactCreator
|
|
18
15
|
from ara_cli.template_manager import TemplatePathManager
|
|
19
16
|
from ara_cli.ara_config import ConfigManager
|
|
20
17
|
from ara_cli.file_lister import generate_markdown_listing
|
|
21
18
|
|
|
19
|
+
# Lazy loading for heavy modules - these will be imported on first use
|
|
20
|
+
# Module-level references for backward compatibility with tests that patch these
|
|
21
|
+
litellm = None # Will be lazily loaded
|
|
22
|
+
Langfuse = None # Will be lazily loaded
|
|
23
|
+
_logging_configured = False
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _configure_logging():
|
|
27
|
+
"""Configure logging for litellm/langfuse - called once on first LLM use."""
|
|
28
|
+
global _logging_configured
|
|
29
|
+
if _logging_configured:
|
|
30
|
+
return
|
|
31
|
+
_logging_configured = True
|
|
32
|
+
|
|
33
|
+
logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
|
|
34
|
+
logging.getLogger("litellm").setLevel(logging.CRITICAL)
|
|
35
|
+
logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
|
|
36
|
+
logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
|
|
37
|
+
logging.getLogger("langfuse").setLevel(logging.CRITICAL)
|
|
38
|
+
logging.getLogger("opentelemetry").setLevel(logging.CRITICAL)
|
|
39
|
+
logging.getLogger("opentelemetry.exporter.otlp.proto.http.trace_exporter").setLevel(
|
|
40
|
+
logging.CRITICAL
|
|
41
|
+
)
|
|
42
|
+
logging.getLogger("httpx").setLevel(logging.CRITICAL)
|
|
43
|
+
logging.getLogger("httpcore").setLevel(logging.CRITICAL)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _get_litellm():
|
|
47
|
+
"""Lazy load litellm module."""
|
|
48
|
+
global litellm
|
|
49
|
+
if litellm is None:
|
|
50
|
+
_configure_logging()
|
|
51
|
+
import litellm as _litellm
|
|
52
|
+
|
|
53
|
+
_litellm.suppress_debug_info = True
|
|
54
|
+
_litellm.set_verbose = False
|
|
55
|
+
litellm = _litellm
|
|
56
|
+
|
|
57
|
+
# Apply output filtering only when litellm is loaded
|
|
58
|
+
from ara_cli.output_suppressor import FilteredStdout
|
|
59
|
+
import sys
|
|
60
|
+
|
|
61
|
+
if not isinstance(sys.stdout, FilteredStdout):
|
|
62
|
+
sys.stdout = FilteredStdout(sys.stdout)
|
|
63
|
+
return litellm
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _get_langfuse():
|
|
67
|
+
"""Lazy load langfuse module."""
|
|
68
|
+
global Langfuse
|
|
69
|
+
if Langfuse is None:
|
|
70
|
+
_configure_logging()
|
|
71
|
+
from langfuse import Langfuse as _Langfuse
|
|
72
|
+
|
|
73
|
+
Langfuse = _Langfuse
|
|
74
|
+
return Langfuse
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _get_langfuse_errors():
|
|
78
|
+
"""Lazy load langfuse error classes."""
|
|
79
|
+
from langfuse.api.resources.commons.errors import (
|
|
80
|
+
Error as LangfuseError,
|
|
81
|
+
NotFoundError,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return LangfuseError, NotFoundError
|
|
85
|
+
|
|
22
86
|
|
|
23
87
|
class LLMSingleton:
|
|
24
88
|
_instance = None
|
|
@@ -51,6 +115,7 @@ class LLMSingleton:
|
|
|
51
115
|
|
|
52
116
|
captured_stderr = StringIO()
|
|
53
117
|
with redirect_stderr(captured_stderr):
|
|
118
|
+
Langfuse = _get_langfuse()
|
|
54
119
|
self.langfuse = Langfuse(
|
|
55
120
|
public_key=langfuse_public_key,
|
|
56
121
|
secret_key=langfuse_secret_key,
|
|
@@ -218,14 +283,43 @@ def send_prompt(prompt, purpose="default"):
|
|
|
218
283
|
with LLMSingleton.get_instance().langfuse.start_as_current_span(
|
|
219
284
|
name="send_prompt"
|
|
220
285
|
) as span:
|
|
286
|
+
# Sanitize prompt for logging (remove base64 image data)
|
|
287
|
+
def sanitize_message(msg):
|
|
288
|
+
import copy
|
|
289
|
+
|
|
290
|
+
if not isinstance(msg, dict):
|
|
291
|
+
return msg
|
|
292
|
+
|
|
293
|
+
clean_msg = copy.deepcopy(msg)
|
|
294
|
+
content = clean_msg.get("content")
|
|
295
|
+
|
|
296
|
+
if isinstance(content, list):
|
|
297
|
+
new_content = []
|
|
298
|
+
for item in content:
|
|
299
|
+
if item.get("type") == "image_url":
|
|
300
|
+
# Replace image_url with text placeholder to avoid Langfuse parsing errors on invalid base64
|
|
301
|
+
new_content.append(
|
|
302
|
+
{
|
|
303
|
+
"type": "text",
|
|
304
|
+
"text": "[IMAGE DATA TRUNCATED FOR LOGGING]",
|
|
305
|
+
}
|
|
306
|
+
)
|
|
307
|
+
else:
|
|
308
|
+
new_content.append(item)
|
|
309
|
+
clean_msg["content"] = new_content
|
|
310
|
+
return clean_msg
|
|
311
|
+
|
|
312
|
+
sanitized_prompt = [sanitize_message(msg) for msg in prompt]
|
|
313
|
+
|
|
221
314
|
span.update_trace(
|
|
222
|
-
input={"prompt":
|
|
315
|
+
input={"prompt": sanitized_prompt, "purpose": purpose, "model": model_info}
|
|
223
316
|
)
|
|
224
317
|
|
|
225
318
|
config_parameters.pop("provider", None)
|
|
226
319
|
|
|
227
320
|
filtered_prompt = [msg for msg in prompt if _is_valid_message(msg)]
|
|
228
321
|
|
|
322
|
+
litellm = _get_litellm()
|
|
229
323
|
completion = litellm.completion(
|
|
230
324
|
**config_parameters, messages=filtered_prompt, stream=True
|
|
231
325
|
)
|
|
@@ -274,13 +368,12 @@ def describe_image(image_path: str) -> str:
|
|
|
274
368
|
describe_image_prompt = (
|
|
275
369
|
langfuse_prompt.prompt if langfuse_prompt.prompt else None
|
|
276
370
|
)
|
|
277
|
-
except
|
|
278
|
-
|
|
371
|
+
except Exception as e:
|
|
372
|
+
# Silently fallback - no need to show error for describe-image prompt
|
|
279
373
|
describe_image_prompt = None
|
|
280
374
|
|
|
281
375
|
# Fallback to default prompt if Langfuse prompt is not available
|
|
282
376
|
if not describe_image_prompt:
|
|
283
|
-
logging.info("Using default describe-image prompt.")
|
|
284
377
|
describe_image_prompt = (
|
|
285
378
|
"Please describe this image in detail. If it contains text, transcribe it exactly. "
|
|
286
379
|
"If it's a diagram or chart, explain its structure and content. If it's a photo or illustration, "
|
|
@@ -538,7 +631,11 @@ def extract_and_load_markdown_files(md_prompt_file_path):
|
|
|
538
631
|
elif "[x]" in line:
|
|
539
632
|
relative_path = line.split("]")[-1].strip()
|
|
540
633
|
# Use os.path.join for OS-safe joining, then normalize
|
|
541
|
-
full_rel_path =
|
|
634
|
+
full_rel_path = (
|
|
635
|
+
os.path.join(*header_stack, relative_path)
|
|
636
|
+
if header_stack
|
|
637
|
+
else relative_path
|
|
638
|
+
)
|
|
542
639
|
path_accumulator.append(_norm(full_rel_path))
|
|
543
640
|
return path_accumulator
|
|
544
641
|
|
|
@@ -650,19 +747,28 @@ def collect_file_content_by_extension(prompt_data_path, extensions):
|
|
|
650
747
|
|
|
651
748
|
|
|
652
749
|
def prepend_system_prompt(message_list):
|
|
750
|
+
from ara_cli.error_handler import AraError, ErrorLevel, ErrorHandler
|
|
751
|
+
|
|
653
752
|
try:
|
|
654
753
|
langfuse_prompt = LLMSingleton.get_instance().langfuse.get_prompt(
|
|
655
754
|
"ara-cli/system-prompt"
|
|
656
755
|
)
|
|
657
756
|
system_prompt = langfuse_prompt.prompt if langfuse_prompt.prompt else None
|
|
658
|
-
except
|
|
659
|
-
|
|
757
|
+
except Exception as e:
|
|
758
|
+
# Show user-friendly info message about Langfuse connection issue
|
|
759
|
+
info_error = AraError(
|
|
760
|
+
message="Langfuse connection failed. Using default system prompt.",
|
|
761
|
+
error_code=0,
|
|
762
|
+
level=ErrorLevel.INFO,
|
|
763
|
+
)
|
|
764
|
+
ErrorHandler().report_error(info_error)
|
|
660
765
|
system_prompt = None
|
|
661
766
|
|
|
662
767
|
# Fallback to default prompt if Langfuse prompt is not available
|
|
663
768
|
if not system_prompt:
|
|
664
|
-
|
|
665
|
-
|
|
769
|
+
system_prompt = (
|
|
770
|
+
"You are a helpful assistant that can process both text and images."
|
|
771
|
+
)
|
|
666
772
|
|
|
667
773
|
# Prepend the system prompt
|
|
668
774
|
system_prompt_message = {"role": "system", "content": system_prompt}
|
|
@@ -695,7 +801,9 @@ def append_images_to_message(message, image_data_list):
|
|
|
695
801
|
message["content"].extend(image_data_list)
|
|
696
802
|
else:
|
|
697
803
|
# If somehow content is not list or str, coerce to list
|
|
698
|
-
message["content"] = [
|
|
804
|
+
message["content"] = [
|
|
805
|
+
{"type": "text", "text": str(message_content)}
|
|
806
|
+
] + image_data_list
|
|
699
807
|
|
|
700
808
|
logger.debug(f"Updated message content with {len(image_data_list)} images")
|
|
701
809
|
|
|
@@ -818,9 +926,7 @@ def generate_config_prompt_global_givens_file(
|
|
|
818
926
|
return
|
|
819
927
|
|
|
820
928
|
dir_list = [path for d in config.global_dirs for path in d.values()]
|
|
821
|
-
print(
|
|
822
|
-
f"used {dir_list} for global prompt givens file listing with absolute paths"
|
|
823
|
-
)
|
|
929
|
+
print(f"used {dir_list} for global prompt givens file listing with absolute paths")
|
|
824
930
|
generate_global_markdown_listing(
|
|
825
931
|
dir_list, config.ara_prompt_given_list_includes, config_prompt_givens_path
|
|
826
|
-
)
|
|
932
|
+
)
|
ara_cli/tag_extractor.py
CHANGED
|
@@ -6,6 +6,7 @@ from ara_cli.artefact_models.artefact_data_retrieval import (
|
|
|
6
6
|
artefact_tags_retrieval,
|
|
7
7
|
)
|
|
8
8
|
|
|
9
|
+
|
|
9
10
|
class TagExtractor:
|
|
10
11
|
def __init__(self, file_system=None):
|
|
11
12
|
self.file_system = file_system or os
|
|
@@ -53,16 +54,16 @@ class TagExtractor:
|
|
|
53
54
|
"""Collect all tags from an artefact including user tags and author."""
|
|
54
55
|
all_tags = []
|
|
55
56
|
all_tags.extend(artefact.tags)
|
|
56
|
-
|
|
57
|
+
|
|
57
58
|
if artefact.status:
|
|
58
59
|
all_tags.append(artefact.status)
|
|
59
|
-
|
|
60
|
+
|
|
60
61
|
user_tags = [f"user_{tag}" for tag in artefact.users]
|
|
61
62
|
all_tags.extend(user_tags)
|
|
62
|
-
|
|
63
|
-
if hasattr(artefact,
|
|
63
|
+
|
|
64
|
+
if hasattr(artefact, "author") and artefact.author:
|
|
64
65
|
all_tags.append(artefact.author)
|
|
65
|
-
|
|
66
|
+
|
|
66
67
|
return [tag for tag in all_tags if tag is not None]
|
|
67
68
|
|
|
68
69
|
def _add_tags_to_groups(self, tag_groups, tags):
|
|
@@ -92,7 +93,7 @@ class TagExtractor:
|
|
|
92
93
|
if navigate_to_target:
|
|
93
94
|
navigator.navigate_to_target()
|
|
94
95
|
|
|
95
|
-
artefacts = ArtefactReader.read_artefacts()
|
|
96
|
+
artefacts = ArtefactReader(self.file_system).read_artefacts()
|
|
96
97
|
|
|
97
98
|
filtered_artefacts = filter_list(
|
|
98
99
|
list_to_filter=artefacts,
|
|
@@ -109,4 +110,4 @@ class TagExtractor:
|
|
|
109
110
|
else:
|
|
110
111
|
self.add_to_tags_set(tag_groups, filtered_artefacts)
|
|
111
112
|
|
|
112
|
-
return tag_groups
|
|
113
|
+
return tag_groups
|
ara_cli/version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# version.py
|
|
2
|
-
__version__ = "0.1.
|
|
2
|
+
__version__ = "0.1.14.0" # fith parameter like .0 for local install test purposes only. official numbers should be 4 digit numbers
|
|
@@ -1,28 +1,34 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ara_cli
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.14.0
|
|
4
4
|
Summary: Powerful, open source command-line tool for managing, structuring and automating software development artifacts in line with Business-Driven Development (BDD) and AI-assisted processes
|
|
5
5
|
Description-Content-Type: text/markdown
|
|
6
|
+
Requires-Dist: argcomplete
|
|
7
|
+
Requires-Dist: argparse
|
|
8
|
+
Requires-Dist: cmd2>=2.5
|
|
9
|
+
Requires-Dist: rich
|
|
10
|
+
Requires-Dist: typer
|
|
6
11
|
Requires-Dist: langfuse
|
|
7
|
-
Requires-Dist: litellm
|
|
12
|
+
Requires-Dist: litellm>=1.81.0
|
|
8
13
|
Requires-Dist: llama-index
|
|
9
14
|
Requires-Dist: llama-index-llms-openai
|
|
10
15
|
Requires-Dist: llama-index-retrievers-bm25
|
|
11
16
|
Requires-Dist: openai
|
|
12
|
-
Requires-Dist: markdown-it-py
|
|
13
|
-
Requires-Dist: json-repair
|
|
14
|
-
Requires-Dist: argparse
|
|
15
|
-
Requires-Dist: argcomplete
|
|
16
|
-
Requires-Dist: cmd2>=2.5
|
|
17
|
-
Requires-Dist: charset-normalizer
|
|
18
|
-
Requires-Dist: pydantic
|
|
19
17
|
Requires-Dist: pydantic_ai
|
|
20
|
-
Requires-Dist:
|
|
18
|
+
Requires-Dist: charset-normalizer
|
|
19
|
+
Requires-Dist: json-repair
|
|
20
|
+
Requires-Dist: markdown-it-py
|
|
21
|
+
Requires-Dist: openpyxl
|
|
22
|
+
Requires-Dist: xlrd
|
|
23
|
+
Requires-Dist: xlwt
|
|
24
|
+
Requires-Dist: pandas
|
|
21
25
|
Requires-Dist: pymupdf4llm
|
|
22
|
-
Requires-Dist:
|
|
26
|
+
Requires-Dist: python-docx
|
|
27
|
+
Requires-Dist: python-pptx
|
|
28
|
+
Requires-Dist: tabulate
|
|
23
29
|
Requires-Dist: psutil
|
|
30
|
+
Requires-Dist: pydantic
|
|
24
31
|
Requires-Dist: requests
|
|
25
|
-
Requires-Dist: rich
|
|
26
32
|
Dynamic: description
|
|
27
33
|
Dynamic: description-content-type
|
|
28
34
|
Dynamic: requires-dist
|