ara-cli 0.1.9.95__py3-none-any.whl → 0.1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ara_cli/__init__.py +5 -2
- ara_cli/__main__.py +61 -13
- ara_cli/ara_command_action.py +85 -20
- ara_cli/ara_command_parser.py +42 -2
- ara_cli/ara_config.py +118 -94
- ara_cli/artefact_autofix.py +131 -2
- ara_cli/artefact_creator.py +2 -7
- ara_cli/artefact_deleter.py +2 -4
- ara_cli/artefact_fuzzy_search.py +13 -6
- ara_cli/artefact_models/artefact_templates.py +3 -3
- ara_cli/artefact_models/feature_artefact_model.py +25 -0
- ara_cli/artefact_reader.py +4 -5
- ara_cli/chat.py +210 -150
- ara_cli/commands/extract_command.py +4 -11
- ara_cli/error_handler.py +134 -0
- ara_cli/file_classifier.py +3 -2
- ara_cli/prompt_extractor.py +1 -1
- ara_cli/prompt_handler.py +268 -127
- ara_cli/template_loader.py +245 -0
- ara_cli/version.py +1 -1
- {ara_cli-0.1.9.95.dist-info → ara_cli-0.1.10.0.dist-info}/METADATA +2 -1
- {ara_cli-0.1.9.95.dist-info → ara_cli-0.1.10.0.dist-info}/RECORD +32 -29
- tests/test_ara_command_action.py +66 -52
- tests/test_artefact_autofix.py +361 -5
- tests/test_chat.py +1894 -546
- tests/test_file_classifier.py +23 -0
- tests/test_file_creator.py +3 -5
- tests/test_prompt_handler.py +40 -4
- tests/test_template_loader.py +192 -0
- {ara_cli-0.1.9.95.dist-info → ara_cli-0.1.10.0.dist-info}/WHEEL +0 -0
- {ara_cli-0.1.9.95.dist-info → ara_cli-0.1.10.0.dist-info}/entry_points.txt +0 -0
- {ara_cli-0.1.9.95.dist-info → ara_cli-0.1.10.0.dist-info}/top_level.txt +0 -0
ara_cli/prompt_handler.py
CHANGED
|
@@ -1,10 +1,4 @@
|
|
|
1
1
|
import base64
|
|
2
|
-
import litellm
|
|
3
|
-
from ara_cli.classifier import Classifier
|
|
4
|
-
from ara_cli.artefact_creator import ArtefactCreator
|
|
5
|
-
from ara_cli.template_manager import TemplatePathManager
|
|
6
|
-
from ara_cli.ara_config import ConfigManager, LLMConfigItem
|
|
7
|
-
from ara_cli.file_lister import generate_markdown_listing
|
|
8
2
|
from os.path import exists, join
|
|
9
3
|
import os
|
|
10
4
|
from os import makedirs
|
|
@@ -13,25 +7,60 @@ import re
|
|
|
13
7
|
import shutil
|
|
14
8
|
import glob
|
|
15
9
|
import logging
|
|
10
|
+
import warnings
|
|
11
|
+
from io import StringIO
|
|
12
|
+
from contextlib import redirect_stderr
|
|
13
|
+
from langfuse import Langfuse
|
|
14
|
+
from langfuse.api.resources.commons.errors import Error as LangfuseError, NotFoundError
|
|
15
|
+
import litellm
|
|
16
|
+
from ara_cli.classifier import Classifier
|
|
17
|
+
from ara_cli.artefact_creator import ArtefactCreator
|
|
18
|
+
from ara_cli.template_manager import TemplatePathManager
|
|
19
|
+
from ara_cli.ara_config import ConfigManager
|
|
20
|
+
from ara_cli.file_lister import generate_markdown_listing
|
|
16
21
|
|
|
17
22
|
|
|
18
23
|
class LLMSingleton:
|
|
19
24
|
_instance = None
|
|
20
25
|
_default_model = None
|
|
21
26
|
_extraction_model = None
|
|
27
|
+
langfuse = None
|
|
22
28
|
|
|
23
29
|
def __init__(self, default_model_id, extraction_model_id):
|
|
24
30
|
config = ConfigManager().get_config()
|
|
25
31
|
default_config_data = config.llm_config.get(str(default_model_id))
|
|
26
32
|
|
|
27
33
|
if not default_config_data:
|
|
28
|
-
raise ValueError(
|
|
34
|
+
raise ValueError(
|
|
35
|
+
f"No configuration found for the default model: {default_model_id}"
|
|
36
|
+
)
|
|
29
37
|
self.default_config_params = default_config_data.model_dump(exclude_none=True)
|
|
30
38
|
|
|
31
39
|
extraction_config_data = config.llm_config.get(str(extraction_model_id))
|
|
32
40
|
if not extraction_config_data:
|
|
33
|
-
raise ValueError(
|
|
34
|
-
|
|
41
|
+
raise ValueError(
|
|
42
|
+
f"No configuration found for the extraction model: {extraction_model_id}"
|
|
43
|
+
)
|
|
44
|
+
self.extraction_config_params = extraction_config_data.model_dump(
|
|
45
|
+
exclude_none=True
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
langfuse_public_key = os.getenv("ARA_CLI_LANGFUSE_PUBLIC_KEY")
|
|
49
|
+
langfuse_secret_key = os.getenv("ARA_CLI_LANGFUSE_SECRET_KEY")
|
|
50
|
+
langfuse_host = os.getenv("LANGFUSE_HOST")
|
|
51
|
+
|
|
52
|
+
captured_stderr = StringIO()
|
|
53
|
+
with redirect_stderr(captured_stderr):
|
|
54
|
+
self.langfuse = Langfuse(
|
|
55
|
+
public_key=langfuse_public_key,
|
|
56
|
+
secret_key=langfuse_secret_key,
|
|
57
|
+
host=langfuse_host,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Check if there was an authentication error
|
|
61
|
+
stderr_output = captured_stderr.getvalue()
|
|
62
|
+
if "Authentication error" in stderr_output:
|
|
63
|
+
warnings.warn("Invalid Langfuse credentials - prompt tracing disabled and using default prompts. Set environment variables 'ARA_CLI_LANGFUSE_PUBLIC_KEY', 'ARA_CLI_LANGFUSE_SECRET_KEY', 'LANGFUSE_HOST' and restart application to use Langfuse capabilities", UserWarning)
|
|
35
64
|
|
|
36
65
|
LLMSingleton._default_model = default_model_id
|
|
37
66
|
LLMSingleton._extraction_model = extraction_model_id
|
|
@@ -44,10 +73,12 @@ class LLMSingleton:
|
|
|
44
73
|
default_model = config.default_llm
|
|
45
74
|
if not default_model:
|
|
46
75
|
if not config.llm_config:
|
|
47
|
-
raise ValueError(
|
|
76
|
+
raise ValueError(
|
|
77
|
+
"No LLM configurations are defined in the configuration file."
|
|
78
|
+
)
|
|
48
79
|
default_model = next(iter(config.llm_config))
|
|
49
80
|
|
|
50
|
-
extraction_model = getattr(config,
|
|
81
|
+
extraction_model = getattr(config, "extraction_llm", default_model)
|
|
51
82
|
if not extraction_model:
|
|
52
83
|
extraction_model = default_model
|
|
53
84
|
|
|
@@ -55,12 +86,12 @@ class LLMSingleton:
|
|
|
55
86
|
return cls._instance
|
|
56
87
|
|
|
57
88
|
@classmethod
|
|
58
|
-
def get_config_by_purpose(cls, purpose=
|
|
89
|
+
def get_config_by_purpose(cls, purpose="default"):
|
|
59
90
|
"""
|
|
60
91
|
purpose= 'default' or 'extraction'
|
|
61
92
|
"""
|
|
62
93
|
instance = cls.get_instance()
|
|
63
|
-
if purpose ==
|
|
94
|
+
if purpose == "extraction":
|
|
64
95
|
return instance.extraction_config_params.copy()
|
|
65
96
|
return instance.default_config_params.copy()
|
|
66
97
|
|
|
@@ -98,13 +129,13 @@ class LLMSingleton:
|
|
|
98
129
|
|
|
99
130
|
|
|
100
131
|
def write_string_to_file(filename, string, mode):
|
|
101
|
-
with open(filename, mode, encoding=
|
|
132
|
+
with open(filename, mode, encoding="utf-8") as file:
|
|
102
133
|
file.write(f"\n{string}\n")
|
|
103
134
|
return file
|
|
104
135
|
|
|
105
136
|
|
|
106
137
|
def read_string_from_file(path):
|
|
107
|
-
with open(path,
|
|
138
|
+
with open(path, "r", encoding="utf-8") as file:
|
|
108
139
|
text = file.read()
|
|
109
140
|
return text
|
|
110
141
|
|
|
@@ -114,90 +145,144 @@ def _is_valid_message(message: dict) -> bool:
|
|
|
114
145
|
Checks if a message in a prompt is valid (i.e., not empty).
|
|
115
146
|
It handles both string content and list content (for multimodal inputs).
|
|
116
147
|
"""
|
|
117
|
-
content = message.get(
|
|
148
|
+
content = message.get("content")
|
|
118
149
|
|
|
119
150
|
if isinstance(content, str):
|
|
120
|
-
return content.strip() !=
|
|
121
|
-
|
|
151
|
+
return content.strip() != ""
|
|
152
|
+
|
|
122
153
|
if isinstance(content, list):
|
|
123
154
|
# For multimodal content, check if there's at least one non-empty text part.
|
|
124
155
|
return any(
|
|
125
|
-
item.get(
|
|
156
|
+
item.get("type") == "text" and item.get("text", "").strip() != ""
|
|
126
157
|
for item in content
|
|
127
158
|
)
|
|
128
|
-
|
|
159
|
+
|
|
129
160
|
return False
|
|
130
161
|
|
|
131
162
|
|
|
132
|
-
def send_prompt(prompt, purpose=
|
|
163
|
+
def send_prompt(prompt, purpose="default"):
|
|
133
164
|
"""Prepares and sends a prompt to the LLM, streaming the response."""
|
|
134
165
|
chat_instance = LLMSingleton.get_instance()
|
|
135
166
|
config_parameters = chat_instance.get_config_by_purpose(purpose)
|
|
167
|
+
model_info = config_parameters.get("model", "unknown_model")
|
|
136
168
|
|
|
137
|
-
|
|
169
|
+
with LLMSingleton.get_instance().langfuse.start_as_current_span(
|
|
170
|
+
name="send_prompt"
|
|
171
|
+
) as span:
|
|
172
|
+
span.update_trace(
|
|
173
|
+
input={"prompt": prompt, "purpose": purpose, "model": model_info}
|
|
174
|
+
)
|
|
138
175
|
|
|
139
|
-
|
|
176
|
+
config_parameters.pop("provider", None)
|
|
140
177
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
178
|
+
filtered_prompt = [msg for msg in prompt if _is_valid_message(msg)]
|
|
179
|
+
|
|
180
|
+
completion = litellm.completion(
|
|
181
|
+
**config_parameters, messages=filtered_prompt, stream=True
|
|
182
|
+
)
|
|
183
|
+
response_text = ""
|
|
184
|
+
try:
|
|
185
|
+
for chunk in completion:
|
|
186
|
+
chunk_content = chunk.choices[0].delta.content
|
|
187
|
+
if chunk_content:
|
|
188
|
+
response_text += chunk_content
|
|
189
|
+
yield chunk
|
|
190
|
+
|
|
191
|
+
# Update Langfuse span with success output
|
|
192
|
+
span.update(
|
|
193
|
+
output={
|
|
194
|
+
"success": True,
|
|
195
|
+
"response_length": len(response_text),
|
|
196
|
+
"response": response_text,
|
|
197
|
+
}
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
except Exception as e:
|
|
201
|
+
# Update Langfuse span with error details
|
|
202
|
+
span.update(output={"error": str(e)}, level="ERROR")
|
|
203
|
+
raise
|
|
148
204
|
|
|
149
205
|
|
|
150
206
|
def describe_image(image_path: str) -> str:
|
|
151
207
|
"""
|
|
152
208
|
Send an image to the LLM and get a text description.
|
|
153
|
-
|
|
209
|
+
|
|
154
210
|
Args:
|
|
155
211
|
image_path: Path to the image file
|
|
156
|
-
|
|
212
|
+
|
|
157
213
|
Returns:
|
|
158
214
|
Text description of the image
|
|
159
215
|
"""
|
|
160
216
|
import base64
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
217
|
+
|
|
218
|
+
with LLMSingleton.get_instance().langfuse.start_as_current_span(
|
|
219
|
+
name="ara-cli/describe-image"
|
|
220
|
+
) as span:
|
|
221
|
+
span.update_trace(input={"image_path": image_path})
|
|
222
|
+
|
|
223
|
+
try:
|
|
224
|
+
langfuse_prompt = LLMSingleton.get_instance().langfuse.get_prompt(
|
|
225
|
+
"ara-cli/describe-image"
|
|
226
|
+
)
|
|
227
|
+
describe_image_prompt = (
|
|
228
|
+
langfuse_prompt.prompt if langfuse_prompt.prompt else None
|
|
229
|
+
)
|
|
230
|
+
except (LangfuseError, NotFoundError, Exception) as e:
|
|
231
|
+
logging.info(f"Could not fetch Langfuse prompt: {e}")
|
|
232
|
+
describe_image_prompt = None
|
|
233
|
+
|
|
234
|
+
# Fallback to default prompt if Langfuse prompt is not available
|
|
235
|
+
if not describe_image_prompt:
|
|
236
|
+
logging.info("Using default describe-image prompt.")
|
|
237
|
+
describe_image_prompt = "Please describe this image in detail. If it contains text, transcribe it exactly. If it's a diagram or chart, explain its structure and content. If it's a photo or illustration, describe what you see."
|
|
238
|
+
|
|
239
|
+
# Read and encode the image
|
|
240
|
+
with open(image_path, "rb") as image_file:
|
|
241
|
+
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
|
|
242
|
+
|
|
243
|
+
# Determine image type
|
|
244
|
+
image_extension = os.path.splitext(image_path)[1].lower()
|
|
245
|
+
mime_type = {
|
|
246
|
+
".png": "image/png",
|
|
247
|
+
".jpg": "image/jpeg",
|
|
248
|
+
".jpeg": "image/jpeg",
|
|
249
|
+
".gif": "image/gif",
|
|
250
|
+
".bmp": "image/bmp",
|
|
251
|
+
}.get(image_extension, "image/png")
|
|
252
|
+
|
|
253
|
+
# Create message with image
|
|
254
|
+
message = {
|
|
255
|
+
"role": "user",
|
|
256
|
+
"content": [
|
|
257
|
+
{
|
|
258
|
+
"type": "text",
|
|
259
|
+
"text": "Please describe this image in detail. If it contains text, transcribe it exactly. If it's a diagram or chart, explain its structure and content. If it's a photo or illustration, describe what you see.",
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
"type": "image_url",
|
|
263
|
+
"image_url": {"url": f"data:{mime_type};base64,{base64_image}"},
|
|
264
|
+
},
|
|
265
|
+
],
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
# Get response from LLM using the extraction model purpose
|
|
269
|
+
response_text = ""
|
|
270
|
+
for chunk in send_prompt([message], purpose="extraction"):
|
|
271
|
+
chunk_content = chunk.choices[0].delta.content
|
|
272
|
+
if chunk_content:
|
|
273
|
+
response_text += chunk_content
|
|
274
|
+
|
|
275
|
+
response_text = response_text.strip()
|
|
276
|
+
|
|
277
|
+
span.update(
|
|
278
|
+
output={
|
|
279
|
+
"success": True,
|
|
280
|
+
"description_length": len(response_text),
|
|
281
|
+
"response": response_text,
|
|
189
282
|
}
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
# Get response from LLM using the extraction model purpose
|
|
194
|
-
response_text = ""
|
|
195
|
-
for chunk in send_prompt([message], purpose='extraction'):
|
|
196
|
-
chunk_content = chunk.choices[0].delta.content
|
|
197
|
-
if chunk_content:
|
|
198
|
-
response_text += chunk_content
|
|
199
|
-
|
|
200
|
-
return response_text.strip()
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
return response_text
|
|
201
286
|
|
|
202
287
|
|
|
203
288
|
def append_headings(classifier, param, heading_name):
|
|
@@ -207,11 +292,11 @@ def append_headings(classifier, param, heading_name):
|
|
|
207
292
|
|
|
208
293
|
# Check if the file exists, and if not, create an empty file
|
|
209
294
|
if not os.path.exists(artefact_data_path):
|
|
210
|
-
with open(artefact_data_path,
|
|
295
|
+
with open(artefact_data_path, "w", encoding="utf-8") as file:
|
|
211
296
|
file.write("")
|
|
212
297
|
|
|
213
298
|
content = read_string_from_file(artefact_data_path)
|
|
214
|
-
pattern = r
|
|
299
|
+
pattern = r"## {}_(\d+)".format(heading_name)
|
|
215
300
|
matches = findall(pattern, content)
|
|
216
301
|
|
|
217
302
|
max_number = 1
|
|
@@ -219,7 +304,7 @@ def append_headings(classifier, param, heading_name):
|
|
|
219
304
|
max_number = max(map(int, matches)) + 1
|
|
220
305
|
heading = f"## {heading_name}_{max_number}"
|
|
221
306
|
|
|
222
|
-
write_string_to_file(artefact_data_path, heading,
|
|
307
|
+
write_string_to_file(artefact_data_path, heading, "a")
|
|
223
308
|
|
|
224
309
|
|
|
225
310
|
def write_prompt_result(classifier, param, text):
|
|
@@ -227,7 +312,7 @@ def write_prompt_result(classifier, param, text):
|
|
|
227
312
|
|
|
228
313
|
# TODO change absolute path to relative path with directory navigator
|
|
229
314
|
artefact_data_path = f"ara/{sub_directory}/{param}.data/{classifier}.prompt_log.md"
|
|
230
|
-
write_string_to_file(artefact_data_path, text,
|
|
315
|
+
write_string_to_file(artefact_data_path, text, "a")
|
|
231
316
|
|
|
232
317
|
|
|
233
318
|
def prompt_data_directory_creation(classifier, parameter):
|
|
@@ -239,7 +324,7 @@ def prompt_data_directory_creation(classifier, parameter):
|
|
|
239
324
|
|
|
240
325
|
|
|
241
326
|
def get_file_content(path):
|
|
242
|
-
with open(path,
|
|
327
|
+
with open(path, "r", encoding="utf-8") as file:
|
|
243
328
|
return file.read()
|
|
244
329
|
|
|
245
330
|
|
|
@@ -247,14 +332,22 @@ def initialize_prompt_templates(classifier, parameter):
|
|
|
247
332
|
prompt_data_path = prompt_data_directory_creation(classifier, parameter)
|
|
248
333
|
prompt_log_path = os.path.dirname(prompt_data_path)
|
|
249
334
|
|
|
250
|
-
template_path = os.path.join(os.path.dirname(__file__),
|
|
335
|
+
template_path = os.path.join(os.path.dirname(__file__), "templates")
|
|
251
336
|
artefact_creator = ArtefactCreator()
|
|
252
|
-
artefact_creator.create_artefact_prompt_files(
|
|
337
|
+
artefact_creator.create_artefact_prompt_files(
|
|
338
|
+
prompt_log_path, template_path, classifier
|
|
339
|
+
)
|
|
253
340
|
|
|
254
341
|
generate_config_prompt_template_file(prompt_data_path, "config.prompt_templates.md")
|
|
255
342
|
|
|
256
343
|
# Mark the relevant artefact in the givens list
|
|
257
|
-
generate_config_prompt_givens_file(
|
|
344
|
+
generate_config_prompt_givens_file(
|
|
345
|
+
prompt_data_path,
|
|
346
|
+
"config.prompt_givens.md",
|
|
347
|
+
artefact_to_mark=f"{parameter}.{classifier}",
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
generate_config_prompt_global_givens_file(prompt_data_path, "config.prompt_global_givens.md")
|
|
258
351
|
|
|
259
352
|
generate_config_prompt_global_givens_file(prompt_data_path, "config.prompt_global_givens.md")
|
|
260
353
|
|
|
@@ -275,7 +368,7 @@ def load_selected_prompt_templates(classifier, parameter):
|
|
|
275
368
|
print("WARNING: config.prompt_templates.md does not exist.")
|
|
276
369
|
return
|
|
277
370
|
|
|
278
|
-
with open(config_file_path,
|
|
371
|
+
with open(config_file_path, "r", encoding="utf-8") as config_file:
|
|
279
372
|
content = config_file.read()
|
|
280
373
|
|
|
281
374
|
global_base_template_path = TemplatePathManager.get_template_base_path()
|
|
@@ -311,7 +404,9 @@ def find_files_with_endings(directory, endings):
|
|
|
311
404
|
# Create an empty dictionary to store files according to their endings
|
|
312
405
|
files_by_ending = {ending: [] for ending in endings}
|
|
313
406
|
|
|
314
|
-
files = [
|
|
407
|
+
files = [
|
|
408
|
+
f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))
|
|
409
|
+
]
|
|
315
410
|
# Walk through the files list
|
|
316
411
|
for file in files:
|
|
317
412
|
# Check each file to see if it ends with one of the specified endings
|
|
@@ -346,17 +441,23 @@ def move_and_copy_files(source_path, prompt_data_path, prompt_archive_path):
|
|
|
346
441
|
|
|
347
442
|
# Move all existing files with the same ending to the prompt_archive_path
|
|
348
443
|
for existing_file in glob.glob(glob_pattern):
|
|
349
|
-
archived_file_path = os.path.join(
|
|
444
|
+
archived_file_path = os.path.join(
|
|
445
|
+
prompt_archive_path, os.path.basename(existing_file)
|
|
446
|
+
)
|
|
350
447
|
shutil.move(existing_file, archived_file_path)
|
|
351
|
-
print(
|
|
352
|
-
|
|
448
|
+
print(
|
|
449
|
+
f"Moved existing prompt-module: {os.path.basename(existing_file)} to prompt.archive"
|
|
450
|
+
)
|
|
451
|
+
|
|
353
452
|
# Copy the source_path file to the prompt_data_path directory
|
|
354
453
|
target_path = os.path.join(prompt_data_path, file_name)
|
|
355
454
|
shutil.copy(source_path, target_path)
|
|
356
455
|
print(f"Loaded new prompt-module: {os.path.basename(target_path)}")
|
|
357
456
|
|
|
358
457
|
else:
|
|
359
|
-
print(
|
|
458
|
+
print(
|
|
459
|
+
f"File name {file_name} does not end with one of the specified patterns, skipping move and copy."
|
|
460
|
+
)
|
|
360
461
|
else:
|
|
361
462
|
print(f"WARNING: template {source_path} does not exist.")
|
|
362
463
|
|
|
@@ -367,19 +468,19 @@ def extract_and_load_markdown_files(md_prompt_file_path):
|
|
|
367
468
|
"""
|
|
368
469
|
header_stack = []
|
|
369
470
|
path_accumulator = []
|
|
370
|
-
with open(md_prompt_file_path,
|
|
471
|
+
with open(md_prompt_file_path, "r", encoding="utf-8") as file:
|
|
371
472
|
for line in file:
|
|
372
|
-
if line.strip().startswith(
|
|
373
|
-
level = line.count(
|
|
374
|
-
header = line.strip().strip(
|
|
473
|
+
if line.strip().startswith("#"):
|
|
474
|
+
level = line.count("#")
|
|
475
|
+
header = line.strip().strip("#").strip()
|
|
375
476
|
# Adjust the stack based on the current header level
|
|
376
477
|
current_depth = len(header_stack)
|
|
377
478
|
if level <= current_depth:
|
|
378
|
-
header_stack = header_stack[:level-1]
|
|
479
|
+
header_stack = header_stack[: level - 1]
|
|
379
480
|
header_stack.append(header)
|
|
380
|
-
elif
|
|
381
|
-
relative_path = line.split(
|
|
382
|
-
full_path = os.path.join(
|
|
481
|
+
elif "[x]" in line:
|
|
482
|
+
relative_path = line.split("]")[-1].strip()
|
|
483
|
+
full_path = os.path.join("/".join(header_stack), relative_path)
|
|
383
484
|
path_accumulator.append(full_path)
|
|
384
485
|
return path_accumulator
|
|
385
486
|
|
|
@@ -396,15 +497,20 @@ def load_givens(file_path):
|
|
|
396
497
|
content = "### GIVENS\n\n"
|
|
397
498
|
|
|
398
499
|
for item in markdown_items:
|
|
399
|
-
if item.lower().endswith((
|
|
500
|
+
if item.lower().endswith((".png", ".jpeg", ".jpg")):
|
|
400
501
|
with open(item, "rb") as image_file:
|
|
401
502
|
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
|
|
402
|
-
image_data_list.append(
|
|
503
|
+
image_data_list.append(
|
|
504
|
+
{
|
|
505
|
+
"type": "image_url",
|
|
506
|
+
"image_url": {"url": f"data:image/png;base64,{base64_image}"},
|
|
507
|
+
}
|
|
508
|
+
)
|
|
403
509
|
content += item + "\n"
|
|
404
|
-
content += f
|
|
510
|
+
content += f"" + "\n"
|
|
405
511
|
else:
|
|
406
512
|
# Check if the item specifies line ranges
|
|
407
|
-
# TODO item has currently no trailing [] see extraction and handover method in extract and load
|
|
513
|
+
# TODO item has currently no trailing [] see extraction and handover method in extract and load
|
|
408
514
|
# item = f"[10:29] {item}"
|
|
409
515
|
# print(f"found {item}, check for subsection")
|
|
410
516
|
# TODO re.match can not split the item with [] correctly and extract the line numbers
|
|
@@ -435,19 +541,19 @@ def get_partial_file_content(file_name, line_ranges):
|
|
|
435
541
|
Returns:
|
|
436
542
|
str: The content of the specified lines.
|
|
437
543
|
"""
|
|
438
|
-
line_ranges = line_ranges.strip(
|
|
544
|
+
line_ranges = line_ranges.strip("[]").split(",")
|
|
439
545
|
lines_to_read = []
|
|
440
546
|
for line_range in line_ranges:
|
|
441
|
-
start, end = map(int, line_range.split(
|
|
547
|
+
start, end = map(int, line_range.split(":"))
|
|
442
548
|
lines_to_read.extend(range(start, end + 1))
|
|
443
549
|
|
|
444
550
|
partial_content = []
|
|
445
|
-
with open(file_name,
|
|
551
|
+
with open(file_name, "r", encoding="utf-8") as file:
|
|
446
552
|
for i, line in enumerate(file, 1):
|
|
447
553
|
if i in lines_to_read:
|
|
448
554
|
partial_content.append(line)
|
|
449
555
|
|
|
450
|
-
return
|
|
556
|
+
return "".join(partial_content)
|
|
451
557
|
|
|
452
558
|
|
|
453
559
|
def collect_file_content_by_extension(prompt_data_path, extensions):
|
|
@@ -467,18 +573,35 @@ def collect_file_content_by_extension(prompt_data_path, extensions):
|
|
|
467
573
|
|
|
468
574
|
|
|
469
575
|
def prepend_system_prompt(message_list):
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
576
|
+
try:
|
|
577
|
+
langfuse_prompt = LLMSingleton.get_instance().langfuse.get_prompt(
|
|
578
|
+
"ara-cli/system-prompt"
|
|
579
|
+
)
|
|
580
|
+
system_prompt = langfuse_prompt.prompt if langfuse_prompt.prompt else None
|
|
581
|
+
except (LangfuseError, NotFoundError, Exception) as e:
|
|
582
|
+
logging.info(f"Could not fetch Langfuse system prompt: {e}")
|
|
583
|
+
system_prompt = None
|
|
584
|
+
|
|
585
|
+
# Fallback to default prompt if Langfuse prompt is not available
|
|
586
|
+
if not system_prompt:
|
|
587
|
+
logging.info("Using default system prompt.")
|
|
588
|
+
system_prompt = (
|
|
589
|
+
"You are a helpful assistant that can process both text and images."
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
# Prepend the system prompt
|
|
593
|
+
system_prompt_message = {"role": "system", "content": system_prompt}
|
|
594
|
+
|
|
595
|
+
message_list.insert(0, system_prompt_message)
|
|
475
596
|
return message_list
|
|
476
597
|
|
|
477
598
|
|
|
478
599
|
def append_images_to_message(message, image_data_list):
|
|
479
600
|
logger = logging.getLogger(__name__)
|
|
480
601
|
|
|
481
|
-
logger.debug(
|
|
602
|
+
logger.debug(
|
|
603
|
+
f"append_images_to_message called with image_data_list length: {len(image_data_list) if image_data_list else 0}"
|
|
604
|
+
)
|
|
482
605
|
|
|
483
606
|
if not image_data_list:
|
|
484
607
|
logger.debug("No images to append, returning original message")
|
|
@@ -489,7 +612,7 @@ def append_images_to_message(message, image_data_list):
|
|
|
489
612
|
|
|
490
613
|
if isinstance(message_content, str):
|
|
491
614
|
message["content"] = [{"type": "text", "text": message_content}]
|
|
492
|
-
|
|
615
|
+
|
|
493
616
|
message["content"].extend(image_data_list)
|
|
494
617
|
|
|
495
618
|
logger.debug(f"Updated message content with {len(image_data_list)} images")
|
|
@@ -505,25 +628,24 @@ def create_and_send_custom_prompt(classifier, parameter):
|
|
|
505
628
|
extensions = [".blueprint.md", ".rules.md", ".prompt_givens.md", ".prompt_global_givens.md", ".intention.md", ".commands.md"]
|
|
506
629
|
combined_content_markdown, image_data_list = collect_file_content_by_extension(prompt_data_path, extensions)
|
|
507
630
|
|
|
508
|
-
with open(prompt_file_path_markdown,
|
|
631
|
+
with open(prompt_file_path_markdown, "w", encoding="utf-8") as file:
|
|
509
632
|
file.write(combined_content_markdown)
|
|
510
633
|
|
|
511
634
|
prompt = read_string_from_file(prompt_file_path_markdown)
|
|
512
635
|
append_headings(classifier, parameter, "prompt")
|
|
513
636
|
write_prompt_result(classifier, parameter, prompt)
|
|
514
637
|
|
|
515
|
-
message = {
|
|
516
|
-
"role": "user",
|
|
517
|
-
"content": combined_content_markdown
|
|
518
|
-
}
|
|
638
|
+
message = {"role": "user", "content": combined_content_markdown}
|
|
519
639
|
|
|
520
640
|
message_list = [message]
|
|
521
641
|
|
|
522
642
|
message_list = append_images_to_message(message_list, image_data_list)
|
|
523
643
|
append_headings(classifier, parameter, "result")
|
|
524
644
|
|
|
525
|
-
artefact_data_path =
|
|
526
|
-
|
|
645
|
+
artefact_data_path = (
|
|
646
|
+
f"ara/{sub_directory}/{parameter}.data/{classifier}.prompt_log.md"
|
|
647
|
+
)
|
|
648
|
+
with open(artefact_data_path, "a", encoding="utf-8") as file:
|
|
527
649
|
for chunk in send_prompt(message_list):
|
|
528
650
|
chunk_content = chunk.choices[0].delta.content
|
|
529
651
|
if not chunk_content:
|
|
@@ -533,42 +655,61 @@ def create_and_send_custom_prompt(classifier, parameter):
|
|
|
533
655
|
# write_prompt_result(classifier, parameter, response)
|
|
534
656
|
|
|
535
657
|
|
|
536
|
-
def generate_config_prompt_template_file(
|
|
537
|
-
|
|
658
|
+
def generate_config_prompt_template_file(
|
|
659
|
+
prompt_data_path, config_prompt_templates_name
|
|
660
|
+
):
|
|
661
|
+
config_prompt_templates_path = os.path.join(
|
|
662
|
+
prompt_data_path, config_prompt_templates_name
|
|
663
|
+
)
|
|
538
664
|
config = ConfigManager.get_config()
|
|
539
665
|
global_prompt_template_path = TemplatePathManager.get_template_base_path()
|
|
540
|
-
dir_list = ["ara/.araconfig/custom-prompt-modules"] + [
|
|
541
|
-
|
|
666
|
+
dir_list = ["ara/.araconfig/custom-prompt-modules"] + [
|
|
667
|
+
f"{os.path.join(global_prompt_template_path,'prompt-modules')}"
|
|
668
|
+
]
|
|
669
|
+
file_list = ["*.blueprint.md", "*.rules.md", "*.intention.md", "*.commands.md"]
|
|
542
670
|
|
|
543
671
|
print(f"used {dir_list} for prompt templates file listing")
|
|
544
672
|
generate_markdown_listing(dir_list, file_list, config_prompt_templates_path)
|
|
545
673
|
|
|
546
674
|
|
|
547
|
-
def generate_config_prompt_givens_file(
|
|
548
|
-
|
|
675
|
+
def generate_config_prompt_givens_file(
|
|
676
|
+
prompt_data_path, config_prompt_givens_name, artefact_to_mark=None
|
|
677
|
+
):
|
|
678
|
+
config_prompt_givens_path = os.path.join(
|
|
679
|
+
prompt_data_path, config_prompt_givens_name
|
|
680
|
+
)
|
|
549
681
|
config = ConfigManager.get_config()
|
|
550
|
-
dir_list =
|
|
682
|
+
dir_list = (
|
|
683
|
+
["ara"]
|
|
684
|
+
+ [path for d in config.ext_code_dirs for path in d.values()]
|
|
685
|
+
+ [config.doc_dir]
|
|
686
|
+
+ [config.glossary_dir]
|
|
687
|
+
)
|
|
551
688
|
|
|
552
689
|
print(f"used {dir_list} for prompt givens file listing")
|
|
553
|
-
generate_markdown_listing(
|
|
690
|
+
generate_markdown_listing(
|
|
691
|
+
dir_list, config.ara_prompt_given_list_includes, config_prompt_givens_path
|
|
692
|
+
)
|
|
554
693
|
|
|
555
694
|
# If an artefact is specified, mark it with [x]
|
|
556
695
|
if artefact_to_mark:
|
|
557
|
-
print(
|
|
696
|
+
print(
|
|
697
|
+
f"artefact {artefact_to_mark} marked in related config.prompt_givens.md per default"
|
|
698
|
+
)
|
|
558
699
|
|
|
559
700
|
# Read the generated file content
|
|
560
|
-
with open(config_prompt_givens_path,
|
|
701
|
+
with open(config_prompt_givens_path, "r", encoding="utf-8") as file:
|
|
561
702
|
markdown_listing = file.readlines()
|
|
562
703
|
|
|
563
704
|
updated_listing = []
|
|
564
705
|
for line in markdown_listing:
|
|
565
706
|
# Use a regular expression to match the exact string
|
|
566
|
-
if re.search(r
|
|
707
|
+
if re.search(r"\b" + re.escape(artefact_to_mark) + r"\b", line):
|
|
567
708
|
line = line.replace("[]", "[x]")
|
|
568
709
|
updated_listing.append(line)
|
|
569
710
|
|
|
570
711
|
# Write the updated listing back to the file
|
|
571
|
-
with open(config_prompt_givens_path,
|
|
712
|
+
with open(config_prompt_givens_path, "w", encoding="utf-8") as file:
|
|
572
713
|
file.write("".join(updated_listing))
|
|
573
714
|
|
|
574
715
|
def generate_config_prompt_global_givens_file(prompt_data_path, config_prompt_givens_name, artefact_to_mark=None):
|