ara-cli 0.1.9.77__py3-none-any.whl → 0.1.10.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ara-cli might be problematic. Click here for more details.
- ara_cli/__init__.py +18 -2
- ara_cli/__main__.py +245 -66
- ara_cli/ara_command_action.py +128 -63
- ara_cli/ara_config.py +201 -177
- ara_cli/ara_subcommands/__init__.py +0 -0
- ara_cli/ara_subcommands/autofix.py +26 -0
- ara_cli/ara_subcommands/chat.py +27 -0
- ara_cli/ara_subcommands/classifier_directory.py +16 -0
- ara_cli/ara_subcommands/common.py +100 -0
- ara_cli/ara_subcommands/create.py +75 -0
- ara_cli/ara_subcommands/delete.py +22 -0
- ara_cli/ara_subcommands/extract.py +22 -0
- ara_cli/ara_subcommands/fetch_templates.py +14 -0
- ara_cli/ara_subcommands/list.py +65 -0
- ara_cli/ara_subcommands/list_tags.py +25 -0
- ara_cli/ara_subcommands/load.py +48 -0
- ara_cli/ara_subcommands/prompt.py +136 -0
- ara_cli/ara_subcommands/read.py +47 -0
- ara_cli/ara_subcommands/read_status.py +20 -0
- ara_cli/ara_subcommands/read_user.py +20 -0
- ara_cli/ara_subcommands/reconnect.py +27 -0
- ara_cli/ara_subcommands/rename.py +22 -0
- ara_cli/ara_subcommands/scan.py +14 -0
- ara_cli/ara_subcommands/set_status.py +22 -0
- ara_cli/ara_subcommands/set_user.py +22 -0
- ara_cli/ara_subcommands/template.py +16 -0
- ara_cli/artefact_autofix.py +214 -28
- ara_cli/artefact_creator.py +5 -8
- ara_cli/artefact_deleter.py +2 -4
- ara_cli/artefact_fuzzy_search.py +13 -6
- ara_cli/artefact_lister.py +29 -55
- ara_cli/artefact_models/artefact_data_retrieval.py +23 -0
- ara_cli/artefact_models/artefact_model.py +106 -25
- ara_cli/artefact_models/artefact_templates.py +23 -13
- ara_cli/artefact_models/epic_artefact_model.py +11 -2
- ara_cli/artefact_models/feature_artefact_model.py +56 -1
- ara_cli/artefact_models/userstory_artefact_model.py +15 -3
- ara_cli/artefact_reader.py +4 -5
- ara_cli/artefact_renamer.py +6 -2
- ara_cli/artefact_scan.py +2 -2
- ara_cli/chat.py +594 -219
- ara_cli/chat_agent/__init__.py +0 -0
- ara_cli/chat_agent/agent_communicator.py +62 -0
- ara_cli/chat_agent/agent_process_manager.py +211 -0
- ara_cli/chat_agent/agent_status_manager.py +73 -0
- ara_cli/chat_agent/agent_workspace_manager.py +76 -0
- ara_cli/commands/__init__.py +0 -0
- ara_cli/commands/command.py +7 -0
- ara_cli/commands/extract_command.py +15 -0
- ara_cli/commands/load_command.py +65 -0
- ara_cli/commands/load_image_command.py +34 -0
- ara_cli/commands/read_command.py +117 -0
- ara_cli/completers.py +144 -0
- ara_cli/directory_navigator.py +37 -4
- ara_cli/error_handler.py +134 -0
- ara_cli/file_classifier.py +3 -2
- ara_cli/file_loaders/__init__.py +0 -0
- ara_cli/file_loaders/binary_file_loader.py +33 -0
- ara_cli/file_loaders/document_file_loader.py +34 -0
- ara_cli/file_loaders/document_reader.py +245 -0
- ara_cli/file_loaders/document_readers.py +233 -0
- ara_cli/file_loaders/file_loader.py +50 -0
- ara_cli/file_loaders/file_loaders.py +123 -0
- ara_cli/file_loaders/image_processor.py +89 -0
- ara_cli/file_loaders/markdown_reader.py +75 -0
- ara_cli/file_loaders/text_file_loader.py +187 -0
- ara_cli/global_file_lister.py +51 -0
- ara_cli/prompt_extractor.py +214 -87
- ara_cli/prompt_handler.py +508 -146
- ara_cli/tag_extractor.py +54 -24
- ara_cli/template_loader.py +245 -0
- ara_cli/template_manager.py +14 -4
- ara_cli/templates/prompt-modules/commands/empty.commands.md +2 -12
- ara_cli/templates/prompt-modules/commands/extract_general.commands.md +12 -0
- ara_cli/templates/prompt-modules/commands/extract_markdown.commands.md +11 -0
- ara_cli/templates/prompt-modules/commands/extract_python.commands.md +13 -0
- ara_cli/templates/prompt-modules/commands/feature_add_or_modifiy_specified_behavior.commands.md +36 -0
- ara_cli/templates/prompt-modules/commands/feature_generate_initial_specified_bevahior.commands.md +53 -0
- ara_cli/templates/prompt-modules/commands/prompt_template_tech_stack_transformer.commands.md +95 -0
- ara_cli/templates/prompt-modules/commands/python_bug_fixing_code.commands.md +34 -0
- ara_cli/templates/prompt-modules/commands/python_generate_code.commands.md +27 -0
- ara_cli/templates/prompt-modules/commands/python_refactoring_code.commands.md +39 -0
- ara_cli/templates/prompt-modules/commands/python_step_definitions_generation_and_fixing.commands.md +40 -0
- ara_cli/templates/prompt-modules/commands/python_unittest_generation_and_fixing.commands.md +48 -0
- ara_cli/update_config_prompt.py +7 -1
- ara_cli/version.py +1 -1
- ara_cli-0.1.10.8.dist-info/METADATA +241 -0
- {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/RECORD +104 -59
- tests/test_ara_command_action.py +66 -52
- tests/test_ara_config.py +200 -279
- tests/test_artefact_autofix.py +361 -5
- tests/test_artefact_lister.py +52 -132
- tests/test_artefact_scan.py +1 -1
- tests/test_chat.py +2009 -603
- tests/test_file_classifier.py +23 -0
- tests/test_file_creator.py +3 -5
- tests/test_global_file_lister.py +131 -0
- tests/test_prompt_handler.py +746 -0
- tests/test_tag_extractor.py +19 -13
- tests/test_template_loader.py +192 -0
- tests/test_template_manager.py +5 -4
- ara_cli/ara_command_parser.py +0 -536
- ara_cli/templates/prompt-modules/blueprints/complete_pytest_unittest.blueprint.md +0 -27
- ara_cli/templates/prompt-modules/blueprints/task_todo_list_implement_feature_BDD_way.blueprint.md +0 -30
- ara_cli/templates/prompt-modules/commands/artefact_classification.commands.md +0 -9
- ara_cli/templates/prompt-modules/commands/artefact_extension.commands.md +0 -17
- ara_cli/templates/prompt-modules/commands/artefact_formulation.commands.md +0 -14
- ara_cli/templates/prompt-modules/commands/behave_step_generation.commands.md +0 -102
- ara_cli/templates/prompt-modules/commands/code_generation_complex.commands.md +0 -20
- ara_cli/templates/prompt-modules/commands/code_generation_simple.commands.md +0 -13
- ara_cli/templates/prompt-modules/commands/error_fixing.commands.md +0 -20
- ara_cli/templates/prompt-modules/commands/feature_file_update.commands.md +0 -18
- ara_cli/templates/prompt-modules/commands/feature_formulation.commands.md +0 -43
- ara_cli/templates/prompt-modules/commands/js_code_generation_simple.commands.md +0 -13
- ara_cli/templates/prompt-modules/commands/refactoring.commands.md +0 -15
- ara_cli/templates/prompt-modules/commands/refactoring_analysis.commands.md +0 -9
- ara_cli/templates/prompt-modules/commands/reverse_engineer_feature_file.commands.md +0 -15
- ara_cli/templates/prompt-modules/commands/reverse_engineer_program_flow.commands.md +0 -19
- ara_cli-0.1.9.77.dist-info/METADATA +0 -18
- {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/WHEEL +0 -0
- {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/entry_points.txt +0 -0
- {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import base64
|
|
4
|
+
import tempfile
|
|
5
|
+
from typing import Optional, Tuple
|
|
6
|
+
import requests
|
|
7
|
+
from charset_normalizer import from_path
|
|
8
|
+
from ara_cli.prompt_handler import describe_image
|
|
9
|
+
from ara_cli.file_loaders.file_loader import FileLoader
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TextFileLoader(FileLoader):
|
|
13
|
+
"""Loads text files"""
|
|
14
|
+
def load(self, file_path: str, prefix: str = "", suffix: str = "",
|
|
15
|
+
block_delimiter: str = "", extract_images: bool = False, **kwargs) -> bool:
|
|
16
|
+
"""Load text file with optional markdown image extraction"""
|
|
17
|
+
|
|
18
|
+
is_md_file = file_path.lower().endswith('.md')
|
|
19
|
+
|
|
20
|
+
if is_md_file and extract_images:
|
|
21
|
+
reader = MarkdownReader(file_path)
|
|
22
|
+
file_content = reader.read(extract_images=True).replace('\r\n', '\n')
|
|
23
|
+
else:
|
|
24
|
+
# Use charset-normalizer to detect encoding
|
|
25
|
+
encoded_content = from_path(file_path).best()
|
|
26
|
+
if not encoded_content:
|
|
27
|
+
print(f"Failed to detect encoding for {file_path}")
|
|
28
|
+
return False
|
|
29
|
+
file_content = str(encoded_content).replace('\r\n', '\n')
|
|
30
|
+
|
|
31
|
+
if block_delimiter:
|
|
32
|
+
file_content = f"{block_delimiter}\n{file_content}\n{block_delimiter}"
|
|
33
|
+
|
|
34
|
+
write_content = f"{prefix}{file_content}{suffix}\n"
|
|
35
|
+
|
|
36
|
+
with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
|
|
37
|
+
chat_file.write(write_content)
|
|
38
|
+
|
|
39
|
+
return True
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class MarkdownReader:
|
|
43
|
+
"""Handles markdown file reading with optional image extraction"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, file_path: str):
|
|
46
|
+
self.file_path = file_path
|
|
47
|
+
self.base_dir = os.path.dirname(file_path)
|
|
48
|
+
self.image_processor = ImageProcessor()
|
|
49
|
+
|
|
50
|
+
def read(self, extract_images: bool = False) -> str:
|
|
51
|
+
"""Read markdown file and optionally extract/describe images"""
|
|
52
|
+
with open(self.file_path, 'r', encoding='utf-8') as file:
|
|
53
|
+
content = file.read()
|
|
54
|
+
|
|
55
|
+
if not extract_images:
|
|
56
|
+
return content
|
|
57
|
+
|
|
58
|
+
return self._process_images(content)
|
|
59
|
+
|
|
60
|
+
def _process_images(self, content: str) -> str:
|
|
61
|
+
"""Process all images in markdown content"""
|
|
62
|
+
# Pattern to match markdown images: 
|
|
63
|
+
image_pattern = re.compile(r'!\[([^\]]*)\]\(([^\)]+)\)')
|
|
64
|
+
base64_pattern = re.compile(r'data:image/([^;]+);base64,([^)]+)')
|
|
65
|
+
|
|
66
|
+
# Process each image reference
|
|
67
|
+
for match in image_pattern.finditer(content):
|
|
68
|
+
image_ref = match.group(2)
|
|
69
|
+
replacement = self._process_single_image(image_ref, base64_pattern)
|
|
70
|
+
|
|
71
|
+
if replacement:
|
|
72
|
+
content = content.replace(match.group(0), replacement, 1)
|
|
73
|
+
|
|
74
|
+
return content
|
|
75
|
+
|
|
76
|
+
def _process_single_image(self, image_ref: str, base64_pattern: re.Pattern) -> Optional[str]:
|
|
77
|
+
"""Process a single image reference"""
|
|
78
|
+
try:
|
|
79
|
+
# Try base64 first
|
|
80
|
+
result = self.image_processor.process_base64_image(
|
|
81
|
+
image_ref, base64_pattern)
|
|
82
|
+
if result:
|
|
83
|
+
return result[0]
|
|
84
|
+
|
|
85
|
+
# Try URL
|
|
86
|
+
result, error = self.image_processor.process_url_image(image_ref)
|
|
87
|
+
if result:
|
|
88
|
+
if error:
|
|
89
|
+
print(f"Warning: {error}")
|
|
90
|
+
return result
|
|
91
|
+
|
|
92
|
+
# Try local file
|
|
93
|
+
result, error = self.image_processor.process_local_image(
|
|
94
|
+
image_ref, self.base_dir)
|
|
95
|
+
if error:
|
|
96
|
+
print(f"Warning: {error}")
|
|
97
|
+
return result
|
|
98
|
+
|
|
99
|
+
except Exception as e:
|
|
100
|
+
print(f"Warning: Could not process image {image_ref}: {e}")
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class ImageProcessor:
|
|
105
|
+
"""Handles image processing operations"""
|
|
106
|
+
|
|
107
|
+
@staticmethod
|
|
108
|
+
def process_base64_image(
|
|
109
|
+
image_ref: str,
|
|
110
|
+
base64_pattern: re.Pattern
|
|
111
|
+
) -> Optional[Tuple[str, str]]:
|
|
112
|
+
"""Process base64 encoded image and return description"""
|
|
113
|
+
base64_match = base64_pattern.match(image_ref)
|
|
114
|
+
if not base64_match:
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
image_format = base64_match.group(1)
|
|
118
|
+
base64_data = base64_match.group(2)
|
|
119
|
+
image_data = base64.b64decode(base64_data)
|
|
120
|
+
|
|
121
|
+
# Create a temporary file to send to LLM
|
|
122
|
+
with tempfile.NamedTemporaryFile(suffix=f'.{image_format}', delete=False) as tmp_file:
|
|
123
|
+
tmp_file.write(image_data)
|
|
124
|
+
tmp_file_path = tmp_file.name
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
description = describe_image(tmp_file_path)
|
|
128
|
+
return f"Image: (base64 embedded {image_format} image)\n[{description}]", None
|
|
129
|
+
finally:
|
|
130
|
+
os.unlink(tmp_file_path)
|
|
131
|
+
|
|
132
|
+
@staticmethod
|
|
133
|
+
def process_url_image(image_ref: str) -> Tuple[str, Optional[str]]:
|
|
134
|
+
"""Process image from URL and return description"""
|
|
135
|
+
if not image_ref.startswith(('http://', 'https://')):
|
|
136
|
+
return "", None
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
response = requests.get(image_ref, timeout=10)
|
|
140
|
+
response.raise_for_status()
|
|
141
|
+
|
|
142
|
+
# Determine file extension from content-type
|
|
143
|
+
content_type = response.headers.get('content-type', '')
|
|
144
|
+
ext = ImageProcessor._get_extension_from_content_type(
|
|
145
|
+
content_type, image_ref)
|
|
146
|
+
|
|
147
|
+
# Create temporary file
|
|
148
|
+
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_file:
|
|
149
|
+
tmp_file.write(response.content)
|
|
150
|
+
tmp_file_path = tmp_file.name
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
description = describe_image(tmp_file_path)
|
|
154
|
+
return f"Image: {image_ref}\n[{description}]", None
|
|
155
|
+
finally:
|
|
156
|
+
os.unlink(tmp_file_path)
|
|
157
|
+
|
|
158
|
+
except Exception as e:
|
|
159
|
+
error_msg = f"Could not download image: {str(e)}"
|
|
160
|
+
return f"Image: {image_ref}\n[{error_msg}]", error_msg
|
|
161
|
+
|
|
162
|
+
@staticmethod
|
|
163
|
+
def process_local_image(image_ref: str, base_dir: str) -> Tuple[str, Optional[str]]:
|
|
164
|
+
"""Process local image file and return description"""
|
|
165
|
+
if os.path.isabs(image_ref):
|
|
166
|
+
local_image_path = image_ref
|
|
167
|
+
else:
|
|
168
|
+
local_image_path = os.path.join(base_dir, image_ref)
|
|
169
|
+
|
|
170
|
+
if os.path.exists(local_image_path):
|
|
171
|
+
description = describe_image(local_image_path)
|
|
172
|
+
return f"Image: {image_ref}\n[{description}]", None
|
|
173
|
+
else:
|
|
174
|
+
error_msg = f"Image file not found"
|
|
175
|
+
return f"Image: {image_ref}\n[{error_msg}]", f"Local image not found: {local_image_path}"
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def _get_extension_from_content_type(content_type: str, url: str) -> str:
|
|
179
|
+
"""Determine file extension from content type or URL"""
|
|
180
|
+
if 'image/jpeg' in content_type:
|
|
181
|
+
return '.jpg'
|
|
182
|
+
elif 'image/png' in content_type:
|
|
183
|
+
return '.png'
|
|
184
|
+
elif 'image/gif' in content_type:
|
|
185
|
+
return '.gif'
|
|
186
|
+
else:
|
|
187
|
+
return os.path.splitext(url)[1] or '.png'
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import fnmatch
|
|
3
|
+
from typing import List, Dict, Any
|
|
4
|
+
|
|
5
|
+
DirTree = Dict[str, Any]
|
|
6
|
+
|
|
7
|
+
def _build_tree(root_path: str, patterns: List[str]) -> DirTree:
|
|
8
|
+
"""Creates a nested dictionary representing the directory structure in the specified path."""
|
|
9
|
+
tree: DirTree = {'files': [], 'dirs': {}}
|
|
10
|
+
try:
|
|
11
|
+
for item in os.listdir(root_path):
|
|
12
|
+
item_path = os.path.join(root_path, item)
|
|
13
|
+
if os.path.isdir(item_path):
|
|
14
|
+
subtree = _build_tree(item_path, patterns)
|
|
15
|
+
if subtree['files'] or subtree['dirs']:
|
|
16
|
+
tree['dirs'][item] = subtree
|
|
17
|
+
elif os.path.isfile(item_path):
|
|
18
|
+
if any(fnmatch.fnmatch(item, pattern) for pattern in patterns):
|
|
19
|
+
tree['files'].append(item)
|
|
20
|
+
except OSError as e:
|
|
21
|
+
print(f"Warning: Could not access path {root_path}: {e}")
|
|
22
|
+
return tree
|
|
23
|
+
|
|
24
|
+
def _write_tree_to_markdown(md_file, tree: DirTree, level: int):
|
|
25
|
+
"""Writes the tree data structure to the file in markdown format."""
|
|
26
|
+
indent = ' ' * level
|
|
27
|
+
for filename in sorted(tree['files']):
|
|
28
|
+
md_file.write(f"{indent}- [] {filename}\n")
|
|
29
|
+
|
|
30
|
+
for dirname, subtree in sorted(tree['dirs'].items()):
|
|
31
|
+
md_file.write(f"{' ' * (level -1)}{'#' * (level + 1)} {dirname}\n")
|
|
32
|
+
_write_tree_to_markdown(md_file, subtree, level + 1)
|
|
33
|
+
|
|
34
|
+
def generate_global_markdown_listing(directories: List[str], file_patterns: List[str], output_file: str):
|
|
35
|
+
"""Creates a hierarchical list of markdown files for global directories. Uses the absolute path as the top heading and relative names for children."""
|
|
36
|
+
with open(output_file, 'w', encoding='utf-8') as md_file:
|
|
37
|
+
for directory in directories:
|
|
38
|
+
abs_dir = os.path.abspath(directory)
|
|
39
|
+
|
|
40
|
+
if not os.path.isdir(abs_dir):
|
|
41
|
+
print(f"Warning: Global directory not found: {abs_dir}")
|
|
42
|
+
md_file.write(f"# {directory}\n")
|
|
43
|
+
md_file.write(f" - !! Warning: Global directory not found: {abs_dir}\n\n")
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
tree = _build_tree(abs_dir, file_patterns)
|
|
47
|
+
|
|
48
|
+
if tree['files'] or tree['dirs']:
|
|
49
|
+
md_file.write(f"# {abs_dir}\n")
|
|
50
|
+
_write_tree_to_markdown(md_file, tree, 1)
|
|
51
|
+
md_file.write("\n")
|
ara_cli/prompt_extractor.py
CHANGED
|
@@ -1,101 +1,207 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import json_repair
|
|
5
|
+
from markdown_it import MarkdownIt
|
|
1
6
|
from ara_cli.prompt_handler import send_prompt, get_file_content
|
|
2
7
|
from ara_cli.classifier import Classifier
|
|
3
8
|
from ara_cli.directory_navigator import DirectoryNavigator
|
|
4
9
|
from ara_cli.artefact_models.artefact_mapping import title_prefix_to_artefact_class
|
|
5
|
-
import re
|
|
6
|
-
import json
|
|
7
|
-
import json_repair
|
|
8
|
-
import os
|
|
9
10
|
|
|
10
|
-
from markdown_it import MarkdownIt
|
|
11
11
|
|
|
12
|
+
def _find_extract_token(tokens):
|
|
13
|
+
"""Find the first token that needs to be processed."""
|
|
14
|
+
for token in tokens:
|
|
15
|
+
if token.type == 'fence' and token.content.strip().startswith("# [x] extract"):
|
|
16
|
+
return token
|
|
17
|
+
return None
|
|
12
18
|
|
|
13
|
-
def extract_code_blocks_md(markdown_text):
|
|
14
|
-
md = MarkdownIt()
|
|
15
|
-
tokens = md.parse(markdown_text)
|
|
16
|
-
code_blocks = [token.content for token in tokens if token.type == 'fence']
|
|
17
|
-
return code_blocks
|
|
18
19
|
|
|
20
|
+
def _extract_file_path(content_lines):
|
|
21
|
+
"""Extract file path from content lines."""
|
|
22
|
+
if not content_lines:
|
|
23
|
+
return None
|
|
24
|
+
file_path_search = re.search(r"# filename: (.+)", content_lines[0])
|
|
25
|
+
return file_path_search.group(1).strip() if file_path_search else None
|
|
19
26
|
|
|
20
|
-
def extract_responses(document_path, relative_to_ara_root=False):
|
|
21
|
-
print(f"Debug: Starting extraction from {document_path}")
|
|
22
|
-
block_extraction_counter = 0
|
|
23
27
|
|
|
24
|
-
|
|
25
|
-
|
|
28
|
+
def _find_artefact_class(content_lines):
|
|
29
|
+
"""Find the appropriate artefact class from content lines."""
|
|
30
|
+
for line in content_lines[:2]:
|
|
31
|
+
words = line.strip().split(' ')
|
|
32
|
+
if not words:
|
|
33
|
+
continue
|
|
34
|
+
first_word = words[0]
|
|
35
|
+
if first_word in title_prefix_to_artefact_class:
|
|
36
|
+
return title_prefix_to_artefact_class[first_word]
|
|
37
|
+
return None
|
|
26
38
|
|
|
27
|
-
cwd = os.getcwd()
|
|
28
|
-
if relative_to_ara_root:
|
|
29
|
-
from ara_cli.directory_navigator import DirectoryNavigator
|
|
30
|
-
navigator = DirectoryNavigator()
|
|
31
|
-
navigator.navigate_to_target()
|
|
32
|
-
os.chdir('..')
|
|
33
39
|
|
|
34
|
-
|
|
35
|
-
|
|
40
|
+
def _process_file_extraction(file_path, code_content, force, write):
|
|
41
|
+
"""Process file extraction logic."""
|
|
42
|
+
print(f"Filename extracted: {file_path}")
|
|
43
|
+
handle_existing_file(file_path, code_content, force, write)
|
|
36
44
|
|
|
37
|
-
for block in code_blocks_found:
|
|
38
|
-
block_lines = block.split('\n')
|
|
39
45
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
46
|
+
def _process_artefact_extraction(artefact_class, content_lines, force, write):
|
|
47
|
+
"""Process artefact extraction logic."""
|
|
48
|
+
artefact = artefact_class.deserialize('\n'.join(content_lines))
|
|
49
|
+
serialized_artefact = artefact.serialize()
|
|
50
|
+
|
|
51
|
+
original_directory = os.getcwd()
|
|
52
|
+
directory_navigator = DirectoryNavigator()
|
|
53
|
+
directory_navigator.navigate_to_target()
|
|
43
54
|
|
|
44
|
-
|
|
55
|
+
artefact_path = artefact.file_path
|
|
56
|
+
directory = os.path.dirname(artefact_path)
|
|
57
|
+
os.makedirs(directory, exist_ok=True)
|
|
58
|
+
handle_existing_file(artefact_path, serialized_artefact, force, write)
|
|
45
59
|
|
|
46
|
-
|
|
60
|
+
os.chdir(original_directory)
|
|
47
61
|
|
|
48
|
-
if file_path_search:
|
|
49
|
-
file_path = file_path_search.group(1).strip()
|
|
50
|
-
print(f"Filename extracted: {file_path}")
|
|
51
62
|
|
|
52
|
-
|
|
53
|
-
|
|
63
|
+
def _perform_extraction_for_block(source_lines, block_start, block_end, force, write):
|
|
64
|
+
"""Helper function to process a single, identified block."""
|
|
65
|
+
original_block_text = '\n'.join(source_lines[block_start:block_end + 1])
|
|
66
|
+
block_content_lines = source_lines[block_start + 1:block_end]
|
|
67
|
+
block_content = '\n'.join(block_content_lines)
|
|
54
68
|
|
|
55
|
-
|
|
56
|
-
|
|
69
|
+
block_lines = block_content.split('\n')
|
|
70
|
+
content_lines_after_extract = block_lines[1:]
|
|
57
71
|
|
|
58
|
-
|
|
59
|
-
|
|
72
|
+
file_path = _extract_file_path(content_lines_after_extract)
|
|
73
|
+
|
|
74
|
+
if file_path:
|
|
75
|
+
code_content = '\n'.join(content_lines_after_extract[1:])
|
|
76
|
+
_process_file_extraction(file_path, code_content, force, write)
|
|
77
|
+
else:
|
|
78
|
+
artefact_class = _find_artefact_class(content_lines_after_extract)
|
|
79
|
+
if artefact_class:
|
|
80
|
+
_process_artefact_extraction(artefact_class, content_lines_after_extract, force, write)
|
|
60
81
|
else:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
82
|
+
print("No filename or valid artefact found, skipping processing for this block.")
|
|
83
|
+
return None, None
|
|
84
|
+
|
|
85
|
+
modified_block_text = original_block_text.replace("# [x] extract", "# [v] extract", 1)
|
|
86
|
+
return original_block_text, modified_block_text
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class FenceDetector:
|
|
90
|
+
"""Helper class to detect and match fence blocks."""
|
|
91
|
+
|
|
92
|
+
def __init__(self, source_lines):
|
|
93
|
+
self.source_lines = source_lines
|
|
94
|
+
|
|
95
|
+
def is_extract_fence(self, line_num):
|
|
96
|
+
"""Check if line is a fence with extract marker."""
|
|
97
|
+
line = self.source_lines[line_num]
|
|
98
|
+
stripped_line = line.strip()
|
|
99
|
+
|
|
100
|
+
is_fence = stripped_line.startswith('```') or stripped_line.startswith('~~~')
|
|
101
|
+
if not is_fence:
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
if not (line_num + 1 < len(self.source_lines)):
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
return self.source_lines[line_num + 1].strip().startswith("# [x] extract")
|
|
108
|
+
|
|
109
|
+
def find_matching_fence_end(self, start_line):
|
|
110
|
+
"""Find the matching end fence for a given start fence."""
|
|
111
|
+
fence_line = self.source_lines[start_line]
|
|
112
|
+
indentation = len(fence_line) - len(fence_line.lstrip())
|
|
113
|
+
stripped_fence_line = fence_line.strip()
|
|
114
|
+
fence_char = stripped_fence_line[0]
|
|
115
|
+
fence_length = len(stripped_fence_line) - len(stripped_fence_line.lstrip(fence_char))
|
|
116
|
+
|
|
117
|
+
for i in range(start_line + 1, len(self.source_lines)):
|
|
118
|
+
scan_line = self.source_lines[i]
|
|
119
|
+
stripped_scan_line = scan_line.strip()
|
|
120
|
+
|
|
121
|
+
if not stripped_scan_line or stripped_scan_line[0] != fence_char:
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
if not all(c == fence_char for c in stripped_scan_line):
|
|
73
125
|
continue
|
|
74
|
-
artefact = artefact_class.deserialize('\n'.join(block_lines))
|
|
75
|
-
serialized_artefact = artefact.serialize()
|
|
76
126
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
127
|
+
candidate_indentation = len(scan_line) - len(scan_line.lstrip())
|
|
128
|
+
candidate_length = len(stripped_scan_line)
|
|
129
|
+
|
|
130
|
+
if candidate_length == fence_length and candidate_indentation == indentation:
|
|
131
|
+
return i
|
|
132
|
+
|
|
133
|
+
return -1
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _process_document_blocks(source_lines, force, write):
|
|
137
|
+
"""Process all extract blocks in the document."""
|
|
138
|
+
fence_detector = FenceDetector(source_lines)
|
|
139
|
+
replacements = []
|
|
140
|
+
line_num = 0
|
|
141
|
+
|
|
142
|
+
while line_num < len(source_lines):
|
|
143
|
+
if not fence_detector.is_extract_fence(line_num):
|
|
144
|
+
line_num += 1
|
|
145
|
+
continue
|
|
80
146
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
147
|
+
block_start_line = line_num
|
|
148
|
+
block_end_line = fence_detector.find_matching_fence_end(block_start_line)
|
|
149
|
+
|
|
150
|
+
if block_end_line != -1:
|
|
151
|
+
print(f"Block found and processed starting on line {block_start_line + 1}.")
|
|
152
|
+
original, modified = _perform_extraction_for_block(
|
|
153
|
+
source_lines, block_start_line, block_end_line, force, write
|
|
154
|
+
)
|
|
155
|
+
if original and modified:
|
|
156
|
+
replacements.append((original, modified))
|
|
157
|
+
line_num = block_end_line + 1
|
|
158
|
+
else:
|
|
159
|
+
line_num += 1
|
|
160
|
+
|
|
161
|
+
return replacements
|
|
86
162
|
|
|
87
|
-
os.chdir(original_directory)
|
|
88
163
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
164
|
+
def _apply_replacements(content, replacements):
|
|
165
|
+
"""Apply all replacements to the content."""
|
|
166
|
+
updated_content = content
|
|
167
|
+
for original, modified in replacements:
|
|
168
|
+
updated_content = updated_content.replace(original, modified, 1)
|
|
169
|
+
return updated_content
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _setup_working_directory(relative_to_ara_root):
|
|
173
|
+
"""Setup working directory and return original cwd."""
|
|
174
|
+
cwd = os.getcwd()
|
|
175
|
+
if relative_to_ara_root:
|
|
176
|
+
navigator = DirectoryNavigator()
|
|
177
|
+
navigator.navigate_to_target()
|
|
178
|
+
os.chdir('..')
|
|
179
|
+
return cwd
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def extract_responses(document_path, relative_to_ara_root=False, force=False, write=False):
|
|
183
|
+
print(f"Starting extraction from '{document_path}'")
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
with open(document_path, 'r', encoding='utf-8', errors='replace') as file:
|
|
187
|
+
content = file.read()
|
|
188
|
+
except FileNotFoundError:
|
|
189
|
+
print(f"Error: File not found at '{document_path}'. Skipping extraction.")
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
cwd = _setup_working_directory(relative_to_ara_root)
|
|
193
|
+
|
|
194
|
+
source_lines = content.split('\n')
|
|
195
|
+
replacements = _process_document_blocks(source_lines, force, write)
|
|
196
|
+
|
|
197
|
+
updated_content = _apply_replacements(content, replacements)
|
|
92
198
|
|
|
93
199
|
os.chdir(cwd)
|
|
94
|
-
# Save the updated markdown content
|
|
95
200
|
with open(document_path, 'w', encoding='utf-8') as file:
|
|
96
201
|
file.write(updated_content)
|
|
97
202
|
|
|
98
|
-
|
|
203
|
+
if replacements:
|
|
204
|
+
print(f"End of extraction. Found and processed {len(replacements)} blocks in '{os.path.basename(document_path)}'.")
|
|
99
205
|
|
|
100
206
|
|
|
101
207
|
def modify_and_save_file(response, file_path):
|
|
@@ -116,7 +222,7 @@ def modify_and_save_file(response, file_path):
|
|
|
116
222
|
print("Skipping block.")
|
|
117
223
|
return
|
|
118
224
|
|
|
119
|
-
with open(file_path, 'w', encoding='utf-8') as file:
|
|
225
|
+
with open(file_path, 'w', encoding='utf-8', errors='replace') as file:
|
|
120
226
|
file.write(response_data['content'])
|
|
121
227
|
print(f"File {file_path} updated successfully.")
|
|
122
228
|
except json.JSONDecodeError as ex:
|
|
@@ -127,14 +233,23 @@ def prompt_user_decision(prompt):
|
|
|
127
233
|
return input(prompt)
|
|
128
234
|
|
|
129
235
|
|
|
130
|
-
def
|
|
236
|
+
def determine_should_create(skip_query=False):
|
|
237
|
+
if skip_query:
|
|
238
|
+
return True
|
|
239
|
+
user_decision = prompt_user_decision("File does not exist. Create? (y/n): ")
|
|
240
|
+
if user_decision.lower() in ['y', 'yes']:
|
|
241
|
+
return True
|
|
242
|
+
return False
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def create_file_if_not_exist(filename, content, skip_query=False):
|
|
131
246
|
try:
|
|
132
247
|
if not os.path.exists(filename):
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
if user_decision.lower() in ['y', 'yes']:
|
|
248
|
+
if determine_should_create(skip_query):
|
|
136
249
|
# Ensure the directory exists
|
|
137
|
-
os.
|
|
250
|
+
dir_name = os.path.dirname(filename)
|
|
251
|
+
if dir_name:
|
|
252
|
+
os.makedirs(dir_name, exist_ok=True)
|
|
138
253
|
|
|
139
254
|
with open(filename, 'w', encoding='utf-8') as file:
|
|
140
255
|
file.write(content)
|
|
@@ -170,7 +285,7 @@ def create_prompt_for_file_modification(content_str, filename):
|
|
|
170
285
|
{{
|
|
171
286
|
"filename": "path/filename.filextension",
|
|
172
287
|
"content": "full content of the modified file in valid json format"
|
|
173
|
-
}}
|
|
288
|
+
}}
|
|
174
289
|
"""
|
|
175
290
|
|
|
176
291
|
# print(f"Debug: modification prompt created: {prompt_text}")
|
|
@@ -178,34 +293,46 @@ def create_prompt_for_file_modification(content_str, filename):
|
|
|
178
293
|
return prompt_text
|
|
179
294
|
|
|
180
295
|
|
|
181
|
-
def handle_existing_file(filename, block_content):
|
|
296
|
+
def handle_existing_file(filename, block_content, skip_query=False, write=False):
|
|
182
297
|
if not os.path.isfile(filename):
|
|
183
298
|
print(f"File {filename} does not exist, attempting to create")
|
|
184
|
-
|
|
299
|
+
# Ensure directory exists before writing
|
|
300
|
+
directory = os.path.dirname(filename)
|
|
301
|
+
if directory:
|
|
302
|
+
os.makedirs(directory, exist_ok=True)
|
|
303
|
+
create_file_if_not_exist(filename, block_content, skip_query)
|
|
304
|
+
|
|
305
|
+
elif write:
|
|
306
|
+
print(f"File {filename} exists. Overwriting without LLM merge as requested.")
|
|
307
|
+
try:
|
|
308
|
+
directory = os.path.dirname(filename)
|
|
309
|
+
if directory:
|
|
310
|
+
os.makedirs(directory, exist_ok=True)
|
|
311
|
+
with open(filename, 'w', encoding='utf-8', errors='replace') as file:
|
|
312
|
+
file.write(block_content)
|
|
313
|
+
print(f"File {filename} overwritten successfully.")
|
|
314
|
+
except OSError as e:
|
|
315
|
+
print(f"Error: {e}")
|
|
316
|
+
print(f"Failed to overwrite file {filename} due to an OS error")
|
|
185
317
|
else:
|
|
186
318
|
print(f"File {filename} exists, creating modification prompt")
|
|
187
319
|
prompt_text = create_prompt_for_file_modification(block_content, filename)
|
|
320
|
+
if prompt_text is None:
|
|
321
|
+
return
|
|
322
|
+
|
|
188
323
|
messages = [{"role": "user", "content": prompt_text}]
|
|
189
324
|
response = ""
|
|
190
325
|
|
|
191
|
-
for chunk in send_prompt(messages):
|
|
326
|
+
for chunk in send_prompt(messages, purpose='extraction'):
|
|
192
327
|
content = chunk.choices[0].delta.content
|
|
193
328
|
if content:
|
|
194
329
|
response += content
|
|
195
330
|
modify_and_save_file(response, filename)
|
|
196
331
|
|
|
197
332
|
|
|
198
|
-
def extract_and_save_prompt_results(classifier, param):
|
|
333
|
+
def extract_and_save_prompt_results(classifier, param, write=False):
|
|
199
334
|
sub_directory = Classifier.get_sub_directory(classifier)
|
|
200
335
|
prompt_log_file = f"ara/{sub_directory}/{param}.data/{classifier}.prompt_log.md"
|
|
201
336
|
print(f"Extract marked sections from: {prompt_log_file}")
|
|
202
337
|
|
|
203
|
-
extract_responses(prompt_log_file)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def update_markdown(original_content, block_content, filename):
|
|
207
|
-
"""
|
|
208
|
-
Update the markdown content by changing the extract block from "# [x] extract" to "# [v] extract"
|
|
209
|
-
"""
|
|
210
|
-
updated_content = original_content.replace("# [x] extract", "# [v] extract")
|
|
211
|
-
return updated_content
|
|
338
|
+
extract_responses(prompt_log_file, write=write)
|