ara-cli 0.1.10.1__py3-none-any.whl → 0.1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ara-cli might be problematic. Click here for more details.
- ara_cli/__init__.py +0 -1
- ara_cli/__main__.py +95 -2
- ara_cli/artefact_autofix.py +44 -6
- ara_cli/artefact_models/artefact_model.py +18 -6
- ara_cli/artefact_models/artefact_templates.py +2 -1
- ara_cli/artefact_models/epic_artefact_model.py +11 -2
- ara_cli/artefact_models/feature_artefact_model.py +31 -1
- ara_cli/artefact_models/userstory_artefact_model.py +13 -1
- ara_cli/chat.py +142 -37
- ara_cli/chat_agent/__init__.py +0 -0
- ara_cli/chat_agent/agent_communicator.py +62 -0
- ara_cli/chat_agent/agent_process_manager.py +211 -0
- ara_cli/chat_agent/agent_status_manager.py +73 -0
- ara_cli/chat_agent/agent_workspace_manager.py +76 -0
- ara_cli/directory_navigator.py +37 -4
- ara_cli/file_loaders/text_file_loader.py +2 -2
- ara_cli/global_file_lister.py +5 -15
- ara_cli/prompt_extractor.py +179 -71
- ara_cli/prompt_handler.py +160 -59
- ara_cli/tag_extractor.py +26 -23
- ara_cli/template_loader.py +1 -1
- ara_cli/templates/prompt-modules/commands/empty.commands.md +2 -12
- ara_cli/templates/prompt-modules/commands/extract_general.commands.md +12 -0
- ara_cli/templates/prompt-modules/commands/extract_markdown.commands.md +11 -0
- ara_cli/templates/prompt-modules/commands/extract_python.commands.md +13 -0
- ara_cli/templates/prompt-modules/commands/feature_add_or_modifiy_specified_behavior.commands.md +36 -0
- ara_cli/templates/prompt-modules/commands/feature_generate_initial_specified_bevahior.commands.md +53 -0
- ara_cli/templates/prompt-modules/commands/prompt_template_tech_stack_transformer.commands.md +95 -0
- ara_cli/templates/prompt-modules/commands/python_bug_fixing_code.commands.md +34 -0
- ara_cli/templates/prompt-modules/commands/python_generate_code.commands.md +27 -0
- ara_cli/templates/prompt-modules/commands/python_refactoring_code.commands.md +39 -0
- ara_cli/templates/prompt-modules/commands/python_step_definitions_generation_and_fixing.commands.md +40 -0
- ara_cli/templates/prompt-modules/commands/python_unittest_generation_and_fixing.commands.md +48 -0
- ara_cli/version.py +1 -1
- {ara_cli-0.1.10.1.dist-info → ara_cli-0.1.11.0.dist-info}/METADATA +31 -1
- {ara_cli-0.1.10.1.dist-info → ara_cli-0.1.11.0.dist-info}/RECORD +41 -41
- tests/test_global_file_lister.py +1 -1
- tests/test_prompt_handler.py +12 -4
- ara_cli/templates/prompt-modules/blueprints/complete_pytest_unittest.blueprint.md +0 -27
- ara_cli/templates/prompt-modules/blueprints/task_todo_list_implement_feature_BDD_way.blueprint.md +0 -30
- ara_cli/templates/prompt-modules/commands/artefact_classification.commands.md +0 -9
- ara_cli/templates/prompt-modules/commands/artefact_extension.commands.md +0 -17
- ara_cli/templates/prompt-modules/commands/artefact_formulation.commands.md +0 -14
- ara_cli/templates/prompt-modules/commands/behave_step_generation.commands.md +0 -102
- ara_cli/templates/prompt-modules/commands/code_generation_complex.commands.md +0 -20
- ara_cli/templates/prompt-modules/commands/code_generation_simple.commands.md +0 -13
- ara_cli/templates/prompt-modules/commands/error_fixing.commands.md +0 -20
- ara_cli/templates/prompt-modules/commands/feature_file_update.commands.md +0 -18
- ara_cli/templates/prompt-modules/commands/feature_formulation.commands.md +0 -43
- ara_cli/templates/prompt-modules/commands/js_code_generation_simple.commands.md +0 -13
- ara_cli/templates/prompt-modules/commands/refactoring.commands.md +0 -15
- ara_cli/templates/prompt-modules/commands/refactoring_analysis.commands.md +0 -9
- ara_cli/templates/prompt-modules/commands/reverse_engineer_feature_file.commands.md +0 -15
- ara_cli/templates/prompt-modules/commands/reverse_engineer_program_flow.commands.md +0 -19
- {ara_cli-0.1.10.1.dist-info → ara_cli-0.1.11.0.dist-info}/WHEEL +0 -0
- {ara_cli-0.1.10.1.dist-info → ara_cli-0.1.11.0.dist-info}/entry_points.txt +0 -0
- {ara_cli-0.1.10.1.dist-info → ara_cli-0.1.11.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from ara_cli.error_handler import AraError
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class AgentWorkspaceManager:
|
|
7
|
+
"""Manages workspace directories for agent execution."""
|
|
8
|
+
|
|
9
|
+
@staticmethod
|
|
10
|
+
def determine_agent_workspace(chat_instance, artefact_classifier=None, artefact_name=None):
|
|
11
|
+
"""
|
|
12
|
+
Determines the appropriate workspace directory for agent logs and data.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
chat_instance: The Chat instance
|
|
16
|
+
artefact_classifier: Optional artefact classifier (e.g., 'feature')
|
|
17
|
+
artefact_name: Optional artefact name
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
str: Absolute path to the agent workspace directory (for logs)
|
|
21
|
+
"""
|
|
22
|
+
base_directory = os.path.dirname(chat_instance.chat_name)
|
|
23
|
+
|
|
24
|
+
if artefact_classifier and artefact_name:
|
|
25
|
+
# Use artefact-specific workspace for logs
|
|
26
|
+
from ara_cli.classifier import Classifier
|
|
27
|
+
classifier_dir = Classifier.get_sub_directory(artefact_classifier)
|
|
28
|
+
if not classifier_dir:
|
|
29
|
+
raise AraError(f"Unknown classifier: {artefact_classifier}")
|
|
30
|
+
|
|
31
|
+
workspace_dir = os.path.join(
|
|
32
|
+
base_directory,
|
|
33
|
+
"ara",
|
|
34
|
+
classifier_dir,
|
|
35
|
+
f"{artefact_name}.data"
|
|
36
|
+
)
|
|
37
|
+
else:
|
|
38
|
+
# Use chat-specific workspace for logs
|
|
39
|
+
chat_name_without_ext = os.path.splitext(
|
|
40
|
+
os.path.basename(chat_instance.chat_name))[0]
|
|
41
|
+
workspace_dir = os.path.join(
|
|
42
|
+
base_directory,
|
|
43
|
+
"ara",
|
|
44
|
+
f"{chat_name_without_ext}"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Ensure workspace directory exists
|
|
48
|
+
os.makedirs(workspace_dir, exist_ok=True)
|
|
49
|
+
return os.path.abspath(workspace_dir)
|
|
50
|
+
|
|
51
|
+
@staticmethod
|
|
52
|
+
def determine_base_work_dir(chat_instance):
|
|
53
|
+
"""
|
|
54
|
+
Determines the base working directory (project root).
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
chat_instance: The Chat instance
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
str: Absolute path to the project root directory
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
return os.path.dirname("./")
|
|
64
|
+
# The bwd should be the project root, not inside ara/
|
|
65
|
+
chat_dir = os.path.dirname(chat_instance.chat_name)
|
|
66
|
+
|
|
67
|
+
# Find project root by looking for 'ara' directory
|
|
68
|
+
current_dir = chat_dir
|
|
69
|
+
while True:
|
|
70
|
+
if os.path.isdir(os.path.join(current_dir, "ara")):
|
|
71
|
+
return os.path.abspath(current_dir)
|
|
72
|
+
parent_dir = os.path.dirname(current_dir)
|
|
73
|
+
if parent_dir == current_dir: # Reached filesystem root
|
|
74
|
+
# Fallback to chat directory if no 'ara' folder found
|
|
75
|
+
return os.path.abspath(chat_dir)
|
|
76
|
+
current_dir = parent_dir
|
ara_cli/directory_navigator.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import sys
|
|
2
3
|
from os.path import join, exists, isdir, dirname, basename
|
|
3
4
|
# from ara_cli.directory_searcher import DirectorySearcher
|
|
4
5
|
|
|
@@ -23,7 +24,8 @@ class DirectoryNavigator:
|
|
|
23
24
|
return original_directory
|
|
24
25
|
|
|
25
26
|
current_directory = original_directory
|
|
26
|
-
|
|
27
|
+
# Ensure loop breaks at root
|
|
28
|
+
while current_directory != dirname(current_directory):
|
|
27
29
|
potential_path = join(current_directory, self.target_directory)
|
|
28
30
|
if self.exists(potential_path):
|
|
29
31
|
os.chdir(potential_path)
|
|
@@ -31,7 +33,8 @@ class DirectoryNavigator:
|
|
|
31
33
|
current_directory = dirname(current_directory)
|
|
32
34
|
|
|
33
35
|
# If the loop completes, the target directory was not found
|
|
34
|
-
user_input = input(
|
|
36
|
+
user_input = input(
|
|
37
|
+
f"Unable to locate the '{self.target_directory}' directory. Do you want to create an 'ara' folder in the working directory? (y/N): ").strip().lower()
|
|
35
38
|
|
|
36
39
|
if user_input == '' or user_input == 'y':
|
|
37
40
|
ara_folder_path = join(original_directory, 'ara')
|
|
@@ -40,7 +43,8 @@ class DirectoryNavigator:
|
|
|
40
43
|
os.chdir(ara_folder_path)
|
|
41
44
|
return original_directory
|
|
42
45
|
else:
|
|
43
|
-
print(
|
|
46
|
+
print(
|
|
47
|
+
f"Unable to locate the '{self.target_directory}' directory and user declined to create 'ara' folder.")
|
|
44
48
|
sys.exit(0)
|
|
45
49
|
|
|
46
50
|
def navigate_to_relative(self, relative_path):
|
|
@@ -56,7 +60,36 @@ class DirectoryNavigator:
|
|
|
56
60
|
if self.exists(path):
|
|
57
61
|
os.chdir(path)
|
|
58
62
|
else:
|
|
59
|
-
raise Exception(
|
|
63
|
+
raise Exception(
|
|
64
|
+
f"Unable to navigate to '{relative_path}' relative to the target directory.")
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def find_ara_directory_root():
|
|
68
|
+
"""Find the root ara directory by traversing up the directory tree."""
|
|
69
|
+
current_dir = os.getcwd()
|
|
70
|
+
|
|
71
|
+
# Check if we're already inside an ara directory structure
|
|
72
|
+
path_parts = current_dir.split(os.sep)
|
|
73
|
+
|
|
74
|
+
# Look for 'ara' in the path parts
|
|
75
|
+
if 'ara' in path_parts:
|
|
76
|
+
ara_index = path_parts.index('ara')
|
|
77
|
+
# Reconstruct path up to and including 'ara'
|
|
78
|
+
ara_root_parts = path_parts[:ara_index + 1]
|
|
79
|
+
potential_ara_root = os.sep.join(ara_root_parts)
|
|
80
|
+
if os.path.exists(potential_ara_root) and os.path.isdir(potential_ara_root):
|
|
81
|
+
return potential_ara_root
|
|
82
|
+
|
|
83
|
+
# If not inside ara directory, check current directory and parents
|
|
84
|
+
check_dir = current_dir
|
|
85
|
+
# Stop at filesystem root
|
|
86
|
+
while check_dir != os.path.dirname(check_dir):
|
|
87
|
+
ara_path = os.path.join(check_dir, 'ara')
|
|
88
|
+
if os.path.exists(ara_path) and os.path.isdir(ara_path):
|
|
89
|
+
return ara_path
|
|
90
|
+
check_dir = os.path.dirname(check_dir)
|
|
91
|
+
|
|
92
|
+
return None
|
|
60
93
|
|
|
61
94
|
# debug version
|
|
62
95
|
# def get_ara_directory(self):
|
|
@@ -19,14 +19,14 @@ class TextFileLoader(FileLoader):
|
|
|
19
19
|
|
|
20
20
|
if is_md_file and extract_images:
|
|
21
21
|
reader = MarkdownReader(file_path)
|
|
22
|
-
file_content = reader.read(extract_images=True)
|
|
22
|
+
file_content = reader.read(extract_images=True).replace('\r\n', '\n')
|
|
23
23
|
else:
|
|
24
24
|
# Use charset-normalizer to detect encoding
|
|
25
25
|
encoded_content = from_path(file_path).best()
|
|
26
26
|
if not encoded_content:
|
|
27
27
|
print(f"Failed to detect encoding for {file_path}")
|
|
28
28
|
return False
|
|
29
|
-
file_content = str(encoded_content)
|
|
29
|
+
file_content = str(encoded_content).replace('\r\n', '\n')
|
|
30
30
|
|
|
31
31
|
if block_delimiter:
|
|
32
32
|
file_content = f"{block_delimiter}\n{file_content}\n{block_delimiter}"
|
ara_cli/global_file_lister.py
CHANGED
|
@@ -2,22 +2,19 @@ import os
|
|
|
2
2
|
import fnmatch
|
|
3
3
|
from typing import List, Dict, Any
|
|
4
4
|
|
|
5
|
-
# Ağaç yapımız için bir tip tanımı yapalım
|
|
6
5
|
DirTree = Dict[str, Any]
|
|
7
6
|
|
|
8
7
|
def _build_tree(root_path: str, patterns: List[str]) -> DirTree:
|
|
9
|
-
"""
|
|
8
|
+
"""Creates a nested dictionary representing the directory structure in the specified path."""
|
|
10
9
|
tree: DirTree = {'files': [], 'dirs': {}}
|
|
11
10
|
try:
|
|
12
11
|
for item in os.listdir(root_path):
|
|
13
12
|
item_path = os.path.join(root_path, item)
|
|
14
13
|
if os.path.isdir(item_path):
|
|
15
14
|
subtree = _build_tree(item_path, patterns)
|
|
16
|
-
# Sadece içinde dosya olan veya dosyası olan alt klasörleri ekle
|
|
17
15
|
if subtree['files'] or subtree['dirs']:
|
|
18
16
|
tree['dirs'][item] = subtree
|
|
19
17
|
elif os.path.isfile(item_path):
|
|
20
|
-
# Dosyanın verilen desenlerden herhangi biriyle eşleşip eşleşmediğini kontrol et
|
|
21
18
|
if any(fnmatch.fnmatch(item, pattern) for pattern in patterns):
|
|
22
19
|
tree['files'].append(item)
|
|
23
20
|
except OSError as e:
|
|
@@ -25,23 +22,17 @@ def _build_tree(root_path: str, patterns: List[str]) -> DirTree:
|
|
|
25
22
|
return tree
|
|
26
23
|
|
|
27
24
|
def _write_tree_to_markdown(md_file, tree: DirTree, level: int):
|
|
28
|
-
"""
|
|
29
|
-
# Dosyaları girintili olarak yaz
|
|
25
|
+
"""Writes the tree data structure to the file in markdown format."""
|
|
30
26
|
indent = ' ' * level
|
|
31
27
|
for filename in sorted(tree['files']):
|
|
32
28
|
md_file.write(f"{indent}- [] {filename}\n")
|
|
33
29
|
|
|
34
|
-
# Alt dizinler için başlık oluştur ve recursive olarak devam et
|
|
35
30
|
for dirname, subtree in sorted(tree['dirs'].items()):
|
|
36
|
-
# Alt başlıklar için girinti yok, sadece başlık seviyesi artıyor
|
|
37
31
|
md_file.write(f"{' ' * (level -1)}{'#' * (level + 1)} {dirname}\n")
|
|
38
32
|
_write_tree_to_markdown(md_file, subtree, level + 1)
|
|
39
33
|
|
|
40
34
|
def generate_global_markdown_listing(directories: List[str], file_patterns: List[str], output_file: str):
|
|
41
|
-
"""
|
|
42
|
-
Global dizinler için hiyerarşik bir markdown dosya listesi oluşturur.
|
|
43
|
-
En üst başlık olarak mutlak yolu kullanır, alt öğeler için göreceli isimler kullanır.
|
|
44
|
-
"""
|
|
35
|
+
"""Creates a hierarchical list of markdown files for global directories. Uses the absolute path as the top heading and relative names for children."""
|
|
45
36
|
with open(output_file, 'w', encoding='utf-8') as md_file:
|
|
46
37
|
for directory in directories:
|
|
47
38
|
abs_dir = os.path.abspath(directory)
|
|
@@ -49,12 +40,11 @@ def generate_global_markdown_listing(directories: List[str], file_patterns: List
|
|
|
49
40
|
if not os.path.isdir(abs_dir):
|
|
50
41
|
print(f"Warning: Global directory not found: {abs_dir}")
|
|
51
42
|
md_file.write(f"# {directory}\n")
|
|
52
|
-
md_file.write(f" - !!
|
|
43
|
+
md_file.write(f" - !! Warning: Global directory not found: {abs_dir}\n\n")
|
|
53
44
|
continue
|
|
54
45
|
|
|
55
46
|
tree = _build_tree(abs_dir, file_patterns)
|
|
56
|
-
|
|
57
|
-
# Sadece ağaç boş değilse yaz
|
|
47
|
+
|
|
58
48
|
if tree['files'] or tree['dirs']:
|
|
59
49
|
md_file.write(f"# {abs_dir}\n")
|
|
60
50
|
_write_tree_to_markdown(md_file, tree, 1)
|
ara_cli/prompt_extractor.py
CHANGED
|
@@ -9,90 +9,199 @@ from ara_cli.directory_navigator import DirectoryNavigator
|
|
|
9
9
|
from ara_cli.artefact_models.artefact_mapping import title_prefix_to_artefact_class
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
12
|
+
def _find_extract_token(tokens):
|
|
13
|
+
"""Find the first token that needs to be processed."""
|
|
14
|
+
for token in tokens:
|
|
15
|
+
if token.type == 'fence' and token.content.strip().startswith("# [x] extract"):
|
|
16
|
+
return token
|
|
17
|
+
return None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _extract_file_path(content_lines):
|
|
21
|
+
"""Extract file path from content lines."""
|
|
22
|
+
if not content_lines:
|
|
23
|
+
return None
|
|
24
|
+
file_path_search = re.search(r"# filename: (.+)", content_lines[0])
|
|
25
|
+
return file_path_search.group(1).strip() if file_path_search else None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _find_artefact_class(content_lines):
|
|
29
|
+
"""Find the appropriate artefact class from content lines."""
|
|
30
|
+
for line in content_lines[:2]:
|
|
31
|
+
words = line.strip().split(' ')
|
|
32
|
+
if not words:
|
|
33
|
+
continue
|
|
34
|
+
first_word = words[0]
|
|
35
|
+
if first_word in title_prefix_to_artefact_class:
|
|
36
|
+
return title_prefix_to_artefact_class[first_word]
|
|
37
|
+
return None
|
|
17
38
|
|
|
18
39
|
|
|
19
|
-
def
|
|
20
|
-
|
|
21
|
-
|
|
40
|
+
def _process_file_extraction(file_path, code_content, force, write):
|
|
41
|
+
"""Process file extraction logic."""
|
|
42
|
+
print(f"Filename extracted: {file_path}")
|
|
43
|
+
handle_existing_file(file_path, code_content, force, write)
|
|
22
44
|
|
|
23
|
-
with open(document_path, 'r', encoding='utf-8', errors='replace') as file:
|
|
24
|
-
content = file.read()
|
|
25
|
-
|
|
26
|
-
cwd = os.getcwd()
|
|
27
|
-
if relative_to_ara_root:
|
|
28
|
-
navigator = DirectoryNavigator()
|
|
29
|
-
navigator.navigate_to_target()
|
|
30
|
-
os.chdir('..')
|
|
31
45
|
|
|
32
|
-
|
|
33
|
-
|
|
46
|
+
def _process_artefact_extraction(artefact_class, content_lines, force, write):
|
|
47
|
+
"""Process artefact extraction logic."""
|
|
48
|
+
artefact = artefact_class.deserialize('\n'.join(content_lines))
|
|
49
|
+
serialized_artefact = artefact.serialize()
|
|
34
50
|
|
|
35
|
-
|
|
36
|
-
|
|
51
|
+
original_directory = os.getcwd()
|
|
52
|
+
directory_navigator = DirectoryNavigator()
|
|
53
|
+
directory_navigator.navigate_to_target()
|
|
37
54
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
55
|
+
artefact_path = artefact.file_path
|
|
56
|
+
directory = os.path.dirname(artefact_path)
|
|
57
|
+
os.makedirs(directory, exist_ok=True)
|
|
58
|
+
handle_existing_file(artefact_path, serialized_artefact, force, write)
|
|
41
59
|
|
|
42
|
-
|
|
60
|
+
os.chdir(original_directory)
|
|
43
61
|
|
|
44
|
-
file_path_search = re.search(r"# filename: (.+)", block_lines[0])
|
|
45
62
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
63
|
+
def _perform_extraction_for_block(source_lines, block_start, block_end, force, write):
|
|
64
|
+
"""Helper function to process a single, identified block."""
|
|
65
|
+
original_block_text = '\n'.join(source_lines[block_start:block_end + 1])
|
|
66
|
+
block_content_lines = source_lines[block_start + 1:block_end]
|
|
67
|
+
block_content = '\n'.join(block_content_lines)
|
|
49
68
|
|
|
50
|
-
|
|
51
|
-
|
|
69
|
+
block_lines = block_content.split('\n')
|
|
70
|
+
content_lines_after_extract = block_lines[1:]
|
|
52
71
|
|
|
53
|
-
|
|
54
|
-
block_extraction_counter += 1
|
|
72
|
+
file_path = _extract_file_path(content_lines_after_extract)
|
|
55
73
|
|
|
56
|
-
|
|
57
|
-
|
|
74
|
+
if file_path:
|
|
75
|
+
code_content = '\n'.join(content_lines_after_extract[1:])
|
|
76
|
+
_process_file_extraction(file_path, code_content, force, write)
|
|
77
|
+
else:
|
|
78
|
+
artefact_class = _find_artefact_class(content_lines_after_extract)
|
|
79
|
+
if artefact_class:
|
|
80
|
+
_process_artefact_extraction(artefact_class, content_lines_after_extract, force, write)
|
|
58
81
|
else:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
82
|
+
print("No filename or valid artefact found, skipping processing for this block.")
|
|
83
|
+
return None, None
|
|
84
|
+
|
|
85
|
+
modified_block_text = original_block_text.replace("# [x] extract", "# [v] extract", 1)
|
|
86
|
+
return original_block_text, modified_block_text
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class FenceDetector:
|
|
90
|
+
"""Helper class to detect and match fence blocks."""
|
|
91
|
+
|
|
92
|
+
def __init__(self, source_lines):
|
|
93
|
+
self.source_lines = source_lines
|
|
94
|
+
|
|
95
|
+
def is_extract_fence(self, line_num):
|
|
96
|
+
"""Check if line is a fence with extract marker."""
|
|
97
|
+
line = self.source_lines[line_num]
|
|
98
|
+
stripped_line = line.strip()
|
|
99
|
+
|
|
100
|
+
is_fence = stripped_line.startswith('```') or stripped_line.startswith('~~~')
|
|
101
|
+
if not is_fence:
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
if not (line_num + 1 < len(self.source_lines)):
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
return self.source_lines[line_num + 1].strip().startswith("# [x] extract")
|
|
108
|
+
|
|
109
|
+
def find_matching_fence_end(self, start_line):
|
|
110
|
+
"""Find the matching end fence for a given start fence."""
|
|
111
|
+
fence_line = self.source_lines[start_line]
|
|
112
|
+
indentation = len(fence_line) - len(fence_line.lstrip())
|
|
113
|
+
stripped_fence_line = fence_line.strip()
|
|
114
|
+
fence_char = stripped_fence_line[0]
|
|
115
|
+
fence_length = len(stripped_fence_line) - len(stripped_fence_line.lstrip(fence_char))
|
|
116
|
+
|
|
117
|
+
for i in range(start_line + 1, len(self.source_lines)):
|
|
118
|
+
scan_line = self.source_lines[i]
|
|
119
|
+
stripped_scan_line = scan_line.strip()
|
|
120
|
+
|
|
121
|
+
if not stripped_scan_line or stripped_scan_line[0] != fence_char:
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
if not all(c == fence_char for c in stripped_scan_line):
|
|
71
125
|
continue
|
|
72
|
-
artefact = artefact_class.deserialize('\n'.join(block_lines))
|
|
73
|
-
serialized_artefact = artefact.serialize()
|
|
74
126
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
127
|
+
candidate_indentation = len(scan_line) - len(scan_line.lstrip())
|
|
128
|
+
candidate_length = len(stripped_scan_line)
|
|
129
|
+
|
|
130
|
+
if candidate_length == fence_length and candidate_indentation == indentation:
|
|
131
|
+
return i
|
|
132
|
+
|
|
133
|
+
return -1
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _process_document_blocks(source_lines, force, write):
|
|
137
|
+
"""Process all extract blocks in the document."""
|
|
138
|
+
fence_detector = FenceDetector(source_lines)
|
|
139
|
+
replacements = []
|
|
140
|
+
line_num = 0
|
|
141
|
+
|
|
142
|
+
while line_num < len(source_lines):
|
|
143
|
+
if not fence_detector.is_extract_fence(line_num):
|
|
144
|
+
line_num += 1
|
|
145
|
+
continue
|
|
78
146
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
147
|
+
block_start_line = line_num
|
|
148
|
+
block_end_line = fence_detector.find_matching_fence_end(block_start_line)
|
|
149
|
+
|
|
150
|
+
if block_end_line != -1:
|
|
151
|
+
print(f"Block found and processed starting on line {block_start_line + 1}.")
|
|
152
|
+
original, modified = _perform_extraction_for_block(
|
|
153
|
+
source_lines, block_start_line, block_end_line, force, write
|
|
154
|
+
)
|
|
155
|
+
if original and modified:
|
|
156
|
+
replacements.append((original, modified))
|
|
157
|
+
line_num = block_end_line + 1
|
|
158
|
+
else:
|
|
159
|
+
line_num += 1
|
|
160
|
+
|
|
161
|
+
return replacements
|
|
83
162
|
|
|
84
|
-
os.chdir(original_directory)
|
|
85
163
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
164
|
+
def _apply_replacements(content, replacements):
|
|
165
|
+
"""Apply all replacements to the content."""
|
|
166
|
+
updated_content = content
|
|
167
|
+
for original, modified in replacements:
|
|
168
|
+
updated_content = updated_content.replace(original, modified, 1)
|
|
169
|
+
return updated_content
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _setup_working_directory(relative_to_ara_root):
|
|
173
|
+
"""Setup working directory and return original cwd."""
|
|
174
|
+
cwd = os.getcwd()
|
|
175
|
+
if relative_to_ara_root:
|
|
176
|
+
navigator = DirectoryNavigator()
|
|
177
|
+
navigator.navigate_to_target()
|
|
178
|
+
os.chdir('..')
|
|
179
|
+
return cwd
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def extract_responses(document_path, relative_to_ara_root=False, force=False, write=False):
|
|
183
|
+
print(f"Starting extraction from '{document_path}'")
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
with open(document_path, 'r', encoding='utf-8', errors='replace') as file:
|
|
187
|
+
content = file.read()
|
|
188
|
+
except FileNotFoundError:
|
|
189
|
+
print(f"Error: File not found at '{document_path}'. Skipping extraction.")
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
cwd = _setup_working_directory(relative_to_ara_root)
|
|
193
|
+
|
|
194
|
+
source_lines = content.split('\n')
|
|
195
|
+
replacements = _process_document_blocks(source_lines, force, write)
|
|
196
|
+
|
|
197
|
+
updated_content = _apply_replacements(content, replacements)
|
|
89
198
|
|
|
90
199
|
os.chdir(cwd)
|
|
91
|
-
# Save the updated markdown content
|
|
92
200
|
with open(document_path, 'w', encoding='utf-8') as file:
|
|
93
201
|
file.write(updated_content)
|
|
94
202
|
|
|
95
|
-
|
|
203
|
+
if replacements:
|
|
204
|
+
print(f"End of extraction. Found and processed {len(replacements)} blocks in '{os.path.basename(document_path)}'.")
|
|
96
205
|
|
|
97
206
|
|
|
98
207
|
def modify_and_save_file(response, file_path):
|
|
@@ -138,7 +247,9 @@ def create_file_if_not_exist(filename, content, skip_query=False):
|
|
|
138
247
|
if not os.path.exists(filename):
|
|
139
248
|
if determine_should_create(skip_query):
|
|
140
249
|
# Ensure the directory exists
|
|
141
|
-
os.
|
|
250
|
+
dir_name = os.path.dirname(filename)
|
|
251
|
+
if dir_name:
|
|
252
|
+
os.makedirs(dir_name, exist_ok=True)
|
|
142
253
|
|
|
143
254
|
with open(filename, 'w', encoding='utf-8') as file:
|
|
144
255
|
file.write(content)
|
|
@@ -185,7 +296,12 @@ def create_prompt_for_file_modification(content_str, filename):
|
|
|
185
296
|
def handle_existing_file(filename, block_content, skip_query=False, write=False):
|
|
186
297
|
if not os.path.isfile(filename):
|
|
187
298
|
print(f"File {filename} does not exist, attempting to create")
|
|
299
|
+
# Ensure directory exists before writing
|
|
300
|
+
directory = os.path.dirname(filename)
|
|
301
|
+
if directory:
|
|
302
|
+
os.makedirs(directory, exist_ok=True)
|
|
188
303
|
create_file_if_not_exist(filename, block_content, skip_query)
|
|
304
|
+
|
|
189
305
|
elif write:
|
|
190
306
|
print(f"File {filename} exists. Overwriting without LLM merge as requested.")
|
|
191
307
|
try:
|
|
@@ -219,12 +335,4 @@ def extract_and_save_prompt_results(classifier, param, write=False):
|
|
|
219
335
|
prompt_log_file = f"ara/{sub_directory}/{param}.data/{classifier}.prompt_log.md"
|
|
220
336
|
print(f"Extract marked sections from: {prompt_log_file}")
|
|
221
337
|
|
|
222
|
-
extract_responses(prompt_log_file, write=write)
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
def update_markdown(original_content, block_content, filename):
|
|
226
|
-
"""
|
|
227
|
-
Update the markdown content by changing the extract block from "# [x] extract" to "# [v] extract"
|
|
228
|
-
"""
|
|
229
|
-
updated_content = original_content.replace("# [x] extract", "# [v] extract")
|
|
230
|
-
return updated_content
|
|
338
|
+
extract_responses(prompt_log_file, write=write)
|