ara-cli 0.1.9.77__py3-none-any.whl → 0.1.10.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ara-cli might be problematic. Click here for more details.

Files changed (122) hide show
  1. ara_cli/__init__.py +18 -2
  2. ara_cli/__main__.py +245 -66
  3. ara_cli/ara_command_action.py +128 -63
  4. ara_cli/ara_config.py +201 -177
  5. ara_cli/ara_subcommands/__init__.py +0 -0
  6. ara_cli/ara_subcommands/autofix.py +26 -0
  7. ara_cli/ara_subcommands/chat.py +27 -0
  8. ara_cli/ara_subcommands/classifier_directory.py +16 -0
  9. ara_cli/ara_subcommands/common.py +100 -0
  10. ara_cli/ara_subcommands/create.py +75 -0
  11. ara_cli/ara_subcommands/delete.py +22 -0
  12. ara_cli/ara_subcommands/extract.py +22 -0
  13. ara_cli/ara_subcommands/fetch_templates.py +14 -0
  14. ara_cli/ara_subcommands/list.py +65 -0
  15. ara_cli/ara_subcommands/list_tags.py +25 -0
  16. ara_cli/ara_subcommands/load.py +48 -0
  17. ara_cli/ara_subcommands/prompt.py +136 -0
  18. ara_cli/ara_subcommands/read.py +47 -0
  19. ara_cli/ara_subcommands/read_status.py +20 -0
  20. ara_cli/ara_subcommands/read_user.py +20 -0
  21. ara_cli/ara_subcommands/reconnect.py +27 -0
  22. ara_cli/ara_subcommands/rename.py +22 -0
  23. ara_cli/ara_subcommands/scan.py +14 -0
  24. ara_cli/ara_subcommands/set_status.py +22 -0
  25. ara_cli/ara_subcommands/set_user.py +22 -0
  26. ara_cli/ara_subcommands/template.py +16 -0
  27. ara_cli/artefact_autofix.py +214 -28
  28. ara_cli/artefact_creator.py +5 -8
  29. ara_cli/artefact_deleter.py +2 -4
  30. ara_cli/artefact_fuzzy_search.py +13 -6
  31. ara_cli/artefact_lister.py +29 -55
  32. ara_cli/artefact_models/artefact_data_retrieval.py +23 -0
  33. ara_cli/artefact_models/artefact_model.py +106 -25
  34. ara_cli/artefact_models/artefact_templates.py +23 -13
  35. ara_cli/artefact_models/epic_artefact_model.py +11 -2
  36. ara_cli/artefact_models/feature_artefact_model.py +56 -1
  37. ara_cli/artefact_models/userstory_artefact_model.py +15 -3
  38. ara_cli/artefact_reader.py +4 -5
  39. ara_cli/artefact_renamer.py +6 -2
  40. ara_cli/artefact_scan.py +2 -2
  41. ara_cli/chat.py +594 -219
  42. ara_cli/chat_agent/__init__.py +0 -0
  43. ara_cli/chat_agent/agent_communicator.py +62 -0
  44. ara_cli/chat_agent/agent_process_manager.py +211 -0
  45. ara_cli/chat_agent/agent_status_manager.py +73 -0
  46. ara_cli/chat_agent/agent_workspace_manager.py +76 -0
  47. ara_cli/commands/__init__.py +0 -0
  48. ara_cli/commands/command.py +7 -0
  49. ara_cli/commands/extract_command.py +15 -0
  50. ara_cli/commands/load_command.py +65 -0
  51. ara_cli/commands/load_image_command.py +34 -0
  52. ara_cli/commands/read_command.py +117 -0
  53. ara_cli/completers.py +144 -0
  54. ara_cli/directory_navigator.py +37 -4
  55. ara_cli/error_handler.py +134 -0
  56. ara_cli/file_classifier.py +3 -2
  57. ara_cli/file_loaders/__init__.py +0 -0
  58. ara_cli/file_loaders/binary_file_loader.py +33 -0
  59. ara_cli/file_loaders/document_file_loader.py +34 -0
  60. ara_cli/file_loaders/document_reader.py +245 -0
  61. ara_cli/file_loaders/document_readers.py +233 -0
  62. ara_cli/file_loaders/file_loader.py +50 -0
  63. ara_cli/file_loaders/file_loaders.py +123 -0
  64. ara_cli/file_loaders/image_processor.py +89 -0
  65. ara_cli/file_loaders/markdown_reader.py +75 -0
  66. ara_cli/file_loaders/text_file_loader.py +187 -0
  67. ara_cli/global_file_lister.py +51 -0
  68. ara_cli/prompt_extractor.py +214 -87
  69. ara_cli/prompt_handler.py +508 -146
  70. ara_cli/tag_extractor.py +54 -24
  71. ara_cli/template_loader.py +245 -0
  72. ara_cli/template_manager.py +14 -4
  73. ara_cli/templates/prompt-modules/commands/empty.commands.md +2 -12
  74. ara_cli/templates/prompt-modules/commands/extract_general.commands.md +12 -0
  75. ara_cli/templates/prompt-modules/commands/extract_markdown.commands.md +11 -0
  76. ara_cli/templates/prompt-modules/commands/extract_python.commands.md +13 -0
  77. ara_cli/templates/prompt-modules/commands/feature_add_or_modifiy_specified_behavior.commands.md +36 -0
  78. ara_cli/templates/prompt-modules/commands/feature_generate_initial_specified_bevahior.commands.md +53 -0
  79. ara_cli/templates/prompt-modules/commands/prompt_template_tech_stack_transformer.commands.md +95 -0
  80. ara_cli/templates/prompt-modules/commands/python_bug_fixing_code.commands.md +34 -0
  81. ara_cli/templates/prompt-modules/commands/python_generate_code.commands.md +27 -0
  82. ara_cli/templates/prompt-modules/commands/python_refactoring_code.commands.md +39 -0
  83. ara_cli/templates/prompt-modules/commands/python_step_definitions_generation_and_fixing.commands.md +40 -0
  84. ara_cli/templates/prompt-modules/commands/python_unittest_generation_and_fixing.commands.md +48 -0
  85. ara_cli/update_config_prompt.py +7 -1
  86. ara_cli/version.py +1 -1
  87. ara_cli-0.1.10.8.dist-info/METADATA +241 -0
  88. {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/RECORD +104 -59
  89. tests/test_ara_command_action.py +66 -52
  90. tests/test_ara_config.py +200 -279
  91. tests/test_artefact_autofix.py +361 -5
  92. tests/test_artefact_lister.py +52 -132
  93. tests/test_artefact_scan.py +1 -1
  94. tests/test_chat.py +2009 -603
  95. tests/test_file_classifier.py +23 -0
  96. tests/test_file_creator.py +3 -5
  97. tests/test_global_file_lister.py +131 -0
  98. tests/test_prompt_handler.py +746 -0
  99. tests/test_tag_extractor.py +19 -13
  100. tests/test_template_loader.py +192 -0
  101. tests/test_template_manager.py +5 -4
  102. ara_cli/ara_command_parser.py +0 -536
  103. ara_cli/templates/prompt-modules/blueprints/complete_pytest_unittest.blueprint.md +0 -27
  104. ara_cli/templates/prompt-modules/blueprints/task_todo_list_implement_feature_BDD_way.blueprint.md +0 -30
  105. ara_cli/templates/prompt-modules/commands/artefact_classification.commands.md +0 -9
  106. ara_cli/templates/prompt-modules/commands/artefact_extension.commands.md +0 -17
  107. ara_cli/templates/prompt-modules/commands/artefact_formulation.commands.md +0 -14
  108. ara_cli/templates/prompt-modules/commands/behave_step_generation.commands.md +0 -102
  109. ara_cli/templates/prompt-modules/commands/code_generation_complex.commands.md +0 -20
  110. ara_cli/templates/prompt-modules/commands/code_generation_simple.commands.md +0 -13
  111. ara_cli/templates/prompt-modules/commands/error_fixing.commands.md +0 -20
  112. ara_cli/templates/prompt-modules/commands/feature_file_update.commands.md +0 -18
  113. ara_cli/templates/prompt-modules/commands/feature_formulation.commands.md +0 -43
  114. ara_cli/templates/prompt-modules/commands/js_code_generation_simple.commands.md +0 -13
  115. ara_cli/templates/prompt-modules/commands/refactoring.commands.md +0 -15
  116. ara_cli/templates/prompt-modules/commands/refactoring_analysis.commands.md +0 -9
  117. ara_cli/templates/prompt-modules/commands/reverse_engineer_feature_file.commands.md +0 -15
  118. ara_cli/templates/prompt-modules/commands/reverse_engineer_program_flow.commands.md +0 -19
  119. ara_cli-0.1.9.77.dist-info/METADATA +0 -18
  120. {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/WHEEL +0 -0
  121. {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/entry_points.txt +0 -0
  122. {ara_cli-0.1.9.77.dist-info → ara_cli-0.1.10.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,187 @@
1
+ import os
2
+ import re
3
+ import base64
4
+ import tempfile
5
+ from typing import Optional, Tuple
6
+ import requests
7
+ from charset_normalizer import from_path
8
+ from ara_cli.prompt_handler import describe_image
9
+ from ara_cli.file_loaders.file_loader import FileLoader
10
+
11
+
12
+ class TextFileLoader(FileLoader):
13
+ """Loads text files"""
14
+ def load(self, file_path: str, prefix: str = "", suffix: str = "",
15
+ block_delimiter: str = "", extract_images: bool = False, **kwargs) -> bool:
16
+ """Load text file with optional markdown image extraction"""
17
+
18
+ is_md_file = file_path.lower().endswith('.md')
19
+
20
+ if is_md_file and extract_images:
21
+ reader = MarkdownReader(file_path)
22
+ file_content = reader.read(extract_images=True).replace('\r\n', '\n')
23
+ else:
24
+ # Use charset-normalizer to detect encoding
25
+ encoded_content = from_path(file_path).best()
26
+ if not encoded_content:
27
+ print(f"Failed to detect encoding for {file_path}")
28
+ return False
29
+ file_content = str(encoded_content).replace('\r\n', '\n')
30
+
31
+ if block_delimiter:
32
+ file_content = f"{block_delimiter}\n{file_content}\n{block_delimiter}"
33
+
34
+ write_content = f"{prefix}{file_content}{suffix}\n"
35
+
36
+ with open(self.chat.chat_name, 'a', encoding='utf-8') as chat_file:
37
+ chat_file.write(write_content)
38
+
39
+ return True
40
+
41
+
42
+ class MarkdownReader:
43
+ """Handles markdown file reading with optional image extraction"""
44
+
45
+ def __init__(self, file_path: str):
46
+ self.file_path = file_path
47
+ self.base_dir = os.path.dirname(file_path)
48
+ self.image_processor = ImageProcessor()
49
+
50
+ def read(self, extract_images: bool = False) -> str:
51
+ """Read markdown file and optionally extract/describe images"""
52
+ with open(self.file_path, 'r', encoding='utf-8') as file:
53
+ content = file.read()
54
+
55
+ if not extract_images:
56
+ return content
57
+
58
+ return self._process_images(content)
59
+
60
+ def _process_images(self, content: str) -> str:
61
+ """Process all images in markdown content"""
62
+ # Pattern to match markdown images: ![alt text](url or path)
63
+ image_pattern = re.compile(r'!\[([^\]]*)\]\(([^\)]+)\)')
64
+ base64_pattern = re.compile(r'data:image/([^;]+);base64,([^)]+)')
65
+
66
+ # Process each image reference
67
+ for match in image_pattern.finditer(content):
68
+ image_ref = match.group(2)
69
+ replacement = self._process_single_image(image_ref, base64_pattern)
70
+
71
+ if replacement:
72
+ content = content.replace(match.group(0), replacement, 1)
73
+
74
+ return content
75
+
76
+ def _process_single_image(self, image_ref: str, base64_pattern: re.Pattern) -> Optional[str]:
77
+ """Process a single image reference"""
78
+ try:
79
+ # Try base64 first
80
+ result = self.image_processor.process_base64_image(
81
+ image_ref, base64_pattern)
82
+ if result:
83
+ return result[0]
84
+
85
+ # Try URL
86
+ result, error = self.image_processor.process_url_image(image_ref)
87
+ if result:
88
+ if error:
89
+ print(f"Warning: {error}")
90
+ return result
91
+
92
+ # Try local file
93
+ result, error = self.image_processor.process_local_image(
94
+ image_ref, self.base_dir)
95
+ if error:
96
+ print(f"Warning: {error}")
97
+ return result
98
+
99
+ except Exception as e:
100
+ print(f"Warning: Could not process image {image_ref}: {e}")
101
+ return None
102
+
103
+
104
+ class ImageProcessor:
105
+ """Handles image processing operations"""
106
+
107
+ @staticmethod
108
+ def process_base64_image(
109
+ image_ref: str,
110
+ base64_pattern: re.Pattern
111
+ ) -> Optional[Tuple[str, str]]:
112
+ """Process base64 encoded image and return description"""
113
+ base64_match = base64_pattern.match(image_ref)
114
+ if not base64_match:
115
+ return None
116
+
117
+ image_format = base64_match.group(1)
118
+ base64_data = base64_match.group(2)
119
+ image_data = base64.b64decode(base64_data)
120
+
121
+ # Create a temporary file to send to LLM
122
+ with tempfile.NamedTemporaryFile(suffix=f'.{image_format}', delete=False) as tmp_file:
123
+ tmp_file.write(image_data)
124
+ tmp_file_path = tmp_file.name
125
+
126
+ try:
127
+ description = describe_image(tmp_file_path)
128
+ return f"Image: (base64 embedded {image_format} image)\n[{description}]", None
129
+ finally:
130
+ os.unlink(tmp_file_path)
131
+
132
+ @staticmethod
133
+ def process_url_image(image_ref: str) -> Tuple[str, Optional[str]]:
134
+ """Process image from URL and return description"""
135
+ if not image_ref.startswith(('http://', 'https://')):
136
+ return "", None
137
+
138
+ try:
139
+ response = requests.get(image_ref, timeout=10)
140
+ response.raise_for_status()
141
+
142
+ # Determine file extension from content-type
143
+ content_type = response.headers.get('content-type', '')
144
+ ext = ImageProcessor._get_extension_from_content_type(
145
+ content_type, image_ref)
146
+
147
+ # Create temporary file
148
+ with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_file:
149
+ tmp_file.write(response.content)
150
+ tmp_file_path = tmp_file.name
151
+
152
+ try:
153
+ description = describe_image(tmp_file_path)
154
+ return f"Image: {image_ref}\n[{description}]", None
155
+ finally:
156
+ os.unlink(tmp_file_path)
157
+
158
+ except Exception as e:
159
+ error_msg = f"Could not download image: {str(e)}"
160
+ return f"Image: {image_ref}\n[{error_msg}]", error_msg
161
+
162
+ @staticmethod
163
+ def process_local_image(image_ref: str, base_dir: str) -> Tuple[str, Optional[str]]:
164
+ """Process local image file and return description"""
165
+ if os.path.isabs(image_ref):
166
+ local_image_path = image_ref
167
+ else:
168
+ local_image_path = os.path.join(base_dir, image_ref)
169
+
170
+ if os.path.exists(local_image_path):
171
+ description = describe_image(local_image_path)
172
+ return f"Image: {image_ref}\n[{description}]", None
173
+ else:
174
+ error_msg = f"Image file not found"
175
+ return f"Image: {image_ref}\n[{error_msg}]", f"Local image not found: {local_image_path}"
176
+
177
+ @staticmethod
178
+ def _get_extension_from_content_type(content_type: str, url: str) -> str:
179
+ """Determine file extension from content type or URL"""
180
+ if 'image/jpeg' in content_type:
181
+ return '.jpg'
182
+ elif 'image/png' in content_type:
183
+ return '.png'
184
+ elif 'image/gif' in content_type:
185
+ return '.gif'
186
+ else:
187
+ return os.path.splitext(url)[1] or '.png'
@@ -0,0 +1,51 @@
1
+ import os
2
+ import fnmatch
3
+ from typing import List, Dict, Any
4
+
5
+ DirTree = Dict[str, Any]
6
+
7
+ def _build_tree(root_path: str, patterns: List[str]) -> DirTree:
8
+ """Creates a nested dictionary representing the directory structure in the specified path."""
9
+ tree: DirTree = {'files': [], 'dirs': {}}
10
+ try:
11
+ for item in os.listdir(root_path):
12
+ item_path = os.path.join(root_path, item)
13
+ if os.path.isdir(item_path):
14
+ subtree = _build_tree(item_path, patterns)
15
+ if subtree['files'] or subtree['dirs']:
16
+ tree['dirs'][item] = subtree
17
+ elif os.path.isfile(item_path):
18
+ if any(fnmatch.fnmatch(item, pattern) for pattern in patterns):
19
+ tree['files'].append(item)
20
+ except OSError as e:
21
+ print(f"Warning: Could not access path {root_path}: {e}")
22
+ return tree
23
+
24
+ def _write_tree_to_markdown(md_file, tree: DirTree, level: int):
25
+ """Writes the tree data structure to the file in markdown format."""
26
+ indent = ' ' * level
27
+ for filename in sorted(tree['files']):
28
+ md_file.write(f"{indent}- [] {filename}\n")
29
+
30
+ for dirname, subtree in sorted(tree['dirs'].items()):
31
+ md_file.write(f"{' ' * (level -1)}{'#' * (level + 1)} {dirname}\n")
32
+ _write_tree_to_markdown(md_file, subtree, level + 1)
33
+
34
+ def generate_global_markdown_listing(directories: List[str], file_patterns: List[str], output_file: str):
35
+ """Creates a hierarchical list of markdown files for global directories. Uses the absolute path as the top heading and relative names for children."""
36
+ with open(output_file, 'w', encoding='utf-8') as md_file:
37
+ for directory in directories:
38
+ abs_dir = os.path.abspath(directory)
39
+
40
+ if not os.path.isdir(abs_dir):
41
+ print(f"Warning: Global directory not found: {abs_dir}")
42
+ md_file.write(f"# {directory}\n")
43
+ md_file.write(f" - !! Warning: Global directory not found: {abs_dir}\n\n")
44
+ continue
45
+
46
+ tree = _build_tree(abs_dir, file_patterns)
47
+
48
+ if tree['files'] or tree['dirs']:
49
+ md_file.write(f"# {abs_dir}\n")
50
+ _write_tree_to_markdown(md_file, tree, 1)
51
+ md_file.write("\n")
@@ -1,101 +1,207 @@
1
+ import re
2
+ import json
3
+ import os
4
+ import json_repair
5
+ from markdown_it import MarkdownIt
1
6
  from ara_cli.prompt_handler import send_prompt, get_file_content
2
7
  from ara_cli.classifier import Classifier
3
8
  from ara_cli.directory_navigator import DirectoryNavigator
4
9
  from ara_cli.artefact_models.artefact_mapping import title_prefix_to_artefact_class
5
- import re
6
- import json
7
- import json_repair
8
- import os
9
10
 
10
- from markdown_it import MarkdownIt
11
11
 
12
+ def _find_extract_token(tokens):
13
+ """Find the first token that needs to be processed."""
14
+ for token in tokens:
15
+ if token.type == 'fence' and token.content.strip().startswith("# [x] extract"):
16
+ return token
17
+ return None
12
18
 
13
- def extract_code_blocks_md(markdown_text):
14
- md = MarkdownIt()
15
- tokens = md.parse(markdown_text)
16
- code_blocks = [token.content for token in tokens if token.type == 'fence']
17
- return code_blocks
18
19
 
20
+ def _extract_file_path(content_lines):
21
+ """Extract file path from content lines."""
22
+ if not content_lines:
23
+ return None
24
+ file_path_search = re.search(r"# filename: (.+)", content_lines[0])
25
+ return file_path_search.group(1).strip() if file_path_search else None
19
26
 
20
- def extract_responses(document_path, relative_to_ara_root=False):
21
- print(f"Debug: Starting extraction from {document_path}")
22
- block_extraction_counter = 0
23
27
 
24
- with open(document_path, 'r', encoding='utf-8') as file:
25
- content = file.read()
28
+ def _find_artefact_class(content_lines):
29
+ """Find the appropriate artefact class from content lines."""
30
+ for line in content_lines[:2]:
31
+ words = line.strip().split(' ')
32
+ if not words:
33
+ continue
34
+ first_word = words[0]
35
+ if first_word in title_prefix_to_artefact_class:
36
+ return title_prefix_to_artefact_class[first_word]
37
+ return None
26
38
 
27
- cwd = os.getcwd()
28
- if relative_to_ara_root:
29
- from ara_cli.directory_navigator import DirectoryNavigator
30
- navigator = DirectoryNavigator()
31
- navigator.navigate_to_target()
32
- os.chdir('..')
33
39
 
34
- code_blocks_found = extract_code_blocks_md(content)
35
- updated_content = content
40
+ def _process_file_extraction(file_path, code_content, force, write):
41
+ """Process file extraction logic."""
42
+ print(f"Filename extracted: {file_path}")
43
+ handle_existing_file(file_path, code_content, force, write)
36
44
 
37
- for block in code_blocks_found:
38
- block_lines = block.split('\n')
39
45
 
40
- if "# [x] extract" not in block_lines[0]:
41
- continue
42
- print("Block found and processed.")
46
+ def _process_artefact_extraction(artefact_class, content_lines, force, write):
47
+ """Process artefact extraction logic."""
48
+ artefact = artefact_class.deserialize('\n'.join(content_lines))
49
+ serialized_artefact = artefact.serialize()
50
+
51
+ original_directory = os.getcwd()
52
+ directory_navigator = DirectoryNavigator()
53
+ directory_navigator.navigate_to_target()
43
54
 
44
- block_lines = block_lines[1:]
55
+ artefact_path = artefact.file_path
56
+ directory = os.path.dirname(artefact_path)
57
+ os.makedirs(directory, exist_ok=True)
58
+ handle_existing_file(artefact_path, serialized_artefact, force, write)
45
59
 
46
- file_path_search = re.search(r"# filename: (.+)", block_lines[0])
60
+ os.chdir(original_directory)
47
61
 
48
- if file_path_search:
49
- file_path = file_path_search.group(1).strip()
50
- print(f"Filename extracted: {file_path}")
51
62
 
52
- block_lines = block_lines[1:] # Remove first line again after removing filename line
53
- block = '\n'.join(block_lines)
63
+ def _perform_extraction_for_block(source_lines, block_start, block_end, force, write):
64
+ """Helper function to process a single, identified block."""
65
+ original_block_text = '\n'.join(source_lines[block_start:block_end + 1])
66
+ block_content_lines = source_lines[block_start + 1:block_end]
67
+ block_content = '\n'.join(block_content_lines)
54
68
 
55
- handle_existing_file(file_path, block)
56
- block_extraction_counter += 1
69
+ block_lines = block_content.split('\n')
70
+ content_lines_after_extract = block_lines[1:]
57
71
 
58
- # Update the markdown content
59
- updated_content = update_markdown(content, block, file_path)
72
+ file_path = _extract_file_path(content_lines_after_extract)
73
+
74
+ if file_path:
75
+ code_content = '\n'.join(content_lines_after_extract[1:])
76
+ _process_file_extraction(file_path, code_content, force, write)
77
+ else:
78
+ artefact_class = _find_artefact_class(content_lines_after_extract)
79
+ if artefact_class:
80
+ _process_artefact_extraction(artefact_class, content_lines_after_extract, force, write)
60
81
  else:
61
- # Extract artefact
62
- artefact_class = None
63
- for line in block_lines[:2]:
64
- words = line.strip().split(' ')
65
- if not words:
66
- continue
67
- first_word = words[0]
68
- if first_word not in title_prefix_to_artefact_class:
69
- continue
70
- artefact_class = title_prefix_to_artefact_class[first_word]
71
- if not artefact_class:
72
- print("No filename found, skipping this block.")
82
+ print("No filename or valid artefact found, skipping processing for this block.")
83
+ return None, None
84
+
85
+ modified_block_text = original_block_text.replace("# [x] extract", "# [v] extract", 1)
86
+ return original_block_text, modified_block_text
87
+
88
+
89
+ class FenceDetector:
90
+ """Helper class to detect and match fence blocks."""
91
+
92
+ def __init__(self, source_lines):
93
+ self.source_lines = source_lines
94
+
95
+ def is_extract_fence(self, line_num):
96
+ """Check if line is a fence with extract marker."""
97
+ line = self.source_lines[line_num]
98
+ stripped_line = line.strip()
99
+
100
+ is_fence = stripped_line.startswith('```') or stripped_line.startswith('~~~')
101
+ if not is_fence:
102
+ return False
103
+
104
+ if not (line_num + 1 < len(self.source_lines)):
105
+ return False
106
+
107
+ return self.source_lines[line_num + 1].strip().startswith("# [x] extract")
108
+
109
+ def find_matching_fence_end(self, start_line):
110
+ """Find the matching end fence for a given start fence."""
111
+ fence_line = self.source_lines[start_line]
112
+ indentation = len(fence_line) - len(fence_line.lstrip())
113
+ stripped_fence_line = fence_line.strip()
114
+ fence_char = stripped_fence_line[0]
115
+ fence_length = len(stripped_fence_line) - len(stripped_fence_line.lstrip(fence_char))
116
+
117
+ for i in range(start_line + 1, len(self.source_lines)):
118
+ scan_line = self.source_lines[i]
119
+ stripped_scan_line = scan_line.strip()
120
+
121
+ if not stripped_scan_line or stripped_scan_line[0] != fence_char:
122
+ continue
123
+
124
+ if not all(c == fence_char for c in stripped_scan_line):
73
125
  continue
74
- artefact = artefact_class.deserialize('\n'.join(block_lines))
75
- serialized_artefact = artefact.serialize()
76
126
 
77
- original_directory = os.getcwd()
78
- directory_navigator = DirectoryNavigator()
79
- directory_navigator.navigate_to_target()
127
+ candidate_indentation = len(scan_line) - len(scan_line.lstrip())
128
+ candidate_length = len(stripped_scan_line)
129
+
130
+ if candidate_length == fence_length and candidate_indentation == indentation:
131
+ return i
132
+
133
+ return -1
134
+
135
+
136
+ def _process_document_blocks(source_lines, force, write):
137
+ """Process all extract blocks in the document."""
138
+ fence_detector = FenceDetector(source_lines)
139
+ replacements = []
140
+ line_num = 0
141
+
142
+ while line_num < len(source_lines):
143
+ if not fence_detector.is_extract_fence(line_num):
144
+ line_num += 1
145
+ continue
80
146
 
81
- artefact_path = artefact.file_path
82
- directory = os.path.dirname(artefact_path)
83
- os.makedirs(directory, exist_ok=True)
84
- with open(artefact_path, 'w', encoding='utf-8') as file:
85
- file.write(serialized_artefact)
147
+ block_start_line = line_num
148
+ block_end_line = fence_detector.find_matching_fence_end(block_start_line)
149
+
150
+ if block_end_line != -1:
151
+ print(f"Block found and processed starting on line {block_start_line + 1}.")
152
+ original, modified = _perform_extraction_for_block(
153
+ source_lines, block_start_line, block_end_line, force, write
154
+ )
155
+ if original and modified:
156
+ replacements.append((original, modified))
157
+ line_num = block_end_line + 1
158
+ else:
159
+ line_num += 1
160
+
161
+ return replacements
86
162
 
87
- os.chdir(original_directory)
88
163
 
89
- # TODO: make update_markdown work block by block instead of updating the whole document at once
90
- block_extraction_counter += 1
91
- updated_content = update_markdown(content, block, None)
164
+ def _apply_replacements(content, replacements):
165
+ """Apply all replacements to the content."""
166
+ updated_content = content
167
+ for original, modified in replacements:
168
+ updated_content = updated_content.replace(original, modified, 1)
169
+ return updated_content
170
+
171
+
172
+ def _setup_working_directory(relative_to_ara_root):
173
+ """Setup working directory and return original cwd."""
174
+ cwd = os.getcwd()
175
+ if relative_to_ara_root:
176
+ navigator = DirectoryNavigator()
177
+ navigator.navigate_to_target()
178
+ os.chdir('..')
179
+ return cwd
180
+
181
+
182
+ def extract_responses(document_path, relative_to_ara_root=False, force=False, write=False):
183
+ print(f"Starting extraction from '{document_path}'")
184
+
185
+ try:
186
+ with open(document_path, 'r', encoding='utf-8', errors='replace') as file:
187
+ content = file.read()
188
+ except FileNotFoundError:
189
+ print(f"Error: File not found at '{document_path}'. Skipping extraction.")
190
+ return
191
+
192
+ cwd = _setup_working_directory(relative_to_ara_root)
193
+
194
+ source_lines = content.split('\n')
195
+ replacements = _process_document_blocks(source_lines, force, write)
196
+
197
+ updated_content = _apply_replacements(content, replacements)
92
198
 
93
199
  os.chdir(cwd)
94
- # Save the updated markdown content
95
200
  with open(document_path, 'w', encoding='utf-8') as file:
96
201
  file.write(updated_content)
97
202
 
98
- print(f"End of extraction. Found {block_extraction_counter} blocks.")
203
+ if replacements:
204
+ print(f"End of extraction. Found and processed {len(replacements)} blocks in '{os.path.basename(document_path)}'.")
99
205
 
100
206
 
101
207
  def modify_and_save_file(response, file_path):
@@ -116,7 +222,7 @@ def modify_and_save_file(response, file_path):
116
222
  print("Skipping block.")
117
223
  return
118
224
 
119
- with open(file_path, 'w', encoding='utf-8') as file:
225
+ with open(file_path, 'w', encoding='utf-8', errors='replace') as file:
120
226
  file.write(response_data['content'])
121
227
  print(f"File {file_path} updated successfully.")
122
228
  except json.JSONDecodeError as ex:
@@ -127,14 +233,23 @@ def prompt_user_decision(prompt):
127
233
  return input(prompt)
128
234
 
129
235
 
130
- def create_file_if_not_exist(filename, content):
236
+ def determine_should_create(skip_query=False):
237
+ if skip_query:
238
+ return True
239
+ user_decision = prompt_user_decision("File does not exist. Create? (y/n): ")
240
+ if user_decision.lower() in ['y', 'yes']:
241
+ return True
242
+ return False
243
+
244
+
245
+ def create_file_if_not_exist(filename, content, skip_query=False):
131
246
  try:
132
247
  if not os.path.exists(filename):
133
- user_decision = prompt_user_decision("File does not exist. Create? (y/n): ")
134
-
135
- if user_decision.lower() in ['y', 'yes']:
248
+ if determine_should_create(skip_query):
136
249
  # Ensure the directory exists
137
- os.makedirs(os.path.dirname(filename), exist_ok=True)
250
+ dir_name = os.path.dirname(filename)
251
+ if dir_name:
252
+ os.makedirs(dir_name, exist_ok=True)
138
253
 
139
254
  with open(filename, 'w', encoding='utf-8') as file:
140
255
  file.write(content)
@@ -170,7 +285,7 @@ def create_prompt_for_file_modification(content_str, filename):
170
285
  {{
171
286
  "filename": "path/filename.filextension",
172
287
  "content": "full content of the modified file in valid json format"
173
- }}
288
+ }}
174
289
  """
175
290
 
176
291
  # print(f"Debug: modification prompt created: {prompt_text}")
@@ -178,34 +293,46 @@ def create_prompt_for_file_modification(content_str, filename):
178
293
  return prompt_text
179
294
 
180
295
 
181
- def handle_existing_file(filename, block_content):
296
+ def handle_existing_file(filename, block_content, skip_query=False, write=False):
182
297
  if not os.path.isfile(filename):
183
298
  print(f"File {filename} does not exist, attempting to create")
184
- create_file_if_not_exist(filename, block_content)
299
+ # Ensure directory exists before writing
300
+ directory = os.path.dirname(filename)
301
+ if directory:
302
+ os.makedirs(directory, exist_ok=True)
303
+ create_file_if_not_exist(filename, block_content, skip_query)
304
+
305
+ elif write:
306
+ print(f"File {filename} exists. Overwriting without LLM merge as requested.")
307
+ try:
308
+ directory = os.path.dirname(filename)
309
+ if directory:
310
+ os.makedirs(directory, exist_ok=True)
311
+ with open(filename, 'w', encoding='utf-8', errors='replace') as file:
312
+ file.write(block_content)
313
+ print(f"File {filename} overwritten successfully.")
314
+ except OSError as e:
315
+ print(f"Error: {e}")
316
+ print(f"Failed to overwrite file {filename} due to an OS error")
185
317
  else:
186
318
  print(f"File {filename} exists, creating modification prompt")
187
319
  prompt_text = create_prompt_for_file_modification(block_content, filename)
320
+ if prompt_text is None:
321
+ return
322
+
188
323
  messages = [{"role": "user", "content": prompt_text}]
189
324
  response = ""
190
325
 
191
- for chunk in send_prompt(messages):
326
+ for chunk in send_prompt(messages, purpose='extraction'):
192
327
  content = chunk.choices[0].delta.content
193
328
  if content:
194
329
  response += content
195
330
  modify_and_save_file(response, filename)
196
331
 
197
332
 
198
- def extract_and_save_prompt_results(classifier, param):
333
+ def extract_and_save_prompt_results(classifier, param, write=False):
199
334
  sub_directory = Classifier.get_sub_directory(classifier)
200
335
  prompt_log_file = f"ara/{sub_directory}/{param}.data/{classifier}.prompt_log.md"
201
336
  print(f"Extract marked sections from: {prompt_log_file}")
202
337
 
203
- extract_responses(prompt_log_file)
204
-
205
-
206
- def update_markdown(original_content, block_content, filename):
207
- """
208
- Update the markdown content by changing the extract block from "# [x] extract" to "# [v] extract"
209
- """
210
- updated_content = original_content.replace("# [x] extract", "# [v] extract")
211
- return updated_content
338
+ extract_responses(prompt_log_file, write=write)