ara-cli 0.1.10.1__py3-none-any.whl → 0.1.10.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,90 +9,199 @@ from ara_cli.directory_navigator import DirectoryNavigator
9
9
  from ara_cli.artefact_models.artefact_mapping import title_prefix_to_artefact_class
10
10
 
11
11
 
12
- def extract_code_blocks_md(markdown_text):
13
- md = MarkdownIt()
14
- tokens = md.parse(markdown_text)
15
- code_blocks = [token.content for token in tokens if token.type == 'fence']
16
- return code_blocks
12
+ def _find_extract_token(tokens):
13
+ """Find the first token that needs to be processed."""
14
+ for token in tokens:
15
+ if token.type == 'fence' and token.content.strip().startswith("# [x] extract"):
16
+ return token
17
+ return None
18
+
19
+
20
+ def _extract_file_path(content_lines):
21
+ """Extract file path from content lines."""
22
+ if not content_lines:
23
+ return None
24
+ file_path_search = re.search(r"# filename: (.+)", content_lines[0])
25
+ return file_path_search.group(1).strip() if file_path_search else None
26
+
27
+
28
+ def _find_artefact_class(content_lines):
29
+ """Find the appropriate artefact class from content lines."""
30
+ for line in content_lines[:2]:
31
+ words = line.strip().split(' ')
32
+ if not words:
33
+ continue
34
+ first_word = words[0]
35
+ if first_word in title_prefix_to_artefact_class:
36
+ return title_prefix_to_artefact_class[first_word]
37
+ return None
17
38
 
18
39
 
19
- def extract_responses(document_path, relative_to_ara_root=False, force=False, write=False):
20
- print(f"Starting extraction from '{document_path}'")
21
- block_extraction_counter = 0
40
+ def _process_file_extraction(file_path, code_content, force, write):
41
+ """Process file extraction logic."""
42
+ print(f"Filename extracted: {file_path}")
43
+ handle_existing_file(file_path, code_content, force, write)
22
44
 
23
- with open(document_path, 'r', encoding='utf-8', errors='replace') as file:
24
- content = file.read()
25
-
26
- cwd = os.getcwd()
27
- if relative_to_ara_root:
28
- navigator = DirectoryNavigator()
29
- navigator.navigate_to_target()
30
- os.chdir('..')
31
45
 
32
- code_blocks_found = extract_code_blocks_md(content)
33
- updated_content = content
46
+ def _process_artefact_extraction(artefact_class, content_lines, force, write):
47
+ """Process artefact extraction logic."""
48
+ artefact = artefact_class.deserialize('\n'.join(content_lines))
49
+ serialized_artefact = artefact.serialize()
34
50
 
35
- for block in code_blocks_found:
36
- block_lines = block.split('\n')
51
+ original_directory = os.getcwd()
52
+ directory_navigator = DirectoryNavigator()
53
+ directory_navigator.navigate_to_target()
37
54
 
38
- if "# [x] extract" not in block_lines[0]:
39
- continue
40
- print("Block found and processed.")
55
+ artefact_path = artefact.file_path
56
+ directory = os.path.dirname(artefact_path)
57
+ os.makedirs(directory, exist_ok=True)
58
+ handle_existing_file(artefact_path, serialized_artefact, force, write)
41
59
 
42
- block_lines = block_lines[1:]
60
+ os.chdir(original_directory)
43
61
 
44
- file_path_search = re.search(r"# filename: (.+)", block_lines[0])
45
62
 
46
- if file_path_search:
47
- file_path = file_path_search.group(1).strip()
48
- print(f"Filename extracted: {file_path}")
63
+ def _perform_extraction_for_block(source_lines, block_start, block_end, force, write):
64
+ """Helper function to process a single, identified block."""
65
+ original_block_text = '\n'.join(source_lines[block_start:block_end + 1])
66
+ block_content_lines = source_lines[block_start + 1:block_end]
67
+ block_content = '\n'.join(block_content_lines)
49
68
 
50
- block_lines = block_lines[1:] # Remove first line again after removing filename line
51
- block = '\n'.join(block_lines)
69
+ block_lines = block_content.split('\n')
70
+ content_lines_after_extract = block_lines[1:]
52
71
 
53
- handle_existing_file(file_path, block, force, write)
54
- block_extraction_counter += 1
72
+ file_path = _extract_file_path(content_lines_after_extract)
55
73
 
56
- # Update the markdown content
57
- updated_content = update_markdown(content, block, file_path)
74
+ if file_path:
75
+ code_content = '\n'.join(content_lines_after_extract[1:])
76
+ _process_file_extraction(file_path, code_content, force, write)
77
+ else:
78
+ artefact_class = _find_artefact_class(content_lines_after_extract)
79
+ if artefact_class:
80
+ _process_artefact_extraction(artefact_class, content_lines_after_extract, force, write)
58
81
  else:
59
- # Extract artefact
60
- artefact_class = None
61
- for line in block_lines[:2]:
62
- words = line.strip().split(' ')
63
- if not words:
64
- continue
65
- first_word = words[0]
66
- if first_word not in title_prefix_to_artefact_class:
67
- continue
68
- artefact_class = title_prefix_to_artefact_class[first_word]
69
- if not artefact_class:
70
- print("No filename found, skipping this block.")
82
+ print("No filename or valid artefact found, skipping processing for this block.")
83
+ return None, None
84
+
85
+ modified_block_text = original_block_text.replace("# [x] extract", "# [v] extract", 1)
86
+ return original_block_text, modified_block_text
87
+
88
+
89
+ class FenceDetector:
90
+ """Helper class to detect and match fence blocks."""
91
+
92
+ def __init__(self, source_lines):
93
+ self.source_lines = source_lines
94
+
95
+ def is_extract_fence(self, line_num):
96
+ """Check if line is a fence with extract marker."""
97
+ line = self.source_lines[line_num]
98
+ stripped_line = line.strip()
99
+
100
+ is_fence = stripped_line.startswith('```') or stripped_line.startswith('~~~')
101
+ if not is_fence:
102
+ return False
103
+
104
+ if not (line_num + 1 < len(self.source_lines)):
105
+ return False
106
+
107
+ return self.source_lines[line_num + 1].strip().startswith("# [x] extract")
108
+
109
+ def find_matching_fence_end(self, start_line):
110
+ """Find the matching end fence for a given start fence."""
111
+ fence_line = self.source_lines[start_line]
112
+ indentation = len(fence_line) - len(fence_line.lstrip())
113
+ stripped_fence_line = fence_line.strip()
114
+ fence_char = stripped_fence_line[0]
115
+ fence_length = len(stripped_fence_line) - len(stripped_fence_line.lstrip(fence_char))
116
+
117
+ for i in range(start_line + 1, len(self.source_lines)):
118
+ scan_line = self.source_lines[i]
119
+ stripped_scan_line = scan_line.strip()
120
+
121
+ if not stripped_scan_line or stripped_scan_line[0] != fence_char:
122
+ continue
123
+
124
+ if not all(c == fence_char for c in stripped_scan_line):
71
125
  continue
72
- artefact = artefact_class.deserialize('\n'.join(block_lines))
73
- serialized_artefact = artefact.serialize()
74
126
 
75
- original_directory = os.getcwd()
76
- directory_navigator = DirectoryNavigator()
77
- directory_navigator.navigate_to_target()
127
+ candidate_indentation = len(scan_line) - len(scan_line.lstrip())
128
+ candidate_length = len(stripped_scan_line)
129
+
130
+ if candidate_length == fence_length and candidate_indentation == indentation:
131
+ return i
132
+
133
+ return -1
134
+
135
+
136
+ def _process_document_blocks(source_lines, force, write):
137
+ """Process all extract blocks in the document."""
138
+ fence_detector = FenceDetector(source_lines)
139
+ replacements = []
140
+ line_num = 0
141
+
142
+ while line_num < len(source_lines):
143
+ if not fence_detector.is_extract_fence(line_num):
144
+ line_num += 1
145
+ continue
78
146
 
79
- artefact_path = artefact.file_path
80
- directory = os.path.dirname(artefact_path)
81
- os.makedirs(directory, exist_ok=True)
82
- handle_existing_file(artefact_path, serialized_artefact, force, write)
147
+ block_start_line = line_num
148
+ block_end_line = fence_detector.find_matching_fence_end(block_start_line)
149
+
150
+ if block_end_line != -1:
151
+ print(f"Block found and processed starting on line {block_start_line + 1}.")
152
+ original, modified = _perform_extraction_for_block(
153
+ source_lines, block_start_line, block_end_line, force, write
154
+ )
155
+ if original and modified:
156
+ replacements.append((original, modified))
157
+ line_num = block_end_line + 1
158
+ else:
159
+ line_num += 1
160
+
161
+ return replacements
83
162
 
84
- os.chdir(original_directory)
85
163
 
86
- # TODO: make update_markdown work block by block instead of updating the whole document at once
87
- block_extraction_counter += 1
88
- updated_content = update_markdown(content, block, None)
164
+ def _apply_replacements(content, replacements):
165
+ """Apply all replacements to the content."""
166
+ updated_content = content
167
+ for original, modified in replacements:
168
+ updated_content = updated_content.replace(original, modified, 1)
169
+ return updated_content
170
+
171
+
172
+ def _setup_working_directory(relative_to_ara_root):
173
+ """Setup working directory and return original cwd."""
174
+ cwd = os.getcwd()
175
+ if relative_to_ara_root:
176
+ navigator = DirectoryNavigator()
177
+ navigator.navigate_to_target()
178
+ os.chdir('..')
179
+ return cwd
180
+
181
+
182
+ def extract_responses(document_path, relative_to_ara_root=False, force=False, write=False):
183
+ print(f"Starting extraction from '{document_path}'")
184
+
185
+ try:
186
+ with open(document_path, 'r', encoding='utf-8', errors='replace') as file:
187
+ content = file.read()
188
+ except FileNotFoundError:
189
+ print(f"Error: File not found at '{document_path}'. Skipping extraction.")
190
+ return
191
+
192
+ cwd = _setup_working_directory(relative_to_ara_root)
193
+
194
+ source_lines = content.split('\n')
195
+ replacements = _process_document_blocks(source_lines, force, write)
196
+
197
+ updated_content = _apply_replacements(content, replacements)
89
198
 
90
199
  os.chdir(cwd)
91
- # Save the updated markdown content
92
200
  with open(document_path, 'w', encoding='utf-8') as file:
93
201
  file.write(updated_content)
94
202
 
95
- print(f"End of extraction. Found {block_extraction_counter} blocks.")
203
+ if replacements:
204
+ print(f"End of extraction. Found and processed {len(replacements)} blocks in '{os.path.basename(document_path)}'.")
96
205
 
97
206
 
98
207
  def modify_and_save_file(response, file_path):
@@ -138,7 +247,9 @@ def create_file_if_not_exist(filename, content, skip_query=False):
138
247
  if not os.path.exists(filename):
139
248
  if determine_should_create(skip_query):
140
249
  # Ensure the directory exists
141
- os.makedirs(os.path.dirname(filename), exist_ok=True)
250
+ dir_name = os.path.dirname(filename)
251
+ if dir_name:
252
+ os.makedirs(dir_name, exist_ok=True)
142
253
 
143
254
  with open(filename, 'w', encoding='utf-8') as file:
144
255
  file.write(content)
@@ -185,7 +296,12 @@ def create_prompt_for_file_modification(content_str, filename):
185
296
  def handle_existing_file(filename, block_content, skip_query=False, write=False):
186
297
  if not os.path.isfile(filename):
187
298
  print(f"File {filename} does not exist, attempting to create")
299
+ # Ensure directory exists before writing
300
+ directory = os.path.dirname(filename)
301
+ if directory:
302
+ os.makedirs(directory, exist_ok=True)
188
303
  create_file_if_not_exist(filename, block_content, skip_query)
304
+
189
305
  elif write:
190
306
  print(f"File {filename} exists. Overwriting without LLM merge as requested.")
191
307
  try:
@@ -219,12 +335,4 @@ def extract_and_save_prompt_results(classifier, param, write=False):
219
335
  prompt_log_file = f"ara/{sub_directory}/{param}.data/{classifier}.prompt_log.md"
220
336
  print(f"Extract marked sections from: {prompt_log_file}")
221
337
 
222
- extract_responses(prompt_log_file, write=write)
223
-
224
-
225
- def update_markdown(original_content, block_content, filename):
226
- """
227
- Update the markdown content by changing the extract block from "# [x] extract" to "# [v] extract"
228
- """
229
- updated_content = original_content.replace("# [x] extract", "# [v] extract")
230
- return updated_content
338
+ extract_responses(prompt_log_file, write=write)