ara-cli 0.1.10.1__py3-none-any.whl → 0.1.10.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ara-cli might be problematic. Click here for more details.
- ara_cli/__main__.py +120 -1
- ara_cli/artefact_autofix.py +44 -6
- ara_cli/artefact_models/artefact_model.py +18 -6
- ara_cli/artefact_models/epic_artefact_model.py +11 -2
- ara_cli/artefact_models/feature_artefact_model.py +31 -1
- ara_cli/artefact_models/userstory_artefact_model.py +13 -1
- ara_cli/chat.py +0 -19
- ara_cli/file_loaders/text_file_loader.py +2 -2
- ara_cli/prompt_extractor.py +97 -79
- ara_cli/prompt_handler.py +160 -59
- ara_cli/tag_extractor.py +26 -23
- ara_cli/template_loader.py +1 -1
- ara_cli/version.py +1 -1
- {ara_cli-0.1.10.1.dist-info → ara_cli-0.1.10.4.dist-info}/METADATA +1 -1
- {ara_cli-0.1.10.1.dist-info → ara_cli-0.1.10.4.dist-info}/RECORD +19 -19
- tests/test_prompt_handler.py +12 -4
- {ara_cli-0.1.10.1.dist-info → ara_cli-0.1.10.4.dist-info}/WHEEL +0 -0
- {ara_cli-0.1.10.1.dist-info → ara_cli-0.1.10.4.dist-info}/entry_points.txt +0 -0
- {ara_cli-0.1.10.1.dist-info → ara_cli-0.1.10.4.dist-info}/top_level.txt +0 -0
ara_cli/__main__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import typer
|
|
2
2
|
import sys
|
|
3
|
+
import os
|
|
3
4
|
from typing import Optional
|
|
4
5
|
from os import getenv
|
|
5
6
|
from ara_cli.error_handler import AraError
|
|
@@ -44,6 +45,113 @@ def configure_debug_mode(debug: bool, env_debug_mode: bool):
|
|
|
44
45
|
error_handler.debug_mode = True
|
|
45
46
|
|
|
46
47
|
|
|
48
|
+
def find_ara_directory_root():
|
|
49
|
+
"""Find the root ara directory by traversing up the directory tree."""
|
|
50
|
+
current_dir = os.getcwd()
|
|
51
|
+
|
|
52
|
+
# Check if we're already inside an ara directory structure
|
|
53
|
+
path_parts = current_dir.split(os.sep)
|
|
54
|
+
|
|
55
|
+
# Look for 'ara' in the path parts
|
|
56
|
+
if 'ara' in path_parts:
|
|
57
|
+
ara_index = path_parts.index('ara')
|
|
58
|
+
# Reconstruct path up to and including 'ara'
|
|
59
|
+
ara_root_parts = path_parts[:ara_index + 1]
|
|
60
|
+
potential_ara_root = os.sep.join(ara_root_parts)
|
|
61
|
+
if os.path.exists(potential_ara_root) and os.path.isdir(potential_ara_root):
|
|
62
|
+
return potential_ara_root
|
|
63
|
+
|
|
64
|
+
# If not inside ara directory, check current directory and parents
|
|
65
|
+
check_dir = current_dir
|
|
66
|
+
while check_dir != os.path.dirname(check_dir): # Stop at filesystem root
|
|
67
|
+
ara_path = os.path.join(check_dir, 'ara')
|
|
68
|
+
if os.path.exists(ara_path) and os.path.isdir(ara_path):
|
|
69
|
+
return ara_path
|
|
70
|
+
check_dir = os.path.dirname(check_dir)
|
|
71
|
+
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def check_ara_directory_exists():
|
|
76
|
+
"""Check if ara directory exists or if we're inside ara directory tree."""
|
|
77
|
+
return find_ara_directory_root() is not None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def prompt_create_ara_directory():
|
|
81
|
+
"""Prompt user to create ara directory and create it if confirmed."""
|
|
82
|
+
# Print the prompt message
|
|
83
|
+
print("No 'ara' directory found. Create one in the current directory? (Y/n)", end=" ", flush=True)
|
|
84
|
+
|
|
85
|
+
# Read user input
|
|
86
|
+
try:
|
|
87
|
+
response = input().strip()
|
|
88
|
+
except (EOFError, KeyboardInterrupt):
|
|
89
|
+
typer.echo("\nOperation cancelled.")
|
|
90
|
+
raise typer.Exit(1)
|
|
91
|
+
|
|
92
|
+
if response.lower() in ('y', 'yes', ''):
|
|
93
|
+
current_dir = os.getcwd()
|
|
94
|
+
ara_path = os.path.join(current_dir, 'ara')
|
|
95
|
+
|
|
96
|
+
# Create ara directory structure
|
|
97
|
+
subdirectories = [
|
|
98
|
+
'businessgoals',
|
|
99
|
+
'capabilities',
|
|
100
|
+
'epics',
|
|
101
|
+
'examples',
|
|
102
|
+
'features',
|
|
103
|
+
'keyfeatures',
|
|
104
|
+
'tasks',
|
|
105
|
+
'userstories',
|
|
106
|
+
'vision'
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
# Create main ara directory
|
|
111
|
+
os.makedirs(ara_path, exist_ok=True)
|
|
112
|
+
|
|
113
|
+
# Create subdirectories for artefact types
|
|
114
|
+
for subdir in subdirectories:
|
|
115
|
+
os.makedirs(os.path.join(ara_path, subdir), exist_ok=True)
|
|
116
|
+
|
|
117
|
+
# Create .araconfig directory
|
|
118
|
+
araconfig_path = os.path.join(ara_path, '.araconfig')
|
|
119
|
+
os.makedirs(araconfig_path, exist_ok=True)
|
|
120
|
+
|
|
121
|
+
# Create default ara_config.json using ConfigManager
|
|
122
|
+
from ara_cli.ara_config import ConfigManager, ARAconfig
|
|
123
|
+
config_file_path = os.path.join(araconfig_path, 'ara_config.json')
|
|
124
|
+
|
|
125
|
+
# Reset ConfigManager to ensure clean state
|
|
126
|
+
ConfigManager.reset()
|
|
127
|
+
|
|
128
|
+
# Create default config and save it
|
|
129
|
+
default_config = ARAconfig()
|
|
130
|
+
from ara_cli.ara_config import save_data
|
|
131
|
+
save_data(config_file_path, default_config)
|
|
132
|
+
|
|
133
|
+
typer.echo(f"Created ara directory structure at {ara_path}")
|
|
134
|
+
typer.echo(f"Created default configuration at {config_file_path}")
|
|
135
|
+
return True
|
|
136
|
+
|
|
137
|
+
except OSError as e:
|
|
138
|
+
typer.echo(f"Error creating ara directory: {e}", err=True)
|
|
139
|
+
raise typer.Exit(1)
|
|
140
|
+
except Exception as e:
|
|
141
|
+
typer.echo(f"Error creating configuration file: {e}", err=True)
|
|
142
|
+
raise typer.Exit(1)
|
|
143
|
+
else:
|
|
144
|
+
typer.echo("Ara directory creation cancelled.")
|
|
145
|
+
raise typer.Exit(0)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def requires_ara_directory():
|
|
149
|
+
"""Check if ara directory exists and prompt to create if not."""
|
|
150
|
+
if not check_ara_directory_exists():
|
|
151
|
+
return prompt_create_ara_directory()
|
|
152
|
+
return True
|
|
153
|
+
|
|
154
|
+
|
|
47
155
|
def create_app():
|
|
48
156
|
app = typer.Typer(
|
|
49
157
|
help="""The ara cli terminal tool is a management tool for classified ara artefacts.
|
|
@@ -124,6 +232,17 @@ ara chat examples:
|
|
|
124
232
|
if ctx.invoked_subcommand is None:
|
|
125
233
|
ctx.get_help()
|
|
126
234
|
ctx.exit()
|
|
235
|
+
|
|
236
|
+
# Check for ara directory before executing any command
|
|
237
|
+
# Skip check for commands that don't require ara directory
|
|
238
|
+
commands_requiring_ara = {
|
|
239
|
+
'create', 'delete', 'rename', 'list', 'list-tags', 'prompt',
|
|
240
|
+
'read', 'reconnect', 'read-status', 'read-user', 'set-status',
|
|
241
|
+
'set-user', 'classifier-directory', 'scan', 'autofix'
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
if ctx.invoked_subcommand in commands_requiring_ara:
|
|
245
|
+
requires_ara_directory()
|
|
127
246
|
|
|
128
247
|
# Register all commands
|
|
129
248
|
register_create_cli(app)
|
|
@@ -162,4 +281,4 @@ def cli():
|
|
|
162
281
|
|
|
163
282
|
|
|
164
283
|
if __name__ == "__main__":
|
|
165
|
-
cli()
|
|
284
|
+
cli()
|
ara_cli/artefact_autofix.py
CHANGED
|
@@ -487,7 +487,7 @@ def _convert_to_scenario_outline(scenario_lines: list, placeholders: set, indent
|
|
|
487
487
|
def _create_examples_table(placeholders: set, base_indentation: str) -> list:
|
|
488
488
|
"""Create the Examples table for the scenario outline."""
|
|
489
489
|
examples_indentation = base_indentation + " "
|
|
490
|
-
table_indentation = examples_indentation + "
|
|
490
|
+
table_indentation = examples_indentation + " "
|
|
491
491
|
|
|
492
492
|
sorted_placeholders = sorted(placeholders)
|
|
493
493
|
header = "| " + " | ".join(sorted_placeholders) + " |"
|
|
@@ -576,6 +576,42 @@ def fix_rule(
|
|
|
576
576
|
return artefact.serialize()
|
|
577
577
|
|
|
578
578
|
|
|
579
|
+
def fix_misplaced_content(file_path: str, artefact_text: str, **kwargs) -> str:
|
|
580
|
+
"""
|
|
581
|
+
Deterministically fixes content like 'Rule:' or 'Estimate:' misplaced in the description.
|
|
582
|
+
"""
|
|
583
|
+
lines = artefact_text.splitlines()
|
|
584
|
+
|
|
585
|
+
desc_start_idx = -1
|
|
586
|
+
for i, line in enumerate(lines):
|
|
587
|
+
if line.strip().startswith("Description:"):
|
|
588
|
+
desc_start_idx = i
|
|
589
|
+
break
|
|
590
|
+
|
|
591
|
+
if desc_start_idx == -1:
|
|
592
|
+
return artefact_text # No description, nothing to fix.
|
|
593
|
+
|
|
594
|
+
pre_desc_lines = lines[:desc_start_idx]
|
|
595
|
+
desc_line = lines[desc_start_idx]
|
|
596
|
+
post_desc_lines = lines[desc_start_idx+1:]
|
|
597
|
+
|
|
598
|
+
misplaced_content = []
|
|
599
|
+
new_post_desc_lines = []
|
|
600
|
+
|
|
601
|
+
for line in post_desc_lines:
|
|
602
|
+
if line.strip().startswith("Rule:") or line.strip().startswith("Estimate:"):
|
|
603
|
+
misplaced_content.append(line)
|
|
604
|
+
else:
|
|
605
|
+
new_post_desc_lines.append(line)
|
|
606
|
+
|
|
607
|
+
if not misplaced_content:
|
|
608
|
+
return artefact_text
|
|
609
|
+
|
|
610
|
+
# Rebuild the file content
|
|
611
|
+
final_lines = pre_desc_lines + misplaced_content + [""] + [desc_line] + new_post_desc_lines
|
|
612
|
+
return "\n".join(final_lines)
|
|
613
|
+
|
|
614
|
+
|
|
579
615
|
def should_skip_issue(deterministic_issue, deterministic, non_deterministic, file_path) -> bool:
|
|
580
616
|
if not non_deterministic and not deterministic_issue:
|
|
581
617
|
print(f"Skipping non-deterministic fix for {file_path} as per request.")
|
|
@@ -622,7 +658,7 @@ def apply_non_deterministic_fix(
|
|
|
622
658
|
corrected_artefact = run_agent(prompt, artefact_class)
|
|
623
659
|
corrected_text = corrected_artefact.serialize()
|
|
624
660
|
except Exception as e:
|
|
625
|
-
print(f"
|
|
661
|
+
print(f" ❌ LLM agent failed to fix artefact at {file_path}: {e}")
|
|
626
662
|
return None
|
|
627
663
|
return corrected_text
|
|
628
664
|
|
|
@@ -651,7 +687,7 @@ def attempt_autofix_loop(
|
|
|
651
687
|
print(
|
|
652
688
|
f"Attempting to fix {file_path} (Attempt {attempt + 1}/{max_attempts})..."
|
|
653
689
|
)
|
|
654
|
-
print(f"
|
|
690
|
+
print(f" Reason: {current_reason}")
|
|
655
691
|
|
|
656
692
|
artefact_text = read_artefact(file_path)
|
|
657
693
|
if artefact_text is None:
|
|
@@ -683,13 +719,13 @@ def attempt_autofix_loop(
|
|
|
683
719
|
|
|
684
720
|
if corrected_text is None or corrected_text.strip() == artefact_text.strip():
|
|
685
721
|
print(
|
|
686
|
-
"
|
|
722
|
+
" Fixing attempt did not alter the file. Stopping to prevent infinite loop."
|
|
687
723
|
)
|
|
688
724
|
return False
|
|
689
725
|
|
|
690
726
|
write_corrected_artefact(file_path, corrected_text)
|
|
691
727
|
|
|
692
|
-
print("
|
|
728
|
+
print(" File modified. Re-classifying artefact information for next check...")
|
|
693
729
|
classified_artefact_info = populate_classified_artefact_info(classified_artefact_info, force=True)
|
|
694
730
|
|
|
695
731
|
print(f"❌ Failed to fix {file_path} after {max_attempts} attempts.")
|
|
@@ -713,6 +749,8 @@ def apply_autofix(
|
|
|
713
749
|
"Invalid Contribution Reference": fix_contribution,
|
|
714
750
|
"Rule Mismatch": fix_rule,
|
|
715
751
|
"Scenario Contains Placeholders": fix_scenario_placeholder_mismatch,
|
|
752
|
+
"Found 'Rule:' inside description": fix_misplaced_content,
|
|
753
|
+
"Found 'Estimate:' inside description": fix_misplaced_content,
|
|
716
754
|
}
|
|
717
755
|
|
|
718
756
|
artefact_type, artefact_class = determine_artefact_type_and_class(classifier)
|
|
@@ -731,4 +769,4 @@ def apply_autofix(
|
|
|
731
769
|
deterministic=deterministic,
|
|
732
770
|
non_deterministic=non_deterministic,
|
|
733
771
|
classified_artefact_info=classified_artefact_info,
|
|
734
|
-
)
|
|
772
|
+
)
|
|
@@ -410,14 +410,26 @@ class Artefact(BaseModel, ABC):
|
|
|
410
410
|
return contribution, lines
|
|
411
411
|
|
|
412
412
|
@classmethod
|
|
413
|
-
def _deserialize_description(cls, lines) -> (Optional[str], List[str]):
|
|
413
|
+
def _deserialize_description(cls, lines: List[str]) -> (Optional[str], List[str]):
|
|
414
414
|
description_start = cls._description_starts_with()
|
|
415
|
+
start_index = -1
|
|
415
416
|
for i, line in enumerate(lines):
|
|
416
417
|
if line.startswith(description_start):
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
418
|
+
start_index = i
|
|
419
|
+
break
|
|
420
|
+
|
|
421
|
+
if start_index == -1:
|
|
422
|
+
return None, lines
|
|
423
|
+
|
|
424
|
+
first_line_content = lines[start_index][len(description_start):].strip()
|
|
425
|
+
|
|
426
|
+
description_lines = ([first_line_content] if first_line_content else []) + lines[start_index + 1:]
|
|
427
|
+
|
|
428
|
+
description = "\n".join(description_lines)
|
|
429
|
+
|
|
430
|
+
remaining_lines = lines[:start_index]
|
|
431
|
+
|
|
432
|
+
return (description if description else None), remaining_lines
|
|
421
433
|
|
|
422
434
|
@classmethod
|
|
423
435
|
def _parse_common_fields(cls, text: str) -> dict:
|
|
@@ -499,4 +511,4 @@ class Artefact(BaseModel, ABC):
|
|
|
499
511
|
classifier=classifier,
|
|
500
512
|
rule=rule
|
|
501
513
|
)
|
|
502
|
-
self.contribution = contribution
|
|
514
|
+
self.contribution = contribution
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from ara_cli.artefact_models.artefact_model import Artefact, ArtefactType, Intent
|
|
2
|
-
from pydantic import Field, field_validator
|
|
2
|
+
from pydantic import Field, field_validator, model_validator
|
|
3
3
|
from typing import List, Tuple, Optional
|
|
4
4
|
|
|
5
5
|
|
|
@@ -91,6 +91,15 @@ class EpicArtefact(Artefact):
|
|
|
91
91
|
description="Rules the epic defines. It is recommended to create rules to clarify the desired outcome"
|
|
92
92
|
)
|
|
93
93
|
|
|
94
|
+
@model_validator(mode='after')
|
|
95
|
+
def check_for_misplaced_rules(self) -> 'EpicArtefact':
|
|
96
|
+
if self.description:
|
|
97
|
+
desc_lines = self.description.split('\n')
|
|
98
|
+
for line in desc_lines:
|
|
99
|
+
if line.strip().startswith("Rule:"):
|
|
100
|
+
raise ValueError("Found 'Rule:' inside description. Rules must be defined before the 'Description:' section.")
|
|
101
|
+
return self
|
|
102
|
+
|
|
94
103
|
@field_validator('artefact_type')
|
|
95
104
|
def validate_artefact_type(cls, v):
|
|
96
105
|
if v != ArtefactType.epic:
|
|
@@ -166,4 +175,4 @@ class EpicArtefact(Artefact):
|
|
|
166
175
|
lines.append("")
|
|
167
176
|
lines.append(description)
|
|
168
177
|
lines.append("")
|
|
169
|
-
return "\n".join(lines)
|
|
178
|
+
return "\n".join(lines)
|
|
@@ -301,6 +301,36 @@ class FeatureArtefact(Artefact):
|
|
|
301
301
|
f"FeatureArtefact must have artefact_type of '{ArtefactType.feature}', not '{v}'")
|
|
302
302
|
return v
|
|
303
303
|
|
|
304
|
+
@classmethod
|
|
305
|
+
def _deserialize_description(cls, lines: List[str]) -> (Optional[str], List[str]):
|
|
306
|
+
description_start = cls._description_starts_with()
|
|
307
|
+
scenario_markers = ["Scenario:", "Scenario Outline:"]
|
|
308
|
+
|
|
309
|
+
start_index = -1
|
|
310
|
+
for i, line in enumerate(lines):
|
|
311
|
+
if line.startswith(description_start):
|
|
312
|
+
start_index = i
|
|
313
|
+
break
|
|
314
|
+
|
|
315
|
+
if start_index == -1:
|
|
316
|
+
return None, lines
|
|
317
|
+
|
|
318
|
+
end_index = len(lines)
|
|
319
|
+
for i in range(start_index + 1, len(lines)):
|
|
320
|
+
if any(lines[i].startswith(marker) for marker in scenario_markers):
|
|
321
|
+
end_index = i
|
|
322
|
+
break
|
|
323
|
+
|
|
324
|
+
first_line_content = lines[start_index][len(description_start):].strip()
|
|
325
|
+
|
|
326
|
+
description_lines_list = [first_line_content] if first_line_content else []
|
|
327
|
+
description_lines_list.extend(lines[start_index+1:end_index])
|
|
328
|
+
|
|
329
|
+
description = "\n".join(description_lines_list).strip() or None
|
|
330
|
+
|
|
331
|
+
remaining_lines = lines[:start_index] + lines[end_index:]
|
|
332
|
+
|
|
333
|
+
return description, remaining_lines
|
|
304
334
|
|
|
305
335
|
@classmethod
|
|
306
336
|
def _title_prefix(cls) -> str:
|
|
@@ -519,4 +549,4 @@ class FeatureArtefact(Artefact):
|
|
|
519
549
|
# or the placeholder is at the end of a line (e.g., "Then I see... __PLACEHOLDER__").
|
|
520
550
|
step = step.replace(key, value)
|
|
521
551
|
rehydrated_steps.append(step)
|
|
522
|
-
return rehydrated_steps
|
|
552
|
+
return rehydrated_steps
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from ara_cli.artefact_models.artefact_model import Artefact, ArtefactType, Intent
|
|
2
|
-
from pydantic import Field, field_validator
|
|
2
|
+
from pydantic import Field, field_validator, model_validator
|
|
3
3
|
from typing import List, Tuple
|
|
4
4
|
|
|
5
5
|
|
|
@@ -92,6 +92,18 @@ class UserstoryArtefact(Artefact):
|
|
|
92
92
|
default_factory=list,
|
|
93
93
|
description="Rules the userstory defines. It is recommended to create rules to clarify the desired outcome")
|
|
94
94
|
|
|
95
|
+
@model_validator(mode='after')
|
|
96
|
+
def check_for_misplaced_content(self) -> 'UserstoryArtefact':
|
|
97
|
+
if self.description:
|
|
98
|
+
desc_lines = self.description.split('\n')
|
|
99
|
+
for line in desc_lines:
|
|
100
|
+
stripped_line = line.strip()
|
|
101
|
+
if stripped_line.startswith("Rule:"):
|
|
102
|
+
raise ValueError("Found 'Rule:' inside description. Rules must be defined before the 'Description:' section.")
|
|
103
|
+
if stripped_line.startswith("Estimate:"):
|
|
104
|
+
raise ValueError("Found 'Estimate:' inside description. Estimate must be defined before the 'Description:' section.")
|
|
105
|
+
return self
|
|
106
|
+
|
|
95
107
|
@field_validator('artefact_type')
|
|
96
108
|
def validate_artefact_type(cls, v):
|
|
97
109
|
if v != ArtefactType.userstory:
|
ara_cli/chat.py
CHANGED
|
@@ -13,25 +13,6 @@ from ara_cli.file_loaders.binary_file_loader import BinaryFileLoader
|
|
|
13
13
|
from ara_cli.file_loaders.text_file_loader import TextFileLoader
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
extract_parser = argparse.ArgumentParser()
|
|
17
|
-
extract_parser.add_argument(
|
|
18
|
-
"-f", "--force", action="store_true", help="Force extraction"
|
|
19
|
-
)
|
|
20
|
-
extract_parser.add_argument(
|
|
21
|
-
"-w",
|
|
22
|
-
"--write",
|
|
23
|
-
action="store_true",
|
|
24
|
-
help="Overwrite existing files without using LLM for merging.",
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
load_parser = argparse.ArgumentParser()
|
|
28
|
-
load_parser.add_argument("file_name", nargs="?", default="", help="File to load")
|
|
29
|
-
load_parser.add_argument(
|
|
30
|
-
"--load-images",
|
|
31
|
-
action="store_true",
|
|
32
|
-
help="Extract and describe images from documents",
|
|
33
|
-
)
|
|
34
|
-
|
|
35
16
|
extract_parser = argparse.ArgumentParser()
|
|
36
17
|
extract_parser.add_argument(
|
|
37
18
|
"-f", "--force", action="store_true", help="Force extraction"
|
|
@@ -19,14 +19,14 @@ class TextFileLoader(FileLoader):
|
|
|
19
19
|
|
|
20
20
|
if is_md_file and extract_images:
|
|
21
21
|
reader = MarkdownReader(file_path)
|
|
22
|
-
file_content = reader.read(extract_images=True)
|
|
22
|
+
file_content = reader.read(extract_images=True).replace('\r\n', '\n')
|
|
23
23
|
else:
|
|
24
24
|
# Use charset-normalizer to detect encoding
|
|
25
25
|
encoded_content = from_path(file_path).best()
|
|
26
26
|
if not encoded_content:
|
|
27
27
|
print(f"Failed to detect encoding for {file_path}")
|
|
28
28
|
return False
|
|
29
|
-
file_content = str(encoded_content)
|
|
29
|
+
file_content = str(encoded_content).replace('\r\n', '\n')
|
|
30
30
|
|
|
31
31
|
if block_delimiter:
|
|
32
32
|
file_content = f"{block_delimiter}\n{file_content}\n{block_delimiter}"
|
ara_cli/prompt_extractor.py
CHANGED
|
@@ -8,20 +8,89 @@ from ara_cli.classifier import Classifier
|
|
|
8
8
|
from ara_cli.directory_navigator import DirectoryNavigator
|
|
9
9
|
from ara_cli.artefact_models.artefact_mapping import title_prefix_to_artefact_class
|
|
10
10
|
|
|
11
|
+
def _find_extract_token(tokens):
|
|
12
|
+
"""Find the first token that needs to be processed."""
|
|
13
|
+
for token in tokens:
|
|
14
|
+
if token.type == 'fence' and token.content.strip().startswith("# [x] extract"):
|
|
15
|
+
return token
|
|
16
|
+
return None
|
|
17
|
+
|
|
18
|
+
def _extract_file_path(content_lines):
|
|
19
|
+
"""Extract file path from content lines."""
|
|
20
|
+
if not content_lines:
|
|
21
|
+
return None
|
|
22
|
+
file_path_search = re.search(r"# filename: (.+)", content_lines[0])
|
|
23
|
+
return file_path_search.group(1).strip() if file_path_search else None
|
|
24
|
+
|
|
25
|
+
def _find_artefact_class(content_lines):
|
|
26
|
+
"""Find the appropriate artefact class from content lines."""
|
|
27
|
+
for line in content_lines[:2]:
|
|
28
|
+
words = line.strip().split(' ')
|
|
29
|
+
if not words:
|
|
30
|
+
continue
|
|
31
|
+
first_word = words[0]
|
|
32
|
+
if first_word in title_prefix_to_artefact_class:
|
|
33
|
+
return title_prefix_to_artefact_class[first_word]
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
def _process_file_extraction(file_path, code_content, force, write):
|
|
37
|
+
"""Process file extraction logic."""
|
|
38
|
+
print(f"Filename extracted: {file_path}")
|
|
39
|
+
handle_existing_file(file_path, code_content, force, write)
|
|
40
|
+
|
|
41
|
+
def _process_artefact_extraction(artefact_class, content_lines, force, write):
|
|
42
|
+
"""Process artefact extraction logic."""
|
|
43
|
+
artefact = artefact_class.deserialize('\n'.join(content_lines))
|
|
44
|
+
serialized_artefact = artefact.serialize()
|
|
45
|
+
|
|
46
|
+
original_directory = os.getcwd()
|
|
47
|
+
directory_navigator = DirectoryNavigator()
|
|
48
|
+
directory_navigator.navigate_to_target()
|
|
49
|
+
|
|
50
|
+
artefact_path = artefact.file_path
|
|
51
|
+
directory = os.path.dirname(artefact_path)
|
|
52
|
+
os.makedirs(directory, exist_ok=True)
|
|
53
|
+
handle_existing_file(artefact_path, serialized_artefact, force, write)
|
|
54
|
+
|
|
55
|
+
os.chdir(original_directory)
|
|
56
|
+
|
|
57
|
+
def _process_extraction_block(token_to_process, updated_content, force, write):
|
|
58
|
+
"""Process a single extraction block."""
|
|
59
|
+
# Get the original block text for later replacement
|
|
60
|
+
source_lines = updated_content.split('\n')
|
|
61
|
+
start_line, end_line = token_to_process.map
|
|
62
|
+
original_block_text = '\n'.join(source_lines[start_line:end_line])
|
|
63
|
+
|
|
64
|
+
block_content = token_to_process.content
|
|
65
|
+
block_lines = block_content.split('\n')
|
|
66
|
+
content_lines_after_extract = block_lines[1:]
|
|
67
|
+
|
|
68
|
+
file_path = _extract_file_path(content_lines_after_extract)
|
|
69
|
+
|
|
70
|
+
if file_path:
|
|
71
|
+
code_content = '\n'.join(content_lines_after_extract[1:])
|
|
72
|
+
_process_file_extraction(file_path, code_content, force, write)
|
|
73
|
+
else:
|
|
74
|
+
artefact_class = _find_artefact_class(content_lines_after_extract)
|
|
75
|
+
if artefact_class:
|
|
76
|
+
_process_artefact_extraction(artefact_class, content_lines_after_extract, force, write)
|
|
77
|
+
else:
|
|
78
|
+
print("No filename or valid artefact found, skipping processing for this block.")
|
|
11
79
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
code_blocks = [token.content for token in tokens if token.type == 'fence']
|
|
16
|
-
return code_blocks
|
|
17
|
-
|
|
80
|
+
# Update the main content by replacing the processed block text with a modified version
|
|
81
|
+
modified_block_text = original_block_text.replace("# [x] extract", "# [v] extract", 1)
|
|
82
|
+
return updated_content.replace(original_block_text, modified_block_text, 1)
|
|
18
83
|
|
|
19
84
|
def extract_responses(document_path, relative_to_ara_root=False, force=False, write=False):
|
|
20
85
|
print(f"Starting extraction from '{document_path}'")
|
|
21
86
|
block_extraction_counter = 0
|
|
22
87
|
|
|
23
|
-
|
|
24
|
-
|
|
88
|
+
try:
|
|
89
|
+
with open(document_path, 'r', encoding='utf-8', errors='replace') as file:
|
|
90
|
+
content = file.read()
|
|
91
|
+
except FileNotFoundError:
|
|
92
|
+
print(f"Error: File not found at '{document_path}'. Skipping extraction.")
|
|
93
|
+
return
|
|
25
94
|
|
|
26
95
|
cwd = os.getcwd()
|
|
27
96
|
if relative_to_ara_root:
|
|
@@ -29,71 +98,27 @@ def extract_responses(document_path, relative_to_ara_root=False, force=False, wr
|
|
|
29
98
|
navigator.navigate_to_target()
|
|
30
99
|
os.chdir('..')
|
|
31
100
|
|
|
32
|
-
code_blocks_found = extract_code_blocks_md(content)
|
|
33
101
|
updated_content = content
|
|
34
102
|
|
|
35
|
-
|
|
36
|
-
|
|
103
|
+
while True:
|
|
104
|
+
md = MarkdownIt()
|
|
105
|
+
tokens = md.parse(updated_content)
|
|
37
106
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
block_lines = block_lines[1:]
|
|
43
|
-
|
|
44
|
-
file_path_search = re.search(r"# filename: (.+)", block_lines[0])
|
|
45
|
-
|
|
46
|
-
if file_path_search:
|
|
47
|
-
file_path = file_path_search.group(1).strip()
|
|
48
|
-
print(f"Filename extracted: {file_path}")
|
|
49
|
-
|
|
50
|
-
block_lines = block_lines[1:] # Remove first line again after removing filename line
|
|
51
|
-
block = '\n'.join(block_lines)
|
|
52
|
-
|
|
53
|
-
handle_existing_file(file_path, block, force, write)
|
|
54
|
-
block_extraction_counter += 1
|
|
55
|
-
|
|
56
|
-
# Update the markdown content
|
|
57
|
-
updated_content = update_markdown(content, block, file_path)
|
|
58
|
-
else:
|
|
59
|
-
# Extract artefact
|
|
60
|
-
artefact_class = None
|
|
61
|
-
for line in block_lines[:2]:
|
|
62
|
-
words = line.strip().split(' ')
|
|
63
|
-
if not words:
|
|
64
|
-
continue
|
|
65
|
-
first_word = words[0]
|
|
66
|
-
if first_word not in title_prefix_to_artefact_class:
|
|
67
|
-
continue
|
|
68
|
-
artefact_class = title_prefix_to_artefact_class[first_word]
|
|
69
|
-
if not artefact_class:
|
|
70
|
-
print("No filename found, skipping this block.")
|
|
71
|
-
continue
|
|
72
|
-
artefact = artefact_class.deserialize('\n'.join(block_lines))
|
|
73
|
-
serialized_artefact = artefact.serialize()
|
|
74
|
-
|
|
75
|
-
original_directory = os.getcwd()
|
|
76
|
-
directory_navigator = DirectoryNavigator()
|
|
77
|
-
directory_navigator.navigate_to_target()
|
|
78
|
-
|
|
79
|
-
artefact_path = artefact.file_path
|
|
80
|
-
directory = os.path.dirname(artefact_path)
|
|
81
|
-
os.makedirs(directory, exist_ok=True)
|
|
82
|
-
handle_existing_file(artefact_path, serialized_artefact, force, write)
|
|
107
|
+
token_to_process = _find_extract_token(tokens)
|
|
108
|
+
if not token_to_process:
|
|
109
|
+
break # No more blocks to process
|
|
83
110
|
|
|
84
|
-
|
|
111
|
+
block_extraction_counter += 1
|
|
112
|
+
print("Block found and processed.")
|
|
85
113
|
|
|
86
|
-
|
|
87
|
-
block_extraction_counter += 1
|
|
88
|
-
updated_content = update_markdown(content, block, None)
|
|
114
|
+
updated_content = _process_extraction_block(token_to_process, updated_content, force, write)
|
|
89
115
|
|
|
90
116
|
os.chdir(cwd)
|
|
91
|
-
# Save the updated markdown content
|
|
92
117
|
with open(document_path, 'w', encoding='utf-8') as file:
|
|
93
118
|
file.write(updated_content)
|
|
94
119
|
|
|
95
|
-
|
|
96
|
-
|
|
120
|
+
if block_extraction_counter > 0:
|
|
121
|
+
print(f"End of extraction. Found and processed {block_extraction_counter} blocks in '{os.path.basename(document_path)}'.")
|
|
97
122
|
|
|
98
123
|
def modify_and_save_file(response, file_path):
|
|
99
124
|
print(f"Debug: Modifying and saving file {file_path}")
|
|
@@ -119,11 +144,9 @@ def modify_and_save_file(response, file_path):
|
|
|
119
144
|
except json.JSONDecodeError as ex:
|
|
120
145
|
print(f"ERROR: Failed to decode JSON response: {ex}")
|
|
121
146
|
|
|
122
|
-
|
|
123
147
|
def prompt_user_decision(prompt):
|
|
124
148
|
return input(prompt)
|
|
125
149
|
|
|
126
|
-
|
|
127
150
|
def determine_should_create(skip_query=False):
|
|
128
151
|
if skip_query:
|
|
129
152
|
return True
|
|
@@ -132,13 +155,14 @@ def determine_should_create(skip_query=False):
|
|
|
132
155
|
return True
|
|
133
156
|
return False
|
|
134
157
|
|
|
135
|
-
|
|
136
158
|
def create_file_if_not_exist(filename, content, skip_query=False):
|
|
137
159
|
try:
|
|
138
160
|
if not os.path.exists(filename):
|
|
139
161
|
if determine_should_create(skip_query):
|
|
140
162
|
# Ensure the directory exists
|
|
141
|
-
os.
|
|
163
|
+
dir_name = os.path.dirname(filename)
|
|
164
|
+
if dir_name:
|
|
165
|
+
os.makedirs(dir_name, exist_ok=True)
|
|
142
166
|
|
|
143
167
|
with open(filename, 'w', encoding='utf-8') as file:
|
|
144
168
|
file.write(content)
|
|
@@ -150,7 +174,6 @@ def create_file_if_not_exist(filename, content, skip_query=False):
|
|
|
150
174
|
print(f"Error: {e}")
|
|
151
175
|
print(f"Failed to create file {filename} due to an OS error")
|
|
152
176
|
|
|
153
|
-
|
|
154
177
|
def create_prompt_for_file_modification(content_str, filename):
|
|
155
178
|
if not os.path.exists(filename):
|
|
156
179
|
print(f"WARNING: {filename} for merge prompt creation does not exist.")
|
|
@@ -181,11 +204,15 @@ def create_prompt_for_file_modification(content_str, filename):
|
|
|
181
204
|
|
|
182
205
|
return prompt_text
|
|
183
206
|
|
|
184
|
-
|
|
185
207
|
def handle_existing_file(filename, block_content, skip_query=False, write=False):
|
|
186
208
|
if not os.path.isfile(filename):
|
|
187
209
|
print(f"File {filename} does not exist, attempting to create")
|
|
210
|
+
# Ensure directory exists before writing
|
|
211
|
+
directory = os.path.dirname(filename)
|
|
212
|
+
if directory:
|
|
213
|
+
os.makedirs(directory, exist_ok=True)
|
|
188
214
|
create_file_if_not_exist(filename, block_content, skip_query)
|
|
215
|
+
|
|
189
216
|
elif write:
|
|
190
217
|
print(f"File {filename} exists. Overwriting without LLM merge as requested.")
|
|
191
218
|
try:
|
|
@@ -213,18 +240,9 @@ def handle_existing_file(filename, block_content, skip_query=False, write=False)
|
|
|
213
240
|
response += content
|
|
214
241
|
modify_and_save_file(response, filename)
|
|
215
242
|
|
|
216
|
-
|
|
217
243
|
def extract_and_save_prompt_results(classifier, param, write=False):
|
|
218
244
|
sub_directory = Classifier.get_sub_directory(classifier)
|
|
219
245
|
prompt_log_file = f"ara/{sub_directory}/{param}.data/{classifier}.prompt_log.md"
|
|
220
246
|
print(f"Extract marked sections from: {prompt_log_file}")
|
|
221
247
|
|
|
222
|
-
extract_responses(prompt_log_file, write=write)
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
def update_markdown(original_content, block_content, filename):
|
|
226
|
-
"""
|
|
227
|
-
Update the markdown content by changing the extract block from "# [x] extract" to "# [v] extract"
|
|
228
|
-
"""
|
|
229
|
-
updated_content = original_content.replace("# [x] extract", "# [v] extract")
|
|
230
|
-
return updated_content
|
|
248
|
+
extract_responses(prompt_log_file, write=write)
|