ara-cli 0.1.9.94__tar.gz → 0.1.9.95__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ara-cli might be problematic. Click here for more details.
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/PKG-INFO +2 -1
- ara_cli-0.1.9.95/ara_cli/__init__.py +17 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/ara_config.py +17 -2
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_autofix.py +40 -21
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_creator.py +3 -1
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/chat.py +75 -32
- ara_cli-0.1.9.95/ara_cli/file_loaders/document_readers.py +233 -0
- ara_cli-0.1.9.95/ara_cli/file_loaders/file_loaders.py +123 -0
- ara_cli-0.1.9.95/ara_cli/file_loaders/image_processor.py +89 -0
- ara_cli-0.1.9.95/ara_cli/file_loaders/markdown_reader.py +75 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/file_loaders/text_file_loader.py +9 -11
- ara_cli-0.1.9.95/ara_cli/global_file_lister.py +61 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/prompt_handler.py +24 -4
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/template_manager.py +14 -4
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/update_config_prompt.py +7 -1
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/version.py +1 -1
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli.egg-info/PKG-INFO +2 -1
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli.egg-info/SOURCES.txt +6 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli.egg-info/requires.txt +1 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/docker/base/requirements.txt +1 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_ara_config.py +28 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_chat.py +17 -30
- ara_cli-0.1.9.95/tests/test_global_file_lister.py +131 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_prompt_handler.py +26 -1
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_template_manager.py +5 -4
- ara_cli-0.1.9.94/ara_cli/__init__.py +0 -3
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/MANIFEST.in +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/README.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/__main__.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/ara_command_action.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/ara_command_parser.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_deleter.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_fuzzy_search.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_link_updater.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_lister.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/__init__.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/artefact_data_retrieval.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/artefact_load.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/artefact_mapping.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/artefact_model.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/artefact_templates.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/businessgoal_artefact_model.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/capability_artefact_model.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/epic_artefact_model.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/example_artefact_model.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/feature_artefact_model.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/issue_artefact_model.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/keyfeature_artefact_model.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/serialize_helper.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/task_artefact_model.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/userstory_artefact_model.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_models/vision_artefact_model.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_reader.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_renamer.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/artefact_scan.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/classifier.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/codefusionretriever.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/codehierachieretriever.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/commandline_completer.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/commands/__init__.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/commands/command.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/commands/extract_command.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/commands/load_command.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/commands/load_image_command.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/commands/read_command.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/directory_navigator.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/file_classifier.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/file_lister.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/file_loaders/__init__.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/file_loaders/binary_file_loader.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/file_loaders/document_file_loader.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/file_loaders/document_reader.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/file_loaders/file_loader.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/filename_validator.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/list_filter.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/output_suppressor.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/prompt_chat.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/prompt_extractor.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/prompt_rag.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/run_file_lister.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/tag_extractor.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/agile.artefacts +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/blueprints/complete_pytest_unittest.blueprint.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/blueprints/empty.blueprint.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/blueprints/task_todo_list_C4_architecture_analysis.blueprint.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/blueprints/task_todo_list_implement_feature_BDD_way.blueprint.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/architecture_C4_analysis.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/architecture_radon_cc_score.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/architecture_radon_halstead_v.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/architecture_radon_maintainability_score.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/artefact_classification.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/artefact_extension.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/artefact_formulation.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/behave_step_generation.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/code_generation_complex.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/code_generation_simple.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/empty.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/error_fixing.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/feature_file_update.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/feature_formulation.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/js_code_generation_simple.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/refactoring.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/refactoring_analysis.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/reverse_engineer_feature_file.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/commands/reverse_engineer_program_flow.commands.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/classify_task.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/empty.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/error_fixing.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/feature_fix_steps_for_scenario.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/feature_formulation.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/feature_reverse_formulation_from_code.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/feature_scenario_implementation.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/feature_scenario_implementation_update.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/feature_scenario_outline_extension.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/feature_update_formulation.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/fibonacci_example_implementation.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/js_implementation_from_task_description.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/js_steps_implementation.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/python_cli_implementation_with_test.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/python_code_understanding.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/task_implementation.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/task_prompt_control_by_status.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/task_stepwise_implementation_by_number.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/intentions/task_stepwise_implementation_by_status.intention.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/rules/architecture_analyst.rules.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/rules/code_analyst.rules.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/rules/empty.rules.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/rules/error_analyst.rules.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/rules/gherkin_expert.rules.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/rules/js_expert_developer.rules.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/rules/product_owner.rules.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/rules/python_behave.rules.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/prompt-modules/rules/python_developer.rules.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/specification_breakdown_files/template.concept.exploration.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/specification_breakdown_files/template.concept.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/specification_breakdown_files/template.customer.exploration.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/specification_breakdown_files/template.customer.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/specification_breakdown_files/template.persona.exploration.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/specification_breakdown_files/template.persona.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/specification_breakdown_files/template.step.exploration.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/specification_breakdown_files/template.step.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/specification_breakdown_files/template.technology.exploration.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/specification_breakdown_files/template.technology.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/template.businessgoal.prompt_log.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/template.capability.prompt_log.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/template.epic.prompt_log.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/template.example.prompt_log.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/template.feature.prompt_log.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/template.issue.prompt_log.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/template.keyfeature.prompt_log.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/template.steps.prompt_log.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/template.task.prompt_log.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/template.userstory.prompt_log.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli/templates/template.vision.prompt_log.md +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli.egg-info/dependency_links.txt +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli.egg-info/entry_points.txt +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/ara_cli.egg-info/top_level.txt +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/setup.cfg +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/setup.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/__init__.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_ara_command_action.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_artefact_autofix.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_artefact_fuzzy_search.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_artefact_link_updater.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_artefact_lister.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_artefact_reader.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_artefact_renamer.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_artefact_scan.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_classifier.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_directory_navigator.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_file_classifier.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_file_creator.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_file_lister.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_list_filter.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_tag_extractor.py +0 -0
- {ara_cli-0.1.9.94 → ara_cli-0.1.9.95}/tests/test_update_config_prompt.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ara_cli
|
|
3
|
-
Version: 0.1.9.
|
|
3
|
+
Version: 0.1.9.95
|
|
4
4
|
Summary: Powerful, open source command-line tool for managing, structuring and automating software development artifacts in line with Business-Driven Development (BDD) and AI-assisted processes
|
|
5
5
|
Description-Content-Type: text/markdown
|
|
6
6
|
Requires-Dist: litellm
|
|
@@ -13,6 +13,7 @@ Requires-Dist: json-repair
|
|
|
13
13
|
Requires-Dist: argparse
|
|
14
14
|
Requires-Dist: argcomplete
|
|
15
15
|
Requires-Dist: cmd2>=2.5
|
|
16
|
+
Requires-Dist: charset-normalizer
|
|
16
17
|
Requires-Dist: pydantic
|
|
17
18
|
Requires-Dist: pydantic_ai
|
|
18
19
|
Requires-Dist: python-docx
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from .version import __version__
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
whitelisted_commands = ["RERUN", "SEND", "EXTRACT", "LOAD_IMAGE", "CHOOSE_MODEL", "CHOOSE_EXTRACTION_MODEL", "CURRENT_MODEL", "CURRENT_EXTRACTION_MODEL", "LIST_MODELS"]
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# ANSI escape codes for coloring
|
|
9
|
+
YELLOW = '\033[93m'
|
|
10
|
+
RESET = '\033[0m'
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def format_warning(message, category, *args, **kwargs):
|
|
14
|
+
return f'{YELLOW}{category.__name__}: {message}{RESET}\n'
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
warnings.formatwarning = format_warning
|
|
@@ -6,6 +6,7 @@ from os.path import exists, dirname
|
|
|
6
6
|
from os import makedirs
|
|
7
7
|
from functools import lru_cache
|
|
8
8
|
import sys
|
|
9
|
+
import warnings
|
|
9
10
|
|
|
10
11
|
DEFAULT_CONFIG_LOCATION = "./ara/.araconfig/ara_config.json"
|
|
11
12
|
|
|
@@ -21,6 +22,7 @@ class ARAconfig(BaseModel):
|
|
|
21
22
|
{"source_dir": "./src"},
|
|
22
23
|
{"source_dir": "./tests"}
|
|
23
24
|
])
|
|
25
|
+
global_dirs: Optional[List[Dict[str, str]]] = Field(default=[])
|
|
24
26
|
glossary_dir: str = "./glossary"
|
|
25
27
|
doc_dir: str = "./docs"
|
|
26
28
|
local_prompt_templates_dir: str = "./ara/.araconfig"
|
|
@@ -162,6 +164,7 @@ def handle_unrecognized_keys(data: dict) -> dict:
|
|
|
162
164
|
cleaned_data[key] = value
|
|
163
165
|
return cleaned_data
|
|
164
166
|
|
|
167
|
+
|
|
165
168
|
# Function to read the JSON file and return an ARAconfig model
|
|
166
169
|
@lru_cache(maxsize=1)
|
|
167
170
|
def read_data(filepath: str) -> ARAconfig:
|
|
@@ -170,6 +173,16 @@ def read_data(filepath: str) -> ARAconfig:
|
|
|
170
173
|
If the file doesn't exist, it creates a default one.
|
|
171
174
|
If the file is invalid, it corrects only the broken parts.
|
|
172
175
|
"""
|
|
176
|
+
|
|
177
|
+
def warn_on_duplicate_llm_dict_key(ordered_pairs):
|
|
178
|
+
"""Reject duplicate keys."""
|
|
179
|
+
d = {}
|
|
180
|
+
for k, v in ordered_pairs:
|
|
181
|
+
if k in d:
|
|
182
|
+
warnings.warn(f"Duplicate LLM configuration identifier '{k}'. The previous entry will be removed.", UserWarning)
|
|
183
|
+
d[k] = v
|
|
184
|
+
return d
|
|
185
|
+
|
|
173
186
|
ensure_directory_exists(dirname(filepath))
|
|
174
187
|
|
|
175
188
|
if not exists(filepath):
|
|
@@ -181,7 +194,8 @@ def read_data(filepath: str) -> ARAconfig:
|
|
|
181
194
|
|
|
182
195
|
try:
|
|
183
196
|
with open(filepath, "r", encoding="utf-8") as file:
|
|
184
|
-
|
|
197
|
+
content = file.read()
|
|
198
|
+
data = json.loads(content, object_pairs_hook=warn_on_duplicate_llm_dict_key)
|
|
185
199
|
except json.JSONDecodeError as e:
|
|
186
200
|
print(f"Error: Invalid JSON in configuration file: {e}")
|
|
187
201
|
print("Creating a new configuration with defaults...")
|
|
@@ -206,7 +220,8 @@ def read_data(filepath: str) -> ARAconfig:
|
|
|
206
220
|
|
|
207
221
|
for field_name in error_fields:
|
|
208
222
|
print(f"-> Field '{field_name}' is invalid and will be reverted to its default value.")
|
|
209
|
-
|
|
223
|
+
if field_name in corrected_data:
|
|
224
|
+
corrected_data[field_name] = defaults.get(field_name)
|
|
210
225
|
|
|
211
226
|
print("--- End of Error Report ---")
|
|
212
227
|
|
|
@@ -144,11 +144,11 @@ def run_agent(prompt, artefact_class):
|
|
|
144
144
|
# anthropic:claude-4-sonnet-20250514
|
|
145
145
|
agent = Agent(
|
|
146
146
|
model="anthropic:claude-4-sonnet-20250514",
|
|
147
|
-
|
|
147
|
+
output_type=artefact_class,
|
|
148
148
|
instrument=True,
|
|
149
149
|
)
|
|
150
150
|
result = agent.run_sync(prompt)
|
|
151
|
-
return result.
|
|
151
|
+
return result.output
|
|
152
152
|
|
|
153
153
|
|
|
154
154
|
def write_corrected_artefact(file_path, corrected_text):
|
|
@@ -196,36 +196,52 @@ def ask_for_correct_contribution(
|
|
|
196
196
|
return name, classifier
|
|
197
197
|
|
|
198
198
|
|
|
199
|
-
def ask_for_contribution_choice(
|
|
200
|
-
|
|
201
|
-
) -> Optional[str]:
|
|
202
|
-
artefact_name, artefact_classifier = (
|
|
203
|
-
artefact_info if artefact_info else (None, None)
|
|
204
|
-
)
|
|
199
|
+
def ask_for_contribution_choice(choices: List[str], artefact_info: Optional[tuple[str, str]] = None) -> Optional[str]:
|
|
200
|
+
artefact_name, artefact_classifier = artefact_info if artefact_info else (None, None)
|
|
205
201
|
message = "Found multiple close matches for the contribution"
|
|
206
202
|
if artefact_name and artefact_classifier:
|
|
207
203
|
message += f" of the {artefact_classifier} '{artefact_name}'"
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
204
|
+
message += "."
|
|
205
|
+
return get_user_choice(choices, message)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _has_valid_contribution(artefact: Artefact) -> bool:
|
|
209
|
+
contribution = artefact.contribution
|
|
210
|
+
return contribution and contribution.artefact_name and contribution.classifier
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def get_user_choice(choices: List[str], message: str) -> Optional[str]:
|
|
214
|
+
"""
|
|
215
|
+
Generic function to present user with a list of choices and return their selection.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
choices: A list of strings representing the choices to display.
|
|
219
|
+
message: A message to display before listing the choices.
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
The chosen item from the list or None if the input was invalid.
|
|
223
|
+
"""
|
|
224
|
+
print(message)
|
|
225
|
+
for i, choice in enumerate(choices):
|
|
226
|
+
print(f"{i + 1}: {choice}")
|
|
227
|
+
|
|
228
|
+
choice_number = input("Please enter your choice (number): ")
|
|
229
|
+
|
|
214
230
|
try:
|
|
215
231
|
choice_index = int(choice_number) - 1
|
|
216
232
|
if choice_index < 0 or choice_index >= len(choices):
|
|
217
|
-
print("Invalid choice. Aborting
|
|
233
|
+
print("Invalid choice. Aborting operation.")
|
|
218
234
|
return None
|
|
219
|
-
|
|
235
|
+
return choices[choice_index]
|
|
220
236
|
except ValueError:
|
|
221
|
-
print("Invalid input. Aborting
|
|
237
|
+
print("Invalid input. Aborting operation.")
|
|
222
238
|
return None
|
|
223
|
-
return choice
|
|
224
239
|
|
|
225
240
|
|
|
226
|
-
def
|
|
227
|
-
|
|
228
|
-
|
|
241
|
+
def ask_for_rule_choice(matches: List[str]) -> Optional[str]:
|
|
242
|
+
"""Asks the user for a choice between multiple rule matches"""
|
|
243
|
+
message = "Multiple rule matches found:"
|
|
244
|
+
return get_user_choice(matches, message)
|
|
229
245
|
|
|
230
246
|
|
|
231
247
|
def _update_rule(
|
|
@@ -249,6 +265,9 @@ def _update_rule(
|
|
|
249
265
|
return
|
|
250
266
|
if not closest_rule_match:
|
|
251
267
|
return
|
|
268
|
+
if len(closest_rule_match) > 1:
|
|
269
|
+
artefact.contribution.rule = ask_for_rule_choice(closest_rule_match)
|
|
270
|
+
return
|
|
252
271
|
artefact.contribution.rule = closest_rule_match[0]
|
|
253
272
|
|
|
254
273
|
|
|
@@ -106,7 +106,7 @@ class ArtefactCreator:
|
|
|
106
106
|
if not self.handle_existing_files(file_exists):
|
|
107
107
|
return
|
|
108
108
|
|
|
109
|
-
artefact = template_artefact_of_type(classifier, filename,
|
|
109
|
+
artefact = template_artefact_of_type(classifier, filename, True)
|
|
110
110
|
|
|
111
111
|
if parent_classifier and parent_name:
|
|
112
112
|
artefact.set_contribution(
|
|
@@ -114,6 +114,8 @@ class ArtefactCreator:
|
|
|
114
114
|
classifier=parent_classifier,
|
|
115
115
|
rule=rule
|
|
116
116
|
)
|
|
117
|
+
else:
|
|
118
|
+
artefact.set_contribution(None, None, None)
|
|
117
119
|
|
|
118
120
|
artefact_content = artefact.serialize()
|
|
119
121
|
rmtree(dir_path, ignore_errors=True)
|
|
@@ -2,6 +2,21 @@ import os
|
|
|
2
2
|
import argparse
|
|
3
3
|
import cmd2
|
|
4
4
|
from ara_cli.prompt_handler import send_prompt
|
|
5
|
+
from ara_cli.file_loaders.markdown_reader import MarkdownReader
|
|
6
|
+
|
|
7
|
+
from ara_cli.file_loaders.document_file_loader import DocumentFileLoader
|
|
8
|
+
from ara_cli.file_loaders.binary_file_loader import BinaryFileLoader
|
|
9
|
+
from ara_cli.file_loaders.text_file_loader import TextFileLoader
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
extract_parser = argparse.ArgumentParser()
|
|
13
|
+
extract_parser.add_argument('-f', '--force', action='store_true', help='Force extraction')
|
|
14
|
+
extract_parser.add_argument('-w','--write', action='store_true', help='Overwrite existing files without using LLM for merging.')
|
|
15
|
+
|
|
16
|
+
load_parser = argparse.ArgumentParser()
|
|
17
|
+
load_parser.add_argument('file_name', nargs='?', default='', help='File to load')
|
|
18
|
+
load_parser.add_argument('--load-images', action='store_true', help='Extract and describe images from documents')
|
|
19
|
+
|
|
5
20
|
|
|
6
21
|
from ara_cli.file_loaders.document_file_loader import DocumentFileLoader
|
|
7
22
|
from ara_cli.file_loaders.binary_file_loader import BinaryFileLoader
|
|
@@ -822,45 +837,73 @@ Start chatting (type 'HELP'/'h' for available commands, 'QUIT'/'q' to exit chat
|
|
|
822
837
|
)
|
|
823
838
|
command.execute()
|
|
824
839
|
|
|
840
|
+
def _find_givens_files(self, file_name: str) -> list[str]:
|
|
841
|
+
"""
|
|
842
|
+
Finds the givens files to be processed.
|
|
843
|
+
- If file_name is provided, it resolves that path.
|
|
844
|
+
- Otherwise, it looks for default givens files.
|
|
845
|
+
- If no defaults are found, it prompts the user.
|
|
846
|
+
Returns a list of absolute file paths or an empty list if none are found.
|
|
847
|
+
"""
|
|
848
|
+
base_directory = os.path.dirname(self.chat_name)
|
|
849
|
+
|
|
850
|
+
def resolve_path(name):
|
|
851
|
+
"""Inner helper to resolve a path relative to chat, then absolute."""
|
|
852
|
+
relative_path = os.path.join(base_directory, name)
|
|
853
|
+
if os.path.exists(relative_path):
|
|
854
|
+
return relative_path
|
|
855
|
+
if os.path.exists(name):
|
|
856
|
+
return name
|
|
857
|
+
return None
|
|
858
|
+
|
|
859
|
+
if file_name:
|
|
860
|
+
path = resolve_path(file_name)
|
|
861
|
+
if path:
|
|
862
|
+
return [path]
|
|
863
|
+
relative_path_for_error = os.path.join(base_directory, file_name)
|
|
864
|
+
self.perror(f"No givens file found at {relative_path_for_error} or {file_name}")
|
|
865
|
+
return []
|
|
866
|
+
|
|
867
|
+
# If no file_name, check for defaults
|
|
868
|
+
default_files_to_check = [
|
|
869
|
+
os.path.join(base_directory, "prompt.data", "config.prompt_givens.md"),
|
|
870
|
+
os.path.join(base_directory, "prompt.data", "config.prompt_global_givens.md")
|
|
871
|
+
]
|
|
872
|
+
existing_defaults = [f for f in default_files_to_check if os.path.exists(f)]
|
|
873
|
+
if existing_defaults:
|
|
874
|
+
return existing_defaults
|
|
875
|
+
|
|
876
|
+
# No defaults found, prompt user
|
|
877
|
+
user_input = input("Please specify a givens file: ")
|
|
878
|
+
if not user_input:
|
|
879
|
+
self.poutput("Aborting.")
|
|
880
|
+
return []
|
|
881
|
+
|
|
882
|
+
path = resolve_path(user_input)
|
|
883
|
+
if path:
|
|
884
|
+
return [path]
|
|
885
|
+
self.perror(f"No givens file found at {user_input}. Aborting.")
|
|
886
|
+
return []
|
|
887
|
+
|
|
825
888
|
@cmd2.with_category(CATEGORY_CHAT_CONTROL)
|
|
826
889
|
def do_LOAD_GIVENS(self, file_name):
|
|
827
|
-
"""Load all files listed in a ./prompt.data/config.prompt_givens.md"""
|
|
828
|
-
from ara_cli.directory_navigator import DirectoryNavigator
|
|
890
|
+
"""Load all files listed in a ./prompt.data/config.prompt_givens.md and ./prompt.data/config.prompt_global_givens.md"""
|
|
829
891
|
from ara_cli.prompt_handler import load_givens
|
|
830
892
|
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
# Check the relative path first
|
|
837
|
-
relative_givens_path = os.path.join(base_directory, file_name)
|
|
838
|
-
if os.path.exists(relative_givens_path):
|
|
839
|
-
givens_path = relative_givens_path
|
|
840
|
-
elif os.path.exists(file_name): # Check the absolute path
|
|
841
|
-
givens_path = file_name
|
|
842
|
-
else:
|
|
843
|
-
print(f"No givens file found at {relative_givens_path} or {file_name}")
|
|
844
|
-
user_input = input("Please specify a givens file: ")
|
|
845
|
-
if os.path.exists(os.path.join(base_directory, user_input)):
|
|
846
|
-
givens_path = os.path.join(base_directory, user_input)
|
|
847
|
-
elif os.path.exists(user_input):
|
|
848
|
-
givens_path = user_input
|
|
849
|
-
else:
|
|
850
|
-
print(f"No givens file found at {user_input}. Aborting.")
|
|
851
|
-
return
|
|
893
|
+
givens_files_to_process = self._find_givens_files(file_name)
|
|
894
|
+
if not givens_files_to_process:
|
|
895
|
+
self.poutput("No givens files to load.")
|
|
896
|
+
return
|
|
852
897
|
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
content, image_data = load_givens(givens_path)
|
|
858
|
-
os.chdir(cwd)
|
|
898
|
+
for givens_path in givens_files_to_process:
|
|
899
|
+
# The givens_path is absolute, and load_givens reconstructs absolute paths
|
|
900
|
+
# from the markdown file. No directory change is needed.
|
|
901
|
+
content, _ = load_givens(givens_path)
|
|
859
902
|
|
|
860
|
-
|
|
861
|
-
|
|
903
|
+
with open(self.chat_name, 'a', encoding='utf-8') as chat_file:
|
|
904
|
+
chat_file.write(content)
|
|
862
905
|
|
|
863
|
-
|
|
906
|
+
self.poutput(f"Loaded files listed and marked in {givens_path}")
|
|
864
907
|
|
|
865
908
|
@cmd2.with_category(CATEGORY_CHAT_CONTROL)
|
|
866
909
|
def do_SEND(self, _):
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import Tuple, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DocumentReader(ABC):
|
|
7
|
+
"""Abstract base class for document readers"""
|
|
8
|
+
|
|
9
|
+
def __init__(self, file_path: str):
|
|
10
|
+
self.file_path = file_path
|
|
11
|
+
self.base_dir = os.path.dirname(file_path)
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def read(self, extract_images: bool = False) -> str:
|
|
15
|
+
"""Read document and optionally extract images"""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
def create_image_data_dir(self, extension_suffix: str) -> str:
|
|
19
|
+
"""
|
|
20
|
+
Create data directory for images with file extension suffix to avoid conflicts.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
str: Path to images directory
|
|
24
|
+
"""
|
|
25
|
+
file_name_with_ext = os.path.splitext(os.path.basename(self.file_path))[0] + f"_{extension_suffix}"
|
|
26
|
+
data_dir = os.path.join(self.base_dir, f"{file_name_with_ext}.data")
|
|
27
|
+
images_dir = os.path.join(data_dir, "images")
|
|
28
|
+
if not os.path.exists(images_dir):
|
|
29
|
+
os.makedirs(images_dir)
|
|
30
|
+
return images_dir
|
|
31
|
+
|
|
32
|
+
def save_and_describe_image(self, image_data: bytes, image_format: str,
|
|
33
|
+
save_dir: str, image_counter: int) -> Tuple[str, str]:
|
|
34
|
+
"""
|
|
35
|
+
Save image data and get its description from LLM.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
tuple: (relative_image_path, description)
|
|
39
|
+
"""
|
|
40
|
+
from ara_cli.prompt_handler import describe_image
|
|
41
|
+
|
|
42
|
+
# Save image
|
|
43
|
+
image_filename = f"{image_counter}.{image_format}"
|
|
44
|
+
image_path = os.path.join(save_dir, image_filename)
|
|
45
|
+
|
|
46
|
+
with open(image_path, "wb") as image_file:
|
|
47
|
+
image_file.write(image_data)
|
|
48
|
+
|
|
49
|
+
# Get image description from LLM
|
|
50
|
+
description = describe_image(image_path)
|
|
51
|
+
|
|
52
|
+
# Get relative path
|
|
53
|
+
relative_image_path = os.path.relpath(image_path, self.base_dir)
|
|
54
|
+
|
|
55
|
+
return relative_image_path, description
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class DocxReader(DocumentReader):
|
|
59
|
+
"""Reader for DOCX files"""
|
|
60
|
+
|
|
61
|
+
def read(self, extract_images: bool = False) -> str:
|
|
62
|
+
import docx
|
|
63
|
+
|
|
64
|
+
doc = docx.Document(self.file_path)
|
|
65
|
+
text_content = '\n'.join(para.text for para in doc.paragraphs)
|
|
66
|
+
|
|
67
|
+
if not extract_images:
|
|
68
|
+
return text_content
|
|
69
|
+
|
|
70
|
+
from PIL import Image
|
|
71
|
+
import io
|
|
72
|
+
|
|
73
|
+
# Create data directory for images
|
|
74
|
+
images_dir = self.create_image_data_dir("docx")
|
|
75
|
+
|
|
76
|
+
# Extract and process images
|
|
77
|
+
image_descriptions = []
|
|
78
|
+
image_counter = 1
|
|
79
|
+
|
|
80
|
+
for rel in doc.part.rels.values():
|
|
81
|
+
if "image" in rel.reltype:
|
|
82
|
+
image_data = rel.target_part.blob
|
|
83
|
+
|
|
84
|
+
# Determine image format
|
|
85
|
+
image = Image.open(io.BytesIO(image_data))
|
|
86
|
+
image_format = image.format.lower()
|
|
87
|
+
|
|
88
|
+
# Save and describe image
|
|
89
|
+
relative_path, description = self.save_and_describe_image(
|
|
90
|
+
image_data, image_format, images_dir, image_counter
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Add formatted description to list
|
|
94
|
+
image_description = f"\nImage: {relative_path}\n[{description}]\n"
|
|
95
|
+
image_descriptions.append(image_description)
|
|
96
|
+
|
|
97
|
+
image_counter += 1
|
|
98
|
+
|
|
99
|
+
# Combine text content with image descriptions
|
|
100
|
+
if image_descriptions:
|
|
101
|
+
text_content += "\n\n### Extracted Images\n" + "\n".join(image_descriptions)
|
|
102
|
+
|
|
103
|
+
return text_content
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class PdfReader(DocumentReader):
|
|
107
|
+
"""Reader for PDF files"""
|
|
108
|
+
|
|
109
|
+
def read(self, extract_images: bool = False) -> str:
|
|
110
|
+
import pymupdf4llm
|
|
111
|
+
|
|
112
|
+
if not extract_images:
|
|
113
|
+
return pymupdf4llm.to_markdown(self.file_path, write_images=False)
|
|
114
|
+
|
|
115
|
+
import fitz # PyMuPDF
|
|
116
|
+
|
|
117
|
+
# Create images directory
|
|
118
|
+
images_dir = self.create_image_data_dir("pdf")
|
|
119
|
+
|
|
120
|
+
# Extract text without images first
|
|
121
|
+
text_content = pymupdf4llm.to_markdown(self.file_path, write_images=False)
|
|
122
|
+
|
|
123
|
+
# Extract and process images
|
|
124
|
+
doc = fitz.open(self.file_path)
|
|
125
|
+
image_descriptions = []
|
|
126
|
+
image_counter = 1
|
|
127
|
+
|
|
128
|
+
for page_num, page in enumerate(doc):
|
|
129
|
+
image_list = page.get_images()
|
|
130
|
+
|
|
131
|
+
for img_index, img in enumerate(image_list):
|
|
132
|
+
# Extract image
|
|
133
|
+
xref = img[0]
|
|
134
|
+
base_image = doc.extract_image(xref)
|
|
135
|
+
image_bytes = base_image["image"]
|
|
136
|
+
image_ext = base_image["ext"]
|
|
137
|
+
|
|
138
|
+
# Save and describe image
|
|
139
|
+
relative_path, description = self.save_and_describe_image(
|
|
140
|
+
image_bytes, image_ext, images_dir, image_counter
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Add formatted description to list
|
|
144
|
+
image_description = f"\nImage: {relative_path}\n[{description}]\n"
|
|
145
|
+
image_descriptions.append(image_description)
|
|
146
|
+
|
|
147
|
+
image_counter += 1
|
|
148
|
+
|
|
149
|
+
doc.close()
|
|
150
|
+
|
|
151
|
+
# Combine text content with image descriptions
|
|
152
|
+
if image_descriptions:
|
|
153
|
+
text_content += "\n\n### Extracted Images\n" + "\n".join(image_descriptions)
|
|
154
|
+
|
|
155
|
+
return text_content
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class OdtReader(DocumentReader):
|
|
159
|
+
"""Reader for ODT files"""
|
|
160
|
+
|
|
161
|
+
def read(self, extract_images: bool = False) -> str:
|
|
162
|
+
import pymupdf4llm
|
|
163
|
+
|
|
164
|
+
if not extract_images:
|
|
165
|
+
return pymupdf4llm.to_markdown(self.file_path, write_images=False)
|
|
166
|
+
|
|
167
|
+
import zipfile
|
|
168
|
+
from PIL import Image
|
|
169
|
+
import io
|
|
170
|
+
|
|
171
|
+
# Create data directory for images
|
|
172
|
+
images_dir = self.create_image_data_dir("odt")
|
|
173
|
+
|
|
174
|
+
# Get text content
|
|
175
|
+
text_content = pymupdf4llm.to_markdown(self.file_path, write_images=False)
|
|
176
|
+
|
|
177
|
+
# Extract and process images from ODT
|
|
178
|
+
image_descriptions = []
|
|
179
|
+
image_counter = 1
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
with zipfile.ZipFile(self.file_path, 'r') as odt_zip:
|
|
183
|
+
# List all files in the Pictures directory
|
|
184
|
+
picture_files = [f for f in odt_zip.namelist() if f.startswith('Pictures/')]
|
|
185
|
+
|
|
186
|
+
for picture_file in picture_files:
|
|
187
|
+
# Extract image data
|
|
188
|
+
image_data = odt_zip.read(picture_file)
|
|
189
|
+
|
|
190
|
+
# Determine image format
|
|
191
|
+
image = Image.open(io.BytesIO(image_data))
|
|
192
|
+
image_format = image.format.lower()
|
|
193
|
+
|
|
194
|
+
# Save and describe image
|
|
195
|
+
relative_path, description = self.save_and_describe_image(
|
|
196
|
+
image_data, image_format, images_dir, image_counter
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Add formatted description to list
|
|
200
|
+
image_description = f"\nImage: {relative_path}\n[{description}]\n"
|
|
201
|
+
image_descriptions.append(image_description)
|
|
202
|
+
|
|
203
|
+
image_counter += 1
|
|
204
|
+
except Exception as e:
|
|
205
|
+
print(f"Warning: Could not extract images from ODT: {e}")
|
|
206
|
+
|
|
207
|
+
# Combine text content with image descriptions
|
|
208
|
+
if image_descriptions:
|
|
209
|
+
text_content += "\n\n### Extracted Images\n" + "\n".join(image_descriptions)
|
|
210
|
+
|
|
211
|
+
return text_content
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class DocumentReaderFactory:
|
|
215
|
+
"""Factory for creating appropriate document readers"""
|
|
216
|
+
|
|
217
|
+
@staticmethod
|
|
218
|
+
def create_reader(file_path: str) -> Optional[DocumentReader]:
|
|
219
|
+
"""Create appropriate reader based on file extension"""
|
|
220
|
+
_, ext = os.path.splitext(file_path)
|
|
221
|
+
ext = ext.lower()
|
|
222
|
+
|
|
223
|
+
readers = {
|
|
224
|
+
'.docx': DocxReader,
|
|
225
|
+
'.pdf': PdfReader,
|
|
226
|
+
'.odt': OdtReader
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
reader_class = readers.get(ext)
|
|
230
|
+
if reader_class:
|
|
231
|
+
return reader_class(file_path)
|
|
232
|
+
|
|
233
|
+
return None
|