@ngocsangairvds/vsaf 3.2.14 → 3.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/vsaf.js +13 -0
- package/package.json +1 -1
- package/src/config.js +167 -0
- package/src/global.js +1 -48
- package/src/utils.js +44 -1
- package/tools/vds-scripts/Makefile +9 -31
- package/tools/vds-scripts/docker/docker-compose.cli.yml +1 -117
- package/tools/vds-scripts/docker/docker-compose.services.yml +1 -40
- package/tools/vds-scripts/docker/infrastructure/init-schemas.sql +0 -34
- package/tools/vds-scripts/docker/infrastructure/pgbouncer/pgbouncer.ini +2 -6
- package/tools/vds-scripts/pyproject.toml +1 -33
- package/tools/vds-scripts/uv.lock +80 -1651
- package/tools/vds-scripts/vds_cli/pyproject.toml +3 -0
- package/tools/vds-scripts/vds_cli/src/vds_cli/cli.py +1 -127
- package/tools/vds-scripts/vds_cli/src/vds_cli/commands/lint_cli.py +1 -20
- package/tools/vds-scripts/vds_cli/src/vds_cli/router.py +0 -100
- package/tools/vds-scripts/vds_cli/tests/conftest.py +0 -2
- package/tools/vds-scripts/vds_cli/tests/unit/test_cli.py +0 -25
- package/tools/vds-scripts/vds_cli/tests/unit/test_lint_cli.py +2 -2
- package/tools/vds-scripts/vds_cli/tests/unit/test_router.py +0 -2
- package/tools/vds-scripts/CLOSURE.md +0 -340
- package/tools/vds-scripts/ECOSYSTEM-CHANGELOG.md +0 -52
- package/tools/vds-scripts/ECOSYSTEM-DOCS.md +0 -602
- package/tools/vds-scripts/ECOSYSTEM_ALIGNMENT.md +0 -133
- package/tools/vds-scripts/ENV-HYGIENE-OPS-NOTE.md +0 -65
- package/tools/vds-scripts/INVESTIGATION-cloud-401.md +0 -103
- package/tools/vds-scripts/MEM0_2.0_API_REFERENCE.md +0 -238
- package/tools/vds-scripts/PACKAGE_P125B_IMPLEMENTATION_SUMMARY.md +0 -131
- package/tools/vds-scripts/PHASE-MERGE-SUMMARY.md +0 -121
- package/tools/vds-scripts/PHASES-3-ARCHIVE.md +0 -59
- package/tools/vds-scripts/PROJECT_COMPLETION_SUMMARY.md +0 -45
- package/tools/vds-scripts/SEARCH-CRASH-REPRO.md +0 -51
- package/tools/vds-scripts/analyze_hexagonal.py +0 -217
- package/tools/vds-scripts/analyze_profiles.py +0 -60
- package/tools/vds-scripts/audit-checklist.xlsx +0 -0
- package/tools/vds-scripts/audit_orchestrator/.audit_approvals/approvals_index.json +0 -1
- package/tools/vds-scripts/audit_orchestrator/.env.example +0 -85
- package/tools/vds-scripts/audit_orchestrator/.github/workflows/audit.yml +0 -47
- package/tools/vds-scripts/audit_orchestrator/Dockerfile +0 -92
- package/tools/vds-scripts/audit_orchestrator/GOOGLE_SHEETS_IMPLEMENTATION_SUMMARY.md +0 -218
- package/tools/vds-scripts/audit_orchestrator/PHASE3_INTEGRATION_SUMMARY.md +0 -268
- package/tools/vds-scripts/audit_orchestrator/PHASE7-MERGE-SUMMARY.md +0 -174
- package/tools/vds-scripts/audit_orchestrator/README.md +0 -1573
- package/tools/vds-scripts/audit_orchestrator/TSK-168-IMPLEMENTATION-SUMMARY.md +0 -191
- package/tools/vds-scripts/audit_orchestrator/TSK-196-IMPLEMENTATION-SUMMARY.md +0 -201
- package/tools/vds-scripts/audit_orchestrator/alembic/env.py +0 -37
- package/tools/vds-scripts/audit_orchestrator/alembic/script.py.mako +0 -28
- package/tools/vds-scripts/audit_orchestrator/alembic/versions/0001_initial_audit_state_schema.py +0 -1260
- package/tools/vds-scripts/audit_orchestrator/alembic.ini +0 -68
- package/tools/vds-scripts/audit_orchestrator/config/category-mapping.json +0 -81
- package/tools/vds-scripts/audit_orchestrator/config/profile-timeouts.yaml +0 -17
- package/tools/vds-scripts/audit_orchestrator/create_sample.py +0 -55
- package/tools/vds-scripts/audit_orchestrator/data/corpus_accuracy_report.json +0 -17
- package/tools/vds-scripts/audit_orchestrator/data/exemplar_quality_report.json +0 -1606
- package/tools/vds-scripts/audit_orchestrator/data/instruction_plan_fixtures.json +0 -163
- package/tools/vds-scripts/audit_orchestrator/data/requirement_exemplars.json +0 -3443
- package/tools/vds-scripts/audit_orchestrator/data/requirement_scope_fixtures.json +0 -172
- package/tools/vds-scripts/audit_orchestrator/debug_rg.py +0 -46
- package/tools/vds-scripts/audit_orchestrator/demo_code_pack.py +0 -127
- package/tools/vds-scripts/audit_orchestrator/docs/AGENT_SDK_SELECTION_SPEC.md +0 -720
- package/tools/vds-scripts/audit_orchestrator/docs/API.md +0 -804
- package/tools/vds-scripts/audit_orchestrator/docs/CONTENT_ANALYSIS_APPROACH.md +0 -1041
- package/tools/vds-scripts/audit_orchestrator/docs/CONTENT_SCORING_EVOLUTION_SPEC.md +0 -868
- package/tools/vds-scripts/audit_orchestrator/docs/DEPLOYMENT.md +0 -778
- package/tools/vds-scripts/audit_orchestrator/docs/LLM_AGENT_AUDIT_SPEC.md +0 -721
- package/tools/vds-scripts/audit_orchestrator/docs/LLM_CONTENT_ANALYSIS_SPEC.md +0 -1143
- package/tools/vds-scripts/audit_orchestrator/docs/LSP_SETUP_GUIDE.md +0 -221
- package/tools/vds-scripts/audit_orchestrator/docs/MULTI_REPO_AUDIT_SPEC.md +0 -951
- package/tools/vds-scripts/audit_orchestrator/docs/OLLAMA_EMBEDDINGS_SETUP.md +0 -119
- package/tools/vds-scripts/audit_orchestrator/docs/PHASE32_REAL_BENCHMARK_2026-02-08.md +0 -66
- package/tools/vds-scripts/audit_orchestrator/docs/PHASE_64_TO_92_HISTORICAL_SPEC.md +0 -1772
- package/tools/vds-scripts/audit_orchestrator/docs/TSK-193-flow-trace.md +0 -201
- package/tools/vds-scripts/audit_orchestrator/docs/TSK-193-verification.md +0 -124
- package/tools/vds-scripts/audit_orchestrator/docs/phase152-hierarchical-query-surface.md +0 -46
- package/tools/vds-scripts/audit_orchestrator/examples/bitbucket_metadata_example.json +0 -50
- package/tools/vds-scripts/audit_orchestrator/legacy/migrations/README.md +0 -68
- package/tools/vds-scripts/audit_orchestrator/legacy/migrations/phase117_phase118_shared_state.sql +0 -64
- package/tools/vds-scripts/audit_orchestrator/legacy/migrations/phase154_published_pages.sql +0 -28
- package/tools/vds-scripts/audit_orchestrator/legacy/migrations/phase157_dispatch_tables.sql +0 -94
- package/tools/vds-scripts/audit_orchestrator/legacy/migrations/phase157_events.sql +0 -91
- package/tools/vds-scripts/audit_orchestrator/legacy/migrations/phase157_scope_snapshots.sql +0 -24
- package/tools/vds-scripts/audit_orchestrator/legacy/migrations/phase157_status_view.sql +0 -22
- package/tools/vds-scripts/audit_orchestrator/legacy/migrations/phase169_dispatch_observability.sql +0 -55
- package/tools/vds-scripts/audit_orchestrator/legacy/migrations/state_repair_hardening.sql +0 -24
- package/tools/vds-scripts/audit_orchestrator/pyproject.toml +0 -211
- package/tools/vds-scripts/audit_orchestrator/pyrightconfig.json +0 -51
- package/tools/vds-scripts/audit_orchestrator/pytest.ini +0 -37
- package/tools/vds-scripts/audit_orchestrator/reproduce_scanner.py +0 -40
- package/tools/vds-scripts/audit_orchestrator/scripts/README.md +0 -116
- package/tools/vds-scripts/audit_orchestrator/scripts/benchmark_crawl_modes.py +0 -455
- package/tools/vds-scripts/audit_orchestrator/scripts/benchmark_dspy.py +0 -513
- package/tools/vds-scripts/audit_orchestrator/scripts/benchmark_nlp_accuracy.py +0 -138
- package/tools/vds-scripts/audit_orchestrator/scripts/benchmark_retrieval_modes.py +0 -176
- package/tools/vds-scripts/audit_orchestrator/scripts/benchmark_upload_update_mode.py +0 -167
- package/tools/vds-scripts/audit_orchestrator/scripts/build_check.py +0 -76
- package/tools/vds-scripts/audit_orchestrator/scripts/check_live_progress.py +0 -61
- package/tools/vds-scripts/audit_orchestrator/scripts/cli_integration_test.py +0 -400
- package/tools/vds-scripts/audit_orchestrator/scripts/index_workspace.py +0 -178
- package/tools/vds-scripts/audit_orchestrator/scripts/inspect_route_conformance.py +0 -196
- package/tools/vds-scripts/audit_orchestrator/scripts/monitor_postgres.py +0 -145
- package/tools/vds-scripts/audit_orchestrator/scripts/optimize_audit.py +0 -462
- package/tools/vds-scripts/audit_orchestrator/scripts/verify.py +0 -673
- package/tools/vds-scripts/audit_orchestrator/scripts/verify_phase111_requirement_analysis.py +0 -375
- package/tools/vds-scripts/audit_orchestrator/scripts/verify_phase117_cross_repo_evidence.py +0 -77
- package/tools/vds-scripts/audit_orchestrator/scripts/verify_phase121_short_circuit.py +0 -680
- package/tools/vds-scripts/audit_orchestrator/scripts/verify_phase122_instruction_handling.py +0 -478
- package/tools/vds-scripts/audit_orchestrator/scripts/verify_phase125_skill_integration.py +0 -832
- package/tools/vds-scripts/audit_orchestrator/scripts/verify_phase_36.py +0 -394
- package/tools/vds-scripts/audit_orchestrator/scripts/verify_phase_37.py +0 -58
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/__init__.py +0 -17
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/__init__.py +0 -29
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/_langchain_warnings.py +0 -17
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/agentic_investigator.py +0 -4130
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/approval.py +0 -490
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/audit_loop_hooks.py +0 -107
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/audit_state.py +0 -50
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/base.py +0 -4035
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/code_agent.py +0 -667
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/code_analysis_helpers.py +0 -236
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/code_analysis_prompts.py +0 -146
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/docs_agent.py +0 -1234
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/langgraph_workflow.py +0 -2002
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/pydantic_base.py +0 -1227
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/requirement_analysis_agent.py +0 -593
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/security_agent.py +0 -1829
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/security_scanner.py +0 -686
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/skill_tools.py +0 -204
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/synthesis_agent.py +0 -1463
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/tool_efficiency_guard.py +0 -609
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/tool_registry.py +0 -3822
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/toolsets/__init__.py +0 -52
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/toolsets/evidence_corpus.py +0 -385
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/toolsets/filesystem.py +0 -1134
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/toolsets/lsp.py +0 -458
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/toolsets/mcp_toolset.py +0 -491
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/toolsets/skills_toolset.py +0 -997
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/toolsets/vector_evidence.py +0 -842
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/usage_tracker.py +0 -682
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/agents/visualization.py +0 -303
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/analyze_cmds.py +0 -892
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checklist_query/__init__.py +0 -15
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checklist_query/service.py +0 -171
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/__init__.py +0 -20
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/base.py +0 -60
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/bitbucket/__init__.py +0 -6
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/bitbucket/checks.py +0 -257
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/confluence/__init__.py +0 -10
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/confluence/checks.py +0 -78
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/git/__init__.py +0 -6
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/git/file_checks.py +0 -133
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/llm_checks/__init__.py +0 -17
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/llm_checks/api_docs_check.py +0 -80
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/llm_checks/readme_check.py +0 -76
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/llm_checks/security_docs_check.py +0 -78
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/registry.py +0 -402
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/sonarqube/__init__.py +0 -10
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/checks/sonarqube/checks.py +0 -276
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/cli.py +0 -12
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/cli_common.py +0 -128
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/cli_impl.py +0 -9826
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/clients/bitbucket_cli_client.py +0 -187
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/clients/confluence_cli_client.py +0 -977
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/clients/sonarqube_cli_client.py +0 -28
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/__init__.py +0 -21
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/base.py +0 -25
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/bitbucket_downloader.py +0 -644
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/bitbucket_metadata.py +0 -133
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/checklist_parser.py +0 -180
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/completeness/__init__.py +0 -31
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/completeness/bitbucket_probe.py +0 -443
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/completeness/confluence_probe.py +0 -365
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/completeness/freshness_evaluator.py +0 -330
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/completeness/material_completeness_service.py +0 -1079
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/confluence_collector.py +0 -259
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/diagram_extractor.py +0 -280
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/enrichment_extractor.py +0 -200
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/evidence_cache.py +0 -35
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/git_collector.py +0 -148
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/graphify_collector.py +0 -171
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/image_extractor.py +0 -359
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/linked_page_tracker.py +0 -120
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/markdown_converter.py +0 -344
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/material_cache.py +0 -1252
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/material_downloader.py +0 -1165
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/orchestrator.py +0 -168
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/registry_parser.py +0 -3063
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/requirements.py +0 -70
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/runner.py +0 -119
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/collectors/sonarqube_collector.py +0 -113
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/config.py +0 -1943
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/config_resolution/__init__.py +0 -23
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/config_resolution/discovery.py +0 -90
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/config_resolution/environment_resolver.py +0 -56
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/config_resolution/evidence.py +0 -78
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/config_resolution/models.py +0 -73
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/config_resolution/precedence.py +0 -10
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/config_resolution/redaction.py +0 -20
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/confluence_connectivity.py +0 -140
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/corpus_cmds.py +0 -278
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/db/__init__.py +0 -7
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/db/alembic_filters.py +0 -57
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/docs/__init__.py +0 -29
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/docs/diataxis_validator.py +0 -687
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/doctor_cmds.py +0 -3295
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/dspy_modules/__init__.py +0 -5
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/dspy_modules/evaluation.py +0 -301
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/dspy_modules/modules.py +0 -172
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/dspy_modules/runtime.py +0 -836
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/dspy_modules/signatures.py +0 -406
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/__init__.py +0 -192
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/ad_hoc_analyzer.py +0 -399
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/aggregator.py +0 -220
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/auditor.py +0 -504
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/batch_evidence_cache.py +0 -111
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/batch_processor.py +0 -4776
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/calibration.py +0 -217
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/checklist_generator.py +0 -1201
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/checklist_projection.py +0 -192
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/checklist_scoping.py +0 -221
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/checkpoint.py +0 -159
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/cl003_shared_lib_guard.py +0 -194
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/companion_context_service.py +0 -445
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/confluence_checklist_contract.py +0 -7425
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/cross_check_rules.py +0 -213
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/deterministic_evaluator.py +0 -237
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/drift_detector.py +0 -157
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/dspy_requirement_classifier.py +0 -640
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/evidence_assembler.py +0 -407
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/evidence_collector.py +0 -119
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/evidence_diversity.py +0 -101
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/gap_analyzer.py +0 -549
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/graduated.py +0 -185
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/grounding_validator.py +0 -287
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/instruction_analyzer.py +0 -882
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/instruction_compliance.py +0 -172
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/llm_row_evaluator.py +0 -9270
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/loader.py +0 -1070
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/manual_check_config.py +0 -136
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/mapping.py +0 -269
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/multi_judge.py +0 -65
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/phase120_checklist_update.py +0 -416
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/profile_scorer.py +0 -427
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/project_evidence_context.py +0 -449
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/project_knowledge_query_service.py +0 -155
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/project_knowledge_store.py +0 -383
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/project_topology.py +0 -1920
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/provider_failure_classifier.py +0 -778
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/readiness_cli_helpers.py +0 -341
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/readiness_extractor.py +0 -303
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/readiness_synthesizer.py +0 -730
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/regression_guard.py +0 -138
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/repo_type_classifier.py +0 -297
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/requirement_analysis.py +0 -1433
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/requirement_classification.py +0 -1725
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/result_merger.py +0 -814
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/route_matrix.py +0 -267
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/row_evaluator.py +0 -9437
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/row_evaluator_runtime.py +0 -1270
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/row_evaluator_types.py +0 -2102
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/rubric.py +0 -592
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/scorer.py +0 -1239
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/section_packs.py +0 -645
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/skill_recommendation.py +0 -1183
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/stability_harness.py +0 -207
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/target_selector.py +0 -841
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/telemetry.py +0 -347
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/template_analyzer.py +0 -469
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/token_tracker.py +0 -111
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/tool_first_planner.py +0 -7905
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/topology_query_service.py +0 -80
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/validator.py +0 -449
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/engine/weight_policy.py +0 -464
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/errors.py +0 -430
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/extract_cmds.py +0 -4887
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/identity.py +0 -146
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/incremental/__init__.py +0 -52
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/incremental/baseline.py +0 -378
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/incremental/change_analyzer.py +0 -407
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/incremental/delta_report.py +0 -189
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/incremental/diff_detector.py +0 -301
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/integrations/__init__.py +0 -3
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/__init__.py +0 -50
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/audit_schemas.py +0 -459
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/codex_oauth.py +0 -340
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/cost_tracker.py +0 -288
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/engine.py +0 -751
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/evaluator.py +0 -245
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/prompts/__init__.py +0 -32
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/prompts/api_docs_evaluation.py +0 -25
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/prompts/gap_analysis.py +0 -31
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/prompts/instruction_templates.py +0 -634
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/prompts/readme_evaluation.py +0 -25
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/prompts/row_evaluation.py +0 -247
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/prompts/security_docs_evaluation.py +0 -25
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/prompts/template_analysis.py +0 -25
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/prompts.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/llm/provider.py +0 -626
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/logging_config.py +0 -577
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/mappings/__init__.py +0 -58
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/mappings/default_checklist_mapping.json +0 -18
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/mappings/vietnamese_checklist_mapping.json +0 -38
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/misc_cmds.py +0 -4689
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/__init__.py +0 -153
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/calibration.py +0 -98
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/checklist.py +0 -921
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/completeness.py +0 -309
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/enrichment.py +0 -58
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/enums.py +0 -97
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/evidence.py +0 -351
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/findings.py +0 -381
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/gaps.py +0 -299
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/graph.py +0 -42
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/multi_judge.py +0 -50
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/readiness.py +0 -309
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/registry.py +0 -386
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/reporting.py +0 -32
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/task.py +0 -549
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/models/template.py +0 -477
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/observability/__init__.py +0 -31
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/observability/metrics.py +0 -404
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/parse_cmds.py +0 -608
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/pdf_cmds.py +0 -208
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/performance_gates.py +0 -224
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/phase151_projection.py +0 -84
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/profiles/__init__.py +0 -65
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/profiles/detection.py +0 -842
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/profiles/models.py +0 -474
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/__init__.py +0 -1
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/_confluence_macros.py +0 -145
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/_field_sanitizer.py +0 -25
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/_table_builder.py +0 -63
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/_vietnamese_templates.py +0 -103
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/bitbucket_link_resolver.py +0 -34
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/checklist_renderer.py +0 -483
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/confluence_publisher.py +0 -3048
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/hierarchy_publisher.py +0 -213
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/live_data_injector.py +0 -152
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/macro_builder.py +0 -101
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/markdown_converter.py +0 -154
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/priority_renderer.py +0 -133
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/project_aggregate_renderer.py +0 -423
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/readiness_renderer.py +0 -186
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/system_doc_hierarchy_renderer.py +0 -382
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/publishers/system_doc_renderer.py +0 -683
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/report_cmds.py +0 -788
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/__init__.py +0 -13
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/aggregation_report.py +0 -86
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/checklist_generator.py +0 -425
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/excel_generator.py +0 -599
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/gap_report.py +0 -131
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/json_generator.py +0 -188
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/markdown_generator.py +0 -595
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/pdf/__init__.py +0 -154
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/pdf/collector.py +0 -61
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/pdf/department_builder.py +0 -77
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/pdf/errors.py +0 -9
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/pdf/md_renderer.py +0 -386
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/pdf/pdf_models.py +0 -95
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/pdf/pdf_writer.py +0 -27
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/pdf/repo_project_builders.py +0 -274
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/readiness_report.py +0 -447
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/reporting.py +0 -94
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/reports/sarif_generator.py +0 -519
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/runtime_profiles.py +0 -98
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/seed/__init__.py +0 -29
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/seed/seed_loader.py +0 -561
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/skills/__init__.py +0 -5
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/skills/skill_routing.py +0 -312
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/sources/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/sources/base.py +0 -110
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/sources/bitbucket.py +0 -129
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/sources/git_url.py +0 -60
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/sources/github.py +0 -75
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/sources/local.py +0 -58
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/spec_sync_validator.py +0 -15
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/state/__init__.py +0 -6285
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/state/readiness_helpers.py +0 -74
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/state/skill_readiness.py +0 -487
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/state/store.py +0 -12927
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/state_cmds.py +0 -1868
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/sync/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/sync/repo_sync.py +0 -409
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/sync_cmds.py +0 -1247
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/utils/__init__.py +0 -3
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/utils/debug_bundle.py +0 -214
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/validators/checklist_validator.py +0 -342
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/workflow_cmds.py +0 -19147
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/workflows/__init__.py +0 -9
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/workflows/_test_audit_daily_batch.py +0 -192
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/workflows/audit_daily_batch.py +0 -308
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/workflows/audit_deep_monthly.py +0 -193
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/workflows/audit_drift_scan.py +0 -178
- package/tools/vds-scripts/audit_orchestrator/src/vds_audit_orchestrator/workflows/audit_security_daily.py +0 -183
- package/tools/vds-scripts/audit_orchestrator/templates/sample_audit_template.xlsx +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/_helpers.py +0 -32
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/completeness/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/completeness/test_bitbucket_probe.py +0 -403
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/completeness/test_confluence_probe.py +0 -423
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/test_bitbucket_downloader.py +0 -289
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/test_image_extractor.py +0 -260
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/test_markdown_converter.py +0 -57
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/test_material_cache.py +0 -197
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/test_material_downloader.py +0 -550
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/test_registry_parser.py +0 -3514
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/test_registry_parser_department_entry.py +0 -214
- package/tools/vds-scripts/audit_orchestrator/tests/collectors/test_registry_parser_flow.py +0 -200
- package/tools/vds-scripts/audit_orchestrator/tests/conftest.py +0 -988
- package/tools/vds-scripts/audit_orchestrator/tests/engine/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/engine/test_calibration.py +0 -48
- package/tools/vds-scripts/audit_orchestrator/tests/engine/test_confluence_checklist_phase22_helpers.py +0 -6065
- package/tools/vds-scripts/audit_orchestrator/tests/engine/test_multi_judge.py +0 -62
- package/tools/vds-scripts/audit_orchestrator/tests/engine/test_stability_harness.py +0 -61
- package/tools/vds-scripts/audit_orchestrator/tests/engine/test_structured_metadata.py +0 -419
- package/tools/vds-scripts/audit_orchestrator/tests/factories/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/factories/models.py +0 -534
- package/tools/vds-scripts/audit_orchestrator/tests/factories/templates.py +0 -241
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/diagrams/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/diagrams/compressed.drawio +0 -2
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/diagrams/mockup.bmpr +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/diagrams/simple.drawio +0 -26
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/bitbucket/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/bitbucket/branch_permissions_cli.json +0 -26
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/bitbucket/branch_permissions_direct.json +0 -24
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/bitbucket/repo_conditions_cli.json +0 -14
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/bitbucket/repo_conditions_direct.json +0 -12
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/confluence/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/confluence/page_cli.json +0 -7
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/confluence/page_direct.json +0 -7
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/confluence/search_cli.json +0 -11
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/confluence/search_direct.json +0 -7
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/sonarqube/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/sonarqube/quality_gate_cli.json +0 -12
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/golden/sonarqube/quality_gate_direct.json +0 -12
- package/tools/vds-scripts/audit_orchestrator/tests/fixtures/requirement_strategy_phase115.json +0 -118
- package/tools/vds-scripts/audit_orchestrator/tests/integration/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/integration/conftest.py +0 -107
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/expected_outcomes.md +0 -50
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_audit_repo/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_audit_repo/auth.py +0 -27
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_audit_repo/config.py +0 -16
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_audit_repo/db.py +0 -24
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_audit_repo/main.py +0 -18
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_audit_repo/src/__init__.py +0 -1
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_audit_repo/src/utils.py +0 -22
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_checklist_template.json +0 -110
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_evidence/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_evidence/code_evidence_pack.json +0 -40
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_evidence/manifest.json +0 -49
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_evidence/projects/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_evidence/projects/mock-audit-project/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_evidence/projects/mock-audit-project/brd.md +0 -19
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_evidence/projects/mock-audit-project/design.md +0 -32
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_evidence/projects/mock-audit-project/security.md +0 -23
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_evidence/projects/mock-audit-project/srs.md +0 -25
- package/tools/vds-scripts/audit_orchestrator/tests/integration/fixtures/mock_evidence/projects/mock-audit-project/test.md +0 -30
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_checkpoint_merge.py +0 -1371
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_decoupling_route_p149.py +0 -176
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_gap_analyzer_batch_p149.py +0 -151
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_hybrid_search.py +0 -799
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_mcp_integration.py +0 -741
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_merge_ranking_p149.py +0 -98
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_modality_mismatch_p149.py +0 -171
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_phase117_118_storage.py +0 -350
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_phase121_short_circuit.py +0 -732
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_phase18_workflow.py +0 -223
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_phase48_e2e_verification.py +0 -763
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_phase81_doc_anchor_regression.py +0 -252
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_provider_failure_finding_p149.py +0 -339
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_readiness_e2e.py +0 -430
- package/tools/vds-scripts/audit_orchestrator/tests/integration/test_refined_workflow.py +0 -1180
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/snapshots/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/snapshots/department_renderer.md +0 -24
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/snapshots/project_renderer.md +0 -8
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/snapshots/repo_renderer.md +0 -10
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/test_department_pdf.py +0 -112
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/test_e2e_pdf.py +0 -135
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/test_idempotency.py +0 -45
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/test_md_renderer.py +0 -46
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/test_pdf_cmds.py +0 -97
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/test_pdf_snapshot.py +0 -77
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/test_pdf_writer.py +0 -65
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/test_project_builder.py +0 -199
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/test_public_api.py +0 -135
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/test_repo_builder.py +0 -246
- package/tools/vds-scripts/audit_orchestrator/tests/pdf/test_workflow_pdf_flags.py +0 -36
- package/tools/vds-scripts/audit_orchestrator/tests/property/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/property/test_properties.py +0 -807
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_agent_error_compat.py +0 -38
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_agentic_skill_policy_skip.py +0 -234
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_base_event_stream_logging.py +0 -785
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_base_timeout_policy.py +0 -277
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_base_trace_payload_sanitization.py +0 -92
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_code_agent.py +0 -2311
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_code_agent_re_exports.py +0 -25
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_code_analysis_helpers.py +0 -94
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_create_audit_agent_reasoning_effort.py +0 -69
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_docs_agent.py +0 -2044
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_langgraph_workflow_efficiency_metrics.py +0 -71
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_output_validators.py +0 -317
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_phase41_toolsets.py +0 -6427
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_pydantic_ai_models.py +0 -1219
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_pydantic_base_url_resolution.py +0 -84
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_security_agent.py +0 -2069
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_skill_manager_focus.py +0 -439
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_synthesis_agent.py +0 -1195
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_tool_efficiency_guard_fr120.py +0 -683
- package/tools/vds-scripts/audit_orchestrator/tests/test_agents/test_toolsets.py +0 -716
- package/tools/vds-scripts/audit_orchestrator/tests/test_aggregator_p149.py +0 -171
- package/tools/vds-scripts/audit_orchestrator/tests/test_alembic_migrations.py +0 -287
- package/tools/vds-scripts/audit_orchestrator/tests/test_anchor_allowlist_p149.py +0 -273
- package/tools/vds-scripts/audit_orchestrator/tests/test_audit_otel.py +0 -283
- package/tools/vds-scripts/audit_orchestrator/tests/test_checklist_models.py +0 -583
- package/tools/vds-scripts/audit_orchestrator/tests/test_checks/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_checks/test_base_check.py +0 -211
- package/tools/vds-scripts/audit_orchestrator/tests/test_checks/test_llm_checks.py +0 -126
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_analyze_command.py +0 -400
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_archive_stale_page_cli.py +0 -217
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_bitbucket_metadata_cli.py +0 -354
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_cli_impl_profile_availability.py +0 -114
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_codex_profile.py +0 -174
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_compare_backends_cli.py +0 -449
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_confluence_parent_auto_resolve.py +0 -451
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_corpus_purge_cli.py +0 -290
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_credentials_preflight.py +0 -106
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_debug_bundle.py +0 -37
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_deprecation_phase157.py +0 -484
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_dispatch_concurrency_diagnostics.py +0 -758
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_check_confluence_cli.py +0 -320
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_codex.py +0 -187
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_corpus_status_cli.py +0 -236
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_correlation_cli.py +0 -128
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_crawl_status_cli.py +0 -192
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_credentials_cli.py +0 -86
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_dispatch_status_cli.py +0 -421
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_heartbeat_phase169.py +0 -173
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_hierarchy_status_cli.py +0 -199
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_locks_cli.py +0 -134
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_logs_follow_cli.py +0 -305
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_migration.py +0 -333
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_profile_availability_cli.py +0 -151
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_doctor_skills_policy_cli.py +0 -153
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_evidence_quality_cli.py +0 -307
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_export_debug_bundle_phase36.py +0 -60
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_export_git_manifest_cli.py +0 -172
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_file_removal_phase157e.py +0 -770
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_grounding_classifier.py +0 -226
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_logging.py +0 -49
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_materials_cli.py +0 -9127
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_metadata_completeness_phase92.py +0 -364
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_parent_dispatch_finalization_phase168f.py +0 -111
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_parse_cli.py +0 -590
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_phase117_118_feature_flags.py +0 -219
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_phase164_control_plane.py +0 -718
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_phase165_runner_scripts.py +0 -230
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_preparation_classifications.py +0 -146
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_prepare_cli.py +0 -398
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_publication_quality_gate.py +0 -126
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_publish_system_doc_cli.py +0 -158
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_query_checklist_cli.py +0 -219
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_readiness_cli.py +0 -673
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_readiness_cli_integration.py +0 -689
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_removed_flags_phase92.py +0 -36
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_report_cmds.py +0 -1317
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_run_history_index.py +0 -57
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_run_management.py +0 -1194
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_runtime_profiles_cli.py +0 -1658
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_smart_run_selection.py +0 -1562
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_state_cli.py +0 -2467
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_state_migration.py +0 -339
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_sync_repos_debug_artifacts.py +0 -1109
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_upload_results_cli.py +0 -809
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_validate_checklist.py +0 -178
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_validate_checklist_cli.py +0 -110
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_validate_spec_sync_cli.py +0 -519
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_workflow_default_parameters_baseline.py +0 -101
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_workflow_options.py +0 -7896
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_workflow_project_db_modes.py +0 -6516
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_workflow_project_project_scope.py +0 -831
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_workflow_project_target.py +0 -611
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_workflow_projects_phase131_lifecycle.py +0 -2488
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_workflow_projects_phase131_scaffolding.py +0 -96
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_workflow_row_key_guard.py +0 -78
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli/test_workflow_summary_artifacts.py +0 -1872
- package/tools/vds-scripts/audit_orchestrator/tests/test_cli_paths_phase2.py +0 -45
- package/tools/vds-scripts/audit_orchestrator/tests/test_clients/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_clients/test_bitbucket_cli_client.py +0 -124
- package/tools/vds-scripts/audit_orchestrator/tests/test_clients/test_cli_parity.py +0 -110
- package/tools/vds-scripts/audit_orchestrator/tests/test_clients/test_confluence_cli_client.py +0 -1149
- package/tools/vds-scripts/audit_orchestrator/tests/test_clients/test_sonarqube_cli_client.py +0 -19
- package/tools/vds-scripts/audit_orchestrator/tests/test_collectors/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_collectors/test_linked_page_tracker.py +0 -118
- package/tools/vds-scripts/audit_orchestrator/tests/test_companion_context_service.py +0 -230
- package/tools/vds-scripts/audit_orchestrator/tests/test_dspy_modules/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_dspy_modules/conftest.py +0 -11
- package/tools/vds-scripts/audit_orchestrator/tests/test_dspy_modules/test_compile_artifact.py +0 -465
- package/tools/vds-scripts/audit_orchestrator/tests/test_dspy_modules/test_cross_provider_critique.py +0 -120
- package/tools/vds-scripts/audit_orchestrator/tests/test_dspy_modules/test_cross_provider_critique_e2e.py +0 -75
- package/tools/vds-scripts/audit_orchestrator/tests/test_dspy_modules/test_evaluation.py +0 -515
- package/tools/vds-scripts/audit_orchestrator/tests/test_dspy_modules/test_runtime_loader.py +0 -537
- package/tools/vds-scripts/audit_orchestrator/tests/test_dspy_modules/test_signatures_normalization.py +0 -172
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_auditor_applicability.py +0 -68
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_checklist_generator.py +0 -1252
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_checklist_projection.py +0 -54
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_confluence_checklist_projection_consistency.py +0 -1696
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_critique_merger_matrix.py +0 -120
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_cross_check_rules.py +0 -459
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_cross_provider_critique.py +0 -55
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_doc_loader.py +0 -73
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_drift_detector.py +0 -34
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_evidence_collectors.py +0 -93
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_lease_timeout.py +0 -114
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_loader.py +0 -350
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_loader_parity.py +0 -179
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_low_confidence_reeval.py +0 -691
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_phase145a_completion.py +0 -209
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_phase31_row_consistency_retry_benchmark.py +0 -150
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_profile_detector.py +0 -286
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_regression_guard.py +0 -53
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_result_merger.py +0 -619
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_row_evaluator.py +0 -15783
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_row_failover.py +0 -215
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_scorer.py +0 -597
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_skill_breakdown_telemetry_fr137.py +0 -421
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_targeted_auto_merge.py +0 -229
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_timeout_failover.py +0 -488
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_timeout_telemetry.py +0 -73
- package/tools/vds-scripts/audit_orchestrator/tests/test_engine/test_validator.py +0 -419
- package/tools/vds-scripts/audit_orchestrator/tests/test_incremental/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_incremental/test_diff_detector.py +0 -111
- package/tools/vds-scripts/audit_orchestrator/tests/test_infra_persistence.py +0 -291
- package/tools/vds-scripts/audit_orchestrator/tests/test_integration/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_integration/test_phase3_integration.py +0 -516
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_cache.py +0 -670
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_codex_model_builder.py +0 -281
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_codex_oauth.py +0 -330
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_codex_streaming.py +0 -433
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_cost_tracker.py +0 -27
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_engine.py +0 -876
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_evaluator.py +0 -212
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_instruction_templates.py +0 -639
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_prompt_metadata.py +0 -97
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_prompts.py +0 -660
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_provider.py +0 -330
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_provider_contract_sync.py +0 -18
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_reasoning_effort_validation.py +0 -565
- package/tools/vds-scripts/audit_orchestrator/tests/test_llm/test_schemas.py +0 -827
- package/tools/vds-scripts/audit_orchestrator/tests/test_logging_config.py +0 -297
- package/tools/vds-scripts/audit_orchestrator/tests/test_models/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_models/test_enums.py +0 -185
- package/tools/vds-scripts/audit_orchestrator/tests/test_models/test_findings.py +0 -1159
- package/tools/vds-scripts/audit_orchestrator/tests/test_models/test_project_profile.py +0 -307
- package/tools/vds-scripts/audit_orchestrator/tests/test_models/test_registry.py +0 -532
- package/tools/vds-scripts/audit_orchestrator/tests/test_models/test_template.py +0 -708
- package/tools/vds-scripts/audit_orchestrator/tests/test_observability/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_observability/test_metrics.py +0 -60
- package/tools/vds-scripts/audit_orchestrator/tests/test_paths_config_phase2.py +0 -21
- package/tools/vds-scripts/audit_orchestrator/tests/test_performance/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_performance/test_fr79_performance_guardrails.py +0 -199
- package/tools/vds-scripts/audit_orchestrator/tests/test_phase156_hardening.py +0 -498
- package/tools/vds-scripts/audit_orchestrator/tests/test_phase93_regression_guards.py +0 -123
- package/tools/vds-scripts/audit_orchestrator/tests/test_pipeline_integration.py +0 -517
- package/tools/vds-scripts/audit_orchestrator/tests/test_profiles/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_profiles/test_detection.py +0 -146
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_bitbucket_link_resolver.py +0 -55
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_checklist_renderer.py +0 -84
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_checklist_renderer_projection.py +0 -97
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_confluence_macros.py +0 -58
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_confluence_publisher.py +0 -2171
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_evidence_links.py +0 -129
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_field_sanitizer.py +0 -108
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_hierarchy_publisher.py +0 -134
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_incremental_plan_parser.py +0 -62
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_live_data_injector.py +0 -48
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_macro_builder.py +0 -22
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_p161_confluence_optimization.py +0 -168
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_priority_renderer.py +0 -96
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_project_aggregate_renderer.py +0 -364
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_storage_validation.py +0 -273
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_summary_refactor.py +0 -118
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_system_doc_hierarchy.py +0 -50
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_table_builder.py +0 -23
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_vietnamese_templates.py +0 -37
- package/tools/vds-scripts/audit_orchestrator/tests/test_publishers/test_wiring_integration.py +0 -290
- package/tools/vds-scripts/audit_orchestrator/tests/test_reports/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_reports/test_aggregation_report.py +0 -181
- package/tools/vds-scripts/audit_orchestrator/tests/test_reports/test_checklist_generator.py +0 -258
- package/tools/vds-scripts/audit_orchestrator/tests/test_reports/test_gap_report.py +0 -73
- package/tools/vds-scripts/audit_orchestrator/tests/test_reports/test_json_generator.py +0 -317
- package/tools/vds-scripts/audit_orchestrator/tests/test_result_merger_p149.py +0 -347
- package/tools/vds-scripts/audit_orchestrator/tests/test_route_mode_p149.py +0 -178
- package/tools/vds-scripts/audit_orchestrator/tests/test_rubric_parser.py +0 -179
- package/tools/vds-scripts/audit_orchestrator/tests/test_scorer.py +0 -110
- package/tools/vds-scripts/audit_orchestrator/tests/test_state/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_state/test_sparse_coverage.py +0 -117
- package/tools/vds-scripts/audit_orchestrator/tests/test_workflow/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/test_workflow/test_langgraph_workflow.py +0 -2072
- package/tools/vds-scripts/audit_orchestrator/tests/test_workflow/test_p161_runtime_hardening.py +0 -341
- package/tools/vds-scripts/audit_orchestrator/tests/test_workflow_cmds_p149.py +0 -112
- package/tools/vds-scripts/audit_orchestrator/tests/test_workflow_cmds_p172.py +0 -126
- package/tools/vds-scripts/audit_orchestrator/tests/test_workflow_guidance_p150.py +0 -95
- package/tools/vds-scripts/audit_orchestrator/tests/unit/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_agentic_investigator_phase115.py +0 -42
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_requirement_analysis_agent.py +0 -412
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_security_agent_updates.py +0 -131
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_security_scanner.py +0 -397
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_skill_executor.py +0 -316
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_skill_fallback.py +0 -299
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_skill_policy.py +0 -520
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_skill_telemetry.py +0 -306
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_synthesis_fixes.py +0 -761
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_tool_argument_robustness.py +0 -272
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_tool_registry.py +0 -2548
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_tool_registry_ast_grep.py +0 -87
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_tool_registry_phase123_scoping.py +0 -353
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_tool_registry_phase94_ff.py +0 -445
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_tool_registry_vector_search_phase115.py +0 -35
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_utils.py +0 -1007
- package/tools/vds-scripts/audit_orchestrator/tests/unit/agents/test_vector_evidence_toolset.py +0 -622
- package/tools/vds-scripts/audit_orchestrator/tests/unit/cli/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/cli/test_workflow_cli.py +0 -123
- package/tools/vds-scripts/audit_orchestrator/tests/unit/collectors/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/collectors/test_cache_guard.py +0 -479
- package/tools/vds-scripts/audit_orchestrator/tests/unit/collectors/test_checklist_parser_phase120.py +0 -55
- package/tools/vds-scripts/audit_orchestrator/tests/unit/collectors/test_diagram_extractor.py +0 -467
- package/tools/vds-scripts/audit_orchestrator/tests/unit/collectors/test_enrichment_extractor.py +0 -59
- package/tools/vds-scripts/audit_orchestrator/tests/unit/collectors/test_graphify_collector.py +0 -158
- package/tools/vds-scripts/audit_orchestrator/tests/unit/completeness/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/completeness/test_completeness.py +0 -563
- package/tools/vds-scripts/audit_orchestrator/tests/unit/completeness/test_freshness_evaluator.py +0 -493
- package/tools/vds-scripts/audit_orchestrator/tests/unit/completeness/test_material_cache_metrics.py +0 -365
- package/tools/vds-scripts/audit_orchestrator/tests/unit/completeness/test_material_completeness_service.py +0 -2736
- package/tools/vds-scripts/audit_orchestrator/tests/unit/config_resolution/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/config_resolution/test_discovery.py +0 -47
- package/tools/vds-scripts/audit_orchestrator/tests/unit/config_resolution/test_redaction.py +0 -15
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_ad_hoc_analyzer.py +0 -576
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_agent_loop.py +0 -1896
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_anchor_filter_cl003.py +0 -181
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_batch_evidence_cache.py +0 -155
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_batch_processor.py +0 -3608
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_checklist_contract.py +0 -55
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_checklist_scoping.py +0 -371
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_config_companion_phase123.py +0 -142
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_config_evidence_phase123.py +0 -249
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_confluence_checklist_contract_export_parity.py +0 -813
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_cross_repo_config_phase122.py +0 -613
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_dspy_requirement_classifier.py +0 -517
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_evidence_diversity.py +0 -144
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_evidence_truncation.py +0 -108
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_grounding_validator.py +0 -127
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_guidance_injection_phase120.py +0 -105
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_instruction_analysis_phase122.py +0 -761
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_instruction_pre_filter_phase167.py +0 -334
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_llm_row_evaluator_retries.py +0 -3684
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_loader_phase123.py +0 -345
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_manual_check_gating_phase122.py +0 -474
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_parallel_eval.py +0 -263
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_phase122_verifier_phase122.py +0 -169
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_phase166_route_failover.py +0 -437
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_post_eval_cl003_shared_lib.py +0 -267
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_postproc_streaming.py +0 -194
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_pre_eval_gating_phase122.py +0 -362
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_prepare_topology_coverage.py +0 -247
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_private_dns_sanitization_phase104.py +0 -397
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_project_evidence_context.py +0 -450
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_project_knowledge_store.py +0 -487
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_project_topology.py +0 -1142
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_provider_failure_classifier.py +0 -195
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_readiness_extractor.py +0 -496
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_readiness_synthesizer.py +0 -653
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_repo_type_classifier.py +0 -303
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_requirement_analysis.py +0 -508
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_requirement_analysis_execution_scope.py +0 -239
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_requirement_analysis_phase114.py +0 -919
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_requirement_analysis_phase115.py +0 -97
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_requirement_analysis_shared_lib.py +0 -340
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_requirement_classification_drift.py +0 -729
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_requirement_classification_nlp.py +0 -670
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_requirement_scope_phase122.py +0 -615
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_route_matrix.py +0 -258
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_route_override.py +0 -141
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_routing_precision.py +0 -650
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_row_evaluator_dual_evidence.py +0 -2987
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_row_evaluator_instruction_runtime_phase122.py +0 -365
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_row_evaluator_runtime.py +0 -830
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_runtime_hardening_phase122.py +0 -225
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_scoped_na_skip.py +0 -107
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_scoring_enhancements.py +0 -404
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_shared_library_retrieval_phase123.py +0 -441
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_shared_library_routing_phase123.py +0 -279
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_shared_resource_indexing_phase122.py +0 -188
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_skill_recommendation.py +0 -225
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_skill_routing_cl003_shared_lib.py +0 -338
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_skills_toolset.py +0 -319
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_stability_metric.py +0 -60
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_target_selector.py +0 -958
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_token_tracker.py +0 -121
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_token_wiring.py +0 -119
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_tool_first_planner.py +0 -7103
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_topology_knowledge_persistence.py +0 -332
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_topology_query_service.py +0 -55
- package/tools/vds-scripts/audit_orchestrator/tests/unit/engine/test_unverified_ref_retry.py +0 -909
- package/tools/vds-scripts/audit_orchestrator/tests/unit/models/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/models/test_evidence.py +0 -515
- package/tools/vds-scripts/audit_orchestrator/tests/unit/models/test_gaps.py +0 -422
- package/tools/vds-scripts/audit_orchestrator/tests/unit/models/test_readiness.py +0 -428
- package/tools/vds-scripts/audit_orchestrator/tests/unit/publishers/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/publishers/test_confluence_hierarchy.py +0 -227
- package/tools/vds-scripts/audit_orchestrator/tests/unit/publishers/test_project_title_generation.py +0 -335
- package/tools/vds-scripts/audit_orchestrator/tests/unit/publishers/test_publisher_registry_helpers.py +0 -290
- package/tools/vds-scripts/audit_orchestrator/tests/unit/publishers/test_publisher_registry_integration.py +0 -557
- package/tools/vds-scripts/audit_orchestrator/tests/unit/publishers/test_readiness_renderer.py +0 -381
- package/tools/vds-scripts/audit_orchestrator/tests/unit/publishers/test_repo_title_consistency.py +0 -266
- package/tools/vds-scripts/audit_orchestrator/tests/unit/publishers/test_upload_hierarchy_integration.py +0 -470
- package/tools/vds-scripts/audit_orchestrator/tests/unit/scripts/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/scripts/test_benchmark_dspy.py +0 -177
- package/tools/vds-scripts/audit_orchestrator/tests/unit/scripts/test_benchmark_nlp_accuracy.py +0 -72
- package/tools/vds-scripts/audit_orchestrator/tests/unit/scripts/test_benchmark_retrieval_modes.py +0 -123
- package/tools/vds-scripts/audit_orchestrator/tests/unit/scripts/test_verify_phase111_requirement_analysis.py +0 -409
- package/tools/vds-scripts/audit_orchestrator/tests/unit/seed/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/seed/test_seed_chain_cli.py +0 -277
- package/tools/vds-scripts/audit_orchestrator/tests/unit/seed/test_seed_loader.py +0 -502
- package/tools/vds-scripts/audit_orchestrator/tests/unit/skills/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/skills/test_skill_routing.py +0 -209
- package/tools/vds-scripts/audit_orchestrator/tests/unit/sources/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/sources/test_bitbucket_source.py +0 -66
- package/tools/vds-scripts/audit_orchestrator/tests/unit/sources/test_non_retryable_markers.py +0 -88
- package/tools/vds-scripts/audit_orchestrator/tests/unit/sources/test_repo_info.py +0 -212
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_completeness.py +0 -598
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_dispatch_events_contract_phase169.py +0 -100
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_dispatch_hardening_phase158.py +0 -392
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_dispatch_persistence_phase157.py +0 -914
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_embedding_client.py +0 -64
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_get_latest_completed_run.py +0 -313
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_heartbeat_phase169.py +0 -109
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_hybrid_search.py +0 -398
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_normalize_url.py +0 -262
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_phase152_query_surface.py +0 -59
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_phase98_confluence_document_model.py +0 -202
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_published_pages.py +0 -754
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_readiness_helpers.py +0 -193
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_run_ledger.py +0 -522
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_run_management.py +0 -378
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_schema_contract_phase170.py +0 -755
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_state_cmds.py +0 -231
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_state_loaders.py +0 -2151
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_state_run_api.py +0 -2226
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_store.py +0 -1435
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_store_dispatch.py +0 -646
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_store_dispatch_status_view.py +0 -181
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_store_scope.py +0 -213
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_utilization_persist_phase169.py +0 -77
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_vds_search.py +0 -263
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_vector_index_api.py +0 -319
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_vector_index_runtime.py +0 -175
- package/tools/vds-scripts/audit_orchestrator/tests/unit/state/test_vector_index_store.py +0 -1756
- package/tools/vds-scripts/audit_orchestrator/tests/unit/sync/__init__.py +0 -0
- package/tools/vds-scripts/audit_orchestrator/tests/unit/sync/test_repo_sync.py +0 -257
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_artifact_exclusion.py +0 -119
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_auto_promote_phase158.py +0 -337
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_carry_forward_artifact_filtering.py +0 -317
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_checklist_precache_p160a.py +0 -416
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_cli_decomposition_fr219.py +0 -269
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_code_chunk_carry_forward.py +0 -203
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_config_coherence.py +0 -180
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_config_secret_policy.py +0 -522
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_corpus_project_id_migration.py +0 -318
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_corpus_status_diagnostics.py +0 -239
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_department_priority_ordering.py +0 -131
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_dispatch_coordinator_phase158.py +0 -402
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_dispatch_job_identity_p167a.py +0 -238
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_dispatch_ramp_up_phase171.py +0 -434
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_dispatcher.py +0 -911
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_doc_type_en_inference.py +0 -246
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_early_exit_unchunked_repos.py +0 -111
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_errors.py +0 -237
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_errors_taxonomy.py +0 -83
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_extract_chunking_config_phase98.py +0 -73
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_extract_cmds_state_helpers.py +0 -33
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_extract_docs_code_chunking.py +0 -260
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_finalize_dispatch_run_phase168.py +0 -341
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_identity.py +0 -221
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_infrastructure_detection.py +0 -441
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_junction_table_phase95.py +0 -259
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_late_binding_assignment_p167c.py +0 -286
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_misc_cmds_fr224_225_hardening.py +0 -194
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_p172_integration.py +0 -306
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_parent_provider_preflight.py +0 -118
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_performance_gates_phase92.py +0 -141
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_performance_gates_phase93.py +0 -50
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_phase115_search_strategy.py +0 -106
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_phase154_title_consistency.py +0 -117
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_phase155_param_forwarding.py +0 -304
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_phase158_concurrency_defaults.py +0 -207
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_phase170_doctor_schema.py +0 -319
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_phase170_regression.py +0 -334
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_phase94_corpus_lifecycle.py +0 -307
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_phase96_repo_key_migration.py +0 -305
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_pipelined_scheduling.py +0 -130
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_profile_availability_probe.py +0 -616
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_profile_aware_row_timeout.py +0 -102
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_profile_timeout_stagger_p160cd.py +0 -205
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_progress_summary_phase169.py +0 -96
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_registry_checklist_diagnostics.py +0 -124
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_resume_manifest_p167b.py +0 -268
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_risk_mitigations_p160e1.py +0 -348
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_single_row_shards_p160b.py +0 -357
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_state_repo_discovery.py +0 -504
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_sync_metadata_entries.py +0 -57
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_task_models.py +0 -1796
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_utilization_telemetry_p167e.py +0 -259
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_vietnamese_fts_hardening.py +0 -160
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_workflow_phase98_enrichment.py +0 -92
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_workflow_project_merge_materialization.py +0 -322
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_workflow_row_key_migration_guard.py +0 -88
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_workflow_short_circuit_phase121.py +0 -564
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_workflow_single_target_row_context.py +0 -49
- package/tools/vds-scripts/audit_orchestrator/tests/unit/test_zero_result_messaging.py +0 -76
- package/tools/vds-scripts/bandit-report.json +0 -2974
- package/tools/vds-scripts/brd_orchestrator/README.md +0 -29
- package/tools/vds-scripts/brd_orchestrator/pyproject.toml +0 -63
- package/tools/vds-scripts/brd_orchestrator/src/vds_brd_orchestrator/__init__.py +0 -17
- package/tools/vds-scripts/brd_orchestrator/src/vds_brd_orchestrator/cli.py +0 -187
- package/tools/vds-scripts/brd_orchestrator/src/vds_brd_orchestrator/validator.py +0 -121
- package/tools/vds-scripts/brd_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/brd_orchestrator/tests/test_cli.py +0 -62
- package/tools/vds-scripts/brd_orchestrator/tests/test_validator.py +0 -33
- package/tools/vds-scripts/circular_dependency_orchestrator/README.md +0 -30
- package/tools/vds-scripts/circular_dependency_orchestrator/pyproject.toml +0 -43
- package/tools/vds-scripts/circular_dependency_orchestrator/src/vds_circular_dependency_orchestrator/__init__.py +0 -16
- package/tools/vds-scripts/circular_dependency_orchestrator/src/vds_circular_dependency_orchestrator/cli.py +0 -904
- package/tools/vds-scripts/circular_dependency_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/circular_dependency_orchestrator/tests/unit/__init__.py +0 -0
- package/tools/vds-scripts/circular_dependency_orchestrator/tests/unit/test_cli.py +0 -354
- package/tools/vds-scripts/coverage.json +0 -1
- package/tools/vds-scripts/create_pr.py +0 -57
- package/tools/vds-scripts/diagram_generator/README.md +0 -663
- package/tools/vds-scripts/diagram_generator/ci_validate.sh +0 -16
- package/tools/vds-scripts/diagram_generator/docs-nttc/projects/INSURANCE/analysis/current-state/insurance-claim-business/insurance-claim-business-component.png +0 -0
- package/tools/vds-scripts/diagram_generator/docs-nttc/projects/INSURANCE/analysis/current-state/insurance-claim-business/insurance-claim-business-component.puml +0 -23
- package/tools/vds-scripts/diagram_generator/docs-nttc/projects/INSURANCE/analysis/current-state/insurance-claim-business/insurance-claim-business-sequence.png +0 -0
- package/tools/vds-scripts/diagram_generator/docs-nttc/projects/INSURANCE/analysis/current-state/insurance-claim-business/insurance-claim-business-sequence.puml +0 -21
- package/tools/vds-scripts/diagram_generator/docs-nttc/projects/INSURANCE/analysis/current-state/insurance-claim-business/insurance-claim-business-usecase.png +0 -0
- package/tools/vds-scripts/diagram_generator/docs-nttc/projects/INSURANCE/analysis/current-state/insurance-claim-business/insurance-claim-business-usecase.puml +0 -14
- package/tools/vds-scripts/diagram_generator/examples/github-actions-validate.yml +0 -39
- package/tools/vds-scripts/diagram_generator/generate_all_diagrams.py +0 -827
- package/tools/vds-scripts/diagram_generator/generate_insurance_c4_diagrams.py +0 -261
- package/tools/vds-scripts/diagram_generator/generate_insurance_c4_quick.py +0 -486
- package/tools/vds-scripts/diagram_generator/pyproject.toml +0 -28
- package/tools/vds-scripts/diagram_generator/render_png.py +0 -59
- package/tools/vds-scripts/diagram_generator/src/vds_diagram_generator/__init__.py +0 -3
- package/tools/vds-scripts/diagram_generator/src/vds_diagram_generator/cli.py +0 -50
- package/tools/vds-scripts/diagram_generator/test_c4_hierarchical.py +0 -142
- package/tools/vds-scripts/diagram_generator/test_c4_quick.py +0 -131
- package/tools/vds-scripts/diagram_generator/tests/__init__.py +0 -0
- package/tools/vds-scripts/diagram_generator/tests/test_analyzer_completeness.py +0 -260
- package/tools/vds-scripts/diagram_generator/tests/test_c4_syntax_correctness.py +0 -138
- package/tools/vds-scripts/diagram_generator/tests/test_component_coverage.py +0 -182
- package/tools/vds-scripts/diagram_generator/tests/test_mermaid_output.py +0 -80
- package/tools/vds-scripts/diagram_generator/tests/test_png_generation.py +0 -112
- package/tools/vds-scripts/diagram_generator/tests/test_scenario_templates.py +0 -15
- package/tools/vds-scripts/diagram_generator/tests/test_sequence_accuracy.py +0 -93
- package/tools/vds-scripts/diagram_generator/tests/test_structurizr_export.py +0 -177
- package/tools/vds-scripts/diagram_generator/tests/test_style_consistency.py +0 -174
- package/tools/vds-scripts/diagram_generator/tests/test_usecase_generator.py +0 -201
- package/tools/vds-scripts/diagram_generator/tests/test_usecase_integration.py +0 -124
- package/tools/vds-scripts/docker/compose.phase2-verification.yml +0 -31
- package/tools/vds-scripts/docker-compose.openapi-validator.yml +0 -14
- package/tools/vds-scripts/excel_orchestrator/README.md +0 -288
- package/tools/vds-scripts/excel_orchestrator/RESEARCH_BASED_UPDATES_REPORT.md +0 -261
- package/tools/vds-scripts/excel_orchestrator/add_essential_missing_effort.py +0 -255
- package/tools/vds-scripts/excel_orchestrator/adjust_effort_complexity.py +0 -184
- package/tools/vds-scripts/excel_orchestrator/brd_analysis_and_task_breakdown.py +0 -632
- package/tools/vds-scripts/excel_orchestrator/brd_analysis_comprehensive.py +0 -1029
- package/tools/vds-scripts/excel_orchestrator/check_overlaps_and_brd_coverage.py +0 -570
- package/tools/vds-scripts/excel_orchestrator/clean_remarks_column.py +0 -127
- package/tools/vds-scripts/excel_orchestrator/comprehensive_brd_check.py +0 -322
- package/tools/vds-scripts/excel_orchestrator/create_buffered_summary.py +0 -119
- package/tools/vds-scripts/excel_orchestrator/create_service_totals_sheet.py +0 -118
- package/tools/vds-scripts/excel_orchestrator/examples/basic_operations.py +0 -85
- package/tools/vds-scripts/excel_orchestrator/expand_all_tasks.py +0 -341
- package/tools/vds-scripts/excel_orchestrator/expand_tasks.py +0 -304
- package/tools/vds-scripts/excel_orchestrator/fill_brd_references.py +0 -347
- package/tools/vds-scripts/excel_orchestrator/fill_remarks_and_colors.py +0 -132
- package/tools/vds-scripts/excel_orchestrator/finalize_brd_and_cleanup.py +0 -295
- package/tools/vds-scripts/excel_orchestrator/finalize_brd_coverage.py +0 -327
- package/tools/vds-scripts/excel_orchestrator/fix_all_formulas.py +0 -99
- package/tools/vds-scripts/excel_orchestrator/fix_detail_presentation.py +0 -113
- package/tools/vds-scripts/excel_orchestrator/fix_presentation_and_effort.py +0 -116
- package/tools/vds-scripts/excel_orchestrator/fix_presentation_consistency.py +0 -231
- package/tools/vds-scripts/excel_orchestrator/fix_remarks_matching.py +0 -179
- package/tools/vds-scripts/excel_orchestrator/group_tasks_by_service_id.py +0 -210
- package/tools/vds-scripts/excel_orchestrator/increase_brd_coverage.py +0 -497
- package/tools/vds-scripts/excel_orchestrator/increase_effort_complexity.py +0 -155
- package/tools/vds-scripts/excel_orchestrator/organize_and_deduplicate.py +0 -273
- package/tools/vds-scripts/excel_orchestrator/pyproject.toml +0 -64
- package/tools/vds-scripts/excel_orchestrator/rebuild_all_formulas.py +0 -146
- package/tools/vds-scripts/excel_orchestrator/remove_base_multiplier_and_check_duplicates.py +0 -310
- package/tools/vds-scripts/excel_orchestrator/remove_duplicate_brd_tasks.py +0 -137
- package/tools/vds-scripts/excel_orchestrator/research_based_updates.py +0 -457
- package/tools/vds-scripts/excel_orchestrator/restore_e_values.py +0 -172
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/__init__.py +0 -5
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/cli.py +0 -746
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/config.py +0 -74
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/converters.py +0 -226
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/errors.py +0 -88
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/excel_client.py +0 -443
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/formatters.py +0 -211
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/logging.py +0 -57
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/source_contract.py +0 -29
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/target_state_status.py +0 -837
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/ulnc_alignment.py +0 -1291
- package/tools/vds-scripts/excel_orchestrator/src/vds_excel_orchestrator/validators.py +0 -164
- package/tools/vds-scripts/excel_orchestrator/sync_detail_and_total_sheets.py +0 -211
- package/tools/vds-scripts/excel_orchestrator/tests/__init__.py +0 -1
- package/tools/vds-scripts/excel_orchestrator/tests/conftest.py +0 -36
- package/tools/vds-scripts/excel_orchestrator/tests/test_cli.py +0 -383
- package/tools/vds-scripts/excel_orchestrator/tests/test_excel_client.py +0 -129
- package/tools/vds-scripts/excel_orchestrator/tests/test_ulnc_alignment.py +0 -373
- package/tools/vds-scripts/excel_orchestrator/tests/test_validators.py +0 -64
- package/tools/vds-scripts/excel_orchestrator/update_api_database_effort.py +0 -261
- package/tools/vds-scripts/excel_orchestrator/update_buffers_inline.py +0 -115
- package/tools/vds-scripts/excel_orchestrator/update_complex_services_and_add_new.py +0 -336
- package/tools/vds-scripts/excel_orchestrator/update_responsibility_and_fix_rows.py +0 -208
- package/tools/vds-scripts/excel_orchestrator/update_task_breakdown_vietnamese.py +0 -309
- package/tools/vds-scripts/excel_orchestrator/update_vietnamese_and_responsibility.py +0 -415
- package/tools/vds-scripts/excel_orchestrator/verify_brd_coverage_comprehensive.py +0 -401
- package/tools/vds-scripts/hexagonal_orchestrator/README.md +0 -530
- package/tools/vds-scripts/hexagonal_orchestrator/pyproject.toml +0 -48
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/__init__.py +0 -39
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/analyzers/__init__.py +0 -19
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/analyzers/base.py +0 -95
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/analyzers/fallback.py +0 -614
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/analyzers/java.py +0 -372
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/analyzers/python.py +0 -437
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/cache.py +0 -331
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/classifier.py +0 -263
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/cli.py +0 -554
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/config.py +0 -577
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/models.py +0 -159
- package/tools/vds-scripts/hexagonal_orchestrator/src/vds_hexagonal_orchestrator/profiler.py +0 -451
- package/tools/vds-scripts/hexagonal_orchestrator/test-config.yaml +0 -38
- package/tools/vds-scripts/hexagonal_orchestrator/tests/__init__.py +0 -1
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/__init__.py +0 -1
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/java-compliant/adapter/driven/persistence/InMemoryUserRepository.java +0 -62
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/java-compliant/adapter/driving/api/UserController.java +0 -101
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/java-compliant/application/port/EmailService.java +0 -33
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/java-compliant/application/port/UserRepository.java +0 -45
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/java-compliant/application/usecase/CreateUser.java +0 -58
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/java-compliant/domain/entity/Email.java +0 -80
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/java-compliant/domain/entity/User.java +0 -98
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/java-noncompliant/domain/User.java +0 -64
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/java-with-frameworks/domain/Menu.java +0 -13
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/java-with-frameworks/domain/Product.java +0 -16
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-compliant/__init__.py +0 -1
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-compliant/application/__init__.py +0 -1
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-compliant/application/ports/__init__.py +0 -1
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-compliant/application/ports/email_service.py +0 -60
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-compliant/application/ports/user_repository.py +0 -78
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-compliant/domain/__init__.py +0 -1
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-compliant/domain/entities/__init__.py +0 -1
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-compliant/domain/entities/user.py +0 -56
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-compliant/domain/value_objects/__init__.py +0 -1
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-compliant/domain/value_objects/email.py +0 -63
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-noncompliant/application/user_service.py +0 -1837
- package/tools/vds-scripts/hexagonal_orchestrator/tests/fixtures/python-noncompliant/domain/user.py +0 -43
- package/tools/vds-scripts/hexagonal_orchestrator/tests/test_cache.py +0 -458
- package/tools/vds-scripts/hexagonal_orchestrator/tests/test_cli_integration.py +0 -942
- package/tools/vds-scripts/hexagonal_orchestrator/tests/test_cli_unit.py +0 -557
- package/tools/vds-scripts/hexagonal_orchestrator/tests/test_cross_repo_pollution.py +0 -275
- package/tools/vds-scripts/hexagonal_orchestrator/tests/test_foundation.py +0 -129
- package/tools/vds-scripts/hexagonal_orchestrator/tests/test_integration.py +0 -1524
- package/tools/vds-scripts/hexagonal_orchestrator/tests/test_java_analyzer.py +0 -642
- package/tools/vds-scripts/hexagonal_orchestrator/tests/test_timing_unit.py +0 -60
- package/tools/vds-scripts/intellij_orchestrator/README.md +0 -55
- package/tools/vds-scripts/intellij_orchestrator/pyproject.toml +0 -64
- package/tools/vds-scripts/intellij_orchestrator/src/vds_intellij_orchestrator/__init__.py +0 -17
- package/tools/vds-scripts/intellij_orchestrator/src/vds_intellij_orchestrator/cli.py +0 -210
- package/tools/vds-scripts/intellij_orchestrator/src/vds_intellij_orchestrator/core.py +0 -260
- package/tools/vds-scripts/intellij_orchestrator/tests/__init__.py +0 -1
- package/tools/vds-scripts/intellij_orchestrator/tests/test_cli.py +0 -112
- package/tools/vds-scripts/intellij_orchestrator/tests/test_core.py +0 -83
- package/tools/vds-scripts/links_orchestrator/README.md +0 -63
- package/tools/vds-scripts/links_orchestrator/pyproject.toml +0 -64
- package/tools/vds-scripts/links_orchestrator/src/vds_links_orchestrator/__init__.py +0 -10
- package/tools/vds-scripts/links_orchestrator/src/vds_links_orchestrator/cli.py +0 -254
- package/tools/vds-scripts/links_orchestrator/src/vds_links_orchestrator/validator.py +0 -244
- package/tools/vds-scripts/links_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/links_orchestrator/tests/test_cli.py +0 -128
- package/tools/vds-scripts/links_orchestrator/tests/test_validator.py +0 -76
- package/tools/vds-scripts/lsp_orchestrator/.dockerignore +0 -69
- package/tools/vds-scripts/lsp_orchestrator/ARCHITECTURE.md +0 -383
- package/tools/vds-scripts/lsp_orchestrator/CODE_QUALITY_IMPROVEMENTS.md +0 -196
- package/tools/vds-scripts/lsp_orchestrator/COMMANDS.md +0 -870
- package/tools/vds-scripts/lsp_orchestrator/Dockerfile +0 -59
- package/tools/vds-scripts/lsp_orchestrator/IMPLEMENTATION_SUMMARY.md +0 -490
- package/tools/vds-scripts/lsp_orchestrator/LSP_ISSUES_AND_FINDINGS.md +0 -380
- package/tools/vds-scripts/lsp_orchestrator/README.md +0 -616
- package/tools/vds-scripts/lsp_orchestrator/SETUP.md +0 -143
- package/tools/vds-scripts/lsp_orchestrator/TEST_COVERAGE_SUMMARY.md +0 -255
- package/tools/vds-scripts/lsp_orchestrator/VERIFICATION_CHECKLIST.md +0 -814
- package/tools/vds-scripts/lsp_orchestrator/docker-compose.yml +0 -102
- package/tools/vds-scripts/lsp_orchestrator/docs/FOR_LLMS.md +0 -401
- package/tools/vds-scripts/lsp_orchestrator/docs/explanation/lsp-response-matching.md +0 -79
- package/tools/vds-scripts/lsp_orchestrator/docs/how-to-guides/automate-with-json.md +0 -159
- package/tools/vds-scripts/lsp_orchestrator/docs/how-to-guides/docker-mode.md +0 -256
- package/tools/vds-scripts/lsp_orchestrator/docs/how-to-guides/navigate-code.md +0 -116
- package/tools/vds-scripts/lsp_orchestrator/docs/how-to-guides/parallel-processing.md +0 -179
- package/tools/vds-scripts/lsp_orchestrator/docs/how-to-guides/project-tool-detection.md +0 -320
- package/tools/vds-scripts/lsp_orchestrator/docs/how-to-guides/type-check-code.md +0 -46
- package/tools/vds-scripts/lsp_orchestrator/docs/how-to-guides/use-daemon-mode.md +0 -78
- package/tools/vds-scripts/lsp_orchestrator/docs/how-to-guides/wsl2-optimization.md +0 -227
- package/tools/vds-scripts/lsp_orchestrator/docs/index.md +0 -88
- package/tools/vds-scripts/lsp_orchestrator/docs/operator-hover-definition.md +0 -143
- package/tools/vds-scripts/lsp_orchestrator/docs/reference/commands.md +0 -581
- package/tools/vds-scripts/lsp_orchestrator/docs/reference/configuration.md +0 -422
- package/tools/vds-scripts/lsp_orchestrator/docs/tutorials/00-quick-start.md +0 -169
- package/tools/vds-scripts/lsp_orchestrator/pyproject.toml +0 -63
- package/tools/vds-scripts/lsp_orchestrator/src/test_file.py +0 -5
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/__init__.py +0 -3
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/aggregator.py +0 -340
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/basedpyright_runner.py +0 -167
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/cli.py +0 -3370
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/code_actions.py +0 -79
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/core.py +0 -3295
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/daemon_client.py +0 -672
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/daemon_manager.py +0 -577
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/daemon_server.py +0 -1040
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/detectors/__init__.py +0 -9
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/detectors/project_detector.py +0 -537
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/formatters.py +0 -141
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/ipc_protocol.py +0 -225
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/lsp_client.py +0 -957
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/lsp_router.py +0 -335
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/mcp_server.py +0 -181
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/models/__init__.py +0 -201
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/models/project_detector.py +0 -646
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/models/project_tools.py +0 -114
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/models.py +0 -399
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/mypy_runner.py +0 -209
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/protocols.py +0 -52
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/ruff_lsp_client.py +0 -109
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/ruff_runner.py +0 -44
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/utils.py +0 -959
- package/tools/vds-scripts/lsp_orchestrator/src/vds_lsp_orchestrator/workspace_indexer.py +0 -1037
- package/tools/vds-scripts/lsp_orchestrator/test_workspace_lsp.py +0 -6
- package/tools/vds-scripts/lsp_orchestrator/tests/__init__.py +0 -1
- package/tools/vds-scripts/lsp_orchestrator/tests/conftest.py +0 -6
- package/tools/vds-scripts/lsp_orchestrator/tests/test_aggregator.py +0 -59
- package/tools/vds-scripts/lsp_orchestrator/tests/test_cli.py +0 -111
- package/tools/vds-scripts/lsp_orchestrator/tests/test_detect_tools_command.py +0 -186
- package/tools/vds-scripts/lsp_orchestrator/tests/test_formatter_linter_detection.py +0 -519
- package/tools/vds-scripts/lsp_orchestrator/tests/test_integration_phase9_10_11.py +0 -367
- package/tools/vds-scripts/lsp_orchestrator/tests/test_mypy_runner.py +0 -482
- package/tools/vds-scripts/lsp_orchestrator/tests/test_package_manager_detection.py +0 -399
- package/tools/vds-scripts/lsp_orchestrator/tests/test_phase10.py +0 -389
- package/tools/vds-scripts/lsp_orchestrator/tests/test_phase11.py +0 -327
- package/tools/vds-scripts/lsp_orchestrator/tests/test_phase12_integration.py +0 -634
- package/tools/vds-scripts/lsp_orchestrator/tests/test_phase9.py +0 -196
- package/tools/vds-scripts/lsp_orchestrator/tests/test_project_detector.py +0 -377
- package/tools/vds-scripts/lsp_orchestrator/tests/test_test_runner_detection.py +0 -549
- package/tools/vds-scripts/lsp_orchestrator/tests/test_type_checker_routing.py +0 -362
- package/tools/vds-scripts/lsp_orchestrator/tests/test_workspace_indexer.py +0 -144
- package/tools/vds-scripts/markdown_orchestrator/README.md +0 -72
- package/tools/vds-scripts/markdown_orchestrator/pyproject.toml +0 -39
- package/tools/vds-scripts/markdown_orchestrator/src/vds_markdown_orchestrator/__init__.py +0 -5
- package/tools/vds-scripts/markdown_orchestrator/src/vds_markdown_orchestrator/cli.py +0 -102
- package/tools/vds-scripts/multi_agent_orchestrator/Dockerfile +0 -65
- package/tools/vds-scripts/multi_agent_orchestrator/README.md +0 -306
- package/tools/vds-scripts/multi_agent_orchestrator/postman/README.md +0 -264
- package/tools/vds-scripts/multi_agent_orchestrator/postman/TEST_RESULTS_SUMMARY.md +0 -197
- package/tools/vds-scripts/multi_agent_orchestrator/postman/VDS-Multi-Agent-Orchestrator-API.postman_collection.json +0 -1010
- package/tools/vds-scripts/multi_agent_orchestrator/postman/environments/local-development.postman_environment.json +0 -55
- package/tools/vds-scripts/multi_agent_orchestrator/postman/test-results.json +0 -24146
- package/tools/vds-scripts/multi_agent_orchestrator/pyproject.toml +0 -63
- package/tools/vds-scripts/multi_agent_orchestrator/run_api.py +0 -9
- package/tools/vds-scripts/multi_agent_orchestrator/run_mock_api.py +0 -9
- package/tools/vds-scripts/multi_agent_orchestrator/simple_test.py +0 -53
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/__init__.py +0 -25
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/agent_pool.py +0 -433
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/api/__init__.py +0 -5
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/api/main.py +0 -722
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/api/mock_main.py +0 -812
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/change_log.py +0 -515
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/cli.py +0 -424
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/config.py +0 -220
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/conflict_resolver.py +0 -462
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/coordinator.py +0 -627
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/models.py +0 -389
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/progress_dashboard.py +0 -380
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/redis_client.py +0 -245
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/scheduler_subscriber.py +0 -272
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/task_manager.py +0 -536
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/task_tracking.py +0 -550
- package/tools/vds-scripts/multi_agent_orchestrator/src/vds_multi_agent_orchestrator/vds_ai_memory_client.py +0 -352
- package/tools/vds-scripts/multi_agent_orchestrator/test_complete_system.py +0 -149
- package/tools/vds-scripts/multi_agent_orchestrator/test_infrastructure_only.py +0 -194
- package/tools/vds-scripts/multi_agent_orchestrator/test_integration.py +0 -108
- package/tools/vds-scripts/multi_agent_orchestrator/tests/__init__.py +0 -1
- package/tools/vds-scripts/multi_agent_orchestrator/tests/test_agent_registration_credential_validator.py +0 -223
- package/tools/vds-scripts/multi_agent_orchestrator/tests/test_config.py +0 -210
- package/tools/vds-scripts/multi_agent_orchestrator/tests/test_models.py +0 -195
- package/tools/vds-scripts/multi_agent_orchestrator/tests/test_w9_agent_routes.py +0 -321
- package/tools/vds-scripts/openapi_orchestrator/README.md +0 -197
- package/tools/vds-scripts/openapi_orchestrator/pyproject.toml +0 -106
- package/tools/vds-scripts/openapi_orchestrator/src/vds_openapi_orchestrator/__init__.py +0 -29
- package/tools/vds-scripts/openapi_orchestrator/src/vds_openapi_orchestrator/cli.py +0 -345
- package/tools/vds-scripts/openapi_orchestrator/src/vds_openapi_orchestrator/full_validator.py +0 -183
- package/tools/vds-scripts/openapi_orchestrator/src/vds_openapi_orchestrator/spec_validator.py +0 -197
- package/tools/vds-scripts/openapi_orchestrator/tests/__init__.py +0 -1
- package/tools/vds-scripts/openapi_orchestrator/tests/test_cli.py +0 -234
- package/tools/vds-scripts/openapi_orchestrator/tests/test_full_validator.py +0 -203
- package/tools/vds-scripts/openapi_orchestrator/tests/test_spec_validator.py +0 -295
- package/tools/vds-scripts/pdf_orchestrator/.dockerignore +0 -93
- package/tools/vds-scripts/pdf_orchestrator/.env.example +0 -40
- package/tools/vds-scripts/pdf_orchestrator/.ruff_rules.py +0 -350
- package/tools/vds-scripts/pdf_orchestrator/.yamllint.yml +0 -43
- package/tools/vds-scripts/pdf_orchestrator/DEVELOPMENT_PLAN.md +0 -80
- package/tools/vds-scripts/pdf_orchestrator/Dockerfile +0 -87
- package/tools/vds-scripts/pdf_orchestrator/README.md +0 -608
- package/tools/vds-scripts/pdf_orchestrator/cli_verification_test/test.md +0 -6
- package/tools/vds-scripts/pdf_orchestrator/cli_verification_test/test.pdf +0 -0
- package/tools/vds-scripts/pdf_orchestrator/config/alertmanager.yml +0 -83
- package/tools/vds-scripts/pdf_orchestrator/config/prometheus.prod.yml +0 -98
- package/tools/vds-scripts/pdf_orchestrator/config/prometheus.yml +0 -40
- package/tools/vds-scripts/pdf_orchestrator/config/redis.conf +0 -78
- package/tools/vds-scripts/pdf_orchestrator/docs/COMPETITIVE_ANALYSIS_REPORT.md +0 -309
- package/tools/vds-scripts/pdf_orchestrator/docs/FEATURES_GUIDE.md +0 -518
- package/tools/vds-scripts/pdf_orchestrator/docs/MULTI_USER_DEPLOYMENT_GUIDE.md +0 -615
- package/tools/vds-scripts/pdf_orchestrator/docs/USER_GUIDE.md +0 -829
- package/tools/vds-scripts/pdf_orchestrator/pyproject.toml +0 -87
- package/tools/vds-scripts/pdf_orchestrator/pytest.ini +0 -71
- package/tools/vds-scripts/pdf_orchestrator/ruff.toml +0 -6
- package/tools/vds-scripts/pdf_orchestrator/scripts/debug_security_report.py +0 -59
- package/tools/vds-scripts/pdf_orchestrator/scripts/demo_library_selector.py +0 -109
- package/tools/vds-scripts/pdf_orchestrator/scripts/generate_project_stats.py +0 -52
- package/tools/vds-scripts/pdf_orchestrator/scripts/generate_styled_pdf.py +0 -95
- package/tools/vds-scripts/pdf_orchestrator/scripts/migrate_render_pdfs.py +0 -285
- package/tools/vds-scripts/pdf_orchestrator/scripts/setup_team.bat +0 -283
- package/tools/vds-scripts/pdf_orchestrator/scripts/setup_team.sh +0 -324
- package/tools/vds-scripts/pdf_orchestrator/src/vds_pdf_orchestrator/__init__.py +0 -5
- package/tools/vds-scripts/pdf_orchestrator/src/vds_pdf_orchestrator/cli.py +0 -542
- package/tools/vds-scripts/pdf_orchestrator/src/vds_pdf_orchestrator/config.py +0 -33
- package/tools/vds-scripts/pdf_orchestrator/tests/README.md +0 -650
- package/tools/vds-scripts/pdf_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/pdf_orchestrator/tests/conftest.py +0 -520
- package/tools/vds-scripts/pdf_orchestrator/tests/requirements.txt +0 -51
- package/tools/vds-scripts/pdf_orchestrator/tests/run_tests.py +0 -659
- package/tools/vds-scripts/pdf_orchestrator/tests/test_config.py +0 -36
- package/tools/vds-scripts/progress_report_orchestrator/Dockerfile +0 -77
- package/tools/vds-scripts/progress_report_orchestrator/README.md +0 -39
- package/tools/vds-scripts/progress_report_orchestrator/alembic/env.py +0 -42
- package/tools/vds-scripts/progress_report_orchestrator/alembic/script.py.mako +0 -28
- package/tools/vds-scripts/progress_report_orchestrator/alembic/versions/0001_initial_progress_schema.py +0 -180
- package/tools/vds-scripts/progress_report_orchestrator/alembic.ini +0 -67
- package/tools/vds-scripts/progress_report_orchestrator/pyproject.toml +0 -67
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/__init__.py +0 -3
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/analyzers/__init__.py +0 -1
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/analyzers/endpoint_scanner.py +0 -238
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/analyzers/git_activity.py +0 -159
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/analyzers/hexagonal.py +0 -100
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/analyzers/test_scanner.py +0 -136
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/cli.py +0 -743
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/config.py +0 -50
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/db/__init__.py +0 -12
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/db/alembic_filters.py +0 -64
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/memory.py +0 -82
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/models/__init__.py +0 -1
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/models/analysis.py +0 -84
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/models/report.py +0 -117
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/models/topology.py +0 -101
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/parsers/__init__.py +0 -1
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/parsers/kg_parser.py +0 -252
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/parsers/uc_reader.py +0 -159
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/pipeline/__init__.py +0 -1
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/pipeline/concurrency.py +0 -39
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/pipeline/llm_eval.py +0 -570
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/pipeline/report.py +0 -1256
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/pipeline/structural.py +0 -384
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/pipeline/sync.py +0 -143
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/recommendations/__init__.py +0 -5
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/recommendations/engine.py +0 -105
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/recommendations/templates.py +0 -236
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/scheduler_subscriber.py +0 -238
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/skills/README.md +0 -56
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/skills/__init__.py +0 -1
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/skills/srs-architecture-reviewer/SKILL.md +0 -67
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/skills/srs-endpoint-matcher/SKILL.md +0 -67
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/state/__init__.py +0 -1
- package/tools/vds-scripts/progress_report_orchestrator/src/progress_report_orchestrator/state/schema.py +0 -625
- package/tools/vds-scripts/progress_report_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/progress_report_orchestrator/tests/fixtures/__init__.py +0 -0
- package/tools/vds-scripts/progress_report_orchestrator/tests/fixtures/kg/.gitkeep +0 -0
- package/tools/vds-scripts/progress_report_orchestrator/tests/fixtures/kg/__init__.py +0 -0
- package/tools/vds-scripts/progress_report_orchestrator/tests/fixtures/kg/doc-dependencies.yaml +0 -79
- package/tools/vds-scripts/progress_report_orchestrator/tests/fixtures/kg/fr-to-docs.yaml +0 -478
- package/tools/vds-scripts/progress_report_orchestrator/tests/fixtures/kg/fr-to-services.yaml +0 -18
- package/tools/vds-scripts/progress_report_orchestrator/tests/fixtures/kg/registry.yaml +0 -346
- package/tools/vds-scripts/progress_report_orchestrator/tests/fixtures/phase3_baseline_standard.md +0 -564
- package/tools/vds-scripts/progress_report_orchestrator/tests/integration/__init__.py +0 -0
- package/tools/vds-scripts/progress_report_orchestrator/tests/integration/test_checkpoint.py +0 -276
- package/tools/vds-scripts/progress_report_orchestrator/tests/test_alembic_migrations.py +0 -265
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/__init__.py +0 -0
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_analyzers.py +0 -267
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_bounded_gather.py +0 -176
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_cli_phase_report.py +0 -119
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_delta.py +0 -169
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_error_handling.py +0 -150
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_gate_exit_codes.py +0 -230
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_git_activity.py +0 -215
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_kg_parser.py +0 -267
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_llm_autodetect.py +0 -183
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_llm_eval.py +0 -529
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_memory_integration.py +0 -151
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_migration_contract.py +0 -254
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_mode_rendering.py +0 -576
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_models.py +0 -251
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_progress_llm_config.py +0 -67
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_recommendations.py +0 -480
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_report_enhancements.py +0 -415
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_resume_reload.py +0 -343
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_trend_regression.py +0 -294
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_uc_reader.py +0 -169
- package/tools/vds-scripts/progress_report_orchestrator/tests/unit/test_valence_gap.py +0 -293
- package/tools/vds-scripts/project-cycle-report.json +0 -14
- package/tools/vds-scripts/project-dependency-graph.json +0 -11361
- package/tools/vds-scripts/project-topology.json +0 -99
- package/tools/vds-scripts/public_interface_boundary_orchestrator/pyproject.toml +0 -18
- package/tools/vds-scripts/public_interface_boundary_orchestrator/src/vds_public_interface_boundary_orchestrator/__init__.py +0 -0
- package/tools/vds-scripts/public_interface_boundary_orchestrator/src/vds_public_interface_boundary_orchestrator/cli.py +0 -232
- package/tools/vds-scripts/public_interface_boundary_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/public_interface_boundary_orchestrator/tests/test_cli.py +0 -108
- package/tools/vds-scripts/research_orchestrator/README.md +0 -68
- package/tools/vds-scripts/research_orchestrator/py.typed +0 -0
- package/tools/vds-scripts/research_orchestrator/pyproject.toml +0 -95
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/__init__.py +0 -3
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/_env.py +0 -11
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/cli.py +0 -335
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/config.py +0 -43
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/evidence/__init__.py +0 -0
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/evidence/models.py +0 -89
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/evidence/scoring.py +0 -102
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/exceptions.py +0 -78
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/http_client.py +0 -160
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/logging.py +0 -49
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/output/__init__.py +0 -0
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/output/formatters.py +0 -93
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/py.typed +0 -1
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/report/__init__.py +0 -0
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/report/build.py +0 -156
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/report/format.py +0 -147
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/tools/__init__.py +0 -0
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/tools/health.py +0 -66
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/tools/health_graph.py +0 -52
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/tools/registry.py +0 -127
- package/tools/vds-scripts/research_orchestrator/src/vds_research_orchestrator/tools/search.py +0 -230
- package/tools/vds-scripts/research_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/research_orchestrator/tests/conftest.py +0 -53
- package/tools/vds-scripts/research_orchestrator/tests/test_cli.py +0 -222
- package/tools/vds-scripts/research_orchestrator/tests/test_config.py +0 -23
- package/tools/vds-scripts/research_orchestrator/tests/test_exceptions.py +0 -62
- package/tools/vds-scripts/research_orchestrator/tests/test_formatters.py +0 -89
- package/tools/vds-scripts/research_orchestrator/tests/test_graph_integration.py +0 -149
- package/tools/vds-scripts/research_orchestrator/tests/test_http_client.py +0 -134
- package/tools/vds-scripts/research_orchestrator/tests/test_report_build.py +0 -128
- package/tools/vds-scripts/research_orchestrator/tests/test_report_format.py +0 -91
- package/tools/vds-scripts/research_orchestrator/tests/test_scoring.py +0 -95
- package/tools/vds-scripts/research_orchestrator/tests/vds_research_orchestrator/test_tools/__init__.py +0 -1
- package/tools/vds-scripts/research_orchestrator/tests/vds_research_orchestrator/test_tools/test_health.py +0 -139
- package/tools/vds-scripts/research_orchestrator/tests/vds_research_orchestrator/test_tools/test_registry.py +0 -135
- package/tools/vds-scripts/research_orchestrator/tests/vds_research_orchestrator/test_tools/test_search.py +0 -238
- package/tools/vds-scripts/run-history.json +0 -26
- package/tools/vds-scripts/schema_converter/README.md +0 -109
- package/tools/vds-scripts/schema_converter/pyproject.toml +0 -37
- package/tools/vds-scripts/schema_converter/src/vds_schema_converter/__init__.py +0 -3
- package/tools/vds-scripts/schema_converter/src/vds_schema_converter/cli.py +0 -50
- package/tools/vds-scripts/schema_converter/tests/__init__.py +0 -0
- package/tools/vds-scripts/schema_converter/tests/test_json_schema_generator.py +0 -115
- package/tools/vds-scripts/schema_converter/tests/test_mermaid_generator.py +0 -112
- package/tools/vds-scripts/schema_converter/tests/test_parser.py +0 -111
- package/tools/vds-scripts/schema_converter/tests/test_plantuml_generator.py +0 -112
- package/tools/vds-scripts/schema_converter/tests/test_plantuml_validator.py +0 -69
- package/tools/vds-scripts/schema_converter/tests/test_prisma_generator.py +0 -113
- package/tools/vds-scripts/schema_converter/tests/test_sql_generator.py +0 -138
- package/tools/vds-scripts/schema_converter/tests/test_typeorm_generator.py +0 -110
- package/tools/vds-scripts/schema_converter/tests/test_validators.py +0 -96
- package/tools/vds-scripts/spec_orchestrator/README.md +0 -13
- package/tools/vds-scripts/spec_orchestrator/pyproject.toml +0 -40
- package/tools/vds-scripts/spec_orchestrator/src/vds_spec_orchestrator/__init__.py +0 -5
- package/tools/vds-scripts/spec_orchestrator/src/vds_spec_orchestrator/cli.py +0 -162
- package/tools/vds-scripts/spec_orchestrator/src/vds_spec_orchestrator/core.py +0 -575
- package/tools/vds-scripts/spec_orchestrator/src/vds_spec_orchestrator/sync.py +0 -306
- package/tools/vds-scripts/spec_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/spec_orchestrator/tests/test_frontmatter_drift.py +0 -243
- package/tools/vds-scripts/spec_orchestrator/tests/test_sync.py +0 -342
- package/tools/vds-scripts/structure_orchestrator/README.md +0 -60
- package/tools/vds-scripts/structure_orchestrator/pyproject.toml +0 -103
- package/tools/vds-scripts/structure_orchestrator/src/vds_structure_orchestrator/__init__.py +0 -13
- package/tools/vds-scripts/structure_orchestrator/src/vds_structure_orchestrator/cli.py +0 -308
- package/tools/vds-scripts/structure_orchestrator/src/vds_structure_orchestrator/validator.py +0 -257
- package/tools/vds-scripts/structure_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/structure_orchestrator/tests/test_cli.py +0 -161
- package/tools/vds-scripts/structure_orchestrator/tests/test_helpers.py +0 -115
- package/tools/vds-scripts/structure_orchestrator/tests/test_validator.py +0 -104
- package/tools/vds-scripts/task_orchestrator/README.md +0 -50
- package/tools/vds-scripts/task_orchestrator/__init__.py +0 -18
- package/tools/vds-scripts/task_orchestrator/pyproject.toml +0 -43
- package/tools/vds-scripts/task_orchestrator/scripts/run_excel_sync.py +0 -36
- package/tools/vds-scripts/task_orchestrator/src/vds_task_orchestrator/__init__.py +0 -13
- package/tools/vds-scripts/task_orchestrator/src/vds_task_orchestrator/audit.py +0 -134
- package/tools/vds-scripts/task_orchestrator/src/vds_task_orchestrator/cli.py +0 -127
- package/tools/vds-scripts/task_orchestrator/src/vds_task_orchestrator/debug.py +0 -133
- package/tools/vds-scripts/task_orchestrator/src/vds_task_orchestrator/normalize.py +0 -113
- package/tools/vds-scripts/task_orchestrator/src/vds_task_orchestrator/refine.py +0 -201
- package/tools/vds-scripts/task_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/task_orchestrator/tests/test_task_orchestrator.py +0 -84
- package/tools/vds-scripts/temp_query_projects.py +0 -2
- package/tools/vds-scripts/test_small.md +0 -1
- package/tools/vds-scripts/text_utils_orchestrator/pyproject.toml +0 -20
- package/tools/vds-scripts/text_utils_orchestrator/src/vds_text_utils/__init__.py +0 -7
- package/tools/vds-scripts/text_utils_orchestrator/src/vds_text_utils/i18n.py +0 -143
- package/tools/vds-scripts/text_utils_orchestrator/tests/__init__.py +0 -0
- package/tools/vds-scripts/text_utils_orchestrator/tests/test_i18n.py +0 -53
- package/tools/vds-scripts/upgrade_major.py +0 -61
- package/tools/vds-scripts/upgrade_major_v2.py +0 -64
- package/tools/vds-scripts/verify_violations.py +0 -57
- package/tools/vds-scripts/workflow-summary.json +0 -325
- package/tools/vds-scripts/workflow-summary.md +0 -8
|
@@ -1,3063 +0,0 @@
|
|
|
1
|
-
"""Registry page parsing logic for Phase 14 (Material Extraction)."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import asyncio
|
|
6
|
-
import json
|
|
7
|
-
import re
|
|
8
|
-
import time
|
|
9
|
-
from collections import deque
|
|
10
|
-
from pathlib import Path
|
|
11
|
-
from typing import Any, cast
|
|
12
|
-
from urllib.parse import parse_qs, unquote_plus, urlparse, urlsplit
|
|
13
|
-
|
|
14
|
-
from bs4 import BeautifulSoup
|
|
15
|
-
from structlog import get_logger
|
|
16
|
-
|
|
17
|
-
from vds_audit_orchestrator.clients.confluence_cli_client import ConfluenceCliClient
|
|
18
|
-
from vds_audit_orchestrator.errors import DataSourceError
|
|
19
|
-
from vds_audit_orchestrator.identity import normalize_storage_key
|
|
20
|
-
from vds_audit_orchestrator.models.registry import (
|
|
21
|
-
ChecklistProfile,
|
|
22
|
-
ChildPage,
|
|
23
|
-
ConfluenceAncestryNode,
|
|
24
|
-
DocumentLink,
|
|
25
|
-
DocumentRole,
|
|
26
|
-
LinkSource,
|
|
27
|
-
ParseDiagnostics,
|
|
28
|
-
ProjectMaterial,
|
|
29
|
-
ProjectRegistry,
|
|
30
|
-
RootChildType,
|
|
31
|
-
SkippedNode,
|
|
32
|
-
SkippedNodeReason,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
logger = get_logger()
|
|
36
|
-
|
|
37
|
-
# Phase 24: Mapping version for header heuristics (FR-13.1.12).
|
|
38
|
-
# Increment when header synonym mappings or link-column priority changes.
|
|
39
|
-
MAPPING_VERSION = "1.0.0"
|
|
40
|
-
_CACHE_MISS = object()
|
|
41
|
-
_PLAIN_TEXT_URL_PATTERN = re.compile(r"https?://[^\s\"'<>]+", re.IGNORECASE)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class RegistryPageParser:
|
|
45
|
-
"""Parser for Confluence registry pages."""
|
|
46
|
-
|
|
47
|
-
MAX_RETRIES = 8
|
|
48
|
-
MAX_BACKOFF_SECONDS = 60.0
|
|
49
|
-
MIN_TIMEOUT_SECONDS = 0.1
|
|
50
|
-
MAX_PARSE_CONCURRENCY = 16
|
|
51
|
-
MAX_PARENT_CHAIN_DEPTH = 12
|
|
52
|
-
_REPOSITORY_TITLE_PREFIX = "repository - "
|
|
53
|
-
|
|
54
|
-
def __init__(
|
|
55
|
-
self,
|
|
56
|
-
client: ConfluenceCliClient,
|
|
57
|
-
*,
|
|
58
|
-
recursive_depth: int = 2,
|
|
59
|
-
retries: int = 2,
|
|
60
|
-
backoff: float = 1.0,
|
|
61
|
-
parse_concurrency: int = 3,
|
|
62
|
-
checklist_page: str | None = None,
|
|
63
|
-
max_projects: int | None = None,
|
|
64
|
-
step_timeout: float | None = None,
|
|
65
|
-
prefetch_dir: Path | str | None = None,
|
|
66
|
-
adaptive_depth: bool = True,
|
|
67
|
-
adaptive_max_depth: int = 2,
|
|
68
|
-
):
|
|
69
|
-
self.client = client
|
|
70
|
-
self.recursive_depth = recursive_depth
|
|
71
|
-
normalized_retries = max(0, int(retries))
|
|
72
|
-
if normalized_retries > self.MAX_RETRIES:
|
|
73
|
-
logger.warning(
|
|
74
|
-
"registry_parser_retries_capped",
|
|
75
|
-
requested=normalized_retries,
|
|
76
|
-
capped=self.MAX_RETRIES,
|
|
77
|
-
)
|
|
78
|
-
self.retries = min(normalized_retries, self.MAX_RETRIES)
|
|
79
|
-
|
|
80
|
-
normalized_backoff = max(0.0, float(backoff))
|
|
81
|
-
if normalized_backoff > self.MAX_BACKOFF_SECONDS:
|
|
82
|
-
logger.warning(
|
|
83
|
-
"registry_parser_backoff_capped",
|
|
84
|
-
requested=normalized_backoff,
|
|
85
|
-
capped=self.MAX_BACKOFF_SECONDS,
|
|
86
|
-
)
|
|
87
|
-
self.backoff = min(normalized_backoff, self.MAX_BACKOFF_SECONDS)
|
|
88
|
-
# Keep this conservative because Confluence APIs can degrade under parallel page fetches.
|
|
89
|
-
normalized_parse_concurrency = max(1, int(parse_concurrency))
|
|
90
|
-
if normalized_parse_concurrency > self.MAX_PARSE_CONCURRENCY:
|
|
91
|
-
logger.warning(
|
|
92
|
-
"registry_parser_parse_concurrency_capped",
|
|
93
|
-
requested=normalized_parse_concurrency,
|
|
94
|
-
capped=self.MAX_PARSE_CONCURRENCY,
|
|
95
|
-
)
|
|
96
|
-
self.parse_concurrency = min(normalized_parse_concurrency, self.MAX_PARSE_CONCURRENCY)
|
|
97
|
-
# Phase 24: Explicit checklist page override (TSK-241.2.4).
|
|
98
|
-
self.checklist_page_override = checklist_page
|
|
99
|
-
# Phase 24 TSK-243: Budget controls for timeout resilience.
|
|
100
|
-
self.max_projects = max_projects # None means no limit
|
|
101
|
-
if step_timeout is None:
|
|
102
|
-
self.step_timeout = None
|
|
103
|
-
else:
|
|
104
|
-
normalized_timeout = max(self.MIN_TIMEOUT_SECONDS, float(step_timeout))
|
|
105
|
-
if normalized_timeout != float(step_timeout):
|
|
106
|
-
logger.warning(
|
|
107
|
-
"registry_parser_step_timeout_normalized",
|
|
108
|
-
requested=step_timeout,
|
|
109
|
-
normalized=normalized_timeout,
|
|
110
|
-
min_timeout=self.MIN_TIMEOUT_SECONDS,
|
|
111
|
-
)
|
|
112
|
-
self.step_timeout = normalized_timeout
|
|
113
|
-
# Phase 24 TSK-243.4.5: Prefetch directory for local artifact parsing.
|
|
114
|
-
self.prefetch_dir = Path(prefetch_dir) if prefetch_dir else None
|
|
115
|
-
# Phase 35: Adaptive deepening controls.
|
|
116
|
-
self.adaptive_depth = bool(adaptive_depth)
|
|
117
|
-
self.adaptive_max_depth = max(1, int(adaptive_max_depth))
|
|
118
|
-
self._prefetch_manifest: dict | None = None
|
|
119
|
-
self._prefetch_pages: dict[str, dict] = {} # page_id -> page data
|
|
120
|
-
self._prefetch_children_by_parent: dict[str, list[dict]] = {}
|
|
121
|
-
self._network_children_cache: dict[tuple[str, bool], list[dict]] = {}
|
|
122
|
-
self._network_page_cache: dict[str, dict | None] = {}
|
|
123
|
-
self._project_parse_cache: dict[str, ProjectMaterial | None] = {}
|
|
124
|
-
# Best-effort resolution cache: normalized Confluence URL -> page_id (or None when unresolved).
|
|
125
|
-
self._resolved_confluence_page_ids: dict[str, str | None] = {}
|
|
126
|
-
# Cross-server CQL resolution cache for /display links.
|
|
127
|
-
self._server_scoped_clients: dict[str, ConfluenceCliClient] = {}
|
|
128
|
-
self._server_scoped_timeout = int(getattr(client, "_timeout", 120) or 120)
|
|
129
|
-
self._diag_prefetch_hits = 0
|
|
130
|
-
self._diag_prefetch_misses = 0
|
|
131
|
-
self._diag_network_fetches = 0
|
|
132
|
-
self._diag_fallback_fetches = 0
|
|
133
|
-
self._diag_step_timings_ms: dict[str, int] = {}
|
|
134
|
-
self._diag_retry_count = 0
|
|
135
|
-
self._diag_rate_limit_events = 0
|
|
136
|
-
self._diag_retry_after_seconds: int | None = None
|
|
137
|
-
|
|
138
|
-
def _reset_parse_diagnostics_tracking(self) -> None:
|
|
139
|
-
"""Reset parser-level diagnostics counters for a new parse."""
|
|
140
|
-
self._diag_prefetch_hits = 0
|
|
141
|
-
self._diag_prefetch_misses = 0
|
|
142
|
-
self._diag_network_fetches = 0
|
|
143
|
-
self._diag_fallback_fetches = 0
|
|
144
|
-
self._diag_step_timings_ms = {}
|
|
145
|
-
self._diag_retry_count = 0
|
|
146
|
-
self._diag_rate_limit_events = 0
|
|
147
|
-
self._diag_retry_after_seconds = None
|
|
148
|
-
self._network_children_cache = {}
|
|
149
|
-
self._network_page_cache = {}
|
|
150
|
-
self._project_parse_cache = {}
|
|
151
|
-
self._resolved_confluence_page_ids = {}
|
|
152
|
-
|
|
153
|
-
def _record_step_timing(self, step: str, started_at: float) -> None:
|
|
154
|
-
"""Record elapsed timing for a parsing step."""
|
|
155
|
-
elapsed_ms = max(0, int((time.monotonic() - started_at) * 1000))
|
|
156
|
-
self._diag_step_timings_ms[step] = self._diag_step_timings_ms.get(step, 0) + elapsed_ms
|
|
157
|
-
|
|
158
|
-
@staticmethod
|
|
159
|
-
def _extract_retry_after_seconds(exc: DataSourceError) -> int | None:
|
|
160
|
-
"""Extract Retry-After seconds from error context/message when present."""
|
|
161
|
-
context = exc.context if isinstance(exc.context, dict) else {}
|
|
162
|
-
context_value = context.get("retry_after_seconds")
|
|
163
|
-
if context_value is not None:
|
|
164
|
-
try:
|
|
165
|
-
return int(context_value)
|
|
166
|
-
except (TypeError, ValueError):
|
|
167
|
-
pass
|
|
168
|
-
|
|
169
|
-
fragments = [
|
|
170
|
-
str(exc),
|
|
171
|
-
str(context.get("stderr") or ""),
|
|
172
|
-
str(context.get("stdout") or ""),
|
|
173
|
-
str(context.get("error") or ""),
|
|
174
|
-
]
|
|
175
|
-
blob = "\n".join(fragments).lower()
|
|
176
|
-
match = re.search(r"retry[- ]after\s*[:=]?\s*(\d+)", blob)
|
|
177
|
-
if not match:
|
|
178
|
-
return None
|
|
179
|
-
return int(match.group(1))
|
|
180
|
-
|
|
181
|
-
@staticmethod
|
|
182
|
-
def _is_rate_limited(exc: DataSourceError) -> bool:
|
|
183
|
-
"""Detect whether an error indicates rate limiting (429 / Retry-After)."""
|
|
184
|
-
context = exc.context if isinstance(exc.context, dict) else {}
|
|
185
|
-
if bool(context.get("rate_limited")):
|
|
186
|
-
return True
|
|
187
|
-
fragments = [
|
|
188
|
-
str(exc),
|
|
189
|
-
str(context.get("stderr") or ""),
|
|
190
|
-
str(context.get("stdout") or ""),
|
|
191
|
-
str(context.get("error") or ""),
|
|
192
|
-
]
|
|
193
|
-
blob = "\n".join(fragments).lower()
|
|
194
|
-
return "429" in blob or "retry-after" in blob or "rate limit" in blob
|
|
195
|
-
|
|
196
|
-
@staticmethod
|
|
197
|
-
def _page_id(page: dict[str, object] | None) -> str | None:
|
|
198
|
-
"""Extract page ID from heterogeneous payloads (id/page_id)."""
|
|
199
|
-
if not isinstance(page, dict):
|
|
200
|
-
return None
|
|
201
|
-
raw = page.get("id")
|
|
202
|
-
if raw is None:
|
|
203
|
-
raw = page.get("page_id")
|
|
204
|
-
if raw is None:
|
|
205
|
-
return None
|
|
206
|
-
normalized = str(raw).strip()
|
|
207
|
-
return normalized or None
|
|
208
|
-
|
|
209
|
-
@staticmethod
|
|
210
|
-
def _parent_id(page: dict[str, object] | None) -> str | None:
|
|
211
|
-
"""Extract parent ID from heterogeneous payloads."""
|
|
212
|
-
if not isinstance(page, dict):
|
|
213
|
-
return None
|
|
214
|
-
raw = page.get("parent_id")
|
|
215
|
-
if raw is None:
|
|
216
|
-
raw = page.get("parentId")
|
|
217
|
-
if raw is None:
|
|
218
|
-
raw = page.get("parent_page_id")
|
|
219
|
-
if raw is None:
|
|
220
|
-
return None
|
|
221
|
-
normalized = str(raw).strip()
|
|
222
|
-
return normalized or None
|
|
223
|
-
|
|
224
|
-
def _page_url(self, page: dict[str, object] | None, *, fallback_page_id: str | None = None) -> str:
|
|
225
|
-
"""Resolve page URL from links or schema fallbacks (page_url/url)."""
|
|
226
|
-
if not isinstance(page, dict):
|
|
227
|
-
if fallback_page_id:
|
|
228
|
-
return f"/pages/viewpage.action?pageId={fallback_page_id}"
|
|
229
|
-
return ""
|
|
230
|
-
|
|
231
|
-
links_raw = page.get("_links")
|
|
232
|
-
links = links_raw if isinstance(links_raw, dict) else {}
|
|
233
|
-
base_url = str((links or {}).get("base") or "").strip()
|
|
234
|
-
if not base_url:
|
|
235
|
-
base_url = self._resolve_base_url(str((links or {}).get("self") or ""))
|
|
236
|
-
web_ui = str((links or {}).get("webui") or "").strip()
|
|
237
|
-
if base_url and web_ui:
|
|
238
|
-
return f"{base_url}{web_ui}"
|
|
239
|
-
|
|
240
|
-
for key in ("page_url", "url"):
|
|
241
|
-
value = str(page.get(key) or "").strip()
|
|
242
|
-
if value:
|
|
243
|
-
return value
|
|
244
|
-
|
|
245
|
-
self_link = str((links or {}).get("self") or "").strip()
|
|
246
|
-
if self_link:
|
|
247
|
-
return self_link
|
|
248
|
-
if fallback_page_id:
|
|
249
|
-
return f"/pages/viewpage.action?pageId={fallback_page_id}"
|
|
250
|
-
return ""
|
|
251
|
-
|
|
252
|
-
@staticmethod
|
|
253
|
-
def _lineage_node(
|
|
254
|
-
*,
|
|
255
|
-
page_id: str,
|
|
256
|
-
title: str | None,
|
|
257
|
-
page_url: str | None,
|
|
258
|
-
) -> dict[str, str | None]:
|
|
259
|
-
return {
|
|
260
|
-
"page_id": str(page_id or "").strip(),
|
|
261
|
-
"title": str(title).strip() if isinstance(title, str) and str(title).strip() else None,
|
|
262
|
-
"page_url": str(page_url).strip() if isinstance(page_url, str) and str(page_url).strip() else None,
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
def _derive_department_from_lineage(
|
|
266
|
-
self,
|
|
267
|
-
*,
|
|
268
|
-
lineage: list[dict[str, str | None]],
|
|
269
|
-
registry_root_id: str,
|
|
270
|
-
registry_root_title: str | None,
|
|
271
|
-
projects_root_id: str | None,
|
|
272
|
-
candidate_root_id: str,
|
|
273
|
-
) -> tuple[str | None, str | None, str]:
|
|
274
|
-
if not lineage:
|
|
275
|
-
return None, None, "unassigned:lineage_empty"
|
|
276
|
-
if len(lineage) < 2:
|
|
277
|
-
return None, None, "unassigned:lineage_missing_ancestor"
|
|
278
|
-
|
|
279
|
-
ancestors = lineage[:-1]
|
|
280
|
-
|
|
281
|
-
def _node_after_anchor(anchor_id: str | None) -> tuple[dict[str, str | None] | None, str]:
|
|
282
|
-
normalized_anchor = str(anchor_id or "").strip()
|
|
283
|
-
if not normalized_anchor:
|
|
284
|
-
return None, "anchor_missing"
|
|
285
|
-
for index, node in enumerate(ancestors):
|
|
286
|
-
if str(node.get("page_id") or "").strip() != normalized_anchor:
|
|
287
|
-
continue
|
|
288
|
-
if index + 1 < len(ancestors):
|
|
289
|
-
return ancestors[index + 1], "ok"
|
|
290
|
-
return None, "anchor_has_no_descendant"
|
|
291
|
-
return None, "anchor_not_in_lineage"
|
|
292
|
-
|
|
293
|
-
normalized_projects_root = str(projects_root_id or "").strip()
|
|
294
|
-
normalized_registry_root = str(registry_root_id or "").strip()
|
|
295
|
-
if normalized_projects_root and normalized_projects_root != normalized_registry_root:
|
|
296
|
-
node_after_projects_root, projects_root_status = _node_after_anchor(projects_root_id)
|
|
297
|
-
if node_after_projects_root is not None:
|
|
298
|
-
page_id = str(node_after_projects_root.get("page_id") or "").strip()
|
|
299
|
-
title = str(node_after_projects_root.get("title") or "").strip()
|
|
300
|
-
if page_id:
|
|
301
|
-
return title or f"page-{page_id}", page_id, "derived_from_projects_root_lineage"
|
|
302
|
-
return None, None, "unassigned:department_node_missing_page_id"
|
|
303
|
-
|
|
304
|
-
container_ancestors: list[tuple[int, dict[str, str | None]]] = []
|
|
305
|
-
for index, node in enumerate(ancestors):
|
|
306
|
-
title = str(node.get("title") or "").strip()
|
|
307
|
-
if self.classify_root_child(title) == RootChildType.PROJECT_CONTAINER:
|
|
308
|
-
container_ancestors.append((index, node))
|
|
309
|
-
|
|
310
|
-
if container_ancestors:
|
|
311
|
-
candidate_projects_root = str(projects_root_id or "").strip()
|
|
312
|
-
selected_index, selected_node = container_ancestors[-1]
|
|
313
|
-
selected_page_id = str(selected_node.get("page_id") or "").strip()
|
|
314
|
-
selected_title = str(selected_node.get("title") or "").strip()
|
|
315
|
-
projects_root_index: int | None = None
|
|
316
|
-
if candidate_projects_root:
|
|
317
|
-
for container_index, container_node in container_ancestors:
|
|
318
|
-
if str(container_node.get("page_id") or "").strip() == candidate_projects_root:
|
|
319
|
-
projects_root_index = container_index
|
|
320
|
-
break
|
|
321
|
-
if selected_page_id:
|
|
322
|
-
if selected_page_id == str(registry_root_id or "").strip():
|
|
323
|
-
return (
|
|
324
|
-
selected_title or f"page-{selected_page_id}",
|
|
325
|
-
selected_page_id,
|
|
326
|
-
"derived_from_registry_root_anchor",
|
|
327
|
-
)
|
|
328
|
-
if selected_page_id == candidate_projects_root:
|
|
329
|
-
return (
|
|
330
|
-
selected_title or f"page-{selected_page_id}",
|
|
331
|
-
selected_page_id,
|
|
332
|
-
"derived_from_projects_root_anchor",
|
|
333
|
-
)
|
|
334
|
-
if selected_page_id == str(candidate_root_id or "").strip():
|
|
335
|
-
return (
|
|
336
|
-
selected_title or f"page-{selected_page_id}",
|
|
337
|
-
selected_page_id,
|
|
338
|
-
"derived_from_candidate_root_anchor",
|
|
339
|
-
)
|
|
340
|
-
if projects_root_index is not None and selected_index > projects_root_index:
|
|
341
|
-
return (
|
|
342
|
-
selected_title or f"page-{selected_page_id}",
|
|
343
|
-
selected_page_id,
|
|
344
|
-
"derived_from_projects_root_lineage",
|
|
345
|
-
)
|
|
346
|
-
if selected_page_id == str(candidate_root_id or "").strip():
|
|
347
|
-
return (
|
|
348
|
-
selected_title or f"page-{selected_page_id}",
|
|
349
|
-
selected_page_id,
|
|
350
|
-
"derived_from_candidate_root_anchor",
|
|
351
|
-
)
|
|
352
|
-
|
|
353
|
-
for anchor_id, anchor_reason in (
|
|
354
|
-
(projects_root_id, "derived_from_projects_root_lineage"),
|
|
355
|
-
(candidate_root_id, "derived_from_candidate_root_lineage"),
|
|
356
|
-
):
|
|
357
|
-
normalized_anchor = str(anchor_id or "").strip()
|
|
358
|
-
if not normalized_anchor:
|
|
359
|
-
continue
|
|
360
|
-
for container_index, container_node in container_ancestors:
|
|
361
|
-
if str(container_node.get("page_id") or "").strip() != normalized_anchor:
|
|
362
|
-
continue
|
|
363
|
-
if selected_index > container_index:
|
|
364
|
-
return selected_title or f"page-{selected_page_id}", selected_page_id, anchor_reason
|
|
365
|
-
|
|
366
|
-
return (
|
|
367
|
-
selected_title or f"page-{selected_page_id}",
|
|
368
|
-
selected_page_id,
|
|
369
|
-
"derived_from_lineage_nearest_container",
|
|
370
|
-
)
|
|
371
|
-
|
|
372
|
-
def _node_after_anchor(anchor_id: str | None) -> tuple[dict[str, str | None] | None, str]:
|
|
373
|
-
normalized_anchor = str(anchor_id or "").strip()
|
|
374
|
-
if not normalized_anchor:
|
|
375
|
-
return None, "anchor_missing"
|
|
376
|
-
for index, node in enumerate(ancestors):
|
|
377
|
-
if str(node.get("page_id") or "").strip() != normalized_anchor:
|
|
378
|
-
continue
|
|
379
|
-
if index + 1 < len(ancestors):
|
|
380
|
-
return ancestors[index + 1], "ok"
|
|
381
|
-
return None, "anchor_has_no_descendant"
|
|
382
|
-
return None, "anchor_not_in_lineage"
|
|
383
|
-
|
|
384
|
-
projects_root_status = "anchor_missing"
|
|
385
|
-
if projects_root_id:
|
|
386
|
-
node_after_projects_root, projects_root_status = _node_after_anchor(projects_root_id)
|
|
387
|
-
if node_after_projects_root is not None:
|
|
388
|
-
page_id = str(node_after_projects_root.get("page_id") or "").strip()
|
|
389
|
-
title = str(node_after_projects_root.get("title") or "").strip()
|
|
390
|
-
if page_id:
|
|
391
|
-
return title or f"page-{page_id}", page_id, "derived_from_projects_root_lineage"
|
|
392
|
-
return None, None, "unassigned:department_node_missing_page_id"
|
|
393
|
-
if projects_root_status == "anchor_has_no_descendant":
|
|
394
|
-
normalized_projects_root = str(projects_root_id or "").strip()
|
|
395
|
-
normalized_registry_root = str(registry_root_id or "").strip()
|
|
396
|
-
if normalized_projects_root and normalized_projects_root != normalized_registry_root:
|
|
397
|
-
for node in ancestors:
|
|
398
|
-
page_id = str(node.get("page_id") or "").strip()
|
|
399
|
-
if page_id != normalized_projects_root:
|
|
400
|
-
continue
|
|
401
|
-
title = str(node.get("title") or "").strip()
|
|
402
|
-
return title or f"page-{page_id}", page_id, "derived_from_projects_root_anchor"
|
|
403
|
-
if normalized_projects_root and normalized_projects_root == normalized_registry_root:
|
|
404
|
-
root_title = str(registry_root_title or "").strip()
|
|
405
|
-
if root_title and self.classify_root_child(root_title) == RootChildType.PROJECT_CONTAINER:
|
|
406
|
-
return root_title, normalized_registry_root, "derived_from_registry_root_anchor"
|
|
407
|
-
# Department-level entry: single ancestor IS the registry root.
|
|
408
|
-
# The entry page is both root and department (self-referencing).
|
|
409
|
-
if len(ancestors) == 1:
|
|
410
|
-
ancestor_id = str(ancestors[0].get("page_id") or "").strip()
|
|
411
|
-
ancestor_title = str(ancestors[0].get("title") or "").strip()
|
|
412
|
-
if ancestor_id == normalized_registry_root:
|
|
413
|
-
return (
|
|
414
|
-
ancestor_title or f"page-{ancestor_id}",
|
|
415
|
-
ancestor_id,
|
|
416
|
-
"derived_from_department_entry_self_reference",
|
|
417
|
-
)
|
|
418
|
-
return None, None, "unassigned:lineage_missing_department_after_projects_root"
|
|
419
|
-
|
|
420
|
-
node_after_candidate_root, candidate_status = _node_after_anchor(candidate_root_id)
|
|
421
|
-
if node_after_candidate_root is not None:
|
|
422
|
-
page_id = str(node_after_candidate_root.get("page_id") or "").strip()
|
|
423
|
-
title = str(node_after_candidate_root.get("title") or "").strip()
|
|
424
|
-
if page_id:
|
|
425
|
-
if page_id == str(registry_root_id).strip() or (
|
|
426
|
-
projects_root_id and page_id == str(projects_root_id).strip()
|
|
427
|
-
):
|
|
428
|
-
return None, None, "unassigned:lineage_resolved_container_not_department"
|
|
429
|
-
return title or f"page-{page_id}", page_id, "derived_from_candidate_root_lineage"
|
|
430
|
-
return None, None, "unassigned:department_node_missing_page_id"
|
|
431
|
-
|
|
432
|
-
if candidate_status == "anchor_has_no_descendant":
|
|
433
|
-
normalized_candidate_root = str(candidate_root_id or "").strip()
|
|
434
|
-
if normalized_candidate_root and normalized_candidate_root not in {
|
|
435
|
-
str(registry_root_id).strip(),
|
|
436
|
-
str(projects_root_id or "").strip(),
|
|
437
|
-
}:
|
|
438
|
-
for node in ancestors:
|
|
439
|
-
page_id = str(node.get("page_id") or "").strip()
|
|
440
|
-
if page_id != normalized_candidate_root:
|
|
441
|
-
continue
|
|
442
|
-
title = str(node.get("title") or "").strip()
|
|
443
|
-
return title or f"page-{page_id}", page_id, "derived_from_candidate_root_anchor"
|
|
444
|
-
# Department-level entry: single ancestor IS the candidate root.
|
|
445
|
-
if len(ancestors) == 1:
|
|
446
|
-
ancestor_id = str(ancestors[0].get("page_id") or "").strip()
|
|
447
|
-
ancestor_title = str(ancestors[0].get("title") or "").strip()
|
|
448
|
-
if ancestor_id and ancestor_id == str(candidate_root_id or "").strip():
|
|
449
|
-
return (
|
|
450
|
-
ancestor_title or f"page-{ancestor_id}",
|
|
451
|
-
ancestor_id,
|
|
452
|
-
"derived_from_department_entry_self_reference",
|
|
453
|
-
)
|
|
454
|
-
return None, None, "unassigned:lineage_missing_department_after_candidate_root"
|
|
455
|
-
if projects_root_id and projects_root_status == "anchor_not_in_lineage":
|
|
456
|
-
return None, None, "unassigned:projects_root_not_in_lineage"
|
|
457
|
-
if candidate_status == "anchor_not_in_lineage":
|
|
458
|
-
return None, None, "unassigned:candidate_root_not_in_lineage"
|
|
459
|
-
return None, None, "unassigned:department_not_resolved"
|
|
460
|
-
|
|
461
|
-
def _apply_lineage_metadata(
|
|
462
|
-
self,
|
|
463
|
-
*,
|
|
464
|
-
material: ProjectMaterial,
|
|
465
|
-
lineage: list[dict[str, str | None]],
|
|
466
|
-
registry_root_id: str,
|
|
467
|
-
registry_root_title: str | None,
|
|
468
|
-
projects_root_id: str | None,
|
|
469
|
-
candidate_root_id: str,
|
|
470
|
-
) -> None:
|
|
471
|
-
normalized_lineage: list[dict[str, str | None]] = []
|
|
472
|
-
for node in lineage:
|
|
473
|
-
if not isinstance(node, dict):
|
|
474
|
-
continue
|
|
475
|
-
page_id = str(node.get("page_id") or "").strip()
|
|
476
|
-
if not page_id:
|
|
477
|
-
continue
|
|
478
|
-
normalized_lineage.append(
|
|
479
|
-
self._lineage_node(
|
|
480
|
-
page_id=page_id,
|
|
481
|
-
title=str(node.get("title") or "").strip() or None,
|
|
482
|
-
page_url=str(node.get("page_url") or "").strip() or None,
|
|
483
|
-
)
|
|
484
|
-
)
|
|
485
|
-
|
|
486
|
-
effective_registry_root_title = str(registry_root_title or "").strip() or None
|
|
487
|
-
ancestry_nodes = list(normalized_lineage)
|
|
488
|
-
if normalized_lineage:
|
|
489
|
-
first_node = normalized_lineage[0]
|
|
490
|
-
root_page_id = str(first_node.get("page_id") or "").strip() or None
|
|
491
|
-
root_title = str(first_node.get("title") or "").strip() or None
|
|
492
|
-
if root_page_id and root_page_id == str(registry_root_id or "").strip():
|
|
493
|
-
fallback_root_title = effective_registry_root_title
|
|
494
|
-
if normalized_lineage and len(normalized_lineage) > 1:
|
|
495
|
-
next_node_title = str(normalized_lineage[1].get("title") or "").strip() or None
|
|
496
|
-
if next_node_title and self.classify_root_child(next_node_title) == RootChildType.PROJECT_CONTAINER:
|
|
497
|
-
fallback_root_title = next_node_title
|
|
498
|
-
if not root_title:
|
|
499
|
-
root_title = fallback_root_title
|
|
500
|
-
material.root_page_id = root_page_id
|
|
501
|
-
material.root_title = root_title
|
|
502
|
-
material.root_storage_key = normalize_storage_key(root_title or root_page_id or "unassigned")
|
|
503
|
-
if root_page_id and root_page_id == str(registry_root_id or "").strip():
|
|
504
|
-
material.root_assignment_reason = "derived_from_registry_root_lineage_head"
|
|
505
|
-
if len(normalized_lineage) > 1 and root_title == root_page_id:
|
|
506
|
-
ancestry_nodes = normalized_lineage[1:]
|
|
507
|
-
elif root_page_id and root_page_id == str(candidate_root_id or "").strip():
|
|
508
|
-
material.root_assignment_reason = "derived_from_candidate_root_lineage_head"
|
|
509
|
-
else:
|
|
510
|
-
material.root_assignment_reason = "derived_from_lineage_head"
|
|
511
|
-
if str(first_node.get("page_id") or "").strip() == str(registry_root_id or "").strip():
|
|
512
|
-
lineage_root_title = str(first_node.get("title") or "").strip() or None
|
|
513
|
-
if lineage_root_title:
|
|
514
|
-
effective_registry_root_title = lineage_root_title
|
|
515
|
-
else:
|
|
516
|
-
material.root_page_id = None
|
|
517
|
-
material.root_title = None
|
|
518
|
-
material.root_storage_key = None
|
|
519
|
-
material.root_assignment_reason = "unassigned:lineage_empty"
|
|
520
|
-
|
|
521
|
-
material.confluence_ancestry_path = [
|
|
522
|
-
ConfluenceAncestryNode(
|
|
523
|
-
page_id=str(node.get("page_id") or ""),
|
|
524
|
-
title=node.get("title"),
|
|
525
|
-
page_url=node.get("page_url"),
|
|
526
|
-
)
|
|
527
|
-
for node in ancestry_nodes
|
|
528
|
-
]
|
|
529
|
-
department, department_page_id, assignment_reason = self._derive_department_from_lineage(
|
|
530
|
-
lineage=normalized_lineage,
|
|
531
|
-
registry_root_id=registry_root_id,
|
|
532
|
-
registry_root_title=effective_registry_root_title,
|
|
533
|
-
projects_root_id=projects_root_id,
|
|
534
|
-
candidate_root_id=candidate_root_id,
|
|
535
|
-
)
|
|
536
|
-
material.department = department
|
|
537
|
-
material.department_page_id = department_page_id
|
|
538
|
-
material.department_storage_key = normalize_storage_key(department or "unassigned")
|
|
539
|
-
material.department_assignment_reason = assignment_reason
|
|
540
|
-
|
|
541
|
-
async def _build_parent_chain_lineage(
|
|
542
|
-
self,
|
|
543
|
-
*,
|
|
544
|
-
leaf_page: dict[str, object],
|
|
545
|
-
max_depth: int | None = None,
|
|
546
|
-
) -> list[dict[str, str | None]]:
|
|
547
|
-
"""Walk parent links upward and return normalized lineage from root to leaf."""
|
|
548
|
-
raw_ancestors = leaf_page.get("ancestors")
|
|
549
|
-
if isinstance(raw_ancestors, list):
|
|
550
|
-
lineage: list[dict[str, str | None]] = []
|
|
551
|
-
for ancestor in raw_ancestors:
|
|
552
|
-
if not isinstance(ancestor, dict):
|
|
553
|
-
continue
|
|
554
|
-
ancestor_id = self._page_id(ancestor)
|
|
555
|
-
if not ancestor_id:
|
|
556
|
-
continue
|
|
557
|
-
lineage.append(
|
|
558
|
-
self._lineage_node(
|
|
559
|
-
page_id=ancestor_id,
|
|
560
|
-
title=str(ancestor.get("title", "")).strip() or None,
|
|
561
|
-
page_url=self._page_url(ancestor, fallback_page_id=ancestor_id),
|
|
562
|
-
)
|
|
563
|
-
)
|
|
564
|
-
leaf_page_id = self._page_id(leaf_page)
|
|
565
|
-
if leaf_page_id:
|
|
566
|
-
lineage.append(
|
|
567
|
-
self._lineage_node(
|
|
568
|
-
page_id=leaf_page_id,
|
|
569
|
-
title=str(leaf_page.get("title", "")).strip() or None,
|
|
570
|
-
page_url=self._page_url(leaf_page, fallback_page_id=leaf_page_id),
|
|
571
|
-
)
|
|
572
|
-
)
|
|
573
|
-
if lineage:
|
|
574
|
-
return lineage
|
|
575
|
-
|
|
576
|
-
effective_max_depth = max(1, int(max_depth or self.MAX_PARENT_CHAIN_DEPTH))
|
|
577
|
-
lineage_reversed: list[dict[str, str | None]] = []
|
|
578
|
-
current_page = leaf_page
|
|
579
|
-
seen_page_ids: set[str] = set()
|
|
580
|
-
|
|
581
|
-
for _ in range(effective_max_depth):
|
|
582
|
-
current_page_id = self._page_id(current_page)
|
|
583
|
-
if not current_page_id or current_page_id in seen_page_ids:
|
|
584
|
-
break
|
|
585
|
-
seen_page_ids.add(current_page_id)
|
|
586
|
-
lineage_reversed.append(
|
|
587
|
-
self._lineage_node(
|
|
588
|
-
page_id=current_page_id,
|
|
589
|
-
title=str(current_page.get("title", "")).strip() or None,
|
|
590
|
-
page_url=self._page_url(current_page, fallback_page_id=current_page_id),
|
|
591
|
-
)
|
|
592
|
-
)
|
|
593
|
-
parent_id = self._parent_id(current_page)
|
|
594
|
-
if not parent_id:
|
|
595
|
-
break
|
|
596
|
-
parent_page = self._get_prefetch_page(parent_id) or await self._get_page_with_cache(parent_id)
|
|
597
|
-
if not isinstance(parent_page, dict):
|
|
598
|
-
break
|
|
599
|
-
current_page = parent_page
|
|
600
|
-
|
|
601
|
-
return list(reversed(lineage_reversed))
|
|
602
|
-
|
|
603
|
-
def _projects_root_id_from_lineage(self, lineage: list[dict[str, str | None]]) -> str | None:
|
|
604
|
-
"""Infer the projects-root/container page from a normalized ancestry lineage."""
|
|
605
|
-
for node in lineage[:-1]:
|
|
606
|
-
page_id = str(node.get("page_id") or "").strip()
|
|
607
|
-
title = str(node.get("title") or "").strip()
|
|
608
|
-
if not page_id or not title:
|
|
609
|
-
continue
|
|
610
|
-
if self.classify_root_child(title) == RootChildType.PROJECT_CONTAINER:
|
|
611
|
-
return page_id
|
|
612
|
-
return None
|
|
613
|
-
|
|
614
|
-
@staticmethod
|
|
615
|
-
def _title_looks_like_department_container(title: str | None) -> bool:
|
|
616
|
-
normalized = str(title or "").strip()
|
|
617
|
-
if not normalized:
|
|
618
|
-
return False
|
|
619
|
-
return bool(re.match(r"^\d+\.\d+\.\d+(?:\s|$)", normalized))
|
|
620
|
-
|
|
621
|
-
async def _resolve_effective_registry_root_context(
|
|
622
|
-
self,
|
|
623
|
-
*,
|
|
624
|
-
page_id: str,
|
|
625
|
-
page_payload: dict[str, object] | None,
|
|
626
|
-
) -> tuple[dict[str, object] | None, str, str | None]:
|
|
627
|
-
payload = page_payload if isinstance(page_payload, dict) else None
|
|
628
|
-
normalized_page_id = str(page_id or "").strip()
|
|
629
|
-
if payload is None and max(1, int(self.recursive_depth or 1)) > 1:
|
|
630
|
-
try:
|
|
631
|
-
fetched_payload = await self._retry(
|
|
632
|
-
lambda: self.client.get_page(normalized_page_id), page_id=normalized_page_id
|
|
633
|
-
)
|
|
634
|
-
except Exception:
|
|
635
|
-
fetched_payload = None
|
|
636
|
-
if isinstance(fetched_payload, dict):
|
|
637
|
-
payload = fetched_payload
|
|
638
|
-
title = str((payload or {}).get("title") or "").strip() or normalized_page_id
|
|
639
|
-
|
|
640
|
-
if (
|
|
641
|
-
self.classify_root_child(title) != RootChildType.PROJECT_CONTAINER
|
|
642
|
-
or not self._title_looks_like_department_container(title)
|
|
643
|
-
or not isinstance(payload, dict)
|
|
644
|
-
):
|
|
645
|
-
return payload, normalized_page_id, title or None
|
|
646
|
-
|
|
647
|
-
if self._parent_id(payload) is None and not isinstance(payload.get("ancestors"), list):
|
|
648
|
-
enriched_payload = await self._retry(
|
|
649
|
-
lambda: self.client.get_page(normalized_page_id, expand="ancestors"),
|
|
650
|
-
page_id=normalized_page_id,
|
|
651
|
-
expand="ancestors",
|
|
652
|
-
)
|
|
653
|
-
if isinstance(enriched_payload, dict):
|
|
654
|
-
payload = enriched_payload
|
|
655
|
-
|
|
656
|
-
lineage = await self._build_parent_chain_lineage(leaf_page=payload, max_depth=3)
|
|
657
|
-
container_ancestors = [
|
|
658
|
-
node
|
|
659
|
-
for node in lineage[:-1]
|
|
660
|
-
if self.classify_root_child(str(node.get("title") or "").strip()) == RootChildType.PROJECT_CONTAINER
|
|
661
|
-
]
|
|
662
|
-
if container_ancestors:
|
|
663
|
-
selected = container_ancestors[-1]
|
|
664
|
-
selected_id = str(selected.get("page_id") or "").strip() or normalized_page_id
|
|
665
|
-
selected_title = str(selected.get("title") or "").strip() or title
|
|
666
|
-
return payload, selected_id, selected_title or None
|
|
667
|
-
|
|
668
|
-
return payload, normalized_page_id, title or None
|
|
669
|
-
|
|
670
|
-
async def _parse_registry_entry_as_project(
|
|
671
|
-
self,
|
|
672
|
-
*,
|
|
673
|
-
page_id: str,
|
|
674
|
-
page_payload: dict[str, object] | None,
|
|
675
|
-
) -> ProjectMaterial | None:
|
|
676
|
-
"""Parse a direct project-page registry entry and backfill ancestry from parent links."""
|
|
677
|
-
payload = page_payload if isinstance(page_payload, dict) else await self._get_page_with_cache(page_id)
|
|
678
|
-
if not isinstance(payload, dict):
|
|
679
|
-
return None
|
|
680
|
-
|
|
681
|
-
if self._parent_id(payload) is None and not isinstance(payload.get("ancestors"), list):
|
|
682
|
-
enriched_payload = await self._retry(
|
|
683
|
-
lambda: self.client.get_page(page_id, expand="ancestors"),
|
|
684
|
-
page_id=page_id,
|
|
685
|
-
expand="ancestors",
|
|
686
|
-
)
|
|
687
|
-
if isinstance(enriched_payload, dict):
|
|
688
|
-
payload = enriched_payload
|
|
689
|
-
|
|
690
|
-
title = str(payload.get("title") or "").strip() or page_id
|
|
691
|
-
material = await self.parse_project_page(page_id, title, page_payload=payload)
|
|
692
|
-
if material is None:
|
|
693
|
-
return None
|
|
694
|
-
|
|
695
|
-
descendant_pages = await self._collect_atomic_project_descendants(page_id=page_id, max_depth=2)
|
|
696
|
-
material.child_pages = [
|
|
697
|
-
ChildPage(
|
|
698
|
-
page_id=page["page_id"],
|
|
699
|
-
title=page["title"],
|
|
700
|
-
page_url=page["page_url"],
|
|
701
|
-
depth=page["depth"],
|
|
702
|
-
)
|
|
703
|
-
for page in descendant_pages
|
|
704
|
-
]
|
|
705
|
-
material.bitbucket_projects = self._derive_bitbucket_projects_from_child_pages(
|
|
706
|
-
[page["payload"] for page in descendant_pages if isinstance(page.get("payload"), dict)]
|
|
707
|
-
)
|
|
708
|
-
|
|
709
|
-
lineage = await self._build_parent_chain_lineage(leaf_page=payload)
|
|
710
|
-
if not lineage:
|
|
711
|
-
lineage = [
|
|
712
|
-
self._lineage_node(
|
|
713
|
-
page_id=page_id,
|
|
714
|
-
title=title,
|
|
715
|
-
page_url=self._page_url(payload, fallback_page_id=page_id),
|
|
716
|
-
)
|
|
717
|
-
]
|
|
718
|
-
registry_root_node = lineage[0]
|
|
719
|
-
candidate_root_id = str(lineage[-2].get("page_id") or "").strip() if len(lineage) >= 2 else page_id
|
|
720
|
-
self._apply_lineage_metadata(
|
|
721
|
-
material=material,
|
|
722
|
-
lineage=lineage,
|
|
723
|
-
registry_root_id=str(registry_root_node.get("page_id") or "").strip() or page_id,
|
|
724
|
-
registry_root_title=str(registry_root_node.get("title") or "").strip() or title,
|
|
725
|
-
projects_root_id=self._projects_root_id_from_lineage(lineage),
|
|
726
|
-
candidate_root_id=candidate_root_id,
|
|
727
|
-
)
|
|
728
|
-
return material
|
|
729
|
-
|
|
730
|
-
@classmethod
|
|
731
|
-
def _repository_slug_from_title(cls, title: str) -> tuple[str, str] | None:
|
|
732
|
-
normalized_title = str(title or "").strip()
|
|
733
|
-
if not normalized_title:
|
|
734
|
-
return None
|
|
735
|
-
lowered = normalized_title.casefold()
|
|
736
|
-
if not lowered.startswith(cls._REPOSITORY_TITLE_PREFIX):
|
|
737
|
-
return None
|
|
738
|
-
suffix = normalized_title[len(cls._REPOSITORY_TITLE_PREFIX) :].strip()
|
|
739
|
-
normalized_suffix = normalize_storage_key(suffix)
|
|
740
|
-
if not normalized_suffix:
|
|
741
|
-
return None
|
|
742
|
-
parts = [part for part in normalized_suffix.split("-") if part]
|
|
743
|
-
if len(parts) >= 2:
|
|
744
|
-
return parts[0].upper(), "-".join(parts[1:])
|
|
745
|
-
return "", normalized_suffix
|
|
746
|
-
|
|
747
|
-
@staticmethod
|
|
748
|
-
def _local_repo_path_for_slug(*, bitbucket_project_key: str, repo_slug: str) -> str | None:
|
|
749
|
-
project_key = str(bitbucket_project_key or "").strip().lower()
|
|
750
|
-
slug = str(repo_slug or "").strip()
|
|
751
|
-
if not project_key or not slug:
|
|
752
|
-
return None
|
|
753
|
-
candidate = Path.home() / ".vds" / "cache" / "repos" / f"{project_key}-project" / slug
|
|
754
|
-
if candidate.exists() and candidate.is_dir():
|
|
755
|
-
return str(candidate)
|
|
756
|
-
return None
|
|
757
|
-
|
|
758
|
-
def _derive_bitbucket_projects_from_child_pages(self, child_pages: list[dict[str, object]]) -> list[dict[str, Any]]:
|
|
759
|
-
grouped: dict[str, dict[str, Any]] = {}
|
|
760
|
-
for child in child_pages:
|
|
761
|
-
title = str(child.get("title") or "").strip()
|
|
762
|
-
page_id = self._page_id(child)
|
|
763
|
-
repo_identity = self._repository_slug_from_title(title)
|
|
764
|
-
if repo_identity is None:
|
|
765
|
-
continue
|
|
766
|
-
bitbucket_project_key, repo_slug = repo_identity
|
|
767
|
-
if not bitbucket_project_key or not repo_slug:
|
|
768
|
-
continue
|
|
769
|
-
group = grouped.setdefault(
|
|
770
|
-
bitbucket_project_key,
|
|
771
|
-
{
|
|
772
|
-
"bitbucket_project_key": bitbucket_project_key,
|
|
773
|
-
"display_name": bitbucket_project_key,
|
|
774
|
-
"source": "registry_child_repository_pages",
|
|
775
|
-
"repos": [],
|
|
776
|
-
},
|
|
777
|
-
)
|
|
778
|
-
group["repos"].append(
|
|
779
|
-
{
|
|
780
|
-
"repo_storage_key": f"{bitbucket_project_key.lower()}:{repo_slug}",
|
|
781
|
-
"repo_slug": repo_slug,
|
|
782
|
-
"repo_url": None,
|
|
783
|
-
"default_branch": "main",
|
|
784
|
-
"status": "discovered",
|
|
785
|
-
"path": self._page_url(child, fallback_page_id=str(page_id or repo_slug)),
|
|
786
|
-
"source_page_id": str(page_id or ""),
|
|
787
|
-
"source_title": title,
|
|
788
|
-
"fetch_reason": "registry_child_repository_page",
|
|
789
|
-
"local_code_path": self._local_repo_path_for_slug(
|
|
790
|
-
bitbucket_project_key=bitbucket_project_key,
|
|
791
|
-
repo_slug=repo_slug,
|
|
792
|
-
),
|
|
793
|
-
}
|
|
794
|
-
)
|
|
795
|
-
|
|
796
|
-
return [
|
|
797
|
-
{
|
|
798
|
-
**payload,
|
|
799
|
-
"repos": sorted(
|
|
800
|
-
payload["repos"],
|
|
801
|
-
key=lambda item: (
|
|
802
|
-
str(item.get("repo_slug") or ""),
|
|
803
|
-
str(item.get("repo_storage_key") or ""),
|
|
804
|
-
),
|
|
805
|
-
),
|
|
806
|
-
}
|
|
807
|
-
for _, payload in sorted(grouped.items(), key=lambda item: item[0])
|
|
808
|
-
]
|
|
809
|
-
|
|
810
|
-
async def _collect_atomic_project_descendants(self, *, page_id: str, max_depth: int) -> list[dict[str, Any]]:
|
|
811
|
-
descendants: list[dict[str, Any]] = []
|
|
812
|
-
queue: deque[tuple[str, int]] = deque([(page_id, 0)])
|
|
813
|
-
seen: set[str] = {str(page_id).strip()}
|
|
814
|
-
|
|
815
|
-
while queue:
|
|
816
|
-
current_page_id, depth = queue.popleft()
|
|
817
|
-
if depth >= max_depth:
|
|
818
|
-
continue
|
|
819
|
-
children = await self._get_children(current_page_id)
|
|
820
|
-
for child in children:
|
|
821
|
-
child_page_id = self._page_id(child)
|
|
822
|
-
if not child_page_id or child_page_id in seen:
|
|
823
|
-
continue
|
|
824
|
-
seen.add(child_page_id)
|
|
825
|
-
descendants.append(
|
|
826
|
-
{
|
|
827
|
-
"page_id": str(child_page_id),
|
|
828
|
-
"title": str(child.get("title") or "").strip() or str(child_page_id),
|
|
829
|
-
"page_url": self._page_url(child, fallback_page_id=str(child_page_id)),
|
|
830
|
-
"depth": depth + 1,
|
|
831
|
-
"payload": child,
|
|
832
|
-
}
|
|
833
|
-
)
|
|
834
|
-
queue.append((str(child_page_id), depth + 1))
|
|
835
|
-
|
|
836
|
-
descendants.sort(
|
|
837
|
-
key=lambda item: (
|
|
838
|
-
int(item.get("depth") or 0),
|
|
839
|
-
str(item.get("title") or ""),
|
|
840
|
-
str(item.get("page_id") or ""),
|
|
841
|
-
)
|
|
842
|
-
)
|
|
843
|
-
return descendants
|
|
844
|
-
|
|
845
|
-
@classmethod
|
|
846
|
-
def _looks_like_atomic_project_entry(
|
|
847
|
-
cls, *, root_payload: dict[str, Any] | None, root_children: list[dict[str, object]]
|
|
848
|
-
) -> bool:
|
|
849
|
-
title = str((root_payload or {}).get("title") or "").strip()
|
|
850
|
-
if cls.classify_root_child(title) == RootChildType.PROJECT_CONTAINER:
|
|
851
|
-
return False
|
|
852
|
-
return any(
|
|
853
|
-
cls._repository_slug_from_title(str(child.get("title") or "")) is not None for child in root_children
|
|
854
|
-
)
|
|
855
|
-
|
|
856
|
-
def _load_prefetch_artifacts(self) -> bool:
|
|
857
|
-
"""Load crawl-tree artifacts from prefetch directory (Phase 24 TSK-243.4.5).
|
|
858
|
-
|
|
859
|
-
This method loads the crawl_manifest.json and indexes child pages by ID
|
|
860
|
-
for fast lookup during parsing. This allows the parser to use local
|
|
861
|
-
artifacts instead of making network calls.
|
|
862
|
-
|
|
863
|
-
Returns:
|
|
864
|
-
True if prefetch artifacts were loaded successfully, False otherwise.
|
|
865
|
-
"""
|
|
866
|
-
self._prefetch_manifest = None
|
|
867
|
-
self._prefetch_pages = {}
|
|
868
|
-
self._prefetch_children_by_parent = {}
|
|
869
|
-
if not self.prefetch_dir or not self.prefetch_dir.exists():
|
|
870
|
-
return False
|
|
871
|
-
|
|
872
|
-
manifest_path = self.prefetch_dir / "crawl_manifest.json"
|
|
873
|
-
if not manifest_path.exists():
|
|
874
|
-
logger.debug("prefetch_manifest_not_found", path=str(manifest_path))
|
|
875
|
-
return False
|
|
876
|
-
|
|
877
|
-
try:
|
|
878
|
-
with manifest_path.open("r", encoding="utf-8") as f:
|
|
879
|
-
self._prefetch_manifest = json.load(f)
|
|
880
|
-
except (json.JSONDecodeError, OSError) as e:
|
|
881
|
-
logger.warning("prefetch_manifest_load_failed", path=str(manifest_path), error=str(e))
|
|
882
|
-
return False
|
|
883
|
-
|
|
884
|
-
if self._prefetch_manifest is None:
|
|
885
|
-
return False
|
|
886
|
-
|
|
887
|
-
# Index child pages by ID for fast lookup.
|
|
888
|
-
child_pages = self._prefetch_manifest.get("child_pages", [])
|
|
889
|
-
for page in child_pages:
|
|
890
|
-
page_id = self._page_id(page)
|
|
891
|
-
if page_id:
|
|
892
|
-
self._prefetch_pages[page_id] = page
|
|
893
|
-
parent_id = self._parent_id(page)
|
|
894
|
-
if parent_id:
|
|
895
|
-
self._prefetch_children_by_parent.setdefault(parent_id, []).append(page)
|
|
896
|
-
|
|
897
|
-
# Also index the root page.
|
|
898
|
-
root_page = self._prefetch_manifest.get("page")
|
|
899
|
-
root_page_id = self._page_id(root_page)
|
|
900
|
-
manifest_root_id = str(self._prefetch_manifest.get("root_page_id") or "").strip()
|
|
901
|
-
if root_page_id and isinstance(root_page, dict):
|
|
902
|
-
self._prefetch_pages[root_page_id] = root_page
|
|
903
|
-
|
|
904
|
-
logger.info(
|
|
905
|
-
"prefetch_artifacts_loaded",
|
|
906
|
-
manifest_path=str(manifest_path),
|
|
907
|
-
root_page_id=manifest_root_id or self._prefetch_manifest.get("root_page_id"),
|
|
908
|
-
child_pages_count=len(child_pages),
|
|
909
|
-
indexed_pages=len(self._prefetch_pages),
|
|
910
|
-
)
|
|
911
|
-
return True
|
|
912
|
-
|
|
913
|
-
def _get_prefetch_page(self, page_id: str) -> dict | None:
|
|
914
|
-
"""Get a page from prefetch cache by ID.
|
|
915
|
-
|
|
916
|
-
Args:
|
|
917
|
-
page_id: The Confluence page ID.
|
|
918
|
-
|
|
919
|
-
Returns:
|
|
920
|
-
Page data dict if found in prefetch cache, None otherwise.
|
|
921
|
-
"""
|
|
922
|
-
page = self._prefetch_pages.get(str(page_id))
|
|
923
|
-
if page is None:
|
|
924
|
-
self._diag_prefetch_misses += 1
|
|
925
|
-
return None
|
|
926
|
-
self._diag_prefetch_hits += 1
|
|
927
|
-
return page
|
|
928
|
-
|
|
929
|
-
def _get_prefetch_children(self, parent_id: str) -> list[dict] | None:
|
|
930
|
-
"""Get child pages from prefetch cache.
|
|
931
|
-
|
|
932
|
-
Args:
|
|
933
|
-
parent_id: The parent page ID.
|
|
934
|
-
|
|
935
|
-
Returns:
|
|
936
|
-
List of child page dicts if found in prefetch cache, None otherwise.
|
|
937
|
-
"""
|
|
938
|
-
if not self._prefetch_manifest:
|
|
939
|
-
return None
|
|
940
|
-
|
|
941
|
-
parent_id_str = str(parent_id)
|
|
942
|
-
|
|
943
|
-
# If parent is the root page, return all child_pages.
|
|
944
|
-
root_page_id = str(self._prefetch_manifest.get("root_page_id") or "").strip()
|
|
945
|
-
if root_page_id and parent_id_str == root_page_id:
|
|
946
|
-
self._diag_prefetch_hits += 1
|
|
947
|
-
return self._prefetch_manifest.get("child_pages", [])
|
|
948
|
-
|
|
949
|
-
# For non-root pages, use indexed parent->children map.
|
|
950
|
-
children = self._prefetch_children_by_parent.get(parent_id_str, [])
|
|
951
|
-
if children:
|
|
952
|
-
self._diag_prefetch_hits += 1
|
|
953
|
-
return children
|
|
954
|
-
|
|
955
|
-
# Known page with no children is still a prefetch hit (leaf).
|
|
956
|
-
if parent_id_str in self._prefetch_pages:
|
|
957
|
-
self._diag_prefetch_hits += 1
|
|
958
|
-
return []
|
|
959
|
-
|
|
960
|
-
self._diag_prefetch_misses += 1
|
|
961
|
-
return None
|
|
962
|
-
|
|
963
|
-
async def _get_children(self, parent_id: str, *, expand: str | None = None) -> list[dict]:
|
|
964
|
-
"""Get child pages with prefetch + network cache fallback.
|
|
965
|
-
|
|
966
|
-
Uses prefetch artifacts when available, then reuses in-memory network
|
|
967
|
-
results to avoid repeated downloads across multi-pass traversal.
|
|
968
|
-
"""
|
|
969
|
-
prefetch_children = self._get_prefetch_children(parent_id)
|
|
970
|
-
if prefetch_children is not None:
|
|
971
|
-
logger.debug(
|
|
972
|
-
"using_prefetch_children",
|
|
973
|
-
parent_id=parent_id,
|
|
974
|
-
count=len(prefetch_children),
|
|
975
|
-
)
|
|
976
|
-
return prefetch_children
|
|
977
|
-
|
|
978
|
-
needs_body = expand == "body.storage"
|
|
979
|
-
cache_key = (str(parent_id), needs_body)
|
|
980
|
-
if cache_key in self._network_children_cache:
|
|
981
|
-
children = self._network_children_cache[cache_key]
|
|
982
|
-
logger.debug(
|
|
983
|
-
"network_children_cache_hit",
|
|
984
|
-
parent_id=parent_id,
|
|
985
|
-
count=len(children),
|
|
986
|
-
expand=expand,
|
|
987
|
-
)
|
|
988
|
-
return children
|
|
989
|
-
|
|
990
|
-
rich_key = (str(parent_id), True)
|
|
991
|
-
if not needs_body and rich_key in self._network_children_cache:
|
|
992
|
-
children = self._network_children_cache[rich_key]
|
|
993
|
-
logger.debug(
|
|
994
|
-
"network_children_cache_hit",
|
|
995
|
-
parent_id=parent_id,
|
|
996
|
-
count=len(children),
|
|
997
|
-
expand="body.storage",
|
|
998
|
-
reused_for_expand=expand,
|
|
999
|
-
)
|
|
1000
|
-
return children
|
|
1001
|
-
|
|
1002
|
-
self._diag_network_fetches += 1
|
|
1003
|
-
if self.prefetch_dir is not None:
|
|
1004
|
-
self._diag_fallback_fetches += 1
|
|
1005
|
-
children = await self._retry(
|
|
1006
|
-
lambda cid=parent_id, child_expand=expand: self.client.get_child_pages(cid, expand=child_expand),
|
|
1007
|
-
parent_id=parent_id,
|
|
1008
|
-
expand=expand,
|
|
1009
|
-
)
|
|
1010
|
-
normalized_children = children or []
|
|
1011
|
-
self._network_children_cache[cache_key] = normalized_children
|
|
1012
|
-
logger.debug(
|
|
1013
|
-
"network_children_fetched",
|
|
1014
|
-
parent_id=parent_id,
|
|
1015
|
-
count=len(normalized_children),
|
|
1016
|
-
expand=expand,
|
|
1017
|
-
)
|
|
1018
|
-
return normalized_children
|
|
1019
|
-
|
|
1020
|
-
@staticmethod
|
|
1021
|
-
def classify_root_child(title: str) -> RootChildType:
|
|
1022
|
-
"""Deterministically classify a root child page by title pattern (Phase 24).
|
|
1023
|
-
|
|
1024
|
-
This implements FR-24.1.2: deterministic root-child classification based on
|
|
1025
|
-
title patterns. The classification is used by checklist-first mode to avoid
|
|
1026
|
-
unnecessary deep traversal.
|
|
1027
|
-
|
|
1028
|
-
Classification rules:
|
|
1029
|
-
- "3.1" prefix or "checklist" in title → CHECKLIST
|
|
1030
|
-
- "3.2" prefix or "hệ thống"/"nền tảng" in title → PROJECT_CONTAINER
|
|
1031
|
-
- "audit history" in title → AUDIT_HISTORY
|
|
1032
|
-
- Otherwise → UNKNOWN
|
|
1033
|
-
|
|
1034
|
-
Args:
|
|
1035
|
-
title: The page title to classify.
|
|
1036
|
-
|
|
1037
|
-
Returns:
|
|
1038
|
-
RootChildType indicating the classification.
|
|
1039
|
-
"""
|
|
1040
|
-
normalized = (title or "").strip().lower()
|
|
1041
|
-
if not normalized:
|
|
1042
|
-
return RootChildType.UNKNOWN
|
|
1043
|
-
|
|
1044
|
-
# Check for checklist page (3.1 prefix or "checklist" keyword).
|
|
1045
|
-
if normalized.startswith("3.1") or "checklist" in normalized:
|
|
1046
|
-
return RootChildType.CHECKLIST
|
|
1047
|
-
|
|
1048
|
-
# Check for project container (3.2 prefix or Vietnamese keywords).
|
|
1049
|
-
if normalized.startswith("3.2"):
|
|
1050
|
-
return RootChildType.PROJECT_CONTAINER
|
|
1051
|
-
if "hệ thống" in normalized or "nền tảng" in normalized:
|
|
1052
|
-
return RootChildType.PROJECT_CONTAINER
|
|
1053
|
-
|
|
1054
|
-
# Check for audit history (excluded from project discovery).
|
|
1055
|
-
if "audit history" in normalized:
|
|
1056
|
-
return RootChildType.AUDIT_HISTORY
|
|
1057
|
-
|
|
1058
|
-
# Phase 133: Exclude publisher-created audit artifact pages.
|
|
1059
|
-
if any(
|
|
1060
|
-
normalized.startswith(p)
|
|
1061
|
-
for p in (
|
|
1062
|
-
"project analysis -",
|
|
1063
|
-
"project audit -",
|
|
1064
|
-
"audit results -",
|
|
1065
|
-
"repository -",
|
|
1066
|
-
)
|
|
1067
|
-
):
|
|
1068
|
-
return RootChildType.AUDIT_HISTORY
|
|
1069
|
-
|
|
1070
|
-
return RootChildType.UNKNOWN
|
|
1071
|
-
|
|
1072
|
-
@staticmethod
|
|
1073
|
-
def _classify_checklist_profile_id(title: str) -> tuple[str, int]:
|
|
1074
|
-
"""Classify checklist profile id from title patterns (Phase 101)."""
|
|
1075
|
-
normalized = (title or "").strip().lower()
|
|
1076
|
-
if not normalized:
|
|
1077
|
-
return "default", 0
|
|
1078
|
-
matches: list[str] = []
|
|
1079
|
-
keyword_map: tuple[tuple[str, tuple[str, ...]], ...] = (
|
|
1080
|
-
("backend", ("backend", "back-end", "phía sau", "máy chủ")),
|
|
1081
|
-
("frontend", ("frontend", "front-end", "front end", "giao diện", "giao dien", "phía trước")),
|
|
1082
|
-
("mobile", ("mobile", "ios", "android", "mini app", "miniapp", "di động")),
|
|
1083
|
-
("infra", ("infra", "infrastructure", "devops", "platform", "hạ tầng")),
|
|
1084
|
-
)
|
|
1085
|
-
for profile_id, keywords in keyword_map:
|
|
1086
|
-
if any(keyword in normalized for keyword in keywords):
|
|
1087
|
-
matches.append(profile_id)
|
|
1088
|
-
if len(matches) == 1:
|
|
1089
|
-
return matches[0], 1
|
|
1090
|
-
if len(matches) > 1:
|
|
1091
|
-
return "default", len(matches)
|
|
1092
|
-
return "default", 0
|
|
1093
|
-
|
|
1094
|
-
@staticmethod
|
|
1095
|
-
def _extract_checklist_version(title: str) -> tuple[str, tuple[int, ...]]:
|
|
1096
|
-
"""Extract version suffix from a checklist title (Phase 112).
|
|
1097
|
-
|
|
1098
|
-
Returns (base_title, version_tuple) where base_title is the title with the
|
|
1099
|
-
version suffix stripped, and version_tuple is a comparable tuple of integers.
|
|
1100
|
-
Titles without a version suffix are treated as (0,) — the lowest version —
|
|
1101
|
-
so that a versioned successor (e.g. "v1.1") always supersedes the original.
|
|
1102
|
-
|
|
1103
|
-
Examples:
|
|
1104
|
-
"3.1 Checklist đánh giá v1.1" → ("3.1 Checklist đánh giá", (1, 1))
|
|
1105
|
-
"3.1 Checklist đánh giá" → ("3.1 Checklist đánh giá", (0,))
|
|
1106
|
-
"Checklist v2" → ("Checklist", (2,))
|
|
1107
|
-
"""
|
|
1108
|
-
# Match trailing version patterns: v1, v1.0, v1.1, V2.3.1, etc.
|
|
1109
|
-
version_re = re.compile(r"\s+v(\d+(?:\.\d+)*)\s*$", re.IGNORECASE)
|
|
1110
|
-
match = version_re.search((title or "").strip())
|
|
1111
|
-
if match:
|
|
1112
|
-
base = (title or "").strip()[: match.start()].strip()
|
|
1113
|
-
version = tuple(int(part) for part in match.group(1).split("."))
|
|
1114
|
-
return base, version
|
|
1115
|
-
return (title or "").strip(), (0,)
|
|
1116
|
-
|
|
1117
|
-
@staticmethod
|
|
1118
|
-
def _profile_display_name(profile_id: str, fallback_title: str | None = None) -> str:
|
|
1119
|
-
fallback = str(fallback_title or "").strip()
|
|
1120
|
-
if fallback:
|
|
1121
|
-
return fallback
|
|
1122
|
-
mapping = {
|
|
1123
|
-
"backend": "Backend Checklist",
|
|
1124
|
-
"frontend": "Frontend Checklist",
|
|
1125
|
-
"mobile": "Mobile Checklist",
|
|
1126
|
-
"infra": "Infrastructure Checklist",
|
|
1127
|
-
"default": "Default Checklist",
|
|
1128
|
-
}
|
|
1129
|
-
return mapping.get(profile_id, "Default Checklist")
|
|
1130
|
-
|
|
1131
|
-
@staticmethod
|
|
1132
|
-
def _build_checklist_profiles(
|
|
1133
|
-
checklist_candidates: list[dict[str, Any]],
|
|
1134
|
-
) -> tuple[list[ChecklistProfile], dict[str, int]]:
|
|
1135
|
-
"""Build deterministic checklist profile catalog from discovered candidates."""
|
|
1136
|
-
if not checklist_candidates:
|
|
1137
|
-
return [], {
|
|
1138
|
-
"candidate_count": 0,
|
|
1139
|
-
"classified_count": 0,
|
|
1140
|
-
"ambiguous_count": 0,
|
|
1141
|
-
"superseded_count": 0,
|
|
1142
|
-
}
|
|
1143
|
-
|
|
1144
|
-
profile_priority: dict[str, int] = {
|
|
1145
|
-
"backend": 10,
|
|
1146
|
-
"frontend": 20,
|
|
1147
|
-
"mobile": 30,
|
|
1148
|
-
"infra": 40,
|
|
1149
|
-
"default": 100,
|
|
1150
|
-
}
|
|
1151
|
-
grouped: dict[str, dict[str, Any]] = {}
|
|
1152
|
-
classified_count = 0
|
|
1153
|
-
ambiguous_count = 0
|
|
1154
|
-
|
|
1155
|
-
superseded_count = 0
|
|
1156
|
-
for index, candidate in enumerate(checklist_candidates):
|
|
1157
|
-
title = str(candidate.get("title") or "").strip()
|
|
1158
|
-
profile_id_raw, match_count = RegistryPageParser._classify_checklist_profile_id(title)
|
|
1159
|
-
profile_id = str(candidate.get("profile_id") or profile_id_raw).strip().lower() or "default"
|
|
1160
|
-
if match_count == 1:
|
|
1161
|
-
classified_count += 1
|
|
1162
|
-
elif match_count > 1:
|
|
1163
|
-
ambiguous_count += 1
|
|
1164
|
-
|
|
1165
|
-
page_id = str(candidate.get("id") or "").strip()
|
|
1166
|
-
page_url = str(candidate.get("url") or "").strip()
|
|
1167
|
-
if not page_id:
|
|
1168
|
-
continue
|
|
1169
|
-
|
|
1170
|
-
bucket = grouped.setdefault(
|
|
1171
|
-
profile_id,
|
|
1172
|
-
{
|
|
1173
|
-
"page_ids": [],
|
|
1174
|
-
"page_urls": [],
|
|
1175
|
-
"titles": [],
|
|
1176
|
-
"display_name": RegistryPageParser._profile_display_name(profile_id, title),
|
|
1177
|
-
"first_index": index,
|
|
1178
|
-
},
|
|
1179
|
-
)
|
|
1180
|
-
if page_id not in bucket["page_ids"]:
|
|
1181
|
-
bucket["page_ids"].append(page_id)
|
|
1182
|
-
bucket["page_urls"].append(page_url)
|
|
1183
|
-
bucket["titles"].append(title)
|
|
1184
|
-
bucket["first_index"] = min(int(bucket["first_index"]), index)
|
|
1185
|
-
|
|
1186
|
-
# Phase 112: Version-aware deduplication within each profile bucket.
|
|
1187
|
-
# When multiple pages share the same base title (differing only by a version
|
|
1188
|
-
# suffix like "v1.1"), keep only the highest-versioned page. The legacy
|
|
1189
|
-
# page (no version suffix, treated as v0) is superseded by any versioned
|
|
1190
|
-
# successor. This prevents legacy page IDs (e.g. 88716673) from persisting
|
|
1191
|
-
# alongside their current replacements (e.g. 88722450).
|
|
1192
|
-
for profile_id, bucket in grouped.items():
|
|
1193
|
-
page_ids = bucket["page_ids"]
|
|
1194
|
-
page_urls = bucket["page_urls"]
|
|
1195
|
-
titles = bucket.get("titles", [])
|
|
1196
|
-
if len(page_ids) <= 1:
|
|
1197
|
-
continue
|
|
1198
|
-
# Group by base title → pick highest version per group
|
|
1199
|
-
base_groups: dict[str, list[tuple[int, str, str, str, tuple[int, ...]]]] = {}
|
|
1200
|
-
for i, (pid, purl) in enumerate(zip(page_ids, page_urls, strict=False)):
|
|
1201
|
-
t = titles[i] if i < len(titles) else ""
|
|
1202
|
-
base_title, version = RegistryPageParser._extract_checklist_version(t)
|
|
1203
|
-
base_key = base_title.lower().strip()
|
|
1204
|
-
base_groups.setdefault(base_key, []).append((i, pid, purl, t, version))
|
|
1205
|
-
keep_ids: list[str] = []
|
|
1206
|
-
keep_urls: list[str] = []
|
|
1207
|
-
keep_titles: list[str] = []
|
|
1208
|
-
for base_key, entries in base_groups.items():
|
|
1209
|
-
if len(entries) == 1:
|
|
1210
|
-
_, pid, purl, t, _ = entries[0]
|
|
1211
|
-
keep_ids.append(pid)
|
|
1212
|
-
keep_urls.append(purl)
|
|
1213
|
-
keep_titles.append(t)
|
|
1214
|
-
else:
|
|
1215
|
-
# Sort by version descending, keep only the highest
|
|
1216
|
-
entries.sort(key=lambda e: e[4], reverse=True)
|
|
1217
|
-
winner = entries[0]
|
|
1218
|
-
_, pid, purl, t, ver = winner
|
|
1219
|
-
keep_ids.append(pid)
|
|
1220
|
-
keep_urls.append(purl)
|
|
1221
|
-
keep_titles.append(t)
|
|
1222
|
-
for loser in entries[1:]:
|
|
1223
|
-
superseded_count += 1
|
|
1224
|
-
logger.info(
|
|
1225
|
-
"checklist_page_superseded_by_version",
|
|
1226
|
-
profile_id=profile_id,
|
|
1227
|
-
superseded_page_id=loser[1],
|
|
1228
|
-
superseded_title=loser[3],
|
|
1229
|
-
superseded_version=".".join(str(v) for v in loser[4]),
|
|
1230
|
-
kept_page_id=pid,
|
|
1231
|
-
kept_title=t,
|
|
1232
|
-
kept_version=".".join(str(v) for v in ver),
|
|
1233
|
-
)
|
|
1234
|
-
bucket["page_ids"] = keep_ids
|
|
1235
|
-
bucket["page_urls"] = keep_urls
|
|
1236
|
-
bucket["titles"] = keep_titles
|
|
1237
|
-
|
|
1238
|
-
profiles: list[ChecklistProfile] = []
|
|
1239
|
-
for profile_id, payload in sorted(
|
|
1240
|
-
grouped.items(),
|
|
1241
|
-
key=lambda item: (
|
|
1242
|
-
profile_priority.get(item[0], 1000),
|
|
1243
|
-
int(item[1].get("first_index", 0)),
|
|
1244
|
-
item[0],
|
|
1245
|
-
),
|
|
1246
|
-
):
|
|
1247
|
-
repo_type_filters = [profile_id] if profile_id in {"backend", "frontend", "mobile", "infra"} else None
|
|
1248
|
-
profiles.append(
|
|
1249
|
-
ChecklistProfile(
|
|
1250
|
-
profile_id=profile_id,
|
|
1251
|
-
display_name=str(payload.get("display_name") or "Default Checklist"),
|
|
1252
|
-
page_ids=[str(page_id) for page_id in payload.get("page_ids", []) if str(page_id).strip()],
|
|
1253
|
-
page_urls=[str(page_url) for page_url in payload.get("page_urls", []) if str(page_url).strip()],
|
|
1254
|
-
priority=profile_priority.get(profile_id, 1000),
|
|
1255
|
-
repo_type_filters=repo_type_filters,
|
|
1256
|
-
loader_hints={"source": "registry_parser_root_discovery"},
|
|
1257
|
-
)
|
|
1258
|
-
)
|
|
1259
|
-
return profiles, {
|
|
1260
|
-
"candidate_count": len(checklist_candidates),
|
|
1261
|
-
"classified_count": classified_count,
|
|
1262
|
-
"ambiguous_count": ambiguous_count,
|
|
1263
|
-
"superseded_count": superseded_count,
|
|
1264
|
-
}
|
|
1265
|
-
|
|
1266
|
-
@staticmethod
|
|
1267
|
-
def _is_project_candidate_title(title: str) -> tuple[bool, SkippedNodeReason | None]:
|
|
1268
|
-
"""Heuristic filter to skip obvious non-project nodes in registry trees.
|
|
1269
|
-
|
|
1270
|
-
Phase 24 TSK-242: Enhanced filtering with reason codes for telemetry.
|
|
1271
|
-
|
|
1272
|
-
Returns:
|
|
1273
|
-
Tuple of (is_candidate, skip_reason) where skip_reason is None if
|
|
1274
|
-
the page is a valid project candidate.
|
|
1275
|
-
"""
|
|
1276
|
-
normalized = (title or "").strip().lower()
|
|
1277
|
-
if not normalized:
|
|
1278
|
-
return False, SkippedNodeReason.EMPTY_TITLE
|
|
1279
|
-
|
|
1280
|
-
# Exact matches for known non-project pages.
|
|
1281
|
-
excluded_exact = {
|
|
1282
|
-
"audit history",
|
|
1283
|
-
"lịch sử audit",
|
|
1284
|
-
"lịch sử đánh giá",
|
|
1285
|
-
}
|
|
1286
|
-
if normalized in excluded_exact:
|
|
1287
|
-
return False, SkippedNodeReason.AUDIT_HISTORY
|
|
1288
|
-
|
|
1289
|
-
# Patterns that indicate checklist/template pages (not projects).
|
|
1290
|
-
checklist_patterns = {
|
|
1291
|
-
"checklist",
|
|
1292
|
-
"template",
|
|
1293
|
-
"mẫu đánh giá",
|
|
1294
|
-
"tiêu chí đánh giá",
|
|
1295
|
-
}
|
|
1296
|
-
if any(pattern in normalized for pattern in checklist_patterns):
|
|
1297
|
-
return False, SkippedNodeReason.CHECKLIST_NODE
|
|
1298
|
-
|
|
1299
|
-
# Patterns that indicate audit history pages.
|
|
1300
|
-
audit_history_patterns = {
|
|
1301
|
-
"audit history",
|
|
1302
|
-
"lịch sử audit",
|
|
1303
|
-
"kết quả audit",
|
|
1304
|
-
"audit result",
|
|
1305
|
-
}
|
|
1306
|
-
if any(pattern in normalized for pattern in audit_history_patterns):
|
|
1307
|
-
return False, SkippedNodeReason.AUDIT_HISTORY
|
|
1308
|
-
|
|
1309
|
-
# Phase 133: Exclude publisher-created audit artifact pages that should
|
|
1310
|
-
# not be re-discovered as project candidates. These pages are created
|
|
1311
|
-
# by confluence_publisher._resolve_publish_parent() with deterministic
|
|
1312
|
-
# title prefixes. Without this filter, parse-registry treats them as
|
|
1313
|
-
# project candidates, inflating the project count and causing readiness
|
|
1314
|
-
# failures on non-project pages.
|
|
1315
|
-
audit_artifact_prefixes = (
|
|
1316
|
-
"project analysis -",
|
|
1317
|
-
"project audit -",
|
|
1318
|
-
"audit results -",
|
|
1319
|
-
"repository -",
|
|
1320
|
-
)
|
|
1321
|
-
if any(normalized.startswith(prefix) for prefix in audit_artifact_prefixes):
|
|
1322
|
-
return False, SkippedNodeReason.AUDIT_HISTORY
|
|
1323
|
-
|
|
1324
|
-
return True, None
|
|
1325
|
-
|
|
1326
|
-
@staticmethod
|
|
1327
|
-
def normalize_url(url: str) -> str:
|
|
1328
|
-
"""Normalize a URL for deduplication (Phase 24 TSK-242).
|
|
1329
|
-
|
|
1330
|
-
Normalization rules:
|
|
1331
|
-
- Remove trailing slashes
|
|
1332
|
-
- Normalize Confluence pageId URLs to canonical form
|
|
1333
|
-
- Normalize Bitbucket URLs to canonical form
|
|
1334
|
-
- Convert to lowercase for comparison
|
|
1335
|
-
|
|
1336
|
-
Args:
|
|
1337
|
-
url: The URL to normalize.
|
|
1338
|
-
|
|
1339
|
-
Returns:
|
|
1340
|
-
Normalized URL string.
|
|
1341
|
-
"""
|
|
1342
|
-
raw = (url or "").strip()
|
|
1343
|
-
if not raw:
|
|
1344
|
-
return ""
|
|
1345
|
-
|
|
1346
|
-
# Remove trailing slashes (but not from root paths).
|
|
1347
|
-
while raw.endswith("/") and len(raw) > 1 and not raw.endswith("://"):
|
|
1348
|
-
raw = raw[:-1]
|
|
1349
|
-
|
|
1350
|
-
try:
|
|
1351
|
-
parsed = urlparse(raw)
|
|
1352
|
-
except Exception:
|
|
1353
|
-
return raw.lower()
|
|
1354
|
-
|
|
1355
|
-
# Normalize Confluence URLs.
|
|
1356
|
-
if parsed.path.startswith("/pages/viewpage.action"):
|
|
1357
|
-
qs = parse_qs(parsed.query)
|
|
1358
|
-
page_ids = qs.get("pageId", [])
|
|
1359
|
-
if page_ids:
|
|
1360
|
-
# Canonical form: just the pageId parameter.
|
|
1361
|
-
normalized_path = f"/pages/viewpage.action?pageId={page_ids[0]}"
|
|
1362
|
-
return f"{parsed.scheme}://{parsed.netloc}{normalized_path}".lower()
|
|
1363
|
-
|
|
1364
|
-
# Normalize /display/ URLs.
|
|
1365
|
-
if parsed.path.startswith("/display/"):
|
|
1366
|
-
# Remove any trailing query params for display URLs.
|
|
1367
|
-
return f"{parsed.scheme}://{parsed.netloc}{parsed.path}".lower()
|
|
1368
|
-
|
|
1369
|
-
# Normalize Bitbucket URLs.
|
|
1370
|
-
if "bitbucket" in parsed.netloc.lower() or "git" in parsed.netloc.lower():
|
|
1371
|
-
# Remove common trailing paths like /browse.
|
|
1372
|
-
path = parsed.path
|
|
1373
|
-
if path.endswith("/browse"):
|
|
1374
|
-
path = path[:-7]
|
|
1375
|
-
return f"{parsed.scheme}://{parsed.netloc}{path}".lower()
|
|
1376
|
-
|
|
1377
|
-
return raw.lower()
|
|
1378
|
-
|
|
1379
|
-
def deduplicate_links(self, links: list[DocumentLink]) -> list[DocumentLink]:
|
|
1380
|
-
"""Deduplicate document links by normalized URL (Phase 24 TSK-242).
|
|
1381
|
-
|
|
1382
|
-
When duplicates are found, the first occurrence is kept.
|
|
1383
|
-
|
|
1384
|
-
Args:
|
|
1385
|
-
links: List of document links to deduplicate.
|
|
1386
|
-
|
|
1387
|
-
Returns:
|
|
1388
|
-
Deduplicated list of document links.
|
|
1389
|
-
"""
|
|
1390
|
-
seen_urls: set[str] = set()
|
|
1391
|
-
deduplicated: list[DocumentLink] = []
|
|
1392
|
-
|
|
1393
|
-
for link in links:
|
|
1394
|
-
normalized = self.normalize_url(link.url)
|
|
1395
|
-
if normalized in seen_urls:
|
|
1396
|
-
logger.debug(
|
|
1397
|
-
"duplicate_link_skipped",
|
|
1398
|
-
url=link.url,
|
|
1399
|
-
normalized_url=normalized,
|
|
1400
|
-
)
|
|
1401
|
-
continue
|
|
1402
|
-
seen_urls.add(normalized)
|
|
1403
|
-
deduplicated.append(link)
|
|
1404
|
-
|
|
1405
|
-
if len(links) != len(deduplicated):
|
|
1406
|
-
logger.info(
|
|
1407
|
-
"links_deduplicated",
|
|
1408
|
-
original_count=len(links),
|
|
1409
|
-
deduplicated_count=len(deduplicated),
|
|
1410
|
-
removed_count=len(links) - len(deduplicated),
|
|
1411
|
-
)
|
|
1412
|
-
|
|
1413
|
-
return deduplicated
|
|
1414
|
-
|
|
1415
|
-
async def _retry(self, fn, *, timeout: float | None = None, **log_ctx):
|
|
1416
|
-
"""Retry vds-cli backed calls (Confluence can be flaky/slow).
|
|
1417
|
-
|
|
1418
|
-
Args:
|
|
1419
|
-
fn: Async function to call.
|
|
1420
|
-
timeout: Optional per-call timeout in seconds (overrides step_timeout).
|
|
1421
|
-
**log_ctx: Additional context for logging.
|
|
1422
|
-
|
|
1423
|
-
Returns:
|
|
1424
|
-
Result of the function call, or None if all retries failed.
|
|
1425
|
-
"""
|
|
1426
|
-
retries = self.retries
|
|
1427
|
-
base_delay = self.backoff
|
|
1428
|
-
effective_timeout = timeout if timeout is not None else self.step_timeout
|
|
1429
|
-
|
|
1430
|
-
for attempt in range(retries + 1):
|
|
1431
|
-
try:
|
|
1432
|
-
if effective_timeout is not None:
|
|
1433
|
-
return await asyncio.wait_for(fn(), timeout=effective_timeout)
|
|
1434
|
-
return await fn()
|
|
1435
|
-
except TimeoutError:
|
|
1436
|
-
logger.warning(
|
|
1437
|
-
"confluence_request_timeout",
|
|
1438
|
-
attempt=attempt + 1,
|
|
1439
|
-
timeout_seconds=effective_timeout,
|
|
1440
|
-
**log_ctx,
|
|
1441
|
-
)
|
|
1442
|
-
if attempt >= retries:
|
|
1443
|
-
return None
|
|
1444
|
-
self._diag_retry_count += 1
|
|
1445
|
-
raw_delay = base_delay * (2**attempt)
|
|
1446
|
-
delay = min(raw_delay, self.MAX_BACKOFF_SECONDS)
|
|
1447
|
-
if delay != raw_delay:
|
|
1448
|
-
logger.debug(
|
|
1449
|
-
"confluence_retry_delay_capped",
|
|
1450
|
-
raw_delay_seconds=raw_delay,
|
|
1451
|
-
capped_delay_seconds=delay,
|
|
1452
|
-
max_backoff_seconds=self.MAX_BACKOFF_SECONDS,
|
|
1453
|
-
**log_ctx,
|
|
1454
|
-
)
|
|
1455
|
-
await asyncio.sleep(delay)
|
|
1456
|
-
except DataSourceError as exc:
|
|
1457
|
-
rate_limited = self._is_rate_limited(exc)
|
|
1458
|
-
retry_after_seconds = self._extract_retry_after_seconds(exc)
|
|
1459
|
-
if rate_limited:
|
|
1460
|
-
self._diag_rate_limit_events += 1
|
|
1461
|
-
if retry_after_seconds is not None:
|
|
1462
|
-
self._diag_retry_after_seconds = max(self._diag_retry_after_seconds or 0, retry_after_seconds)
|
|
1463
|
-
if attempt >= retries:
|
|
1464
|
-
logger.warning("confluence_request_failed", attempt=attempt + 1, error=str(exc), **log_ctx)
|
|
1465
|
-
return None
|
|
1466
|
-
self._diag_retry_count += 1
|
|
1467
|
-
raw_delay = base_delay * (2**attempt)
|
|
1468
|
-
delay = raw_delay
|
|
1469
|
-
if retry_after_seconds is not None:
|
|
1470
|
-
delay = max(delay, float(retry_after_seconds))
|
|
1471
|
-
delay = min(delay, self.MAX_BACKOFF_SECONDS)
|
|
1472
|
-
logger.warning(
|
|
1473
|
-
"confluence_request_retry",
|
|
1474
|
-
attempt=attempt + 1,
|
|
1475
|
-
retries=retries,
|
|
1476
|
-
delay_seconds=delay,
|
|
1477
|
-
max_backoff_seconds=self.MAX_BACKOFF_SECONDS,
|
|
1478
|
-
rate_limited=rate_limited,
|
|
1479
|
-
retry_after_seconds=retry_after_seconds,
|
|
1480
|
-
error=str(exc),
|
|
1481
|
-
**log_ctx,
|
|
1482
|
-
)
|
|
1483
|
-
await asyncio.sleep(delay)
|
|
1484
|
-
return None
|
|
1485
|
-
|
|
1486
|
-
async def parse_registry(self, root_id: str) -> ProjectRegistry:
|
|
1487
|
-
"""Parse the registry structure starting from a root page.
|
|
1488
|
-
|
|
1489
|
-
Args:
|
|
1490
|
-
root_id: The ID of the registry root page (e.g. 88716667).
|
|
1491
|
-
|
|
1492
|
-
Returns:
|
|
1493
|
-
ProjectRegistry object containing discovered projects.
|
|
1494
|
-
If some pages fail or timeout, returns partial results with
|
|
1495
|
-
diagnostics indicating the failures (Phase 24 TSK-243).
|
|
1496
|
-
"""
|
|
1497
|
-
start_time = time.monotonic()
|
|
1498
|
-
diagnostics = ParseDiagnostics()
|
|
1499
|
-
self._reset_parse_diagnostics_tracking()
|
|
1500
|
-
|
|
1501
|
-
# Phase 24 TSK-243.4.5: Try to load prefetch artifacts first.
|
|
1502
|
-
prefetch_load_started = time.monotonic()
|
|
1503
|
-
prefetch_loaded = self._load_prefetch_artifacts()
|
|
1504
|
-
self._record_step_timing("prefetch_load", prefetch_load_started)
|
|
1505
|
-
diagnostics.prefetch_pages_indexed = len(self._prefetch_pages)
|
|
1506
|
-
if prefetch_loaded:
|
|
1507
|
-
logger.info("using_prefetch_artifacts", prefetch_dir=str(self.prefetch_dir))
|
|
1508
|
-
|
|
1509
|
-
root_page_payload = self._get_prefetch_page(root_id)
|
|
1510
|
-
root_children = await self._get_children(root_id)
|
|
1511
|
-
(
|
|
1512
|
-
root_page_payload,
|
|
1513
|
-
effective_registry_root_id,
|
|
1514
|
-
effective_registry_root_title,
|
|
1515
|
-
) = await self._resolve_effective_registry_root_context(
|
|
1516
|
-
page_id=root_id,
|
|
1517
|
-
page_payload=root_page_payload if isinstance(root_page_payload, dict) else None,
|
|
1518
|
-
)
|
|
1519
|
-
|
|
1520
|
-
registry = ProjectRegistry(
|
|
1521
|
-
registry_url=f"/pages/viewpage.action?pageId={root_id}",
|
|
1522
|
-
registry_page_id=root_id,
|
|
1523
|
-
mapping_version=MAPPING_VERSION,
|
|
1524
|
-
)
|
|
1525
|
-
|
|
1526
|
-
if self._looks_like_atomic_project_entry(
|
|
1527
|
-
root_payload=root_page_payload if isinstance(root_page_payload, dict) else None,
|
|
1528
|
-
root_children=root_children,
|
|
1529
|
-
):
|
|
1530
|
-
direct_project = await self._parse_registry_entry_as_project(
|
|
1531
|
-
page_id=root_id,
|
|
1532
|
-
page_payload=root_page_payload if isinstance(root_page_payload, dict) else None,
|
|
1533
|
-
)
|
|
1534
|
-
if direct_project is not None:
|
|
1535
|
-
registry.projects = [direct_project]
|
|
1536
|
-
|
|
1537
|
-
# Phase 36 hard cutover: checklist-first is the only supported runtime mode.
|
|
1538
|
-
discovery_started = time.monotonic()
|
|
1539
|
-
projects_root_id, checklist_candidates = await self._find_projects_root_and_checklist_deterministic(root_id)
|
|
1540
|
-
self._record_step_timing("discover_root_and_checklist", discovery_started)
|
|
1541
|
-
|
|
1542
|
-
# Phase 101: Populate dynamic checklist profile catalog.
|
|
1543
|
-
checklist_profiles, checklist_discovery_diag = self._build_checklist_profiles(checklist_candidates)
|
|
1544
|
-
registry.checklist_profiles = checklist_profiles
|
|
1545
|
-
logger.info(
|
|
1546
|
-
"checklist_profile_discovery",
|
|
1547
|
-
candidate_count=checklist_discovery_diag["candidate_count"],
|
|
1548
|
-
classified_count=checklist_discovery_diag["classified_count"],
|
|
1549
|
-
ambiguous_count=checklist_discovery_diag["ambiguous_count"],
|
|
1550
|
-
superseded_count=checklist_discovery_diag.get("superseded_count", 0),
|
|
1551
|
-
profile_count=len(checklist_profiles),
|
|
1552
|
-
profile_ids=[profile.profile_id for profile in checklist_profiles],
|
|
1553
|
-
)
|
|
1554
|
-
|
|
1555
|
-
candidate_roots = await self._derive_candidate_roots(root_id, projects_root_id)
|
|
1556
|
-
root_title = str((root_page_payload or {}).get("title") or "").strip()
|
|
1557
|
-
|
|
1558
|
-
# Phase 24 TSK-242: Collect skipped nodes for telemetry.
|
|
1559
|
-
all_skipped_nodes: list[SkippedNode] = []
|
|
1560
|
-
if projects_root_id and str(projects_root_id).strip() != str(root_id).strip():
|
|
1561
|
-
root_container_page = next(
|
|
1562
|
-
(
|
|
1563
|
-
page
|
|
1564
|
-
for page in root_children
|
|
1565
|
-
if str(self._page_id(page) or "").strip() == str(projects_root_id).strip()
|
|
1566
|
-
),
|
|
1567
|
-
None,
|
|
1568
|
-
)
|
|
1569
|
-
if isinstance(root_container_page, dict):
|
|
1570
|
-
all_skipped_nodes.append(
|
|
1571
|
-
SkippedNode(
|
|
1572
|
-
page_id=str(projects_root_id),
|
|
1573
|
-
title=str(root_container_page.get("title", "")),
|
|
1574
|
-
reason=SkippedNodeReason.PROJECT_CONTAINER,
|
|
1575
|
-
details="Container page with child pages is parsed recursively, not as an atomic project leaf",
|
|
1576
|
-
)
|
|
1577
|
-
)
|
|
1578
|
-
|
|
1579
|
-
aggregated_projects: list[ProjectMaterial] = []
|
|
1580
|
-
for candidate in candidate_roots:
|
|
1581
|
-
logger.debug(
|
|
1582
|
-
"registry_candidate_root_parse_start",
|
|
1583
|
-
candidate_root_id=candidate,
|
|
1584
|
-
recursive_depth=max(1, int(self.recursive_depth or 1)),
|
|
1585
|
-
)
|
|
1586
|
-
find_projects_started = time.monotonic()
|
|
1587
|
-
base_depth = max(1, int(self.recursive_depth or 1))
|
|
1588
|
-
projects, skipped_nodes, parse_stats = await self._find_projects(
|
|
1589
|
-
candidate,
|
|
1590
|
-
recursive_depth=base_depth,
|
|
1591
|
-
registry_root_id=effective_registry_root_id,
|
|
1592
|
-
registry_root_title=effective_registry_root_title or root_title or None,
|
|
1593
|
-
projects_root_id=projects_root_id,
|
|
1594
|
-
)
|
|
1595
|
-
self._record_step_timing("find_projects", find_projects_started)
|
|
1596
|
-
all_skipped_nodes.extend(skipped_nodes)
|
|
1597
|
-
|
|
1598
|
-
# Phase 24 TSK-243: Aggregate diagnostics.
|
|
1599
|
-
diagnostics.total_pages_attempted += int(str(parse_stats.get("attempted", 0)))
|
|
1600
|
-
diagnostics.total_pages_succeeded += int(str(parse_stats.get("succeeded", 0)))
|
|
1601
|
-
diagnostics.adaptive_candidate_count += int(str(parse_stats.get("candidate_count", 0)))
|
|
1602
|
-
diagnostics.adaptive_no_links_count += int(str(parse_stats.get("no_links_count", 0)))
|
|
1603
|
-
timed_out = parse_stats.get("timed_out", [])
|
|
1604
|
-
if isinstance(timed_out, list):
|
|
1605
|
-
diagnostics.timed_out_pages.extend(timed_out)
|
|
1606
|
-
failed = parse_stats.get("failed", [])
|
|
1607
|
-
if isinstance(failed, list):
|
|
1608
|
-
diagnostics.failed_pages.extend(failed)
|
|
1609
|
-
diagnostics.max_projects_reached = diagnostics.max_projects_reached or bool(
|
|
1610
|
-
parse_stats.get("max_projects_reached", False)
|
|
1611
|
-
)
|
|
1612
|
-
|
|
1613
|
-
if projects:
|
|
1614
|
-
aggregated_projects.extend(projects)
|
|
1615
|
-
aggregated_projects = self._deduplicate_and_sort_projects(aggregated_projects)
|
|
1616
|
-
registry.projects = aggregated_projects
|
|
1617
|
-
|
|
1618
|
-
# Phase 35: Adaptive deepening (checklist-first only, explicit depth remains authoritative).
|
|
1619
|
-
base_depth = max(1, int(self.recursive_depth or 1))
|
|
1620
|
-
adaptive_target_depth = max(base_depth, int(self.adaptive_max_depth))
|
|
1621
|
-
explicit_depth_override = base_depth > 1
|
|
1622
|
-
adaptive_no_links_signal = diagnostics.adaptive_no_links_count > 0
|
|
1623
|
-
adaptive_zero_candidate_signal = (not registry.projects) and diagnostics.adaptive_candidate_count == 0
|
|
1624
|
-
adaptive_trigger = bool(
|
|
1625
|
-
self.adaptive_depth
|
|
1626
|
-
and not explicit_depth_override
|
|
1627
|
-
and (adaptive_no_links_signal or adaptive_zero_candidate_signal)
|
|
1628
|
-
and adaptive_target_depth > base_depth
|
|
1629
|
-
)
|
|
1630
|
-
if adaptive_trigger:
|
|
1631
|
-
diagnostics.adaptive_depth_applied = True
|
|
1632
|
-
diagnostics.adaptive_depth_from = base_depth
|
|
1633
|
-
diagnostics.adaptive_depth_to = base_depth
|
|
1634
|
-
if adaptive_zero_candidate_signal and not adaptive_no_links_signal:
|
|
1635
|
-
diagnostics.adaptive_trigger_reason = "zero_projects_with_zero_candidates"
|
|
1636
|
-
else:
|
|
1637
|
-
diagnostics.adaptive_trigger_reason = (
|
|
1638
|
-
"zero_projects_with_no_links_signal"
|
|
1639
|
-
if not registry.projects
|
|
1640
|
-
else "partial_projects_with_no_links_signal"
|
|
1641
|
-
)
|
|
1642
|
-
|
|
1643
|
-
merged_projects = list(registry.projects)
|
|
1644
|
-
for depth in range(base_depth + 1, adaptive_target_depth + 1):
|
|
1645
|
-
adaptive_find_started = time.monotonic()
|
|
1646
|
-
pass_projects: list[ProjectMaterial] = []
|
|
1647
|
-
for candidate in candidate_roots:
|
|
1648
|
-
projects, skipped_nodes, parse_stats = await self._find_projects(
|
|
1649
|
-
candidate,
|
|
1650
|
-
recursive_depth=depth,
|
|
1651
|
-
registry_root_id=effective_registry_root_id,
|
|
1652
|
-
registry_root_title=root_title or effective_registry_root_title or None,
|
|
1653
|
-
projects_root_id=projects_root_id,
|
|
1654
|
-
)
|
|
1655
|
-
all_skipped_nodes.extend(skipped_nodes)
|
|
1656
|
-
|
|
1657
|
-
diagnostics.total_pages_attempted += int(str(parse_stats.get("attempted", 0)))
|
|
1658
|
-
diagnostics.total_pages_succeeded += int(str(parse_stats.get("succeeded", 0)))
|
|
1659
|
-
timed_out = parse_stats.get("timed_out", [])
|
|
1660
|
-
if isinstance(timed_out, list):
|
|
1661
|
-
diagnostics.timed_out_pages.extend(timed_out)
|
|
1662
|
-
failed = parse_stats.get("failed", [])
|
|
1663
|
-
if isinstance(failed, list):
|
|
1664
|
-
diagnostics.failed_pages.extend(failed)
|
|
1665
|
-
diagnostics.max_projects_reached = diagnostics.max_projects_reached or bool(
|
|
1666
|
-
parse_stats.get("max_projects_reached", False)
|
|
1667
|
-
)
|
|
1668
|
-
if projects:
|
|
1669
|
-
pass_projects.extend(projects)
|
|
1670
|
-
|
|
1671
|
-
merged_projects.extend(pass_projects)
|
|
1672
|
-
merged_projects = self._deduplicate_and_sort_projects(merged_projects)
|
|
1673
|
-
registry.projects = merged_projects
|
|
1674
|
-
diagnostics.adaptive_depth_to = depth
|
|
1675
|
-
self._record_step_timing("find_projects_adaptive", adaptive_find_started)
|
|
1676
|
-
|
|
1677
|
-
root_entry_looks_container = bool(root_children) and not self._looks_like_atomic_project_entry(
|
|
1678
|
-
root_payload=root_page_payload if isinstance(root_page_payload, dict) else None,
|
|
1679
|
-
root_children=root_children,
|
|
1680
|
-
)
|
|
1681
|
-
if not registry.projects and not diagnostics.prefetch_insufficient_coverage and not root_entry_looks_container:
|
|
1682
|
-
direct_project = await self._parse_registry_entry_as_project(
|
|
1683
|
-
page_id=root_id,
|
|
1684
|
-
page_payload=root_page_payload if isinstance(root_page_payload, dict) else None,
|
|
1685
|
-
)
|
|
1686
|
-
if direct_project is not None:
|
|
1687
|
-
registry.projects = [direct_project]
|
|
1688
|
-
|
|
1689
|
-
# Phase 24 TSK-242: Persist skipped-node telemetry.
|
|
1690
|
-
registry.skipped_nodes = all_skipped_nodes
|
|
1691
|
-
if all_skipped_nodes:
|
|
1692
|
-
logger.info(
|
|
1693
|
-
"registry_skipped_nodes",
|
|
1694
|
-
total_skipped=len(all_skipped_nodes),
|
|
1695
|
-
by_reason={
|
|
1696
|
-
reason.value: sum(1 for n in all_skipped_nodes if n.reason == reason)
|
|
1697
|
-
for reason in SkippedNodeReason
|
|
1698
|
-
if any(n.reason == reason for n in all_skipped_nodes)
|
|
1699
|
-
},
|
|
1700
|
-
)
|
|
1701
|
-
|
|
1702
|
-
if not registry.projects:
|
|
1703
|
-
prefetch_hint: str | None = None
|
|
1704
|
-
if prefetch_loaded and diagnostics.prefetch_pages_indexed > 0:
|
|
1705
|
-
diagnostics.prefetch_insufficient_coverage = True
|
|
1706
|
-
prefetch_hint = (
|
|
1707
|
-
"Prefetch artifacts may be too shallow for project discovery. "
|
|
1708
|
-
"Rebuild crawl artifacts with deeper traversal (for this registry layout, start with depth>=2). "
|
|
1709
|
-
"If using CLI bootstrap, parse-registry supports --staged-crawl-mode with --prefetch-depth."
|
|
1710
|
-
)
|
|
1711
|
-
diagnostics.prefetch_hint = prefetch_hint
|
|
1712
|
-
logger.warning(
|
|
1713
|
-
"projects_not_found",
|
|
1714
|
-
root_id=root_id,
|
|
1715
|
-
projects_root_id=projects_root_id,
|
|
1716
|
-
prefetch_loaded=prefetch_loaded,
|
|
1717
|
-
prefetch_pages_indexed=diagnostics.prefetch_pages_indexed,
|
|
1718
|
-
prefetch_hint=prefetch_hint,
|
|
1719
|
-
)
|
|
1720
|
-
|
|
1721
|
-
# Phase 24 TSK-243: Finalize diagnostics.
|
|
1722
|
-
elapsed_ms = int((time.monotonic() - start_time) * 1000)
|
|
1723
|
-
diagnostics.parse_duration_ms = elapsed_ms
|
|
1724
|
-
diagnostics.partial_success = bool(
|
|
1725
|
-
diagnostics.timed_out_pages or diagnostics.failed_pages or diagnostics.max_projects_reached
|
|
1726
|
-
)
|
|
1727
|
-
diagnostics.prefetch_hits = self._diag_prefetch_hits
|
|
1728
|
-
diagnostics.prefetch_misses = self._diag_prefetch_misses
|
|
1729
|
-
diagnostics.network_fetches = self._diag_network_fetches
|
|
1730
|
-
diagnostics.fallback_fetches = self._diag_fallback_fetches
|
|
1731
|
-
diagnostics.timed_out_pages = self._deduplicate_page_ids(diagnostics.timed_out_pages)
|
|
1732
|
-
diagnostics.failed_pages = self._deduplicate_page_ids(diagnostics.failed_pages)
|
|
1733
|
-
diagnostics.step_timings_ms = dict(self._diag_step_timings_ms)
|
|
1734
|
-
diagnostics.retry_count = self._diag_retry_count
|
|
1735
|
-
diagnostics.rate_limit_events = self._diag_rate_limit_events
|
|
1736
|
-
diagnostics.retry_after_seconds = self._diag_retry_after_seconds
|
|
1737
|
-
diagnostics.effective_retries = self.retries
|
|
1738
|
-
diagnostics.effective_backoff_seconds = self.backoff
|
|
1739
|
-
diagnostics.max_backoff_seconds = self.MAX_BACKOFF_SECONDS
|
|
1740
|
-
diagnostics.effective_step_timeout_seconds = self.step_timeout
|
|
1741
|
-
has_transient_failures = bool(diagnostics.timed_out_pages or diagnostics.failed_pages)
|
|
1742
|
-
diagnostics.can_retry = bool(has_transient_failures or diagnostics.rate_limit_events > 0)
|
|
1743
|
-
if has_transient_failures and not registry.projects:
|
|
1744
|
-
diagnostics.result_status = "fail_fast"
|
|
1745
|
-
diagnostics.result_message = (
|
|
1746
|
-
"No projects were parsed because all candidate pages failed or timed out. "
|
|
1747
|
-
"Retry with higher --timeout/--step-timeout, lower --parse-concurrency, or smaller --max-projects."
|
|
1748
|
-
)
|
|
1749
|
-
elif diagnostics.partial_success:
|
|
1750
|
-
diagnostics.result_status = "partial_success"
|
|
1751
|
-
diagnostics.result_message = (
|
|
1752
|
-
"Registry parsed partially: some pages failed or timed out, but partial project results are available."
|
|
1753
|
-
)
|
|
1754
|
-
else:
|
|
1755
|
-
diagnostics.result_status = "success"
|
|
1756
|
-
diagnostics.result_message = "Registry parsed successfully."
|
|
1757
|
-
registry.diagnostics = diagnostics
|
|
1758
|
-
|
|
1759
|
-
if diagnostics.result_status == "fail_fast":
|
|
1760
|
-
logger.error(
|
|
1761
|
-
"registry_parse_fail_fast",
|
|
1762
|
-
duration_ms=elapsed_ms,
|
|
1763
|
-
projects_found=len(registry.projects),
|
|
1764
|
-
timed_out_count=len(diagnostics.timed_out_pages),
|
|
1765
|
-
failed_count=len(diagnostics.failed_pages),
|
|
1766
|
-
)
|
|
1767
|
-
elif diagnostics.partial_success:
|
|
1768
|
-
logger.warning(
|
|
1769
|
-
"registry_parse_partial_success",
|
|
1770
|
-
duration_ms=elapsed_ms,
|
|
1771
|
-
projects_found=len(registry.projects),
|
|
1772
|
-
timed_out_count=len(diagnostics.timed_out_pages),
|
|
1773
|
-
failed_count=len(diagnostics.failed_pages),
|
|
1774
|
-
max_projects_reached=diagnostics.max_projects_reached,
|
|
1775
|
-
)
|
|
1776
|
-
else:
|
|
1777
|
-
logger.info(
|
|
1778
|
-
"registry_parse_complete",
|
|
1779
|
-
duration_ms=elapsed_ms,
|
|
1780
|
-
projects_found=len(registry.projects),
|
|
1781
|
-
)
|
|
1782
|
-
|
|
1783
|
-
return registry
|
|
1784
|
-
|
|
1785
|
-
@staticmethod
|
|
1786
|
-
def _deduplicate_page_ids(page_ids: list[str]) -> list[str]:
|
|
1787
|
-
"""Return deterministic unique page IDs (numeric-first ordering)."""
|
|
1788
|
-
deduped = {str(page_id).strip() for page_id in page_ids if str(page_id).strip()}
|
|
1789
|
-
return sorted(deduped, key=RegistryPageParser._page_id_sort_key)
|
|
1790
|
-
|
|
1791
|
-
@staticmethod
|
|
1792
|
-
def _page_id_sort_key(page_id: str) -> tuple[int, int | str, str]:
|
|
1793
|
-
raw = str(page_id).strip()
|
|
1794
|
-
if raw.isdigit():
|
|
1795
|
-
return (0, int(raw), raw)
|
|
1796
|
-
return (1, raw, raw)
|
|
1797
|
-
|
|
1798
|
-
@classmethod
|
|
1799
|
-
def _deduplicate_and_sort_projects(cls, projects: list[ProjectMaterial]) -> list[ProjectMaterial]:
|
|
1800
|
-
"""Deduplicate project materials by page_id and return deterministic order."""
|
|
1801
|
-
by_page_id: dict[str, ProjectMaterial] = {}
|
|
1802
|
-
for project in projects:
|
|
1803
|
-
page_id = str(project.page_id).strip()
|
|
1804
|
-
if not page_id or page_id in by_page_id:
|
|
1805
|
-
continue
|
|
1806
|
-
by_page_id[page_id] = project
|
|
1807
|
-
return [by_page_id[page_id] for page_id in sorted(by_page_id, key=cls._page_id_sort_key)]
|
|
1808
|
-
|
|
1809
|
-
async def _derive_candidate_roots(self, root_id: str, projects_root_id: str | None) -> list[str]:
|
|
1810
|
-
"""Build ordered candidate roots for resilient hierarchy traversal.
|
|
1811
|
-
|
|
1812
|
-
Keeps the discovered projects root first when available, then adds other
|
|
1813
|
-
non-checklist root children as fallback start points.
|
|
1814
|
-
"""
|
|
1815
|
-
candidates: list[str] = []
|
|
1816
|
-
seen: set[str] = set()
|
|
1817
|
-
|
|
1818
|
-
def _add(candidate_id: str | None) -> None:
|
|
1819
|
-
normalized = str(candidate_id or "").strip()
|
|
1820
|
-
if not normalized or normalized in seen:
|
|
1821
|
-
return
|
|
1822
|
-
seen.add(normalized)
|
|
1823
|
-
candidates.append(normalized)
|
|
1824
|
-
|
|
1825
|
-
_add(projects_root_id)
|
|
1826
|
-
if not projects_root_id or str(projects_root_id).strip() == str(root_id).strip():
|
|
1827
|
-
_add(root_id)
|
|
1828
|
-
|
|
1829
|
-
root_children = await self._get_children(root_id)
|
|
1830
|
-
for child in root_children:
|
|
1831
|
-
child_id = self._page_id(child)
|
|
1832
|
-
if not child_id:
|
|
1833
|
-
continue
|
|
1834
|
-
child_title = str(child.get("title", "")).strip()
|
|
1835
|
-
classification = self.classify_root_child(child_title)
|
|
1836
|
-
if classification in {RootChildType.CHECKLIST, RootChildType.AUDIT_HISTORY}:
|
|
1837
|
-
continue
|
|
1838
|
-
_add(child_id)
|
|
1839
|
-
|
|
1840
|
-
logger.info(
|
|
1841
|
-
"registry_candidate_roots_derived",
|
|
1842
|
-
root_id=root_id,
|
|
1843
|
-
projects_root_id=projects_root_id,
|
|
1844
|
-
candidates=candidates,
|
|
1845
|
-
)
|
|
1846
|
-
return candidates
|
|
1847
|
-
|
|
1848
|
-
async def _find_projects_root_and_checklist_deterministic(
|
|
1849
|
-
self, root_id: str
|
|
1850
|
-
) -> tuple[str | None, list[dict[str, str]]]:
|
|
1851
|
-
"""Find projects root and checklist using deterministic classification (Phase 24).
|
|
1852
|
-
|
|
1853
|
-
This implements the checklist-first discovery strategy (FR-24.2.1) which uses
|
|
1854
|
-
deterministic title pattern matching instead of flexible heuristics.
|
|
1855
|
-
Phase 24 TSK-243.4.5: Uses prefetch artifacts when available.
|
|
1856
|
-
|
|
1857
|
-
Args:
|
|
1858
|
-
root_id: The registry root page ID.
|
|
1859
|
-
|
|
1860
|
-
Returns:
|
|
1861
|
-
Tuple of (projects_root_id, checklist_candidates) where checklist_candidates
|
|
1862
|
-
is ordered by deterministic discovery sequence.
|
|
1863
|
-
"""
|
|
1864
|
-
# Phase 24: Handle explicit checklist override (TSK-241.2.4).
|
|
1865
|
-
checklist_candidates: list[dict[str, str]] = []
|
|
1866
|
-
if self.checklist_page_override:
|
|
1867
|
-
checklist_candidates.append(
|
|
1868
|
-
{
|
|
1869
|
-
"id": self.checklist_page_override,
|
|
1870
|
-
"title": "Explicit override",
|
|
1871
|
-
"url": f"/pages/viewpage.action?pageId={self.checklist_page_override}",
|
|
1872
|
-
"profile_id": "default",
|
|
1873
|
-
}
|
|
1874
|
-
)
|
|
1875
|
-
logger.info(
|
|
1876
|
-
"checklist_page_override_used",
|
|
1877
|
-
page_id=self.checklist_page_override,
|
|
1878
|
-
)
|
|
1879
|
-
|
|
1880
|
-
results = await self._get_children(root_id)
|
|
1881
|
-
|
|
1882
|
-
projects_root_id: str | None = None
|
|
1883
|
-
root_page = self._get_prefetch_page(root_id)
|
|
1884
|
-
root_title = str((root_page or {}).get("title") or "").strip() if isinstance(root_page, dict) else ""
|
|
1885
|
-
inferred_projects_root_title = ""
|
|
1886
|
-
if results:
|
|
1887
|
-
for page in results:
|
|
1888
|
-
title = str(page.get("title", "")).strip()
|
|
1889
|
-
if self.classify_root_child(title) == RootChildType.PROJECT_CONTAINER:
|
|
1890
|
-
inferred_projects_root_title = title
|
|
1891
|
-
break
|
|
1892
|
-
if not root_title and (max(1, int(self.recursive_depth or 1)) > 1 or not inferred_projects_root_title):
|
|
1893
|
-
try:
|
|
1894
|
-
root_page = await self._retry(lambda: self.client.get_page(root_id), page_id=root_id)
|
|
1895
|
-
except Exception:
|
|
1896
|
-
root_page = None
|
|
1897
|
-
root_title = str((root_page or {}).get("title") or "").strip() if isinstance(root_page, dict) else ""
|
|
1898
|
-
root_classification = self.classify_root_child(root_title)
|
|
1899
|
-
if root_classification == RootChildType.PROJECT_CONTAINER and not inferred_projects_root_title:
|
|
1900
|
-
projects_root_id = root_id
|
|
1901
|
-
|
|
1902
|
-
# Classify each root child deterministically.
|
|
1903
|
-
for page in results:
|
|
1904
|
-
title = str(page.get("title", "")).strip()
|
|
1905
|
-
page_id = self._page_id(page)
|
|
1906
|
-
if not page_id:
|
|
1907
|
-
continue
|
|
1908
|
-
classification = self.classify_root_child(title)
|
|
1909
|
-
|
|
1910
|
-
logger.debug(
|
|
1911
|
-
"root_child_classified",
|
|
1912
|
-
page_id=page_id,
|
|
1913
|
-
title=title,
|
|
1914
|
-
classification=classification.value,
|
|
1915
|
-
)
|
|
1916
|
-
|
|
1917
|
-
if classification == RootChildType.CHECKLIST:
|
|
1918
|
-
page_url = self._page_url(page, fallback_page_id=page_id)
|
|
1919
|
-
profile_id, _ = self._classify_checklist_profile_id(title)
|
|
1920
|
-
checklist_candidates.append(
|
|
1921
|
-
{
|
|
1922
|
-
"id": page_id,
|
|
1923
|
-
"title": title,
|
|
1924
|
-
"url": page_url,
|
|
1925
|
-
"profile_id": profile_id,
|
|
1926
|
-
}
|
|
1927
|
-
)
|
|
1928
|
-
logger.info(
|
|
1929
|
-
"checklist_page_discovered_deterministic",
|
|
1930
|
-
page_id=page_id,
|
|
1931
|
-
title=title,
|
|
1932
|
-
profile_id=profile_id,
|
|
1933
|
-
)
|
|
1934
|
-
|
|
1935
|
-
elif classification == RootChildType.PROJECT_CONTAINER and not projects_root_id:
|
|
1936
|
-
projects_root_id = page_id
|
|
1937
|
-
logger.info(
|
|
1938
|
-
"projects_root_discovered_deterministic",
|
|
1939
|
-
page_id=page_id,
|
|
1940
|
-
title=title,
|
|
1941
|
-
)
|
|
1942
|
-
logger.info(
|
|
1943
|
-
"projects_root_marked_recursive_container",
|
|
1944
|
-
page_id=page_id,
|
|
1945
|
-
title=title,
|
|
1946
|
-
)
|
|
1947
|
-
|
|
1948
|
-
# FR-269 (DD-153): Emit structured diagnostic when checklist discovery fails.
|
|
1949
|
-
if not checklist_candidates:
|
|
1950
|
-
child_titles = [str(page.get("title", "")).strip() for page in results if self._page_id(page)]
|
|
1951
|
-
logger.warning(
|
|
1952
|
-
"checklist_discovery_failed",
|
|
1953
|
-
root_id=root_id,
|
|
1954
|
-
searched_patterns=[
|
|
1955
|
-
"title starting with '3.1'",
|
|
1956
|
-
"title containing 'checklist'",
|
|
1957
|
-
],
|
|
1958
|
-
child_pages_found=child_titles,
|
|
1959
|
-
child_page_count=len(child_titles),
|
|
1960
|
-
guidance=(
|
|
1961
|
-
"No checklist page was discovered among root children. "
|
|
1962
|
-
"Pass --checklist-page <PAGE_ID> explicitly to bypass "
|
|
1963
|
-
"automatic discovery."
|
|
1964
|
-
),
|
|
1965
|
-
)
|
|
1966
|
-
|
|
1967
|
-
return projects_root_id, checklist_candidates
|
|
1968
|
-
|
|
1969
|
-
async def _find_projects(
|
|
1970
|
-
self,
|
|
1971
|
-
parent_id: str,
|
|
1972
|
-
*,
|
|
1973
|
-
recursive_depth: int | None = None,
|
|
1974
|
-
registry_root_id: str,
|
|
1975
|
-
registry_root_title: str | None,
|
|
1976
|
-
projects_root_id: str | None,
|
|
1977
|
-
) -> tuple[list[ProjectMaterial], list[SkippedNode], dict[str, object]]:
|
|
1978
|
-
"""Find individual project pages under the parent.
|
|
1979
|
-
|
|
1980
|
-
Phase 24 TSK-242: Enhanced to track skipped nodes for telemetry.
|
|
1981
|
-
Phase 24 TSK-243: Enhanced to support max_projects limit and track
|
|
1982
|
-
timeout/failure statistics for partial-success behavior.
|
|
1983
|
-
Phase 24 TSK-243.4.5: Uses prefetch artifacts when available.
|
|
1984
|
-
|
|
1985
|
-
Returns:
|
|
1986
|
-
Tuple of (projects, skipped_nodes, parse_stats) where:
|
|
1987
|
-
- projects: Successfully parsed project materials
|
|
1988
|
-
- skipped_nodes: Records of pages that were filtered out during parsing
|
|
1989
|
-
- parse_stats: Dictionary with parsing statistics:
|
|
1990
|
-
- attempted: Total pages attempted
|
|
1991
|
-
- succeeded: Pages successfully parsed
|
|
1992
|
-
- timed_out: List of page IDs that timed out
|
|
1993
|
-
- failed: List of page IDs that failed (non-timeout)
|
|
1994
|
-
- max_projects_reached: Whether max_projects limit was hit
|
|
1995
|
-
"""
|
|
1996
|
-
projects: list[ProjectMaterial] = []
|
|
1997
|
-
skipped_nodes: list[SkippedNode] = []
|
|
1998
|
-
parse_stats: dict[str, object] = {
|
|
1999
|
-
"attempted": 0,
|
|
2000
|
-
"succeeded": 0,
|
|
2001
|
-
"timed_out": [],
|
|
2002
|
-
"failed": [],
|
|
2003
|
-
"max_projects_reached": False,
|
|
2004
|
-
"candidate_count": 0,
|
|
2005
|
-
"no_links_count": 0,
|
|
2006
|
-
}
|
|
2007
|
-
|
|
2008
|
-
# Depth-limited traversal: recursive_depth=1 means direct children only.
|
|
2009
|
-
# Avoid using CQL ancestor queries because Confluence does not provide true depth limits.
|
|
2010
|
-
effective_recursive_depth = self.recursive_depth if recursive_depth is None else recursive_depth
|
|
2011
|
-
max_depth = max(1, int(effective_recursive_depth or 1))
|
|
2012
|
-
root_payload = self._prefetch_pages.get(str(parent_id), {})
|
|
2013
|
-
if not isinstance(root_payload, dict):
|
|
2014
|
-
root_payload = {}
|
|
2015
|
-
root_title = str(root_payload.get("title") or "").strip()
|
|
2016
|
-
if not root_title and parent_id != str(registry_root_id).strip():
|
|
2017
|
-
registry_children = await self._get_children(str(registry_root_id).strip())
|
|
2018
|
-
for registry_child in registry_children:
|
|
2019
|
-
if str(self._page_id(registry_child) or "").strip() != str(parent_id).strip():
|
|
2020
|
-
continue
|
|
2021
|
-
root_title = str(registry_child.get("title") or "").strip()
|
|
2022
|
-
if root_title:
|
|
2023
|
-
break
|
|
2024
|
-
should_fetch_root_title = (
|
|
2025
|
-
not root_title
|
|
2026
|
-
and max_depth > 1
|
|
2027
|
-
and (
|
|
2028
|
-
parent_id != str(registry_root_id).strip()
|
|
2029
|
-
or str(projects_root_id or "").strip() == str(registry_root_id).strip()
|
|
2030
|
-
)
|
|
2031
|
-
)
|
|
2032
|
-
if should_fetch_root_title:
|
|
2033
|
-
fetched_root = await self._retry(lambda: self.client.get_page(parent_id), page_id=parent_id)
|
|
2034
|
-
if isinstance(fetched_root, dict):
|
|
2035
|
-
root_payload = fetched_root
|
|
2036
|
-
root_title = str(fetched_root.get("title") or "").strip()
|
|
2037
|
-
root_url = self._page_url(root_payload, fallback_page_id=parent_id)
|
|
2038
|
-
root_lineage = [self._lineage_node(page_id=parent_id, title=root_title or None, page_url=root_url)]
|
|
2039
|
-
normalized_registry_root_id = str(registry_root_id or "").strip()
|
|
2040
|
-
normalized_parent_id = str(parent_id or "").strip()
|
|
2041
|
-
if normalized_registry_root_id and normalized_registry_root_id != normalized_parent_id:
|
|
2042
|
-
registry_root_payload = self._prefetch_pages.get(normalized_registry_root_id, {})
|
|
2043
|
-
if not isinstance(registry_root_payload, dict):
|
|
2044
|
-
registry_root_payload = {}
|
|
2045
|
-
registry_root_url = self._page_url(registry_root_payload, fallback_page_id=normalized_registry_root_id)
|
|
2046
|
-
root_lineage = [
|
|
2047
|
-
self._lineage_node(
|
|
2048
|
-
page_id=normalized_registry_root_id,
|
|
2049
|
-
title=registry_root_title or None,
|
|
2050
|
-
page_url=registry_root_url,
|
|
2051
|
-
),
|
|
2052
|
-
*root_lineage,
|
|
2053
|
-
]
|
|
2054
|
-
|
|
2055
|
-
queue: deque[tuple[str, int, list[dict[str, str | None]]]] = deque([(parent_id, 0, root_lineage)])
|
|
2056
|
-
seen: set[str] = {parent_id}
|
|
2057
|
-
container_page_ids: set[str] = set()
|
|
2058
|
-
pages: list[dict[str, object]] = []
|
|
2059
|
-
discovery_started = time.monotonic()
|
|
2060
|
-
logger.debug(
|
|
2061
|
-
"registry_project_traversal_start",
|
|
2062
|
-
parent_id=parent_id,
|
|
2063
|
-
max_depth=max_depth,
|
|
2064
|
-
)
|
|
2065
|
-
|
|
2066
|
-
while queue:
|
|
2067
|
-
current_id, depth, current_lineage = queue.popleft()
|
|
2068
|
-
if depth >= max_depth:
|
|
2069
|
-
continue
|
|
2070
|
-
|
|
2071
|
-
# Checklist-first keeps root discovery lightweight, but when fetching
|
|
2072
|
-
# leaf candidate pages (depth + 1 == max_depth), include body.storage
|
|
2073
|
-
# to avoid per-page follow-up get_page calls.
|
|
2074
|
-
expand = "body.storage" if (depth + 1) >= max_depth else None
|
|
2075
|
-
children = await self._get_children(current_id, expand=expand)
|
|
2076
|
-
if children:
|
|
2077
|
-
container_page_ids.add(current_id)
|
|
2078
|
-
|
|
2079
|
-
for child in children:
|
|
2080
|
-
page_id = self._page_id(child)
|
|
2081
|
-
if not page_id:
|
|
2082
|
-
continue
|
|
2083
|
-
if page_id in seen:
|
|
2084
|
-
# Track duplicate pages.
|
|
2085
|
-
skipped_nodes.append(
|
|
2086
|
-
SkippedNode(
|
|
2087
|
-
page_id=str(page_id),
|
|
2088
|
-
title=str(child.get("title", "")),
|
|
2089
|
-
reason=SkippedNodeReason.DUPLICATE,
|
|
2090
|
-
details="Page already processed in traversal",
|
|
2091
|
-
)
|
|
2092
|
-
)
|
|
2093
|
-
continue
|
|
2094
|
-
if depth == 0 and current_id == parent_id:
|
|
2095
|
-
classification = self.classify_root_child(str(child.get("title", "")).strip())
|
|
2096
|
-
if str(current_id).strip() == str(registry_root_id).strip():
|
|
2097
|
-
if classification == RootChildType.CHECKLIST:
|
|
2098
|
-
skipped_nodes.append(
|
|
2099
|
-
SkippedNode(
|
|
2100
|
-
page_id=str(page_id),
|
|
2101
|
-
title=str(child.get("title", "")),
|
|
2102
|
-
reason=SkippedNodeReason.CHECKLIST_NODE,
|
|
2103
|
-
details="Checklist root branch pruned from project traversal",
|
|
2104
|
-
)
|
|
2105
|
-
)
|
|
2106
|
-
continue
|
|
2107
|
-
if classification == RootChildType.AUDIT_HISTORY:
|
|
2108
|
-
skipped_nodes.append(
|
|
2109
|
-
SkippedNode(
|
|
2110
|
-
page_id=str(page_id),
|
|
2111
|
-
title=str(child.get("title", "")),
|
|
2112
|
-
reason=SkippedNodeReason.AUDIT_HISTORY,
|
|
2113
|
-
details="Audit-history root branch pruned from project traversal",
|
|
2114
|
-
)
|
|
2115
|
-
)
|
|
2116
|
-
continue
|
|
2117
|
-
if classification == RootChildType.PROJECT_CONTAINER:
|
|
2118
|
-
skipped_nodes.append(
|
|
2119
|
-
SkippedNode(
|
|
2120
|
-
page_id=str(page_id),
|
|
2121
|
-
title=str(child.get("title", "")),
|
|
2122
|
-
reason=SkippedNodeReason.PROJECT_CONTAINER,
|
|
2123
|
-
details="Container page with child pages is parsed recursively, not as an atomic project leaf",
|
|
2124
|
-
)
|
|
2125
|
-
)
|
|
2126
|
-
seen.add(page_id)
|
|
2127
|
-
child_node = self._lineage_node(
|
|
2128
|
-
page_id=page_id,
|
|
2129
|
-
title=str(child.get("title", "")).strip() or None,
|
|
2130
|
-
page_url=self._page_url(child, fallback_page_id=page_id),
|
|
2131
|
-
)
|
|
2132
|
-
child_lineage = [*current_lineage, child_node]
|
|
2133
|
-
pages.append(
|
|
2134
|
-
{
|
|
2135
|
-
"id": page_id,
|
|
2136
|
-
"title": child.get("title", ""),
|
|
2137
|
-
"page": child,
|
|
2138
|
-
"lineage": child_lineage,
|
|
2139
|
-
}
|
|
2140
|
-
)
|
|
2141
|
-
queue.append((page_id, depth + 1, child_lineage))
|
|
2142
|
-
|
|
2143
|
-
self._record_step_timing("discover_candidates", discovery_started)
|
|
2144
|
-
|
|
2145
|
-
# Parallelize parsing with bounded semaphore to reduce Confluence timeout pressure.
|
|
2146
|
-
semaphore = asyncio.Semaphore(self.parse_concurrency)
|
|
2147
|
-
|
|
2148
|
-
# Phase 24 TSK-243: Track timeout and failure statistics.
|
|
2149
|
-
timed_out_pages: list[str] = []
|
|
2150
|
-
failed_pages: list[str] = []
|
|
2151
|
-
|
|
2152
|
-
async def _parse_with_sem(
|
|
2153
|
-
p_id: str,
|
|
2154
|
-
p_title: str,
|
|
2155
|
-
page_payload: dict[str, object] | None,
|
|
2156
|
-
) -> tuple[ProjectMaterial | None, str | None]:
|
|
2157
|
-
"""Parse a project page with semaphore and timeout handling.
|
|
2158
|
-
|
|
2159
|
-
Returns:
|
|
2160
|
-
Tuple of (material, error_type) where error_type is:
|
|
2161
|
-
- None: Success
|
|
2162
|
-
- "timeout": Page timed out
|
|
2163
|
-
- "failed": Other error
|
|
2164
|
-
"""
|
|
2165
|
-
async with semaphore:
|
|
2166
|
-
try:
|
|
2167
|
-
if self.step_timeout is not None:
|
|
2168
|
-
material = await asyncio.wait_for(
|
|
2169
|
-
self.parse_project_page(p_id, p_title, page_payload=page_payload),
|
|
2170
|
-
timeout=self.step_timeout,
|
|
2171
|
-
)
|
|
2172
|
-
else:
|
|
2173
|
-
material = await self.parse_project_page(p_id, p_title, page_payload=page_payload)
|
|
2174
|
-
return material, None
|
|
2175
|
-
except TimeoutError:
|
|
2176
|
-
logger.warning("project_parse_timeout", page_id=p_id, title=p_title)
|
|
2177
|
-
return None, "timeout"
|
|
2178
|
-
except Exception as e:
|
|
2179
|
-
logger.warning("project_parse_failed", page_id=p_id, title=p_title, error=str(e))
|
|
2180
|
-
return None, "failed"
|
|
2181
|
-
|
|
2182
|
-
# Filter pages and track skipped nodes.
|
|
2183
|
-
candidate_pages = []
|
|
2184
|
-
for page in pages:
|
|
2185
|
-
page_id = page.get("id")
|
|
2186
|
-
page_title = str(page.get("title", ""))
|
|
2187
|
-
if not page_id:
|
|
2188
|
-
continue
|
|
2189
|
-
page_lineage = page.get("lineage")
|
|
2190
|
-
page_lineage_nodes = page_lineage if isinstance(page_lineage, list) else []
|
|
2191
|
-
immediate_parent_matches_root = bool(page_lineage_nodes) and str(
|
|
2192
|
-
page_lineage_nodes[-1].get("page_id") or ""
|
|
2193
|
-
).strip() == str(parent_id)
|
|
2194
|
-
normalized_title = page_title.strip().casefold()
|
|
2195
|
-
if (
|
|
2196
|
-
immediate_parent_matches_root
|
|
2197
|
-
and self.classify_root_child(page_title) == RootChildType.PROJECT_CONTAINER
|
|
2198
|
-
and (str(page_id) in container_page_ids or normalized_title.startswith("3.2"))
|
|
2199
|
-
):
|
|
2200
|
-
skipped_nodes.append(
|
|
2201
|
-
SkippedNode(
|
|
2202
|
-
page_id=str(page_id),
|
|
2203
|
-
title=page_title,
|
|
2204
|
-
reason=SkippedNodeReason.PROJECT_CONTAINER,
|
|
2205
|
-
details="Container page with child pages is parsed recursively, not as an atomic project leaf",
|
|
2206
|
-
)
|
|
2207
|
-
)
|
|
2208
|
-
logger.debug(
|
|
2209
|
-
"project_container_skipped_as_leaf",
|
|
2210
|
-
page_id=page_id,
|
|
2211
|
-
title=page_title,
|
|
2212
|
-
)
|
|
2213
|
-
continue
|
|
2214
|
-
|
|
2215
|
-
is_candidate, skip_reason = self._is_project_candidate_title(page_title)
|
|
2216
|
-
if not is_candidate and skip_reason:
|
|
2217
|
-
if skip_reason == SkippedNodeReason.CHECKLIST_NODE and str(page_id) in container_page_ids:
|
|
2218
|
-
is_candidate = True
|
|
2219
|
-
skip_reason = None
|
|
2220
|
-
else:
|
|
2221
|
-
skipped_nodes.append(
|
|
2222
|
-
SkippedNode(
|
|
2223
|
-
page_id=str(page_id),
|
|
2224
|
-
title=page_title,
|
|
2225
|
-
reason=skip_reason,
|
|
2226
|
-
details=f"Filtered by title pattern: {page_title}",
|
|
2227
|
-
)
|
|
2228
|
-
)
|
|
2229
|
-
logger.debug(
|
|
2230
|
-
"project_candidate_skipped",
|
|
2231
|
-
page_id=page_id,
|
|
2232
|
-
title=page_title,
|
|
2233
|
-
reason=skip_reason.value,
|
|
2234
|
-
)
|
|
2235
|
-
continue
|
|
2236
|
-
candidate_pages.append(page)
|
|
2237
|
-
|
|
2238
|
-
# Phase 24 TSK-243: Apply max_projects limit if configured.
|
|
2239
|
-
max_projects_reached = False
|
|
2240
|
-
parse_stats["candidate_count"] = len(candidate_pages)
|
|
2241
|
-
if self.max_projects is not None and len(candidate_pages) > self.max_projects:
|
|
2242
|
-
logger.info(
|
|
2243
|
-
"max_projects_limit_applied",
|
|
2244
|
-
total_candidates=len(candidate_pages),
|
|
2245
|
-
max_projects=self.max_projects,
|
|
2246
|
-
)
|
|
2247
|
-
# Track skipped pages due to budget limit.
|
|
2248
|
-
for page in candidate_pages[self.max_projects :]:
|
|
2249
|
-
skipped_nodes.append(
|
|
2250
|
-
SkippedNode(
|
|
2251
|
-
page_id=str(page["id"]),
|
|
2252
|
-
title=str(page.get("title", "")),
|
|
2253
|
-
reason=SkippedNodeReason.MAX_PROJECTS_REACHED,
|
|
2254
|
-
details=f"Skipped due to --max-projects {self.max_projects} limit",
|
|
2255
|
-
)
|
|
2256
|
-
)
|
|
2257
|
-
candidate_pages = candidate_pages[: self.max_projects]
|
|
2258
|
-
max_projects_reached = True
|
|
2259
|
-
|
|
2260
|
-
parse_stats["attempted"] = len(candidate_pages)
|
|
2261
|
-
parse_stats["max_projects_reached"] = max_projects_reached
|
|
2262
|
-
|
|
2263
|
-
parse_started = time.monotonic()
|
|
2264
|
-
tasks = [
|
|
2265
|
-
_parse_with_sem(
|
|
2266
|
-
str(page["id"]),
|
|
2267
|
-
str(page.get("title", "")),
|
|
2268
|
-
page.get("page") if isinstance(page.get("page"), dict) else None,
|
|
2269
|
-
)
|
|
2270
|
-
for page in candidate_pages
|
|
2271
|
-
]
|
|
2272
|
-
|
|
2273
|
-
results = await asyncio.gather(*tasks)
|
|
2274
|
-
self._record_step_timing("parse_project_candidates", parse_started)
|
|
2275
|
-
for idx, (material, error_type) in enumerate(results):
|
|
2276
|
-
page = candidate_pages[idx]
|
|
2277
|
-
page_id = str(page["id"])
|
|
2278
|
-
page_title = str(page.get("title", ""))
|
|
2279
|
-
|
|
2280
|
-
if error_type == "timeout":
|
|
2281
|
-
timed_out_pages.append(page_id)
|
|
2282
|
-
skipped_nodes.append(
|
|
2283
|
-
SkippedNode(
|
|
2284
|
-
page_id=page_id,
|
|
2285
|
-
title=page_title,
|
|
2286
|
-
reason=SkippedNodeReason.TIMEOUT,
|
|
2287
|
-
details="Page parsing timed out",
|
|
2288
|
-
)
|
|
2289
|
-
)
|
|
2290
|
-
elif error_type == "failed":
|
|
2291
|
-
failed_pages.append(page_id)
|
|
2292
|
-
skipped_nodes.append(
|
|
2293
|
-
SkippedNode(
|
|
2294
|
-
page_id=page_id,
|
|
2295
|
-
title=page_title,
|
|
2296
|
-
reason=SkippedNodeReason.MALFORMED_PAGE,
|
|
2297
|
-
details="Failed to parse project page",
|
|
2298
|
-
)
|
|
2299
|
-
)
|
|
2300
|
-
elif material:
|
|
2301
|
-
raw_lineage = page.get("lineage")
|
|
2302
|
-
lineage = cast("list[dict[str, str | None]]", raw_lineage) if isinstance(raw_lineage, list) else []
|
|
2303
|
-
self._apply_lineage_metadata(
|
|
2304
|
-
material=material,
|
|
2305
|
-
lineage=lineage,
|
|
2306
|
-
registry_root_id=registry_root_id,
|
|
2307
|
-
registry_root_title=registry_root_title,
|
|
2308
|
-
projects_root_id=projects_root_id,
|
|
2309
|
-
candidate_root_id=parent_id,
|
|
2310
|
-
)
|
|
2311
|
-
|
|
2312
|
-
# Check if the project has any links - if not, track as skipped.
|
|
2313
|
-
if not material.documents:
|
|
2314
|
-
parse_stats["no_links_count"] = int(str(parse_stats.get("no_links_count", 0))) + 1
|
|
2315
|
-
skipped_nodes.append(
|
|
2316
|
-
SkippedNode(
|
|
2317
|
-
page_id=page_id,
|
|
2318
|
-
title=page_title,
|
|
2319
|
-
reason=SkippedNodeReason.NO_LINKS,
|
|
2320
|
-
details="Page parsed but no document links found",
|
|
2321
|
-
)
|
|
2322
|
-
)
|
|
2323
|
-
logger.debug(
|
|
2324
|
-
"project_no_links",
|
|
2325
|
-
page_id=page_id,
|
|
2326
|
-
title=page_title,
|
|
2327
|
-
)
|
|
2328
|
-
else:
|
|
2329
|
-
projects.append(material)
|
|
2330
|
-
current_success = parse_stats.get("succeeded", 0)
|
|
2331
|
-
parse_stats["succeeded"] = int(str(current_success)) + 1
|
|
2332
|
-
|
|
2333
|
-
parse_stats["timed_out"] = timed_out_pages
|
|
2334
|
-
parse_stats["failed"] = failed_pages
|
|
2335
|
-
|
|
2336
|
-
return projects, skipped_nodes, parse_stats
|
|
2337
|
-
|
|
2338
|
-
async def _get_page_with_cache(self, page_id: str) -> dict | None:
|
|
2339
|
-
"""Get page body payload with in-memory cache to reduce re-downloads."""
|
|
2340
|
-
cache_key = str(page_id).strip()
|
|
2341
|
-
if cache_key in self._network_page_cache:
|
|
2342
|
-
logger.debug("network_page_cache_hit", page_id=cache_key)
|
|
2343
|
-
return self._network_page_cache[cache_key]
|
|
2344
|
-
|
|
2345
|
-
self._diag_network_fetches += 1
|
|
2346
|
-
if self.prefetch_dir is not None:
|
|
2347
|
-
self._diag_fallback_fetches += 1
|
|
2348
|
-
page = await self._retry(
|
|
2349
|
-
lambda: self.client.get_page(cache_key, expand="body.storage"),
|
|
2350
|
-
page_id=cache_key,
|
|
2351
|
-
expand="body.storage",
|
|
2352
|
-
)
|
|
2353
|
-
self._network_page_cache[cache_key] = page
|
|
2354
|
-
logger.debug("network_page_fetched", page_id=cache_key, found=bool(page))
|
|
2355
|
-
return page
|
|
2356
|
-
|
|
2357
|
-
async def parse_project_page(
|
|
2358
|
-
self,
|
|
2359
|
-
page_id: str,
|
|
2360
|
-
title: str,
|
|
2361
|
-
*,
|
|
2362
|
-
page_payload: dict[str, object] | None = None,
|
|
2363
|
-
) -> ProjectMaterial | None:
|
|
2364
|
-
"""Parse a single project page to extract document links.
|
|
2365
|
-
|
|
2366
|
-
Args:
|
|
2367
|
-
page_id: Confluence page ID.
|
|
2368
|
-
title: Fallback title label (page title is preferred when available).
|
|
2369
|
-
"""
|
|
2370
|
-
cache_key = str(page_id).strip()
|
|
2371
|
-
cached_result = self._project_parse_cache.get(cache_key, _CACHE_MISS)
|
|
2372
|
-
if cached_result is not _CACHE_MISS:
|
|
2373
|
-
logger.debug("project_parse_cache_hit", page_id=cache_key, has_material=bool(cached_result))
|
|
2374
|
-
if isinstance(cached_result, ProjectMaterial):
|
|
2375
|
-
return cast("ProjectMaterial", cached_result).model_copy(deep=True)
|
|
2376
|
-
return None
|
|
2377
|
-
|
|
2378
|
-
page = page_payload
|
|
2379
|
-
body = ""
|
|
2380
|
-
if isinstance(page, dict):
|
|
2381
|
-
# Safe navigation with casts
|
|
2382
|
-
page_body = page.get("body")
|
|
2383
|
-
if isinstance(page_body, dict):
|
|
2384
|
-
storage = page_body.get("storage")
|
|
2385
|
-
if isinstance(storage, dict):
|
|
2386
|
-
body = str(storage.get("value") or "")
|
|
2387
|
-
|
|
2388
|
-
# If payload has no body, try prefetch cache before hitting network.
|
|
2389
|
-
if not body:
|
|
2390
|
-
prefetch_page = self._get_prefetch_page(page_id)
|
|
2391
|
-
if prefetch_page is not None:
|
|
2392
|
-
page = prefetch_page
|
|
2393
|
-
page_body = page.get("body")
|
|
2394
|
-
if isinstance(page_body, dict):
|
|
2395
|
-
storage = page_body.get("storage")
|
|
2396
|
-
if isinstance(storage, dict):
|
|
2397
|
-
body = str(storage.get("value") or "")
|
|
2398
|
-
|
|
2399
|
-
# If storage body is unavailable but crawl-tree captured local HTML, use it.
|
|
2400
|
-
if not body and isinstance(page, dict):
|
|
2401
|
-
html_path = page.get("html_path")
|
|
2402
|
-
if isinstance(html_path, str) and html_path.strip():
|
|
2403
|
-
html_file = Path(html_path)
|
|
2404
|
-
try:
|
|
2405
|
-
if html_file.exists():
|
|
2406
|
-
body = html_file.read_text(encoding="utf-8")
|
|
2407
|
-
except OSError:
|
|
2408
|
-
# Keep behavior resilient: fall through to network fetch if local read fails.
|
|
2409
|
-
body = ""
|
|
2410
|
-
|
|
2411
|
-
# If we still do not have body content, fetch full page over network.
|
|
2412
|
-
# In prefetch mode, avoid network fallback for known prefetch pages that
|
|
2413
|
-
# did not capture body/html (typically container nodes) so parse-registry
|
|
2414
|
-
# can run fully offline and remain deterministic.
|
|
2415
|
-
should_skip_network_fallback = bool(
|
|
2416
|
-
self.prefetch_dir is not None and str(page_id) in self._prefetch_pages and not body
|
|
2417
|
-
)
|
|
2418
|
-
if not body and not should_skip_network_fallback:
|
|
2419
|
-
page = await self._get_page_with_cache(page_id)
|
|
2420
|
-
elif should_skip_network_fallback:
|
|
2421
|
-
logger.debug("prefetch_body_missing_skip_network", page_id=page_id)
|
|
2422
|
-
if not page:
|
|
2423
|
-
self._project_parse_cache[cache_key] = None
|
|
2424
|
-
return None
|
|
2425
|
-
|
|
2426
|
-
if not body and isinstance(page, dict):
|
|
2427
|
-
page_body = page.get("body")
|
|
2428
|
-
if isinstance(page_body, dict):
|
|
2429
|
-
storage = page_body.get("storage")
|
|
2430
|
-
if isinstance(storage, dict):
|
|
2431
|
-
body = str(storage.get("value") or "")
|
|
2432
|
-
|
|
2433
|
-
page_title = page.get("title")
|
|
2434
|
-
resolved_title = page_title if isinstance(page_title, str) and page_title.strip() else str(title or "Unknown")
|
|
2435
|
-
resolved_page_url = self._page_url(page, fallback_page_id=page_id)
|
|
2436
|
-
base_url = self._resolve_base_url(resolved_page_url)
|
|
2437
|
-
|
|
2438
|
-
material = ProjectMaterial(
|
|
2439
|
-
project_name=resolved_title,
|
|
2440
|
-
page_id=page_id,
|
|
2441
|
-
page_url=resolved_page_url,
|
|
2442
|
-
)
|
|
2443
|
-
|
|
2444
|
-
# Extract links from tables
|
|
2445
|
-
links = self._extract_links_from_html(body, base_url)
|
|
2446
|
-
|
|
2447
|
-
# Phase 24 TSK-242: Normalize and deduplicate links.
|
|
2448
|
-
links = self.deduplicate_links(links)
|
|
2449
|
-
await self._resolve_missing_confluence_page_ids(links)
|
|
2450
|
-
material.documents = links
|
|
2451
|
-
|
|
2452
|
-
# Categorize links (using normalized URLs).
|
|
2453
|
-
seen_confluence: set[str] = set()
|
|
2454
|
-
seen_bitbucket: set[str] = set()
|
|
2455
|
-
for doc in links:
|
|
2456
|
-
normalized_url = self.normalize_url(doc.url)
|
|
2457
|
-
if doc.source == LinkSource.CONFLUENCE:
|
|
2458
|
-
if normalized_url not in seen_confluence:
|
|
2459
|
-
material.confluence_links.append(doc.url)
|
|
2460
|
-
seen_confluence.add(normalized_url)
|
|
2461
|
-
elif doc.source == LinkSource.BITBUCKET and normalized_url not in seen_bitbucket:
|
|
2462
|
-
material.bitbucket_links.append(doc.url)
|
|
2463
|
-
seen_bitbucket.add(normalized_url)
|
|
2464
|
-
|
|
2465
|
-
self._project_parse_cache[cache_key] = material.model_copy(deep=True)
|
|
2466
|
-
return material
|
|
2467
|
-
|
|
2468
|
-
def _resolve_base_url(self, absolute_url: str) -> str:
|
|
2469
|
-
"""Derive base URL from an absolute Confluence link."""
|
|
2470
|
-
raw = (absolute_url or "").strip()
|
|
2471
|
-
if not raw:
|
|
2472
|
-
return ""
|
|
2473
|
-
try:
|
|
2474
|
-
parsed = urlsplit(raw)
|
|
2475
|
-
except Exception:
|
|
2476
|
-
return ""
|
|
2477
|
-
if not parsed.scheme or not parsed.netloc:
|
|
2478
|
-
return ""
|
|
2479
|
-
return f"{parsed.scheme}://{parsed.netloc}"
|
|
2480
|
-
|
|
2481
|
-
def _extract_links_from_html(self, html_content: str, base_url: str) -> list[DocumentLink]:
|
|
2482
|
-
"""Extract document links from HTML tables."""
|
|
2483
|
-
soup = BeautifulSoup(html_content, "html.parser")
|
|
2484
|
-
links: list[DocumentLink] = []
|
|
2485
|
-
|
|
2486
|
-
# Find tables - usually structure is Role | Doc Type | Link
|
|
2487
|
-
tables = soup.find_all("table")
|
|
2488
|
-
for table in tables:
|
|
2489
|
-
rows = table.find_all("tr")
|
|
2490
|
-
if not rows:
|
|
2491
|
-
continue
|
|
2492
|
-
|
|
2493
|
-
normalized_rows = self._normalize_table_rows(rows)
|
|
2494
|
-
header_map, header_row_index = self._map_table_headers(rows)
|
|
2495
|
-
link_cols = [idx for idx, key in header_map.items() if key == "link"]
|
|
2496
|
-
for row_index, _row in enumerate(rows):
|
|
2497
|
-
if row_index == header_row_index:
|
|
2498
|
-
continue
|
|
2499
|
-
|
|
2500
|
-
if row_index >= len(normalized_rows):
|
|
2501
|
-
continue
|
|
2502
|
-
cells = normalized_rows[row_index]
|
|
2503
|
-
if not cells:
|
|
2504
|
-
continue
|
|
2505
|
-
|
|
2506
|
-
role = self._extract_role(cells, header_map)
|
|
2507
|
-
doc_type_vn, doc_number = self._extract_doc_type(cells, header_map, "doc_type_vn")
|
|
2508
|
-
doc_type_en, _ = self._extract_doc_type(cells, header_map, "doc_type_en")
|
|
2509
|
-
|
|
2510
|
-
anchor_cells = [cells[idx] for idx in link_cols if idx < len(cells)] if link_cols else cells
|
|
2511
|
-
for cell in anchor_cells:
|
|
2512
|
-
if cell is None:
|
|
2513
|
-
continue
|
|
2514
|
-
cell_seen_urls: set[str] = set()
|
|
2515
|
-
anchors = cell.find_all("a")
|
|
2516
|
-
for a in anchors:
|
|
2517
|
-
href = a.get("href")
|
|
2518
|
-
if not href:
|
|
2519
|
-
continue
|
|
2520
|
-
|
|
2521
|
-
full_url = href if href.startswith("http") else f"{base_url}{href}"
|
|
2522
|
-
normalized_full_url = self.normalize_url(full_url)
|
|
2523
|
-
if normalized_full_url in cell_seen_urls:
|
|
2524
|
-
continue
|
|
2525
|
-
cell_seen_urls.add(normalized_full_url)
|
|
2526
|
-
source = self._determine_source(full_url)
|
|
2527
|
-
page_id = (
|
|
2528
|
-
self._extract_confluence_page_id(full_url) if source == LinkSource.CONFLUENCE else None
|
|
2529
|
-
)
|
|
2530
|
-
|
|
2531
|
-
links.append(
|
|
2532
|
-
DocumentLink(
|
|
2533
|
-
role=role,
|
|
2534
|
-
doc_number=doc_number,
|
|
2535
|
-
# Prefer VN doc type when present; fall back to EN.
|
|
2536
|
-
doc_type=doc_type_vn or doc_type_en or "Unknown",
|
|
2537
|
-
doc_type_en=doc_type_en,
|
|
2538
|
-
url=full_url,
|
|
2539
|
-
page_id=page_id,
|
|
2540
|
-
source=source,
|
|
2541
|
-
title=a.get_text(strip=True),
|
|
2542
|
-
)
|
|
2543
|
-
)
|
|
2544
|
-
|
|
2545
|
-
# Some Confluence pages keep URLs as plain text (e.g. <span class="nolink">...).
|
|
2546
|
-
for raw_url in self._extract_plain_urls_from_text(cell.get_text(" ", strip=True)):
|
|
2547
|
-
normalized_raw_url = self.normalize_url(raw_url)
|
|
2548
|
-
if normalized_raw_url in cell_seen_urls:
|
|
2549
|
-
continue
|
|
2550
|
-
cell_seen_urls.add(normalized_raw_url)
|
|
2551
|
-
source = self._determine_source(raw_url)
|
|
2552
|
-
page_id = self._extract_confluence_page_id(raw_url) if source == LinkSource.CONFLUENCE else None
|
|
2553
|
-
links.append(
|
|
2554
|
-
DocumentLink(
|
|
2555
|
-
role=role,
|
|
2556
|
-
doc_number=doc_number,
|
|
2557
|
-
doc_type=doc_type_vn or doc_type_en or "Unknown",
|
|
2558
|
-
doc_type_en=doc_type_en,
|
|
2559
|
-
url=raw_url,
|
|
2560
|
-
page_id=page_id,
|
|
2561
|
-
source=source,
|
|
2562
|
-
title=raw_url,
|
|
2563
|
-
)
|
|
2564
|
-
)
|
|
2565
|
-
return links
|
|
2566
|
-
|
|
2567
|
-
@staticmethod
|
|
2568
|
-
def _extract_plain_urls_from_text(raw_text: str) -> list[str]:
|
|
2569
|
-
"""Extract absolute URLs from plain text fragments."""
|
|
2570
|
-
if not raw_text:
|
|
2571
|
-
return []
|
|
2572
|
-
urls: list[str] = []
|
|
2573
|
-
seen: set[str] = set()
|
|
2574
|
-
for match in _PLAIN_TEXT_URL_PATTERN.finditer(raw_text):
|
|
2575
|
-
candidate = match.group(0).rstrip(").,;\"'!?]}")
|
|
2576
|
-
if not candidate:
|
|
2577
|
-
continue
|
|
2578
|
-
if candidate in seen:
|
|
2579
|
-
continue
|
|
2580
|
-
seen.add(candidate)
|
|
2581
|
-
urls.append(candidate)
|
|
2582
|
-
return urls
|
|
2583
|
-
|
|
2584
|
-
def _normalize_table_rows(self, rows: list) -> list[list]:
|
|
2585
|
-
"""Normalize HTML table rows into a rectangular cell grid.
|
|
2586
|
-
|
|
2587
|
-
Confluence tables frequently use rowspan/colspan, which makes naive
|
|
2588
|
-
`row.find_all('td')` extraction misalign columns. This helper expands
|
|
2589
|
-
rowspans so cells can be referenced by consistent column index.
|
|
2590
|
-
"""
|
|
2591
|
-
# Determine column count from the widest row (accounting for colspan).
|
|
2592
|
-
max_cols = 0
|
|
2593
|
-
for row in rows:
|
|
2594
|
-
width = 0
|
|
2595
|
-
for cell in row.find_all(["th", "td"]):
|
|
2596
|
-
try:
|
|
2597
|
-
width += int(cell.get("colspan", 1))
|
|
2598
|
-
except Exception:
|
|
2599
|
-
width += 1
|
|
2600
|
-
max_cols = max(max_cols, width)
|
|
2601
|
-
|
|
2602
|
-
if max_cols == 0:
|
|
2603
|
-
return []
|
|
2604
|
-
|
|
2605
|
-
span_cells: list = [None] * max_cols
|
|
2606
|
-
span_remaining: list[int] = [0] * max_cols
|
|
2607
|
-
normalized: list[list] = []
|
|
2608
|
-
|
|
2609
|
-
for row in rows:
|
|
2610
|
-
row_cells: list = [None] * max_cols
|
|
2611
|
-
|
|
2612
|
-
# Fill from active rowspans.
|
|
2613
|
-
for idx in range(max_cols):
|
|
2614
|
-
if span_remaining[idx] > 0:
|
|
2615
|
-
row_cells[idx] = span_cells[idx]
|
|
2616
|
-
span_remaining[idx] -= 1
|
|
2617
|
-
else:
|
|
2618
|
-
span_cells[idx] = None
|
|
2619
|
-
|
|
2620
|
-
col_idx = 0
|
|
2621
|
-
for cell in row.find_all(["th", "td"]):
|
|
2622
|
-
# Find next empty slot.
|
|
2623
|
-
while col_idx < max_cols and row_cells[col_idx] is not None:
|
|
2624
|
-
col_idx += 1
|
|
2625
|
-
if col_idx >= max_cols:
|
|
2626
|
-
break
|
|
2627
|
-
|
|
2628
|
-
try:
|
|
2629
|
-
colspan = int(cell.get("colspan", 1))
|
|
2630
|
-
except Exception:
|
|
2631
|
-
colspan = 1
|
|
2632
|
-
try:
|
|
2633
|
-
rowspan = int(cell.get("rowspan", 1))
|
|
2634
|
-
except Exception:
|
|
2635
|
-
rowspan = 1
|
|
2636
|
-
|
|
2637
|
-
for offset in range(max(1, colspan)):
|
|
2638
|
-
target = col_idx + offset
|
|
2639
|
-
if target >= max_cols:
|
|
2640
|
-
break
|
|
2641
|
-
row_cells[target] = cell
|
|
2642
|
-
if rowspan > 1:
|
|
2643
|
-
span_cells[target] = cell
|
|
2644
|
-
span_remaining[target] = max(span_remaining[target], rowspan - 1)
|
|
2645
|
-
|
|
2646
|
-
col_idx += max(1, colspan)
|
|
2647
|
-
|
|
2648
|
-
normalized.append(row_cells)
|
|
2649
|
-
|
|
2650
|
-
return normalized
|
|
2651
|
-
|
|
2652
|
-
def _determine_source(self, url: str) -> LinkSource:
|
|
2653
|
-
if self._looks_like_confluence_url(url):
|
|
2654
|
-
return LinkSource.CONFLUENCE
|
|
2655
|
-
if "bitbucket" in url or "git" in url:
|
|
2656
|
-
return LinkSource.BITBUCKET
|
|
2657
|
-
return LinkSource.OTHER
|
|
2658
|
-
|
|
2659
|
-
def _looks_like_confluence_url(self, url: str) -> bool:
|
|
2660
|
-
"""Best-effort detection for Confluence links.
|
|
2661
|
-
|
|
2662
|
-
Registry pages sometimes link to Confluence via IP-based URLs (e.g. 10.x) that
|
|
2663
|
-
don't contain the substring "confluence". We detect those via known Confluence
|
|
2664
|
-
URL shapes instead of relying solely on hostnames.
|
|
2665
|
-
"""
|
|
2666
|
-
raw = (url or "").strip()
|
|
2667
|
-
if not raw:
|
|
2668
|
-
return False
|
|
2669
|
-
|
|
2670
|
-
try:
|
|
2671
|
-
parsed = urlparse(raw)
|
|
2672
|
-
except Exception:
|
|
2673
|
-
return False
|
|
2674
|
-
|
|
2675
|
-
host = (parsed.hostname or "").lower()
|
|
2676
|
-
if host in {"confluence.digital.vn", "10.254.136.35", "10.254.136.32"}:
|
|
2677
|
-
return True
|
|
2678
|
-
|
|
2679
|
-
path = parsed.path or ""
|
|
2680
|
-
if path.startswith("/display/"):
|
|
2681
|
-
return True
|
|
2682
|
-
if path.startswith("/pages/viewpage.action"):
|
|
2683
|
-
qs = parse_qs(parsed.query)
|
|
2684
|
-
page_ids = qs.get("pageId", [])
|
|
2685
|
-
if page_ids and str(page_ids[0]).isdigit():
|
|
2686
|
-
return True
|
|
2687
|
-
# Confluence sometimes uses viewpage.action without pageId (rare) - treat as Confluence.
|
|
2688
|
-
return True
|
|
2689
|
-
|
|
2690
|
-
qs = parse_qs(parsed.query)
|
|
2691
|
-
page_ids = qs.get("pageId", [])
|
|
2692
|
-
return bool(page_ids and str(page_ids[0]).isdigit())
|
|
2693
|
-
|
|
2694
|
-
def _extract_confluence_page_id(self, url: str) -> str | None:
|
|
2695
|
-
"""Best-effort extraction of numeric Confluence page ID from URL."""
|
|
2696
|
-
raw = str(url or "").strip()
|
|
2697
|
-
if not raw:
|
|
2698
|
-
return None
|
|
2699
|
-
if raw.isdigit():
|
|
2700
|
-
return raw
|
|
2701
|
-
try:
|
|
2702
|
-
parsed = urlparse(raw)
|
|
2703
|
-
qs = parse_qs(parsed.query)
|
|
2704
|
-
for key in ("pageId", "pageid"):
|
|
2705
|
-
values = qs.get(key, [])
|
|
2706
|
-
for value in values:
|
|
2707
|
-
candidate = str(value or "").strip()
|
|
2708
|
-
if candidate.isdigit():
|
|
2709
|
-
return candidate
|
|
2710
|
-
except Exception:
|
|
2711
|
-
pass
|
|
2712
|
-
|
|
2713
|
-
match = re.search(r"(?:^|[?&])pageId=(\d+)", raw, flags=re.IGNORECASE)
|
|
2714
|
-
if match:
|
|
2715
|
-
return match.group(1)
|
|
2716
|
-
return None
|
|
2717
|
-
|
|
2718
|
-
@staticmethod
|
|
2719
|
-
def _extract_confluence_display_ref(url: str) -> tuple[str, str] | None:
|
|
2720
|
-
"""Extract (space_key, title) from /display/<SPACE>/<Title> URLs."""
|
|
2721
|
-
raw = str(url or "").strip()
|
|
2722
|
-
if not raw:
|
|
2723
|
-
return None
|
|
2724
|
-
try:
|
|
2725
|
-
parsed = urlparse(raw)
|
|
2726
|
-
except Exception:
|
|
2727
|
-
return None
|
|
2728
|
-
match = re.match(r"^/?display/([^/]+)/([^/]+)$", parsed.path)
|
|
2729
|
-
if not match:
|
|
2730
|
-
return None
|
|
2731
|
-
space_key = str(match.group(1) or "").strip()
|
|
2732
|
-
raw_title = str(match.group(2) or "").strip()
|
|
2733
|
-
if not space_key or not raw_title:
|
|
2734
|
-
return None
|
|
2735
|
-
title = re.sub(r"\s+", " ", unquote_plus(raw_title).replace("+", " ")).strip()
|
|
2736
|
-
if not title:
|
|
2737
|
-
return None
|
|
2738
|
-
return space_key, title
|
|
2739
|
-
|
|
2740
|
-
@staticmethod
|
|
2741
|
-
def _extract_confluence_tiny_id(url: str) -> str | None:
|
|
2742
|
-
"""Extract the tinyId from a Confluence short URL (/x/<tinyId>)."""
|
|
2743
|
-
raw = str(url or "").strip()
|
|
2744
|
-
if not raw:
|
|
2745
|
-
return None
|
|
2746
|
-
try:
|
|
2747
|
-
parsed = urlparse(raw)
|
|
2748
|
-
except Exception:
|
|
2749
|
-
return None
|
|
2750
|
-
match = re.match(r"^/?x/([A-Za-z0-9_-]+)$", parsed.path)
|
|
2751
|
-
if not match:
|
|
2752
|
-
return None
|
|
2753
|
-
return match.group(1)
|
|
2754
|
-
|
|
2755
|
-
@staticmethod
|
|
2756
|
-
def _escape_cql(value: str) -> str:
|
|
2757
|
-
return value.replace("\\", "\\\\").replace('"', '\\"')
|
|
2758
|
-
|
|
2759
|
-
@staticmethod
|
|
2760
|
-
def _infer_confluence_server_from_url(url: str) -> str | None:
|
|
2761
|
-
"""Infer configured Confluence server alias from URL host when possible."""
|
|
2762
|
-
raw = str(url or "").strip()
|
|
2763
|
-
if not raw:
|
|
2764
|
-
return None
|
|
2765
|
-
try:
|
|
2766
|
-
parsed = urlparse(raw)
|
|
2767
|
-
except Exception:
|
|
2768
|
-
return None
|
|
2769
|
-
host = str(parsed.hostname or "").strip().lower()
|
|
2770
|
-
if host in {"10.254.136.35", "10.254.136.32"}:
|
|
2771
|
-
return "external"
|
|
2772
|
-
if host == "confluence.digital.vn":
|
|
2773
|
-
return "internal"
|
|
2774
|
-
return None
|
|
2775
|
-
|
|
2776
|
-
def _confluence_client_for_url(self, url: str) -> ConfluenceCliClient:
|
|
2777
|
-
"""Select a server-aware Confluence client for cross-host /display resolution."""
|
|
2778
|
-
server = self._infer_confluence_server_from_url(url)
|
|
2779
|
-
if not server:
|
|
2780
|
-
return self.client
|
|
2781
|
-
raw_active_server = getattr(self.client, "_server", None)
|
|
2782
|
-
if not isinstance(raw_active_server, str):
|
|
2783
|
-
return self.client
|
|
2784
|
-
active_server = raw_active_server.strip().lower()
|
|
2785
|
-
if not active_server:
|
|
2786
|
-
return self.client
|
|
2787
|
-
if active_server == server:
|
|
2788
|
-
return self.client
|
|
2789
|
-
cached = self._server_scoped_clients.get(server)
|
|
2790
|
-
if cached is not None:
|
|
2791
|
-
return cached
|
|
2792
|
-
scoped = ConfluenceCliClient.for_registry_discovery(
|
|
2793
|
-
server=server,
|
|
2794
|
-
timeout=self._server_scoped_timeout,
|
|
2795
|
-
)
|
|
2796
|
-
self._server_scoped_clients[server] = scoped
|
|
2797
|
-
return scoped
|
|
2798
|
-
|
|
2799
|
-
async def _resolve_page_id_from_confluence_url(self, raw_url: str) -> str | None:
|
|
2800
|
-
"""Resolve numeric Confluence page_id from URL; supports /display lookup via CQL."""
|
|
2801
|
-
url = str(raw_url or "").strip()
|
|
2802
|
-
if not url:
|
|
2803
|
-
return None
|
|
2804
|
-
normalized_url = self.normalize_url(url)
|
|
2805
|
-
cached = self._resolved_confluence_page_ids.get(normalized_url, _CACHE_MISS)
|
|
2806
|
-
if cached is not _CACHE_MISS:
|
|
2807
|
-
return cast("str | None", cached)
|
|
2808
|
-
|
|
2809
|
-
direct_page_id = self._extract_confluence_page_id(url)
|
|
2810
|
-
if direct_page_id:
|
|
2811
|
-
self._resolved_confluence_page_ids[normalized_url] = direct_page_id
|
|
2812
|
-
return direct_page_id
|
|
2813
|
-
|
|
2814
|
-
tiny_id = self._extract_confluence_tiny_id(url)
|
|
2815
|
-
if tiny_id:
|
|
2816
|
-
try:
|
|
2817
|
-
resolved_from_tiny = await self._confluence_client_for_url(url).resolve_tiny_url(tiny_id)
|
|
2818
|
-
except Exception as exc:
|
|
2819
|
-
logger.debug(
|
|
2820
|
-
"confluence_tiny_url_resolution_failed",
|
|
2821
|
-
page_url=url,
|
|
2822
|
-
tiny_id=tiny_id,
|
|
2823
|
-
error=str(exc),
|
|
2824
|
-
)
|
|
2825
|
-
resolved_from_tiny = None
|
|
2826
|
-
self._resolved_confluence_page_ids[normalized_url] = resolved_from_tiny
|
|
2827
|
-
return resolved_from_tiny
|
|
2828
|
-
|
|
2829
|
-
display_ref = self._extract_confluence_display_ref(url)
|
|
2830
|
-
if not display_ref:
|
|
2831
|
-
self._resolved_confluence_page_ids[normalized_url] = None
|
|
2832
|
-
return None
|
|
2833
|
-
|
|
2834
|
-
space_key, title = display_ref
|
|
2835
|
-
title_candidates = [title]
|
|
2836
|
-
if "_" in title:
|
|
2837
|
-
underscored_normalized = re.sub(r"\s+", " ", title.replace("_", " ")).strip()
|
|
2838
|
-
if underscored_normalized and underscored_normalized not in title_candidates:
|
|
2839
|
-
title_candidates.append(underscored_normalized)
|
|
2840
|
-
|
|
2841
|
-
resolved_page_id: str | None = None
|
|
2842
|
-
for candidate_title in title_candidates:
|
|
2843
|
-
cql = f'space="{self._escape_cql(space_key)}" and title="{self._escape_cql(candidate_title)}" and type=page'
|
|
2844
|
-
# Fail fast on CQL lookup to avoid large retry storms during registry parsing.
|
|
2845
|
-
try:
|
|
2846
|
-
results = await self._confluence_client_for_url(url).search_cql(cql, limit=1)
|
|
2847
|
-
except DataSourceError as exc:
|
|
2848
|
-
logger.debug(
|
|
2849
|
-
"confluence_display_page_id_resolution_failed",
|
|
2850
|
-
page_url=url,
|
|
2851
|
-
space_key=space_key,
|
|
2852
|
-
title=candidate_title,
|
|
2853
|
-
error=str(exc),
|
|
2854
|
-
)
|
|
2855
|
-
continue
|
|
2856
|
-
if not results:
|
|
2857
|
-
continue
|
|
2858
|
-
first = results[0]
|
|
2859
|
-
if not isinstance(first, dict):
|
|
2860
|
-
continue
|
|
2861
|
-
candidate_id = str(first.get("id") or "").strip()
|
|
2862
|
-
if candidate_id:
|
|
2863
|
-
resolved_page_id = candidate_id
|
|
2864
|
-
break
|
|
2865
|
-
|
|
2866
|
-
self._resolved_confluence_page_ids[normalized_url] = resolved_page_id
|
|
2867
|
-
return resolved_page_id
|
|
2868
|
-
|
|
2869
|
-
async def _resolve_missing_confluence_page_ids(self, links: list[DocumentLink]) -> None:
|
|
2870
|
-
"""Backfill missing page_id for Confluence document links when resolvable."""
|
|
2871
|
-
if not links:
|
|
2872
|
-
return
|
|
2873
|
-
# Keep prefetch mode fully local/offline; do not introduce network lookups.
|
|
2874
|
-
if self.prefetch_dir is not None:
|
|
2875
|
-
return
|
|
2876
|
-
for link in links:
|
|
2877
|
-
if link.source != LinkSource.CONFLUENCE:
|
|
2878
|
-
continue
|
|
2879
|
-
if str(link.page_id or "").strip():
|
|
2880
|
-
continue
|
|
2881
|
-
resolved_page_id = await self._resolve_page_id_from_confluence_url(link.url)
|
|
2882
|
-
if resolved_page_id:
|
|
2883
|
-
link.page_id = resolved_page_id
|
|
2884
|
-
|
|
2885
|
-
def _map_table_headers(self, rows: list) -> tuple[dict[int, str], int | None]:
|
|
2886
|
-
header_row_index: int | None = None
|
|
2887
|
-
header_map: dict[int, str] = {}
|
|
2888
|
-
|
|
2889
|
-
for index, row in enumerate(rows):
|
|
2890
|
-
header_cells = row.find_all("th")
|
|
2891
|
-
if not header_cells:
|
|
2892
|
-
continue
|
|
2893
|
-
|
|
2894
|
-
header_row_index = index
|
|
2895
|
-
for col_index, cell in enumerate(header_cells):
|
|
2896
|
-
header_key = self._classify_header(cell.get_text(strip=True))
|
|
2897
|
-
if header_key:
|
|
2898
|
-
header_map[col_index] = header_key
|
|
2899
|
-
break
|
|
2900
|
-
|
|
2901
|
-
if header_row_index is None and rows:
|
|
2902
|
-
first_cells = rows[0].find_all("td")
|
|
2903
|
-
header_map_candidate: dict[int, str] = {}
|
|
2904
|
-
for col_index, cell in enumerate(first_cells):
|
|
2905
|
-
header_key = self._classify_header(cell.get_text(strip=True))
|
|
2906
|
-
if header_key:
|
|
2907
|
-
header_map_candidate[col_index] = header_key
|
|
2908
|
-
if header_map_candidate:
|
|
2909
|
-
header_map = header_map_candidate
|
|
2910
|
-
header_row_index = 0
|
|
2911
|
-
|
|
2912
|
-
return header_map, header_row_index
|
|
2913
|
-
|
|
2914
|
-
def _classify_header(self, text: str) -> str | None:
|
|
2915
|
-
normalized = self._normalize_text(text)
|
|
2916
|
-
if not normalized:
|
|
2917
|
-
return None
|
|
2918
|
-
|
|
2919
|
-
# VN/EN column mapping for role/doc_type (tolerant matching).
|
|
2920
|
-
if any(
|
|
2921
|
-
key in normalized
|
|
2922
|
-
for key in (
|
|
2923
|
-
"role",
|
|
2924
|
-
"vai tro",
|
|
2925
|
-
"vai trò",
|
|
2926
|
-
"phu trach",
|
|
2927
|
-
"phụ trách",
|
|
2928
|
-
"chuc nang",
|
|
2929
|
-
"chức năng",
|
|
2930
|
-
"owner",
|
|
2931
|
-
)
|
|
2932
|
-
):
|
|
2933
|
-
return "role"
|
|
2934
|
-
|
|
2935
|
-
# EN doc type must be detected before VN doc type because headers like
|
|
2936
|
-
# "Doc Type EN" contain the substring "doc type".
|
|
2937
|
-
if any(
|
|
2938
|
-
key in normalized
|
|
2939
|
-
for key in (
|
|
2940
|
-
"doc type en",
|
|
2941
|
-
"document type en",
|
|
2942
|
-
"english doc type",
|
|
2943
|
-
"english document type",
|
|
2944
|
-
"tai lieu en",
|
|
2945
|
-
"tài liệu en",
|
|
2946
|
-
"loai tai lieu en",
|
|
2947
|
-
"loại tài liệu en",
|
|
2948
|
-
"english",
|
|
2949
|
-
)
|
|
2950
|
-
):
|
|
2951
|
-
return "doc_type_en"
|
|
2952
|
-
|
|
2953
|
-
if any(
|
|
2954
|
-
key in normalized
|
|
2955
|
-
for key in (
|
|
2956
|
-
"doc type",
|
|
2957
|
-
"document type",
|
|
2958
|
-
"loai tai lieu",
|
|
2959
|
-
"loại tài liệu",
|
|
2960
|
-
"tai lieu",
|
|
2961
|
-
"tài liệu",
|
|
2962
|
-
"mo ta",
|
|
2963
|
-
"mô tả",
|
|
2964
|
-
"noi dung",
|
|
2965
|
-
"nội dung",
|
|
2966
|
-
"description",
|
|
2967
|
-
"muc",
|
|
2968
|
-
"hạng mục",
|
|
2969
|
-
)
|
|
2970
|
-
):
|
|
2971
|
-
return "doc_type_vn"
|
|
2972
|
-
|
|
2973
|
-
if any(
|
|
2974
|
-
key in normalized
|
|
2975
|
-
for key in (
|
|
2976
|
-
"link tai lieu",
|
|
2977
|
-
"link tài liệu",
|
|
2978
|
-
"tai lieu link",
|
|
2979
|
-
"tài liệu link",
|
|
2980
|
-
"link",
|
|
2981
|
-
"url",
|
|
2982
|
-
"href",
|
|
2983
|
-
)
|
|
2984
|
-
):
|
|
2985
|
-
return "link"
|
|
2986
|
-
|
|
2987
|
-
return None
|
|
2988
|
-
|
|
2989
|
-
def _extract_role(self, cells: list, header_map: dict[int, str]) -> DocumentRole:
|
|
2990
|
-
role_text = self._extract_cell_text(cells, header_map, "role")
|
|
2991
|
-
if not role_text:
|
|
2992
|
-
return DocumentRole.UNKNOWN
|
|
2993
|
-
|
|
2994
|
-
normalized = self._normalize_text(role_text)
|
|
2995
|
-
compact = normalized.replace(" ", "")
|
|
2996
|
-
|
|
2997
|
-
# Try multiple tokens because registry cells often contain combined roles like "PO/BA".
|
|
2998
|
-
tokens = {normalized, compact}
|
|
2999
|
-
for raw in re.split(r"[,/;|\\-]+", role_text):
|
|
3000
|
-
norm = self._normalize_text(raw)
|
|
3001
|
-
if norm:
|
|
3002
|
-
tokens.add(norm)
|
|
3003
|
-
tokens.add(norm.replace(" ", ""))
|
|
3004
|
-
|
|
3005
|
-
role_map = {
|
|
3006
|
-
"po": DocumentRole.PO,
|
|
3007
|
-
"productowner": DocumentRole.PO,
|
|
3008
|
-
"product owner": DocumentRole.PO,
|
|
3009
|
-
"ba": DocumentRole.BA,
|
|
3010
|
-
"businessanalyst": DocumentRole.BA,
|
|
3011
|
-
"business analyst": DocumentRole.BA,
|
|
3012
|
-
"sa": DocumentRole.SA,
|
|
3013
|
-
"solutionarchitect": DocumentRole.SA,
|
|
3014
|
-
"solution architect": DocumentRole.SA,
|
|
3015
|
-
"dev": DocumentRole.DEV,
|
|
3016
|
-
"developer": DocumentRole.DEV,
|
|
3017
|
-
"development": DocumentRole.DEV,
|
|
3018
|
-
"test": DocumentRole.TEST,
|
|
3019
|
-
"tester": DocumentRole.TEST,
|
|
3020
|
-
"qa": DocumentRole.TEST,
|
|
3021
|
-
"qc": DocumentRole.TEST,
|
|
3022
|
-
}
|
|
3023
|
-
|
|
3024
|
-
for candidate in tokens:
|
|
3025
|
-
if candidate in role_map:
|
|
3026
|
-
return role_map[candidate]
|
|
3027
|
-
|
|
3028
|
-
return DocumentRole.UNKNOWN
|
|
3029
|
-
|
|
3030
|
-
def _extract_doc_type(
|
|
3031
|
-
self,
|
|
3032
|
-
cells: list,
|
|
3033
|
-
header_map: dict[int, str],
|
|
3034
|
-
header_key: str,
|
|
3035
|
-
) -> tuple[str | None, int | None]:
|
|
3036
|
-
doc_text = self._extract_cell_text(cells, header_map, header_key)
|
|
3037
|
-
if not doc_text:
|
|
3038
|
-
return None, None
|
|
3039
|
-
|
|
3040
|
-
doc_number, doc_label = self._split_doc_number(doc_text)
|
|
3041
|
-
return doc_label, doc_number
|
|
3042
|
-
|
|
3043
|
-
def _extract_cell_text(self, cells: list, header_map: dict[int, str], key: str) -> str | None:
|
|
3044
|
-
for index, header_key in header_map.items():
|
|
3045
|
-
if header_key != key:
|
|
3046
|
-
continue
|
|
3047
|
-
if index >= len(cells):
|
|
3048
|
-
return None
|
|
3049
|
-
cell = cells[index]
|
|
3050
|
-
if cell is None:
|
|
3051
|
-
return None
|
|
3052
|
-
return cell.get_text(strip=True)
|
|
3053
|
-
return None
|
|
3054
|
-
|
|
3055
|
-
def _split_doc_number(self, text: str) -> tuple[int | None, str]:
|
|
3056
|
-
match = re.match(r"^\s*(\d+)\s*[\.\)]\s*(.+)$", text)
|
|
3057
|
-
if match:
|
|
3058
|
-
return int(match.group(1)), match.group(2).strip()
|
|
3059
|
-
return None, text.strip()
|
|
3060
|
-
|
|
3061
|
-
def _normalize_text(self, text: str) -> str:
|
|
3062
|
-
cleaned = re.sub(r"[^\w\s]", " ", text.strip().lower())
|
|
3063
|
-
return " ".join(cleaned.split())
|