rasa-pro 3.13.1a18__py3-none-any.whl → 3.13.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/cli/scaffold.py +3 -22
- rasa/core/actions/action.py +3 -5
- rasa/core/channels/studio_chat.py +10 -34
- rasa/core/channels/voice_stream/asr/asr_engine.py +5 -1
- rasa/core/channels/voice_stream/asr/deepgram.py +5 -0
- rasa/core/channels/voice_stream/audiocodes.py +9 -4
- rasa/core/channels/voice_stream/jambonz.py +1 -1
- rasa/core/channels/voice_stream/voice_channel.py +7 -0
- rasa/core/policies/enterprise_search_policy.py +9 -8
- rasa/core/policies/flows/flow_executor.py +1 -8
- rasa/dialogue_understanding/commands/correct_slots_command.py +0 -10
- rasa/dialogue_understanding/generator/command_generator.py +5 -5
- rasa/dialogue_understanding/generator/flow_retrieval.py +9 -10
- rasa/dialogue_understanding/processor/command_processor.py +6 -1
- rasa/model_manager/model_api.py +3 -2
- rasa/model_manager/runner_service.py +1 -1
- rasa/model_manager/trainer_service.py +9 -12
- rasa/model_manager/utils.py +29 -1
- rasa/shared/core/domain.py +15 -62
- rasa/shared/core/flows/flow_step.py +1 -7
- rasa/shared/core/flows/yaml_flows_io.py +8 -16
- rasa/shared/core/slots.py +0 -4
- rasa/shared/importers/importer.py +0 -6
- rasa/shared/importers/utils.py +1 -77
- rasa/studio/upload.py +45 -10
- rasa/telemetry.py +1 -2
- rasa/utils/io.py +9 -27
- rasa/utils/json_utils.py +1 -6
- rasa/utils/log_utils.py +1 -5
- rasa/utils/plotting.py +1 -1
- rasa/validator.py +3 -7
- rasa/version.py +1 -1
- {rasa_pro-3.13.1a18.dist-info → rasa_pro-3.13.2.dist-info}/METADATA +8 -9
- {rasa_pro-3.13.1a18.dist-info → rasa_pro-3.13.2.dist-info}/RECORD +37 -259
- rasa/builder/README.md +0 -120
- rasa/builder/__init__.py +0 -0
- rasa/builder/config.py +0 -79
- rasa/builder/create_openai_vector_store.py +0 -228
- rasa/builder/exceptions.py +0 -55
- rasa/builder/inkeep-rag-response-schema.json +0 -64
- rasa/builder/inkeep_document_retrieval.py +0 -212
- rasa/builder/llm-helper-schema.json +0 -69
- rasa/builder/llm_context.py +0 -81
- rasa/builder/llm_helper_prompt.jinja2 +0 -245
- rasa/builder/llm_service.py +0 -317
- rasa/builder/logging_utils.py +0 -51
- rasa/builder/main.py +0 -147
- rasa/builder/models.py +0 -225
- rasa/builder/project_generator.py +0 -282
- rasa/builder/scrape_rasa_docs.py +0 -97
- rasa/builder/service.py +0 -742
- rasa/builder/skill_to_bot_prompt.jinja2 +0 -164
- rasa/builder/training_service.py +0 -132
- rasa/builder/validation_service.py +0 -93
- rasa/cli/project_templates/finance/actions/__init__.py +0 -0
- rasa/cli/project_templates/finance/actions/action_add_payee.py +0 -47
- rasa/cli/project_templates/finance/actions/action_ask_account.py +0 -50
- rasa/cli/project_templates/finance/actions/action_ask_account_from.py +0 -50
- rasa/cli/project_templates/finance/actions/action_ask_card.py +0 -47
- rasa/cli/project_templates/finance/actions/action_check_balance.py +0 -40
- rasa/cli/project_templates/finance/actions/action_check_card_existence.py +0 -35
- rasa/cli/project_templates/finance/actions/action_check_payee_existence.py +0 -40
- rasa/cli/project_templates/finance/actions/action_check_sufficient_funds.py +0 -41
- rasa/cli/project_templates/finance/actions/action_list_payees.py +0 -45
- rasa/cli/project_templates/finance/actions/action_process_immediate_payment.py +0 -18
- rasa/cli/project_templates/finance/actions/action_remove_payee.py +0 -49
- rasa/cli/project_templates/finance/actions/action_schedule_payment.py +0 -19
- rasa/cli/project_templates/finance/actions/action_session_start.py +0 -69
- rasa/cli/project_templates/finance/actions/action_update_card_status.py +0 -45
- rasa/cli/project_templates/finance/actions/action_validate_payment_date.py +0 -36
- rasa/cli/project_templates/finance/actions/database.py +0 -276
- rasa/cli/project_templates/finance/config.yml +0 -32
- rasa/cli/project_templates/finance/credentials.yml +0 -33
- rasa/cli/project_templates/finance/csvs/accounts.csv +0 -8
- rasa/cli/project_templates/finance/csvs/advisors.csv +0 -7
- rasa/cli/project_templates/finance/csvs/appointments.csv +0 -211
- rasa/cli/project_templates/finance/csvs/branches.csv +0 -10
- rasa/cli/project_templates/finance/csvs/cards.csv +0 -11
- rasa/cli/project_templates/finance/csvs/payees.csv +0 -10
- rasa/cli/project_templates/finance/csvs/transactions.csv +0 -71
- rasa/cli/project_templates/finance/csvs/users.csv +0 -4
- rasa/cli/project_templates/finance/data/flows/add_payee.yml +0 -29
- rasa/cli/project_templates/finance/data/flows/block_card.yml +0 -66
- rasa/cli/project_templates/finance/data/flows/check_balance.yml +0 -9
- rasa/cli/project_templates/finance/data/flows/list_payees.yml +0 -5
- rasa/cli/project_templates/finance/data/flows/remove_payee.yml +0 -21
- rasa/cli/project_templates/finance/data/flows/select_card.yml +0 -12
- rasa/cli/project_templates/finance/data/flows/transfer_money.yml +0 -67
- rasa/cli/project_templates/finance/data/flows/welcome.yml +0 -14
- rasa/cli/project_templates/finance/data/nlu.yml +0 -29
- rasa/cli/project_templates/finance/data/patterns/pattern_chitchat.yml +0 -7
- rasa/cli/project_templates/finance/data/patterns/pattern_completed.yml +0 -6
- rasa/cli/project_templates/finance/data/patterns/pattern_search.yml +0 -5
- rasa/cli/project_templates/finance/data/patterns/pattern_session_start.yml +0 -9
- rasa/cli/project_templates/finance/data/source/accounts.json +0 -51
- rasa/cli/project_templates/finance/data/source/advisors.json +0 -44
- rasa/cli/project_templates/finance/data/source/appointments.json +0 -1474
- rasa/cli/project_templates/finance/data/source/branches.json +0 -47
- rasa/cli/project_templates/finance/data/source/cards.json +0 -72
- rasa/cli/project_templates/finance/data/source/payees.json +0 -74
- rasa/cli/project_templates/finance/data/source/transactions.json +0 -492
- rasa/cli/project_templates/finance/data/source/users.json +0 -29
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/block_card/consequences_of_blocking_card.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/block_card/reasons_to_block_card.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/block_card/recovering_from_card_fraud.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/block_card/tips_for_card_security.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/block_card/what_to_do_if_card_is_lost.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/check_balance/account_balance_security.txt +0 -7
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/check_balance/common_balance_inquiries.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/check_balance/methods_to_check_balance.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/check_balance/understanding_balance_updates.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/check_balance/what_to_do_if_balance_is_incorrect.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/manage_payees/benefits_of_authorised_payees.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/manage_payees/common_issues_with_payees.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/manage_payees/general_payee_information.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/manage_payees/payee_management_tips.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/manage_payees/understanding_payee_types.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/transfer_money/common_transfer_errors.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/transfer_money/fees_for_transfers.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/transfer_money/general_transfer_information.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/transfer_money/security_tips_for_transfers.txt +0 -8
- rasa/cli/project_templates/finance/docs/bank_of_rasa_faq/transfer_money/transfer_processing_times.txt +0 -8
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part1.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part10.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part11.txt +0 -48
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part12.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part13.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part14.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part15.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part16.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part17.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part18.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part19.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part2.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part20.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part21.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part22.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part23.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part24.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part25.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part26.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part27.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part28.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part29.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part3.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part30.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part31.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part32.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part33.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part34.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part35.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part36.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part37.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part38.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part39.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part4.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part40.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part41.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part42.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part43.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part44.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part45.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part46.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part47.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part48.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part49.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part5.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part50.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part51.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part52.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part53.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part54.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part55.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part56.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part57.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part58.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part59.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part6.txt +0 -47
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part60.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part61.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part7.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part8.txt +0 -50
- rasa/cli/project_templates/finance/docs/huggingface_alpaca_dataset/questions_part9.txt +0 -47
- rasa/cli/project_templates/finance/domain/add_payee.yml +0 -47
- rasa/cli/project_templates/finance/domain/block_card.yml +0 -101
- rasa/cli/project_templates/finance/domain/check_balance.yml +0 -9
- rasa/cli/project_templates/finance/domain/default_actions.yml +0 -16
- rasa/cli/project_templates/finance/domain/default_flows.yml +0 -33
- rasa/cli/project_templates/finance/domain/list_payees.yml +0 -4
- rasa/cli/project_templates/finance/domain/remove_payee.yml +0 -16
- rasa/cli/project_templates/finance/domain/select_card.yml +0 -12
- rasa/cli/project_templates/finance/domain/transfer_money.yml +0 -79
- rasa/cli/project_templates/finance/endpoints.yml +0 -62
- rasa/cli/project_templates/finance/prompts/command-generator.jinja2 +0 -57
- rasa/cli/project_templates/finance/prompts/rephraser_demo_personality_prompt.jinja2 +0 -19
- rasa/cli/project_templates/finance/tests/conversation_repair/cancellations.yml +0 -12
- rasa/cli/project_templates/finance/tests/conversation_repair/cannot_handle.yml +0 -7
- rasa/cli/project_templates/finance/tests/conversation_repair/chitchat.yml +0 -7
- rasa/cli/project_templates/finance/tests/conversation_repair/clarification.yml +0 -9
- rasa/cli/project_templates/finance/tests/conversation_repair/completion.yml +0 -18
- rasa/cli/project_templates/finance/tests/conversation_repair/corrections.yml +0 -17
- rasa/cli/project_templates/finance/tests/conversation_repair/digressions.yml +0 -32
- rasa/cli/project_templates/finance/tests/conversation_repair/human_handoff.yml +0 -21
- rasa/cli/project_templates/finance/tests/conversation_repair/skipping_collect_steps.yml +0 -16
- rasa/cli/project_templates/finance/tests/demo_scripts/main.yml +0 -16
- rasa/cli/project_templates/finance/tests/happy_paths/balance_verification.yml +0 -15
- rasa/cli/project_templates/finance/tests/happy_paths/banking_questions.yml +0 -12
- rasa/cli/project_templates/finance/tests/happy_paths/card_blocking.yml +0 -52
- rasa/cli/project_templates/finance/tests/happy_paths/money_transfer.yml +0 -136
- rasa/cli/project_templates/finance/tests/happy_paths/payee_management.yml +0 -27
- rasa/cli/project_templates/finance/tests/happy_paths/user_greeted.yml +0 -5
- rasa/cli/project_templates/plain/actions/__init__.py +0 -0
- rasa/cli/project_templates/plain/config.yml +0 -17
- rasa/cli/project_templates/plain/credentials.yml +0 -33
- rasa/cli/project_templates/plain/data/patterns/pattern_session_start.yml +0 -7
- rasa/cli/project_templates/plain/domain.yml +0 -5
- rasa/cli/project_templates/plain/endpoints.yml +0 -58
- rasa/cli/project_templates/telco/actions/__init__.py +0 -0
- rasa/cli/project_templates/telco/actions/actions_billing.py +0 -197
- rasa/cli/project_templates/telco/actions/actions_get_data_from_db.py +0 -43
- rasa/cli/project_templates/telco/actions/actions_run_diagnostics.py +0 -23
- rasa/cli/project_templates/telco/actions/actions_session_start.py +0 -13
- rasa/cli/project_templates/telco/config.yml +0 -25
- rasa/cli/project_templates/telco/credentials.yml +0 -33
- rasa/cli/project_templates/telco/csvs/billing.csv +0 -10
- rasa/cli/project_templates/telco/csvs/customers.csv +0 -5
- rasa/cli/project_templates/telco/data/flows/flow_global.yml +0 -5
- rasa/cli/project_templates/telco/data/flows/flow_reboot_router.yml +0 -8
- rasa/cli/project_templates/telco/data/flows/flow_reset_router.yml +0 -7
- rasa/cli/project_templates/telco/data/flows/flow_solve_internet_issue.yml +0 -73
- rasa/cli/project_templates/telco/data/flows/flow_undertand_bill.yml +0 -45
- rasa/cli/project_templates/telco/data/patterns/pattern_completed.yml +0 -7
- rasa/cli/project_templates/telco/data/patterns/pattern_human_handoff.yml +0 -6
- rasa/cli/project_templates/telco/data/patterns/pattern_search.yml +0 -7
- rasa/cli/project_templates/telco/data/patterns/pattern_session_start.yml +0 -9
- rasa/cli/project_templates/telco/docs/reset_vs_rboot_router.txt +0 -1
- rasa/cli/project_templates/telco/docs/restart_router.txt +0 -6
- rasa/cli/project_templates/telco/docs/run_speed_test.txt +0 -6
- rasa/cli/project_templates/telco/domain/domain_global.yml +0 -29
- rasa/cli/project_templates/telco/domain/domain_patterns.yml +0 -17
- rasa/cli/project_templates/telco/domain/domain_reboot_router.yml +0 -20
- rasa/cli/project_templates/telco/domain/domain_reset_router.yml +0 -11
- rasa/cli/project_templates/telco/domain/domain_run_speed_test.yml +0 -24
- rasa/cli/project_templates/telco/domain/domain_solve_internet_issue.yml +0 -74
- rasa/cli/project_templates/telco/domain/domain_undertand_bill.yml +0 -102
- rasa/cli/project_templates/telco/endpoints.yml +0 -60
- rasa/cli/project_templates/telco/prompts/command-generator.jinja2 +0 -57
- rasa/cli/project_templates/telco/tests/e2e_results_failed.yml +0 -62
- rasa/cli/project_templates/telco/tests/e2e_results_passed.yml +0 -130
- rasa/cli/project_templates/telco/tests/e2e_test_cases/billing_test_cases.yml +0 -68
- rasa/cli/project_templates/telco/tests/e2e_test_cases/global_test_cases.yml +0 -13
- rasa/cli/project_templates/telco/tests/e2e_test_cases/internet_slow_test_case.yml +0 -47
- rasa/cli/project_templates/telco/tests/e2e_test_cases/out_of_scope_test_case.yml +0 -21
- rasa/cli/project_templates/telco/tests/e2e_test_cases/patterns_test_cases.yml +0 -15
- rasa/shared/importers/static.py +0 -63
- rasa/utils/openapi.py +0 -144
- {rasa_pro-3.13.1a18.dist-info → rasa_pro-3.13.2.dist-info}/NOTICE +0 -0
- {rasa_pro-3.13.1a18.dist-info → rasa_pro-3.13.2.dist-info}/WHEEL +0 -0
- {rasa_pro-3.13.1a18.dist-info → rasa_pro-3.13.2.dist-info}/entry_points.txt +0 -0
rasa/builder/models.py
DELETED
|
@@ -1,225 +0,0 @@
|
|
|
1
|
-
"""Pydantic models for request/response validation."""
|
|
2
|
-
|
|
3
|
-
from typing import Any, Dict, List, Literal, Optional, Union
|
|
4
|
-
|
|
5
|
-
from pydantic import BaseModel, Field, validator
|
|
6
|
-
|
|
7
|
-
from rasa.cli.scaffold import ProjectTemplateName
|
|
8
|
-
from rasa.shared.core.trackers import DialogueStateTracker
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class PromptRequest(BaseModel):
|
|
12
|
-
"""Request model for prompt-to-bot endpoint."""
|
|
13
|
-
|
|
14
|
-
prompt: str = Field(
|
|
15
|
-
..., min_length=1, max_length=10000, description="The skill description prompt"
|
|
16
|
-
)
|
|
17
|
-
client_id: Optional[str] = Field(
|
|
18
|
-
None, max_length=255, description="Optional client identifier"
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
@validator("prompt")
|
|
22
|
-
def validate_prompt(cls, v: str) -> str:
|
|
23
|
-
if not v.strip():
|
|
24
|
-
raise ValueError("Prompt cannot be empty or whitespace only")
|
|
25
|
-
return v.strip()
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class TemplateRequest(BaseModel):
|
|
29
|
-
"""Request model for template-to-bot endpoint."""
|
|
30
|
-
|
|
31
|
-
template_name: ProjectTemplateName = Field(
|
|
32
|
-
...,
|
|
33
|
-
description=(
|
|
34
|
-
f"The template name to use ({ProjectTemplateName.supported_values()})"
|
|
35
|
-
),
|
|
36
|
-
)
|
|
37
|
-
client_id: Optional[str] = Field(
|
|
38
|
-
None, max_length=255, description="Optional client identifier"
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
@validator("template_name")
|
|
42
|
-
def validate_template_name(cls, v: Any) -> Any:
|
|
43
|
-
if v not in ProjectTemplateName:
|
|
44
|
-
raise ValueError(
|
|
45
|
-
f"Template name must be one of {ProjectTemplateName.supported_values()}"
|
|
46
|
-
)
|
|
47
|
-
return v
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class ChatMessage(BaseModel):
|
|
51
|
-
"""Model for chat messages."""
|
|
52
|
-
|
|
53
|
-
type: str = Field(..., pattern="^(user|assistant)$")
|
|
54
|
-
content: Union[str, List[Dict[str, Any]]] = Field(...)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class LLMBuilderRequest(BaseModel):
|
|
58
|
-
"""Request model for LLM builder endpoint."""
|
|
59
|
-
|
|
60
|
-
messages: List[ChatMessage] = Field(..., min_items=1, max_items=50)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class LLMBuilderContext(BaseModel):
|
|
64
|
-
"""Context model for LLM builder endpoint."""
|
|
65
|
-
|
|
66
|
-
tracker: Optional[DialogueStateTracker] = Field(None)
|
|
67
|
-
bot_logs: str = Field("")
|
|
68
|
-
chat_bot_files: Dict[str, str] = Field({})
|
|
69
|
-
chat_history: List[ChatMessage] = Field([])
|
|
70
|
-
|
|
71
|
-
class Config:
|
|
72
|
-
"""Config for LLMBuilderContext."""
|
|
73
|
-
|
|
74
|
-
arbitrary_types_allowed = True
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
class BotDataUpdateRequest(BaseModel):
|
|
78
|
-
"""Request model for bot data updates."""
|
|
79
|
-
|
|
80
|
-
domain_yml: Optional[str] = Field(None, alias="domain.yml")
|
|
81
|
-
flows_yml: Optional[str] = Field(None, alias="flows.yml")
|
|
82
|
-
config_yml: Optional[str] = Field(None, alias="config.yml")
|
|
83
|
-
|
|
84
|
-
class Config:
|
|
85
|
-
"""Config for BotDataUpdateRequest."""
|
|
86
|
-
|
|
87
|
-
allow_population_by_field_name = True
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
class ContentBlock(BaseModel):
|
|
91
|
-
"""Base model for content blocks."""
|
|
92
|
-
|
|
93
|
-
type: str = Field(...)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
class TextBlock(ContentBlock):
|
|
97
|
-
"""Text content block."""
|
|
98
|
-
|
|
99
|
-
type: Literal["text"] = "text"
|
|
100
|
-
text: str = Field(...)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
class CodeBlock(ContentBlock):
|
|
104
|
-
"""Code content block."""
|
|
105
|
-
|
|
106
|
-
type: Literal["code"] = "code"
|
|
107
|
-
text: str = Field(...)
|
|
108
|
-
language: Optional[str] = Field(None)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
class FileBlock(ContentBlock):
|
|
112
|
-
"""File content block."""
|
|
113
|
-
|
|
114
|
-
type: Literal["file"] = "file"
|
|
115
|
-
file: str = Field(...)
|
|
116
|
-
content: str = Field(...)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
class LinkBlock(ContentBlock):
|
|
120
|
-
"""Link content block."""
|
|
121
|
-
|
|
122
|
-
type: Literal["link"] = "link"
|
|
123
|
-
text: str = Field(..., pattern=r"^https?://")
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
class LLMHelperResponse(BaseModel):
|
|
127
|
-
"""Response model for LLM helper."""
|
|
128
|
-
|
|
129
|
-
content_blocks: List[Union[TextBlock, CodeBlock, FileBlock, LinkBlock]] = Field(...)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
class ApiErrorResponse(BaseModel):
|
|
133
|
-
"""API error response model."""
|
|
134
|
-
|
|
135
|
-
status: Literal["error"] = "error"
|
|
136
|
-
error: str = Field(...)
|
|
137
|
-
details: Optional[Dict[str, Any]] = Field(None)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
class ServerSentEvent(BaseModel):
|
|
141
|
-
"""Server-sent event model."""
|
|
142
|
-
|
|
143
|
-
event: str = Field(...)
|
|
144
|
-
data: Dict[str, Any] = Field(...)
|
|
145
|
-
|
|
146
|
-
def format(self) -> str:
|
|
147
|
-
"""Format as SSE string."""
|
|
148
|
-
import json
|
|
149
|
-
|
|
150
|
-
return f"event: {self.event}\ndata: {json.dumps(self.data)}\n\n"
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
class ValidationResult(BaseModel):
|
|
154
|
-
"""Result of validation operation."""
|
|
155
|
-
|
|
156
|
-
is_valid: bool = Field(...)
|
|
157
|
-
errors: Optional[List[str]] = Field(None)
|
|
158
|
-
warnings: Optional[List[str]] = Field(None)
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
class TrainingResult(BaseModel):
|
|
162
|
-
"""Result of training operation."""
|
|
163
|
-
|
|
164
|
-
success: bool = Field(...)
|
|
165
|
-
model_path: Optional[str] = Field(None)
|
|
166
|
-
error: Optional[str] = Field(None)
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
BotFiles = Dict[str, Optional[str]]
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
class Document(BaseModel):
|
|
173
|
-
"""Model for document retrieval results."""
|
|
174
|
-
|
|
175
|
-
content: str = Field(...)
|
|
176
|
-
url: Optional[str] = Field(None)
|
|
177
|
-
title: Optional[str] = Field(None)
|
|
178
|
-
metadata: Optional[Dict[str, Any]] = Field(None)
|
|
179
|
-
|
|
180
|
-
@classmethod
|
|
181
|
-
def from_inkeep_rag_response(cls, rag_item: Dict[str, Any]) -> "Document":
|
|
182
|
-
"""Create a Document object from a single InKeep RAG response item.
|
|
183
|
-
|
|
184
|
-
Args:
|
|
185
|
-
rag_item: Single item from InKeep RAG response
|
|
186
|
-
|
|
187
|
-
Returns:
|
|
188
|
-
Document object with extracted content and metadata
|
|
189
|
-
"""
|
|
190
|
-
source = rag_item.get("source", {})
|
|
191
|
-
text_content = cls._extract_text_from_source(source)
|
|
192
|
-
|
|
193
|
-
return cls(
|
|
194
|
-
content=text_content.strip() if text_content else "",
|
|
195
|
-
url=rag_item.get("url"),
|
|
196
|
-
title=rag_item.get("title"),
|
|
197
|
-
metadata={
|
|
198
|
-
"type": rag_item.get("type"),
|
|
199
|
-
"record_type": rag_item.get("record_type"),
|
|
200
|
-
"context": rag_item.get("context"),
|
|
201
|
-
"media_type": source.get("media_type"),
|
|
202
|
-
},
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
@staticmethod
|
|
206
|
-
def _extract_text_from_source(source: Dict[str, Any]) -> str:
|
|
207
|
-
"""Extract text content from InKeep source object.
|
|
208
|
-
|
|
209
|
-
Args:
|
|
210
|
-
source: Source object from InKeep RAG response
|
|
211
|
-
|
|
212
|
-
Returns:
|
|
213
|
-
Extracted text content
|
|
214
|
-
"""
|
|
215
|
-
# Try to extract from content array first
|
|
216
|
-
if "content" in source:
|
|
217
|
-
text_parts = []
|
|
218
|
-
for content_item in source["content"]:
|
|
219
|
-
if content_item.get("type") == "text" and content_item.get("text"):
|
|
220
|
-
text_parts.append(content_item["text"])
|
|
221
|
-
if text_parts:
|
|
222
|
-
return "\n".join(text_parts)
|
|
223
|
-
|
|
224
|
-
# Fallback to source data
|
|
225
|
-
return source.get("data", "")
|
|
@@ -1,282 +0,0 @@
|
|
|
1
|
-
"""Service for generating Rasa projects from prompts."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import os
|
|
5
|
-
import shutil
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from textwrap import dedent
|
|
8
|
-
from typing import Any, Dict, List, Optional
|
|
9
|
-
|
|
10
|
-
import structlog
|
|
11
|
-
|
|
12
|
-
from rasa.builder import config
|
|
13
|
-
from rasa.builder.exceptions import ProjectGenerationError, ValidationError
|
|
14
|
-
from rasa.builder.llm_service import get_skill_generation_messages, llm_service
|
|
15
|
-
from rasa.builder.models import BotFiles
|
|
16
|
-
from rasa.builder.validation_service import validate_project
|
|
17
|
-
from rasa.cli.scaffold import ProjectTemplateName, create_initial_project
|
|
18
|
-
from rasa.shared.core.flows import yaml_flows_io
|
|
19
|
-
from rasa.shared.importers.importer import TrainingDataImporter
|
|
20
|
-
from rasa.shared.utils.yaml import dump_obj_as_yaml_to_string
|
|
21
|
-
from rasa.utils.io import subpath
|
|
22
|
-
|
|
23
|
-
structlogger = structlog.get_logger()
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class ProjectGenerator:
|
|
27
|
-
"""Service for generating Rasa projects from skill descriptions."""
|
|
28
|
-
|
|
29
|
-
def __init__(self, project_folder: str) -> None:
|
|
30
|
-
"""Initialize the project generator with a folder for file persistence.
|
|
31
|
-
|
|
32
|
-
Args:
|
|
33
|
-
project_folder: Path to the folder where project files will be stored
|
|
34
|
-
"""
|
|
35
|
-
self.project_folder = Path(project_folder)
|
|
36
|
-
self.project_folder.mkdir(parents=True, exist_ok=True)
|
|
37
|
-
|
|
38
|
-
def init_from_template(self, template: ProjectTemplateName) -> None:
|
|
39
|
-
"""Create the initial project files."""
|
|
40
|
-
self.cleanup()
|
|
41
|
-
create_initial_project(self.project_folder.as_posix(), template)
|
|
42
|
-
|
|
43
|
-
async def generate_project_with_retries(
|
|
44
|
-
self,
|
|
45
|
-
skill_description: str,
|
|
46
|
-
template: ProjectTemplateName,
|
|
47
|
-
max_retries: Optional[int] = None,
|
|
48
|
-
) -> Dict[str, Optional[str]]:
|
|
49
|
-
"""Generate a Rasa project with retry logic for validation failures.
|
|
50
|
-
|
|
51
|
-
Args:
|
|
52
|
-
skill_description: Natural language description of the skill
|
|
53
|
-
rasa_config: Rasa configuration dictionary
|
|
54
|
-
template: Project template to use for the initial project
|
|
55
|
-
max_retries: Maximum number of retry attempts
|
|
56
|
-
|
|
57
|
-
Returns:
|
|
58
|
-
Dictionary of generated file contents (filename -> content)
|
|
59
|
-
|
|
60
|
-
Raises:
|
|
61
|
-
ProjectGenerationError: If generation fails after all retries
|
|
62
|
-
"""
|
|
63
|
-
if max_retries is None:
|
|
64
|
-
max_retries = config.MAX_RETRIES
|
|
65
|
-
|
|
66
|
-
self.init_from_template(template)
|
|
67
|
-
|
|
68
|
-
project_data = self._get_bot_data_for_llm()
|
|
69
|
-
|
|
70
|
-
initial_messages = get_skill_generation_messages(
|
|
71
|
-
skill_description, project_data
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
async def _generate_with_retry(
|
|
75
|
-
messages: List[Dict[str, Any]], attempts_left: int
|
|
76
|
-
) -> Dict[str, Optional[str]]:
|
|
77
|
-
try:
|
|
78
|
-
# Generate project data using LLM
|
|
79
|
-
project_data = await llm_service.generate_rasa_project(messages)
|
|
80
|
-
|
|
81
|
-
# Update stored bot data
|
|
82
|
-
self._update_bot_files_from_llm_response(project_data)
|
|
83
|
-
|
|
84
|
-
bot_files = self.get_bot_files()
|
|
85
|
-
structlogger.info(
|
|
86
|
-
"project_generator.generated_project",
|
|
87
|
-
attempts_left=attempts_left,
|
|
88
|
-
files=list(bot_files.keys()),
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
# Validate the generated project
|
|
92
|
-
await self._validate_generated_project()
|
|
93
|
-
|
|
94
|
-
structlogger.info(
|
|
95
|
-
"project_generator.validation_success", attempts_left=attempts_left
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
return bot_files
|
|
99
|
-
|
|
100
|
-
except ValidationError as e:
|
|
101
|
-
structlogger.error(
|
|
102
|
-
"project_generator.validation_error",
|
|
103
|
-
error=str(e),
|
|
104
|
-
attempts_left=attempts_left,
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
if attempts_left <= 0:
|
|
108
|
-
raise ProjectGenerationError(
|
|
109
|
-
f"Failed to generate valid Rasa project: {e}", max_retries
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
# Create error feedback for next attempt
|
|
113
|
-
error_feedback_messages = messages + [
|
|
114
|
-
{
|
|
115
|
-
"role": "assistant",
|
|
116
|
-
"content": json.dumps(project_data),
|
|
117
|
-
},
|
|
118
|
-
{
|
|
119
|
-
"role": "user",
|
|
120
|
-
"content": dedent(f"""
|
|
121
|
-
Previous attempt failed validation with error: {e}
|
|
122
|
-
|
|
123
|
-
Please fix the issues and generate a valid Rasa project.
|
|
124
|
-
Pay special attention to:
|
|
125
|
-
- Proper YAML syntax
|
|
126
|
-
- Required fields in domain and flows
|
|
127
|
-
- Consistent naming between flows and domain
|
|
128
|
-
- Valid slot types and mappings
|
|
129
|
-
""").strip(),
|
|
130
|
-
},
|
|
131
|
-
]
|
|
132
|
-
|
|
133
|
-
return await _generate_with_retry(
|
|
134
|
-
error_feedback_messages, attempts_left - 1
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
except Exception as e:
|
|
138
|
-
structlogger.error(
|
|
139
|
-
"project_generator.generation_error",
|
|
140
|
-
error=str(e),
|
|
141
|
-
attempts_left=attempts_left,
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
if attempts_left <= 0:
|
|
145
|
-
raise ProjectGenerationError(
|
|
146
|
-
f"Failed to generate Rasa project: {e}", max_retries
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
# For non-validation errors, retry with original messages
|
|
150
|
-
return await _generate_with_retry(initial_messages, attempts_left - 1)
|
|
151
|
-
|
|
152
|
-
return await _generate_with_retry(initial_messages, max_retries)
|
|
153
|
-
|
|
154
|
-
async def _validate_generated_project(self) -> None:
|
|
155
|
-
"""Validate the generated project using the validation service."""
|
|
156
|
-
importer = self._create_importer()
|
|
157
|
-
validation_error = await validate_project(importer)
|
|
158
|
-
|
|
159
|
-
if validation_error:
|
|
160
|
-
raise ValidationError(validation_error)
|
|
161
|
-
|
|
162
|
-
def _create_importer(self) -> TrainingDataImporter:
|
|
163
|
-
"""Create a training data importer from the current bot files."""
|
|
164
|
-
try:
|
|
165
|
-
if (self.project_folder / "domain.yml").exists():
|
|
166
|
-
domain_path = self.project_folder / "domain.yml"
|
|
167
|
-
else:
|
|
168
|
-
domain_path = self.project_folder / "domain"
|
|
169
|
-
|
|
170
|
-
return TrainingDataImporter.load_from_config(
|
|
171
|
-
config_path=self.project_folder / "config.yml",
|
|
172
|
-
domain_path=domain_path,
|
|
173
|
-
training_data_paths=[
|
|
174
|
-
self.project_folder / "data",
|
|
175
|
-
],
|
|
176
|
-
args={},
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
except Exception as e:
|
|
180
|
-
raise ValidationError(f"Failed to create importer: {e}")
|
|
181
|
-
|
|
182
|
-
def get_bot_files(self) -> BotFiles:
|
|
183
|
-
"""Get the current bot files by reading from disk."""
|
|
184
|
-
bot_files: BotFiles = {}
|
|
185
|
-
|
|
186
|
-
for file in self.project_folder.glob("**/*"):
|
|
187
|
-
# Skip directories
|
|
188
|
-
if not file.is_file():
|
|
189
|
-
continue
|
|
190
|
-
|
|
191
|
-
relative_path = file.relative_to(self.project_folder)
|
|
192
|
-
|
|
193
|
-
# Skip hidden files and directories (any path component starting with '.')
|
|
194
|
-
# as well as `__pycache__` folders
|
|
195
|
-
if any(part.startswith(".") for part in relative_path.parts):
|
|
196
|
-
continue
|
|
197
|
-
|
|
198
|
-
if "__pycache__" in relative_path.parts:
|
|
199
|
-
continue
|
|
200
|
-
|
|
201
|
-
# exclude the project_folder / models folder
|
|
202
|
-
if relative_path.parts[0] == "models":
|
|
203
|
-
continue
|
|
204
|
-
|
|
205
|
-
# Read file content and store with relative path as key
|
|
206
|
-
try:
|
|
207
|
-
bot_files[relative_path.as_posix()] = file.read_text(encoding="utf-8")
|
|
208
|
-
except Exception as e:
|
|
209
|
-
structlogger.debug(
|
|
210
|
-
"project_generator.get_bot_files.error",
|
|
211
|
-
error=str(e),
|
|
212
|
-
file_path=file.as_posix(),
|
|
213
|
-
)
|
|
214
|
-
bot_files[relative_path.as_posix()] = None
|
|
215
|
-
|
|
216
|
-
return bot_files
|
|
217
|
-
|
|
218
|
-
def _get_bot_data_for_llm(self) -> Dict[str, Any]:
|
|
219
|
-
"""Get the current bot data for the LLM."""
|
|
220
|
-
file_importer = self._create_importer()
|
|
221
|
-
|
|
222
|
-
# only include data created by the user (or the builder llm)
|
|
223
|
-
# avoid including to many defaults that are not customized
|
|
224
|
-
domain = file_importer.get_user_domain()
|
|
225
|
-
flows = file_importer.get_user_flows()
|
|
226
|
-
|
|
227
|
-
return {
|
|
228
|
-
"domain": domain.as_dict(should_clean_json=True),
|
|
229
|
-
"flows": yaml_flows_io.get_flows_as_json(flows, should_clean_json=True),
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
def _path_for_flow(self, flow_id: str) -> str:
|
|
233
|
-
"""Get the path for a flow."""
|
|
234
|
-
if flow_id.startswith("pattern_"):
|
|
235
|
-
return f"data/patterns/{flow_id}.yml"
|
|
236
|
-
else:
|
|
237
|
-
return f"data/flows/{flow_id}.yml"
|
|
238
|
-
|
|
239
|
-
def _update_bot_files_from_llm_response(self, project_data: Dict[str, Any]) -> None:
|
|
240
|
-
"""Update the bot files with generated data by writing to disk."""
|
|
241
|
-
files = {"domain.yml": dump_obj_as_yaml_to_string(project_data["domain"])}
|
|
242
|
-
# split up flows into one file per flow in the /flows folder
|
|
243
|
-
for flow_id, flow_data in project_data["flows"].get("flows", {}).items():
|
|
244
|
-
flow_file_path = self._path_for_flow(flow_id)
|
|
245
|
-
single_flow_file_data = {"flows": {flow_id: flow_data}}
|
|
246
|
-
files[flow_file_path] = dump_obj_as_yaml_to_string(single_flow_file_data)
|
|
247
|
-
|
|
248
|
-
# removes any other flows that the LLM didn't generate
|
|
249
|
-
self._cleanup_flows()
|
|
250
|
-
self.update_bot_files(files)
|
|
251
|
-
|
|
252
|
-
def _cleanup_flows(self) -> None:
|
|
253
|
-
"""Cleanup the flows folder."""
|
|
254
|
-
flows_folder = self.project_folder / "data" / "flows"
|
|
255
|
-
if flows_folder.exists():
|
|
256
|
-
shutil.rmtree(flows_folder)
|
|
257
|
-
flows_folder.mkdir(parents=True, exist_ok=True)
|
|
258
|
-
|
|
259
|
-
def update_bot_files(self, files: Dict[str, Optional[str]]) -> None:
|
|
260
|
-
"""Update bot files with new content by writing to disk."""
|
|
261
|
-
for filename, content in files.items():
|
|
262
|
-
file_path = Path(subpath(self.project_folder, filename))
|
|
263
|
-
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
264
|
-
file_path.write_text(content, encoding="utf-8")
|
|
265
|
-
|
|
266
|
-
def cleanup(self) -> None:
|
|
267
|
-
"""Cleanup the project folder."""
|
|
268
|
-
# remove all the files and folders in the project folder resulting
|
|
269
|
-
# in an empty folder
|
|
270
|
-
for filename in os.listdir(self.project_folder):
|
|
271
|
-
file_path = os.path.join(self.project_folder, filename)
|
|
272
|
-
try:
|
|
273
|
-
if os.path.isfile(file_path) or os.path.islink(file_path):
|
|
274
|
-
os.unlink(file_path)
|
|
275
|
-
elif os.path.isdir(file_path):
|
|
276
|
-
shutil.rmtree(file_path)
|
|
277
|
-
except Exception as e:
|
|
278
|
-
structlogger.error(
|
|
279
|
-
"project_generator.cleanup_error",
|
|
280
|
-
error=str(e),
|
|
281
|
-
file_path=file_path,
|
|
282
|
-
)
|
rasa/builder/scrape_rasa_docs.py
DELETED
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from urllib.parse import urljoin, urlparse
|
|
5
|
-
|
|
6
|
-
import requests
|
|
7
|
-
from bs4 import BeautifulSoup
|
|
8
|
-
|
|
9
|
-
BASE_URL = "https://rasa.com"
|
|
10
|
-
DOCS_ROOT = "https://rasa.com/docs"
|
|
11
|
-
OUTPUT_DIR = "rasa_docs_md"
|
|
12
|
-
MAX_PAGES = 100 # Optional limit for safety
|
|
13
|
-
|
|
14
|
-
visited = set()
|
|
15
|
-
to_visit = [DOCS_ROOT]
|
|
16
|
-
|
|
17
|
-
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def is_valid_doc_url(url: str) -> bool:
|
|
21
|
-
return url.startswith(DOCS_ROOT) and not any(
|
|
22
|
-
[url.endswith(".pdf"), "#" in url, "mailto:" in url]
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def slugify_url(url: str) -> str:
|
|
27
|
-
path = urlparse(url).path.strip("/").replace("/", "_")
|
|
28
|
-
return path if path else "index"
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def clean_text(html: str) -> str:
|
|
32
|
-
soup = BeautifulSoup(html, "html.parser")
|
|
33
|
-
|
|
34
|
-
# Remove navs, footers, and code tabs (customize if needed)
|
|
35
|
-
for tag in soup(["nav", "footer", "script", "style", "form", "button"]):
|
|
36
|
-
tag.decompose()
|
|
37
|
-
|
|
38
|
-
main = soup.find("main") or soup.body
|
|
39
|
-
if not main:
|
|
40
|
-
return ""
|
|
41
|
-
|
|
42
|
-
# Replace <code> with backticks
|
|
43
|
-
for code in main.find_all("code"):
|
|
44
|
-
code.string = f"`{code.get_text(strip=True)}`"
|
|
45
|
-
|
|
46
|
-
text = main.get_text(separator="\n", strip=True)
|
|
47
|
-
return text
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def save_as_markdown(text: str, url: str) -> str:
|
|
51
|
-
slug = slugify_url(url)
|
|
52
|
-
file_name = f"{slug}.md"
|
|
53
|
-
md_path = Path(OUTPUT_DIR) / file_name
|
|
54
|
-
with open(md_path, "w", encoding="utf-8") as f:
|
|
55
|
-
f.write(text)
|
|
56
|
-
|
|
57
|
-
print(f"✅ Saved: {md_path}")
|
|
58
|
-
return file_name
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
pages_scraped = 0
|
|
62
|
-
markdown_to_url = {}
|
|
63
|
-
|
|
64
|
-
while to_visit and pages_scraped < MAX_PAGES:
|
|
65
|
-
url = to_visit.pop(0)
|
|
66
|
-
if url in visited:
|
|
67
|
-
continue
|
|
68
|
-
|
|
69
|
-
try:
|
|
70
|
-
print(f"Scraping: {url}")
|
|
71
|
-
response = requests.get(url)
|
|
72
|
-
response.raise_for_status()
|
|
73
|
-
|
|
74
|
-
html = response.text
|
|
75
|
-
text = clean_text(html)
|
|
76
|
-
if len(text) < 200: # skip very short pages
|
|
77
|
-
print("⏭️ Skipped (too short)")
|
|
78
|
-
continue
|
|
79
|
-
|
|
80
|
-
file_name = save_as_markdown(text)
|
|
81
|
-
markdown_to_url[file_name] = url
|
|
82
|
-
pages_scraped += 1
|
|
83
|
-
|
|
84
|
-
soup = BeautifulSoup(html, "html.parser")
|
|
85
|
-
for link_tag in soup.find_all("a", href=True):
|
|
86
|
-
link = urljoin(url, link_tag["href"])
|
|
87
|
-
if is_valid_doc_url(link) and link not in visited:
|
|
88
|
-
to_visit.append(link)
|
|
89
|
-
|
|
90
|
-
visited.add(url)
|
|
91
|
-
|
|
92
|
-
except Exception as e:
|
|
93
|
-
print(f"⚠️ Failed to scrape {url}: {e}")
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
with open("markdown_to_url.json", "w") as f:
|
|
97
|
-
json.dump(markdown_to_url, f, indent=2)
|