decodingtrust-agent-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent/__init__.py +30 -0
- agent/claudesdk/__init__.py +8 -0
- agent/claudesdk/example.py +221 -0
- agent/claudesdk/src/__init__.py +8 -0
- agent/claudesdk/src/agent.py +400 -0
- agent/claudesdk/src/mcp_proxy.py +409 -0
- agent/claudesdk/src/utils.py +420 -0
- agent/googleadk/__init__.py +15 -0
- agent/googleadk/example.py +237 -0
- agent/googleadk/src/__init__.py +12 -0
- agent/googleadk/src/agent.py +401 -0
- agent/googleadk/src/mcp_wrapper.py +163 -0
- agent/googleadk/src/utils.py +602 -0
- agent/langchain/__init__.py +8 -0
- agent/langchain/example.py +213 -0
- agent/langchain/src/__init__.py +8 -0
- agent/langchain/src/agent.py +645 -0
- agent/langchain/src/utils.py +433 -0
- agent/openaisdk/__init__.py +17 -0
- agent/openaisdk/example.py +228 -0
- agent/openaisdk/src/__init__.py +12 -0
- agent/openaisdk/src/agent.py +491 -0
- agent/openaisdk/src/agent_wrapper.py +143 -0
- agent/openaisdk/src/mcp_wrapper.py +395 -0
- agent/openaisdk/src/utils.py +493 -0
- agent/openclaw/__init__.py +10 -0
- agent/openclaw/example.py +251 -0
- agent/openclaw/src/__init__.py +14 -0
- agent/openclaw/src/agent.py +930 -0
- agent/openclaw/src/helpers/__init__.py +1 -0
- agent/openclaw/src/helpers/auth_helpers.py +55 -0
- agent/openclaw/src/mcp_proxy.py +564 -0
- agent/openclaw/src/plugin_generator.py +231 -0
- agent/openclaw/src/utils.py +341 -0
- agent/pocketflow/__init__.py +18 -0
- agent/pocketflow/example.py +221 -0
- agent/pocketflow/prompts/react_agent.py +46 -0
- agent/pocketflow/src/__init__.py +6 -0
- agent/pocketflow/src/agent.py +507 -0
- agent/pocketflow/src/agent_wrapper.py +159 -0
- agent/pocketflow/src/async_helper.py +92 -0
- agent/pocketflow/src/mcp_react_agent.py +279 -0
- agent/pocketflow/src/native_agent.py +74 -0
- agent/pocketflow/src/nodes.py +467 -0
- benchmark/__init__.py +0 -0
- benchmark/browser/benign.jsonl +34 -0
- benchmark/browser/direct.jsonl +85 -0
- benchmark/browser/indirect.jsonl +82 -0
- benchmark/code/benign.jsonl +0 -0
- benchmark/code/direct.jsonl +121 -0
- benchmark/code/indirect.jsonl +165 -0
- benchmark/crm/benign.jsonl +165 -0
- benchmark/crm/direct.jsonl +90 -0
- benchmark/crm/indirect.jsonl +150 -0
- benchmark/customer-service/benign.jsonl +160 -0
- benchmark/customer-service/direct.jsonl +100 -0
- benchmark/customer-service/indirect.jsonl +101 -0
- benchmark/finance/benign.jsonl +0 -0
- benchmark/finance/direct.jsonl +200 -0
- benchmark/finance/indirect.jsonl +200 -0
- benchmark/legal/benign.jsonl +0 -0
- benchmark/legal/direct.jsonl +200 -0
- benchmark/legal/indirect.jsonl +200 -0
- benchmark/macos/benign.jsonl +30 -0
- benchmark/macos/direct.jsonl +50 -0
- benchmark/macos/indirect.jsonl +50 -0
- benchmark/medical/benign.jsonl +642 -0
- benchmark/medical/direct.jsonl +229 -0
- benchmark/medical/indirect.jsonl +222 -0
- benchmark/os-filesystem/benign.jsonl +200 -0
- benchmark/os-filesystem/direct.jsonl +200 -0
- benchmark/os-filesystem/indirect.jsonl +200 -0
- benchmark/research/benign.jsonl +0 -0
- benchmark/research/direct.jsonl +119 -0
- benchmark/research/indirect.jsonl +125 -0
- benchmark/telecom/benign.jsonl +120 -0
- benchmark/telecom/direct.jsonl +161 -0
- benchmark/telecom/indirect.jsonl +166 -0
- benchmark/travel/benign.jsonl +130 -0
- benchmark/travel/direct.jsonl +105 -0
- benchmark/travel/indirect.jsonl +120 -0
- benchmark/windows/benign.jsonl +100 -0
- benchmark/windows/direct.jsonl +140 -0
- benchmark/windows/indirect.jsonl +107 -0
- benchmark/workflow/benign.jsonl +335 -0
- benchmark/workflow/direct.jsonl +78 -0
- benchmark/workflow/indirect.jsonl +107 -0
- cli/__init__.py +5 -0
- cli/main.py +182 -0
- cli/scaffold.py +334 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
- dt_arena/config/env.yaml +515 -0
- dt_arena/config/injection_mcp.yaml +430 -0
- dt_arena/config/mcp.yaml +642 -0
- dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
- dt_arena/envs/arxiv/docker-compose.yml +36 -0
- dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
- dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
- dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
- dt_arena/envs/atlassian/docker-compose.yml +72 -0
- dt_arena/envs/bigquery/docker-compose.yml +20 -0
- dt_arena/envs/booking/docker-compose.yml +59 -0
- dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
- dt_arena/envs/calendar/docker-compose.yml +42 -0
- dt_arena/envs/custom-website/docker-compose.yml +6 -0
- dt_arena/envs/customer_service/docker-compose.yml +59 -0
- dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
- dt_arena/envs/databricks/docker-compose.yml +51 -0
- dt_arena/envs/ecommerce/docker-compose.yml +6 -0
- dt_arena/envs/ers/docker-compose.yml +36 -0
- dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
- dt_arena/envs/finance/docker-compose.yml +23 -0
- dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
- dt_arena/envs/github/docker/docker-compose.yml +50 -0
- dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
- dt_arena/envs/gmail/docker-compose.yml +65 -0
- dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
- dt_arena/envs/google-form/docker-compose.yml +41 -0
- dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
- dt_arena/envs/googledocs/docker-compose.yml +78 -0
- dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
- dt_arena/envs/hospital/docker-compose.yml +27 -0
- dt_arena/envs/legal/docker-compose.yml +22 -0
- dt_arena/envs/linkedin/docker-compose.yml +63 -0
- dt_arena/envs/macos/docker-compose.yml +79 -0
- dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
- dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
- dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
- dt_arena/envs/paypal/docker-compose.yml +63 -0
- dt_arena/envs/research/docker-compose-hub.yml +13 -0
- dt_arena/envs/research/docker-compose.yml +24 -0
- dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
- dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
- dt_arena/envs/slack/docker-compose-hub.yml +28 -0
- dt_arena/envs/slack/docker-compose.yml +41 -0
- dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
- dt_arena/envs/snowflake/docker-compose.yml +44 -0
- dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
- dt_arena/envs/telecom/docker-compose.yml +17 -0
- dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
- dt_arena/envs/telegram/docker-compose.yml +62 -0
- dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
- dt_arena/envs/terminal/docker-compose.yml +26 -0
- dt_arena/envs/travel/docker-compose-hub.yml +19 -0
- dt_arena/envs/travel/docker-compose.yml +19 -0
- dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
- dt_arena/envs/whatsapp/docker-compose.yml +78 -0
- dt_arena/envs/windows/docker-compose.yml +71 -0
- dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
- dt_arena/envs/zoom/docker-compose.yml +40 -0
- dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
- dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
- dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
- dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
- dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
- dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
- dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
- dt_arena/injection_mcp_server/github/env_injection.py +206 -0
- dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
- dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
- dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
- dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
- dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
- dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
- dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
- dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
- dt_arena/injection_mcp_server/research/env_injection.py +616 -0
- dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
- dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
- dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
- dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
- dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
- dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
- dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
- dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
- dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
- dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
- dt_arena/mcp_server/atlassian/main.py +1554 -0
- dt_arena/mcp_server/atlassian/test_server.py +66 -0
- dt_arena/mcp_server/bigquery/main.py +333 -0
- dt_arena/mcp_server/booking/main.py +310 -0
- dt_arena/mcp_server/browser/main.py +1741 -0
- dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
- dt_arena/mcp_server/calendar/main.py +792 -0
- dt_arena/mcp_server/calendar/test_mcp.py +135 -0
- dt_arena/mcp_server/customer_service/main.py +1063 -0
- dt_arena/mcp_server/databricks/main.py +566 -0
- dt_arena/mcp_server/databricks/probe.py +102 -0
- dt_arena/mcp_server/ers/main.py +845 -0
- dt_arena/mcp_server/finance/__init__.py +87 -0
- dt_arena/mcp_server/finance/core/__init__.py +12 -0
- dt_arena/mcp_server/finance/core/data_loader.py +558 -0
- dt_arena/mcp_server/finance/core/portfolio.py +565 -0
- dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
- dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
- dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
- dt_arena/mcp_server/finance/injection/__init__.py +66 -0
- dt_arena/mcp_server/finance/injection/config.py +176 -0
- dt_arena/mcp_server/finance/injection/content.py +755 -0
- dt_arena/mcp_server/finance/injection/html.py +409 -0
- dt_arena/mcp_server/finance/injection/locations.py +167 -0
- dt_arena/mcp_server/finance/injection/methods.py +193 -0
- dt_arena/mcp_server/finance/injection/presets.py +1023 -0
- dt_arena/mcp_server/finance/main.py +361 -0
- dt_arena/mcp_server/finance/run_mcp.py +21 -0
- dt_arena/mcp_server/finance/run_web.py +26 -0
- dt_arena/mcp_server/finance/server/__init__.py +41 -0
- dt_arena/mcp_server/finance/server/extractor.py +1453 -0
- dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
- dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
- dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
- dt_arena/mcp_server/finance/server/mcp.py +451 -0
- dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
- dt_arena/mcp_server/finance/server/tools/account.py +88 -0
- dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
- dt_arena/mcp_server/finance/server/tools/social.py +73 -0
- dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
- dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
- dt_arena/mcp_server/finance/server/web.py +2139 -0
- dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
- dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
- dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
- dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
- dt_arena/mcp_server/github/main.py +441 -0
- dt_arena/mcp_server/gmail/main.py +1004 -0
- dt_arena/mcp_server/google_form/main.py +141 -0
- dt_arena/mcp_server/googledocs/main.py +458 -0
- dt_arena/mcp_server/hospital/mcp_server.py +458 -0
- dt_arena/mcp_server/legal/__init__.py +9 -0
- dt_arena/mcp_server/legal/core/__init__.py +14 -0
- dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
- dt_arena/mcp_server/legal/core/data_loader.py +266 -0
- dt_arena/mcp_server/legal/core/document_store.py +197 -0
- dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
- dt_arena/mcp_server/legal/main.py +89 -0
- dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
- dt_arena/mcp_server/legal/server/__init__.py +14 -0
- dt_arena/mcp_server/legal/server/mcp.py +2330 -0
- dt_arena/mcp_server/macos/client_test.py +270 -0
- dt_arena/mcp_server/macos/mcp_server.py +285 -0
- dt_arena/mcp_server/os-filesystem/main.py +1380 -0
- dt_arena/mcp_server/paypal/main.py +501 -0
- dt_arena/mcp_server/research/main.py +777 -0
- dt_arena/mcp_server/salesforce/main.py +2006 -0
- dt_arena/mcp_server/slack/main.py +318 -0
- dt_arena/mcp_server/snowflake/main.py +612 -0
- dt_arena/mcp_server/snowflake/probe.py +183 -0
- dt_arena/mcp_server/telecom/mcp_client.py +423 -0
- dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
- dt_arena/mcp_server/telegram/main.py +338 -0
- dt_arena/mcp_server/terminal/main.py +163 -0
- dt_arena/mcp_server/travel/client_test.py +16 -0
- dt_arena/mcp_server/travel/mcp_server.py +404 -0
- dt_arena/mcp_server/whatsapp/main.py +318 -0
- dt_arena/mcp_server/windows/client_test.py +270 -0
- dt_arena/mcp_server/windows/mcp_server.py +218 -0
- dt_arena/mcp_server/zoom/main.py +466 -0
- dt_arena/src/__init__.py +0 -0
- dt_arena/src/hooks/__init__.py +0 -0
- dt_arena/src/hooks/audit_log.py +30 -0
- dt_arena/src/hooks/hooks.json +3 -0
- dt_arena/src/run_benign.py +142 -0
- dt_arena/src/types/__init__.py +0 -0
- dt_arena/src/types/agent.py +441 -0
- dt_arena/src/types/attacks.py +2 -0
- dt_arena/src/types/environment.py +2 -0
- dt_arena/src/types/hooks.py +174 -0
- dt_arena/src/types/judge.py +52 -0
- dt_arena/src/types/red_teaming_trajectory.py +385 -0
- dt_arena/src/types/task.py +260 -0
- dt_arena/src/types/trajectory.py +315 -0
- dt_arena/utils/__init__.py +1 -0
- dt_arena/utils/atlassian/__init__.py +27 -0
- dt_arena/utils/atlassian/helpers.py +520 -0
- dt_arena/utils/bigquery/__init__.py +1 -0
- dt_arena/utils/bigquery/helpers.py +246 -0
- dt_arena/utils/calendar/__init__.py +1 -0
- dt_arena/utils/calendar/helpers.py +87 -0
- dt_arena/utils/customer_service/__init__.py +17 -0
- dt_arena/utils/customer_service/cs_env_client.py +940 -0
- dt_arena/utils/customer_service/helpers.py +339 -0
- dt_arena/utils/customer_service/judges/__init__.py +20 -0
- dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
- dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
- dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
- dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
- dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
- dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
- dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
- dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
- dt_arena/utils/customer_service/judges/text_utils.py +21 -0
- dt_arena/utils/databricks/__init__.py +2 -0
- dt_arena/utils/databricks/helpers.py +210 -0
- dt_arena/utils/finance/__init__.py +0 -0
- dt_arena/utils/finance/helpers.py +263 -0
- dt_arena/utils/github/__init__.py +1 -0
- dt_arena/utils/github/helpers.py +249 -0
- dt_arena/utils/gmail/__init__.py +1 -0
- dt_arena/utils/gmail/helpers.py +344 -0
- dt_arena/utils/google_form/__init__.py +2 -0
- dt_arena/utils/google_form/helpers.py +133 -0
- dt_arena/utils/legal/__init__.py +0 -0
- dt_arena/utils/legal/helpers.py +228 -0
- dt_arena/utils/macos/__init__.py +0 -0
- dt_arena/utils/macos/env_setup.py +215 -0
- dt_arena/utils/macos/helpers.py +61 -0
- dt_arena/utils/os_filesystem/__init__.py +1 -0
- dt_arena/utils/os_filesystem/helpers.py +366 -0
- dt_arena/utils/paypal/__init__.py +1 -0
- dt_arena/utils/paypal/helpers.py +178 -0
- dt_arena/utils/port_allocator.py +266 -0
- dt_arena/utils/research/__init__.py +0 -0
- dt_arena/utils/research/helpers.py +251 -0
- dt_arena/utils/salesforce/__init__.py +1 -0
- dt_arena/utils/salesforce/helpers.py +719 -0
- dt_arena/utils/slack/__init__.py +1 -0
- dt_arena/utils/slack/helpers.py +176 -0
- dt_arena/utils/snowflake/__init__.py +1 -0
- dt_arena/utils/snowflake/helpers.py +166 -0
- dt_arena/utils/telecom/__init__.py +1 -0
- dt_arena/utils/telecom/helpers.py +760 -0
- dt_arena/utils/telegram/__init__.py +0 -0
- dt_arena/utils/telegram/helpers.py +174 -0
- dt_arena/utils/terminal/__init__.py +0 -0
- dt_arena/utils/terminal/helpers.py +20 -0
- dt_arena/utils/travel/__init__.py +0 -0
- dt_arena/utils/travel/env_client.py +537 -0
- dt_arena/utils/travel/llm_judge.py +137 -0
- dt_arena/utils/travel/prompts.py +64 -0
- dt_arena/utils/utils/__init__.py +122 -0
- dt_arena/utils/whatsapp/__init__.py +0 -0
- dt_arena/utils/whatsapp/helpers.py +226 -0
- dt_arena/utils/windows/__init__.py +0 -0
- dt_arena/utils/windows/env_reset.py +224 -0
- dt_arena/utils/windows/env_setup.py +280 -0
- dt_arena/utils/windows/exfil_helpers.py +170 -0
- dt_arena/utils/windows/helpers.py +74 -0
- dt_arena/utils/zoom/__init__.py +1 -0
- dt_arena/utils/zoom/helpers.py +70 -0
- eval/__init__.py +1 -0
- eval/evaluation.py +426 -0
- eval/task_runner.py +449 -0
- utils/__init__.py +148 -0
- utils/agent_helpers.py +308 -0
- utils/agent_wrapper.py +189 -0
- utils/compose_utils.py +135 -0
- utils/config.py +77 -0
- utils/env_helpers.py +104 -0
- utils/eval_stats.py +88 -0
- utils/injection_helpers.py +429 -0
- utils/injection_mcp_helpers.py +152 -0
- utils/judge_helpers.py +181 -0
- utils/judge_utils.py +472 -0
- utils/llm.py +196 -0
- utils/logging.py +45 -0
- utils/mcp_helpers.py +232 -0
- utils/mcp_manager.py +235 -0
- utils/memory_guard.py +18 -0
- utils/red_teaming_sandbox.py +476 -0
- utils/reset_helpers.py +318 -0
- utils/resource_manager.py +370 -0
- utils/skill_helpers.py +447 -0
- utils/task_executor.py +904 -0
- utils/task_helpers.py +270 -0
- utils/template_helpers.py +179 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""
|
|
2
|
+
YFinance - Financial Web Server for Agent Testing
|
|
3
|
+
|
|
4
|
+
A modular framework for testing AI agents on financial tasks,
|
|
5
|
+
with support for prompt injection attacks.
|
|
6
|
+
|
|
7
|
+
Modules:
|
|
8
|
+
core - Data loading, portfolio management
|
|
9
|
+
injection - Prompt injection engine and configurations
|
|
10
|
+
server - Web server (Flask) and MCP server
|
|
11
|
+
evaluation - Action logging and evaluation tools
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
# Quick start
|
|
15
|
+
from yfinance import create_web_app, create_mcp_server
|
|
16
|
+
|
|
17
|
+
# Create web app
|
|
18
|
+
app = create_web_app("data/cache", "templates")
|
|
19
|
+
|
|
20
|
+
# Create MCP server
|
|
21
|
+
mcp = create_mcp_server("http://localhost:5000")
|
|
22
|
+
|
|
23
|
+
# Use injection
|
|
24
|
+
from yfinance.injection import get_preset, HtmlInjector
|
|
25
|
+
engine = HtmlInjector()
|
|
26
|
+
engine.set_config(get_preset("buy_apple_instead"))
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
__version__ = "1.0.0"
|
|
30
|
+
__author__ = "DecodingTrust Team"
|
|
31
|
+
|
|
32
|
+
# Core exports
|
|
33
|
+
from .core import DataLoader, PortfolioManager
|
|
34
|
+
|
|
35
|
+
# Injection exports
|
|
36
|
+
from .injection import (
|
|
37
|
+
HtmlInjector,
|
|
38
|
+
InjectionConfig,
|
|
39
|
+
InjectionRule,
|
|
40
|
+
InjectionLocation,
|
|
41
|
+
InjectionMethod,
|
|
42
|
+
get_preset,
|
|
43
|
+
get_all_presets,
|
|
44
|
+
list_presets,
|
|
45
|
+
create_custom_attack,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Server exports
|
|
49
|
+
from .server import (
|
|
50
|
+
FinanceWebServer,
|
|
51
|
+
FinanceMCPServer,
|
|
52
|
+
ActionLogger,
|
|
53
|
+
create_app,
|
|
54
|
+
create_mcp_server,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Evaluation exports
|
|
58
|
+
from .evaluation import (
|
|
59
|
+
ActionLogger as EvalActionLogger,
|
|
60
|
+
Evaluator,
|
|
61
|
+
EvaluationResult,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
__all__ = [
|
|
65
|
+
# Core
|
|
66
|
+
'DataLoader',
|
|
67
|
+
'PortfolioManager',
|
|
68
|
+
# Injection
|
|
69
|
+
'HtmlInjector',
|
|
70
|
+
'InjectionConfig',
|
|
71
|
+
'InjectionRule',
|
|
72
|
+
'InjectionLocation',
|
|
73
|
+
'InjectionMethod',
|
|
74
|
+
'get_preset',
|
|
75
|
+
'get_all_presets',
|
|
76
|
+
'list_presets',
|
|
77
|
+
'create_custom_attack',
|
|
78
|
+
# Server
|
|
79
|
+
'FinanceWebServer',
|
|
80
|
+
'FinanceMCPServer',
|
|
81
|
+
'ActionLogger',
|
|
82
|
+
'create_app',
|
|
83
|
+
'create_mcp_server',
|
|
84
|
+
# Evaluation
|
|
85
|
+
'Evaluator',
|
|
86
|
+
'EvaluationResult',
|
|
87
|
+
]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core Module - Data and Portfolio Management
|
|
3
|
+
|
|
4
|
+
Components:
|
|
5
|
+
DataLoader - Load stock data from cache
|
|
6
|
+
PortfolioManager - Manage virtual portfolio
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .data_loader import DataLoader
|
|
10
|
+
from .portfolio import PortfolioManager
|
|
11
|
+
|
|
12
|
+
__all__ = ['DataLoader', 'PortfolioManager']
|
|
@@ -0,0 +1,558 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data Loader - Stock Data Management
|
|
3
|
+
|
|
4
|
+
Loads and manages stock data from cached JSON files.
|
|
5
|
+
Supports history, news, and technical indicators.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import json
|
|
10
|
+
import re
|
|
11
|
+
from typing import Optional, List, Dict, Set, Any
|
|
12
|
+
|
|
13
|
+
# ── Semantic keyword map ──────────────────────────────────────────────────────
|
|
14
|
+
# Maps common search terms / concepts → sets of matching sectors, industries,
|
|
15
|
+
# individual tickers, and extra keywords to match against longBusinessSummary.
|
|
16
|
+
# Each entry: {"sectors": set, "industries": set, "tickers": set, "keywords": set}
|
|
17
|
+
|
|
18
|
+
_SEMANTIC_MAP: Dict[str, Dict[str, Set[str]]] = {
|
|
19
|
+
# ── Themes / concepts ─────────────────────────────────────────────────
|
|
20
|
+
"ai": {"sectors": set(), "industries": {"Software - Infrastructure", "Software - Application", "Semiconductors"},
|
|
21
|
+
"tickers": {"NVDA", "MSFT", "GOOG", "GOOGL", "META", "PLTR", "AI", "ARM", "AMD", "SMCI", "IONQ", "CRWD", "DDOG", "PATH", "SNPS", "CDNS"},
|
|
22
|
+
"keywords": {"artificial intelligence", "machine learning", "deep learning"}},
|
|
23
|
+
"artificial intelligence": {"sectors": set(), "industries": set(),
|
|
24
|
+
"tickers": {"NVDA", "MSFT", "GOOG", "GOOGL", "META", "PLTR", "AI"},
|
|
25
|
+
"keywords": {"artificial intelligence"}},
|
|
26
|
+
"ev": {"sectors": set(), "industries": {"Auto Manufacturers"},
|
|
27
|
+
"tickers": {"TSLA", "RIVN", "LCID", "JOBY"},
|
|
28
|
+
"keywords": {"electric vehicle", "ev ", "electric car"}},
|
|
29
|
+
"electric vehicle": {"sectors": set(), "industries": {"Auto Manufacturers"},
|
|
30
|
+
"tickers": {"TSLA", "RIVN", "LCID"},
|
|
31
|
+
"keywords": {"electric vehicle"}},
|
|
32
|
+
"crypto": {"sectors": set(), "industries": {"Financial Data & Stock Exchanges", "Capital Markets"},
|
|
33
|
+
"tickers": {"COIN", "MARA", "RIOT", "HOOD"},
|
|
34
|
+
"keywords": {"cryptocurrency", "bitcoin", "blockchain", "crypto"}},
|
|
35
|
+
"cryptocurrency": {"sectors": set(), "industries": set(),
|
|
36
|
+
"tickers": {"COIN", "MARA", "RIOT"},
|
|
37
|
+
"keywords": {"cryptocurrency", "bitcoin"}},
|
|
38
|
+
"bitcoin": {"sectors": set(), "industries": set(),
|
|
39
|
+
"tickers": {"COIN", "MARA", "RIOT"},
|
|
40
|
+
"keywords": {"bitcoin"}},
|
|
41
|
+
"blockchain": {"sectors": set(), "industries": set(),
|
|
42
|
+
"tickers": {"COIN", "MARA", "RIOT"},
|
|
43
|
+
"keywords": {"blockchain"}},
|
|
44
|
+
"cloud": {"sectors": set(), "industries": {"Software - Infrastructure", "Software - Application"},
|
|
45
|
+
"tickers": {"AMZN", "MSFT", "GOOG", "GOOGL", "CRM", "NET", "DDOG", "MDB", "ZS", "TEAM"},
|
|
46
|
+
"keywords": {"cloud computing", "cloud platform", "cloud service", "saas", "iaas"}},
|
|
47
|
+
"cloud computing": {"sectors": set(), "industries": set(),
|
|
48
|
+
"tickers": {"AMZN", "MSFT", "GOOG", "GOOGL", "CRM", "NET"},
|
|
49
|
+
"keywords": {"cloud computing"}},
|
|
50
|
+
"saas": {"sectors": set(), "industries": {"Software - Application", "Software - Infrastructure"},
|
|
51
|
+
"tickers": {"CRM", "ADBE", "TEAM", "WDAY", "DDOG", "MDB", "ZM", "BILL", "PATH"},
|
|
52
|
+
"keywords": {"software as a service", "saas", "subscription software"}},
|
|
53
|
+
"fintech": {"sectors": set(), "industries": {"Credit Services", "Financial - Diversified", "Capital Markets", "Financial Data & Stock Exchanges"},
|
|
54
|
+
"tickers": {"SOFI", "PYPL", "HOOD", "AFRM", "BILL", "COIN", "V"},
|
|
55
|
+
"keywords": {"fintech", "financial technology", "digital payment", "mobile payment"}},
|
|
56
|
+
"cybersecurity": {"sectors": set(), "industries": set(),
|
|
57
|
+
"tickers": {"CRWD", "PANW", "FTNT", "ZS", "NET"},
|
|
58
|
+
"keywords": {"cybersecurity", "network security", "endpoint security", "firewall"}},
|
|
59
|
+
"security": {"sectors": set(), "industries": set(),
|
|
60
|
+
"tickers": {"CRWD", "PANW", "FTNT", "ZS"},
|
|
61
|
+
"keywords": {"cybersecurity", "security"}},
|
|
62
|
+
"social media": {"sectors": set(), "industries": {"Internet Content & Information"},
|
|
63
|
+
"tickers": {"META", "SNAP", "PINS", "RBLX"},
|
|
64
|
+
"keywords": {"social media", "social network"}},
|
|
65
|
+
"streaming": {"sectors": set(), "industries": {"Entertainment"},
|
|
66
|
+
"tickers": {"NFLX", "DIS", "ROKU", "WBD", "SIRI"},
|
|
67
|
+
"keywords": {"streaming", "video streaming", "content streaming"}},
|
|
68
|
+
"e-commerce": {"sectors": set(), "industries": {"Internet Retail"},
|
|
69
|
+
"tickers": {"AMZN", "SHOP", "PDD", "BABA", "JD", "MELI", "CHWY"},
|
|
70
|
+
"keywords": {"e-commerce", "ecommerce", "online retail", "online shopping"}},
|
|
71
|
+
"ecommerce": {"sectors": set(), "industries": {"Internet Retail"},
|
|
72
|
+
"tickers": {"AMZN", "SHOP", "PDD", "BABA", "JD", "MELI"},
|
|
73
|
+
"keywords": {"ecommerce"}},
|
|
74
|
+
"gaming": {"sectors": set(), "industries": {"Electronic Gaming & Multimedia", "Entertainment"},
|
|
75
|
+
"tickers": {"EA", "RBLX", "TTWO", "SKLZ", "NVDA", "AMD", "U"},
|
|
76
|
+
"keywords": {"gaming", "video game", "esports"}},
|
|
77
|
+
"metaverse": {"sectors": set(), "industries": set(),
|
|
78
|
+
"tickers": {"META", "RBLX", "NVDA", "U", "MSFT"},
|
|
79
|
+
"keywords": {"metaverse", "virtual reality", "augmented reality"}},
|
|
80
|
+
"autonomous": {"sectors": set(), "industries": set(),
|
|
81
|
+
"tickers": {"TSLA", "JOBY", "GOOG", "GOOGL"},
|
|
82
|
+
"keywords": {"autonomous", "self-driving", "lidar"}},
|
|
83
|
+
"self-driving": {"sectors": set(), "industries": set(),
|
|
84
|
+
"tickers": {"TSLA", "GOOG", "GOOGL"},
|
|
85
|
+
"keywords": {"self-driving", "autonomous driving"}},
|
|
86
|
+
"space": {"sectors": set(), "industries": {"Aerospace & Defense"},
|
|
87
|
+
"tickers": {"SPCE", "BA", "JOBY"},
|
|
88
|
+
"keywords": {"space", "aerospace"}},
|
|
89
|
+
"quantum": {"sectors": set(), "industries": set(),
|
|
90
|
+
"tickers": {"IONQ", "GOOG", "GOOGL", "MSFT", "IBM"},
|
|
91
|
+
"keywords": {"quantum computing", "quantum"}},
|
|
92
|
+
"renewable": {"sectors": set(), "industries": {"Solar", "Specialty Industrial Machinery"},
|
|
93
|
+
"tickers": {"ENPH", "PLUG"},
|
|
94
|
+
"keywords": {"renewable energy", "solar", "green energy", "clean energy"}},
|
|
95
|
+
"solar": {"sectors": set(), "industries": {"Solar"},
|
|
96
|
+
"tickers": {"ENPH"},
|
|
97
|
+
"keywords": {"solar"}},
|
|
98
|
+
"green energy": {"sectors": set(), "industries": {"Solar"},
|
|
99
|
+
"tickers": {"ENPH", "PLUG"},
|
|
100
|
+
"keywords": {"green energy", "clean energy", "renewable"}},
|
|
101
|
+
"hydrogen": {"sectors": set(), "industries": set(),
|
|
102
|
+
"tickers": {"PLUG"},
|
|
103
|
+
"keywords": {"hydrogen", "fuel cell"}},
|
|
104
|
+
"meme stock": {"sectors": set(), "industries": set(),
|
|
105
|
+
"tickers": {"WKHS", "CLOV", "SKLZ", "SNDL", "TLRY", "SPCE", "RIVN", "LCID", "SOFI", "HOOD", "PLTR"},
|
|
106
|
+
"keywords": {"meme stock", "wallstreetbets", "reddit stock"}},
|
|
107
|
+
"meme": {"sectors": set(), "industries": set(),
|
|
108
|
+
"tickers": {"WKHS", "CLOV", "SKLZ", "SNDL"},
|
|
109
|
+
"keywords": {"meme"}},
|
|
110
|
+
"penny stock": {"sectors": set(), "industries": set(),
|
|
111
|
+
"tickers": {"WKHS", "JAGX", "GFAI", "CLOV", "SNDL", "TLRY", "PLUG", "SIRI"},
|
|
112
|
+
"keywords": {"penny stock", "micro cap", "low price"}},
|
|
113
|
+
"cannabis": {"sectors": set(), "industries": {"Drug Manufacturers - Specialty & Generic"},
|
|
114
|
+
"tickers": {"SNDL", "TLRY"},
|
|
115
|
+
"keywords": {"cannabis", "marijuana", "weed", "pot stock"}},
|
|
116
|
+
"weed": {"sectors": set(), "industries": set(),
|
|
117
|
+
"tickers": {"SNDL", "TLRY"},
|
|
118
|
+
"keywords": {"weed", "cannabis", "marijuana"}},
|
|
119
|
+
"marijuana": {"sectors": set(), "industries": set(),
|
|
120
|
+
"tickers": {"SNDL", "TLRY"},
|
|
121
|
+
"keywords": {"marijuana", "cannabis"}},
|
|
122
|
+
# ── Sector / industry synonyms ────────────────────────────────────────
|
|
123
|
+
"tech": {"sectors": {"Technology"}, "industries": set(),
|
|
124
|
+
"tickers": set(), "keywords": {"technology"}},
|
|
125
|
+
"technology": {"sectors": {"Technology"}, "industries": set(),
|
|
126
|
+
"tickers": set(), "keywords": {"technology"}},
|
|
127
|
+
"semiconductor": {"sectors": set(), "industries": {"Semiconductors", "Semiconductor Equipment & Materials"},
|
|
128
|
+
"tickers": set(), "keywords": {"semiconductor", "chip"}},
|
|
129
|
+
"chip": {"sectors": set(), "industries": {"Semiconductors", "Semiconductor Equipment & Materials"},
|
|
130
|
+
"tickers": set(), "keywords": {"chip", "semiconductor"}},
|
|
131
|
+
"chips": {"sectors": set(), "industries": {"Semiconductors", "Semiconductor Equipment & Materials"},
|
|
132
|
+
"tickers": set(), "keywords": {"chip", "semiconductor"}},
|
|
133
|
+
"software": {"sectors": set(), "industries": {"Software - Application", "Software - Infrastructure"},
|
|
134
|
+
"tickers": set(), "keywords": {"software"}},
|
|
135
|
+
"banking": {"sectors": set(), "industries": {"Banks - Diversified", "Capital Markets", "Credit Services"},
|
|
136
|
+
"tickers": {"JPM", "GS", "V"}, "keywords": {"banking", "bank"}},
|
|
137
|
+
"bank": {"sectors": set(), "industries": {"Banks - Diversified", "Capital Markets"},
|
|
138
|
+
"tickers": {"JPM", "GS"}, "keywords": {"bank"}},
|
|
139
|
+
"healthcare": {"sectors": {"Healthcare"}, "industries": set(),
|
|
140
|
+
"tickers": set(), "keywords": {"healthcare", "health"}},
|
|
141
|
+
"health": {"sectors": {"Healthcare"}, "industries": set(),
|
|
142
|
+
"tickers": set(), "keywords": {"health"}},
|
|
143
|
+
"biotech": {"sectors": set(), "industries": {"Biotechnology"},
|
|
144
|
+
"tickers": set(), "keywords": {"biotechnology", "biotech"}},
|
|
145
|
+
"pharma": {"sectors": set(), "industries": {"Drug Manufacturers - General", "Drug Manufacturers - Specialty & Generic", "Biotechnology"},
|
|
146
|
+
"tickers": set(), "keywords": {"pharmaceutical", "pharma", "drug"}},
|
|
147
|
+
"energy": {"sectors": {"Energy"}, "industries": set(),
|
|
148
|
+
"tickers": set(), "keywords": {"energy", "oil", "gas"}},
|
|
149
|
+
"oil": {"sectors": {"Energy"}, "industries": set(),
|
|
150
|
+
"tickers": {"XOM", "BKR", "FANG"}, "keywords": {"oil"}},
|
|
151
|
+
"utilities": {"sectors": {"Utilities"}, "industries": set(),
|
|
152
|
+
"tickers": set(), "keywords": {"utilities", "utility"}},
|
|
153
|
+
"retail": {"sectors": set(), "industries": {"Internet Retail", "Discount Stores", "Apparel Retail"},
|
|
154
|
+
"tickers": set(), "keywords": {"retail"}},
|
|
155
|
+
"travel": {"sectors": set(), "industries": {"Travel Services", "Lodging"},
|
|
156
|
+
"tickers": {"ABNB", "BKNG", "MAR", "UBER"}, "keywords": {"travel", "vacation", "hotel"}},
|
|
157
|
+
"food": {"sectors": set(), "industries": {"Packaged Foods", "Restaurants", "Confectioners"},
|
|
158
|
+
"tickers": set(), "keywords": {"food"}},
|
|
159
|
+
"auto": {"sectors": set(), "industries": {"Auto Manufacturers", "Auto Parts"},
|
|
160
|
+
"tickers": set(), "keywords": {"auto", "automotive", "car"}},
|
|
161
|
+
"telecom": {"sectors": set(), "industries": {"Telecom Services", "Communication Equipment"},
|
|
162
|
+
"tickers": set(), "keywords": {"telecom", "telecommunications"}},
|
|
163
|
+
# ── Stock groups ──────────────────────────────────────────────────────
|
|
164
|
+
"faang": {"sectors": set(), "industries": set(),
|
|
165
|
+
"tickers": {"META", "AAPL", "AMZN", "NFLX", "GOOG", "GOOGL"},
|
|
166
|
+
"keywords": set()},
|
|
167
|
+
"fang": {"sectors": set(), "industries": set(),
|
|
168
|
+
"tickers": {"META", "AAPL", "AMZN", "NFLX", "GOOG", "GOOGL"},
|
|
169
|
+
"keywords": set()},
|
|
170
|
+
"magnificent seven": {"sectors": set(), "industries": set(),
|
|
171
|
+
"tickers": {"AAPL", "MSFT", "GOOG", "GOOGL", "AMZN", "NVDA", "META", "TSLA"},
|
|
172
|
+
"keywords": set()},
|
|
173
|
+
"mag 7": {"sectors": set(), "industries": set(),
|
|
174
|
+
"tickers": {"AAPL", "MSFT", "GOOG", "GOOGL", "AMZN", "NVDA", "META", "TSLA"},
|
|
175
|
+
"keywords": set()},
|
|
176
|
+
"big tech": {"sectors": set(), "industries": set(),
|
|
177
|
+
"tickers": {"AAPL", "MSFT", "GOOG", "GOOGL", "AMZN", "META"},
|
|
178
|
+
"keywords": set()},
|
|
179
|
+
"mega cap": {"sectors": set(), "industries": set(),
|
|
180
|
+
"tickers": {"AAPL", "MSFT", "GOOG", "GOOGL", "AMZN", "NVDA", "META", "TSLA", "AVGO"},
|
|
181
|
+
"keywords": set()},
|
|
182
|
+
"payment": {"sectors": set(), "industries": {"Credit Services"},
|
|
183
|
+
"tickers": {"V", "PYPL", "AFRM", "BILL"}, "keywords": {"payment", "pay"}},
|
|
184
|
+
"delivery": {"sectors": set(), "industries": set(),
|
|
185
|
+
"tickers": {"DASH", "UBER", "AMZN"}, "keywords": {"delivery", "logistics"}},
|
|
186
|
+
"robotics": {"sectors": set(), "industries": set(),
|
|
187
|
+
"tickers": {"ISRG", "IONQ"}, "keywords": {"robotics", "robot"}},
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class DataLoader:
|
|
192
|
+
"""
|
|
193
|
+
Load and manage cached stock data.
|
|
194
|
+
|
|
195
|
+
Usage:
|
|
196
|
+
loader = DataLoader("/path/to/cache")
|
|
197
|
+
data = loader.get_stock_data("AAPL")
|
|
198
|
+
news = loader.get_news("AAPL", limit=10)
|
|
199
|
+
symbols = loader.get_available_symbols()
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
def __init__(self, cache_dir: str):
|
|
203
|
+
self.cache_dir = cache_dir
|
|
204
|
+
self._cache: Dict[str, Dict] = {}
|
|
205
|
+
self._index: Optional[Dict] = None
|
|
206
|
+
self._load_index()
|
|
207
|
+
|
|
208
|
+
def _load_index(self):
|
|
209
|
+
"""Load the stock index file."""
|
|
210
|
+
index_file = os.path.join(self.cache_dir, "index.json")
|
|
211
|
+
if os.path.exists(index_file):
|
|
212
|
+
try:
|
|
213
|
+
with open(index_file, "r") as f:
|
|
214
|
+
self._index = json.load(f)
|
|
215
|
+
except Exception as e:
|
|
216
|
+
print(f"Warning: Could not load index: {e}")
|
|
217
|
+
self._index = {}
|
|
218
|
+
|
|
219
|
+
def _sync_info_with_history(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
220
|
+
"""Override info fields to match last trading day (Dec 1, 2025 mid-day).
|
|
221
|
+
|
|
222
|
+
We use the last history entry's data for current-day statistics,
|
|
223
|
+
and the second-to-last entry's close as previousClose.
|
|
224
|
+
"""
|
|
225
|
+
history = data.get("history", [])
|
|
226
|
+
if not history or "info" not in data:
|
|
227
|
+
return data
|
|
228
|
+
|
|
229
|
+
last = history[-1] # Current trading day (Dec 1, 2025 partial)
|
|
230
|
+
|
|
231
|
+
# Only set fields that are missing/None — don't overwrite cached values
|
|
232
|
+
# regularMarketPrice and other info fields from Yahoo Finance are the
|
|
233
|
+
# authoritative source; history close may differ slightly
|
|
234
|
+
if not data["info"].get("currentPrice"):
|
|
235
|
+
data["info"]["currentPrice"] = data["info"].get("regularMarketPrice", 0)
|
|
236
|
+
data["info"]["close_date"] = last.get("date", "Dec 1, 2025")
|
|
237
|
+
|
|
238
|
+
# previousClose = prior trading day's close (for change calculation)
|
|
239
|
+
if len(history) >= 2:
|
|
240
|
+
data["info"]["previousClose"] = history[-2].get("close", 0)
|
|
241
|
+
else:
|
|
242
|
+
data["info"]["previousClose"] = last.get("open", 0)
|
|
243
|
+
|
|
244
|
+
return data
|
|
245
|
+
|
|
246
|
+
def get_stock_data(self, symbol: str) -> Optional[Dict[str, Any]]:
|
|
247
|
+
"""Get complete stock data for a symbol."""
|
|
248
|
+
symbol = symbol.upper()
|
|
249
|
+
|
|
250
|
+
if symbol in self._cache:
|
|
251
|
+
return self._cache[symbol]
|
|
252
|
+
|
|
253
|
+
file_path = os.path.join(self.cache_dir, f"{symbol}.json")
|
|
254
|
+
if os.path.exists(file_path):
|
|
255
|
+
try:
|
|
256
|
+
with open(file_path, "r") as f:
|
|
257
|
+
data = json.load(f)
|
|
258
|
+
# Sync info with history to ensure date consistency
|
|
259
|
+
data = self._sync_info_with_history(data)
|
|
260
|
+
self._cache[symbol] = data
|
|
261
|
+
return data
|
|
262
|
+
except Exception as e:
|
|
263
|
+
print(f"Error loading {symbol}: {e}")
|
|
264
|
+
return None
|
|
265
|
+
return None
|
|
266
|
+
|
|
267
|
+
def get_available_symbols(self) -> List[str]:
|
|
268
|
+
"""Get all available stock symbols."""
|
|
269
|
+
if self._index:
|
|
270
|
+
if "stocks" in self._index:
|
|
271
|
+
return [s["symbol"] for s in self._index["stocks"]]
|
|
272
|
+
if "symbols" in self._index:
|
|
273
|
+
return list(self._index["symbols"])
|
|
274
|
+
|
|
275
|
+
# Fallback: scan directory for stock JSON files
|
|
276
|
+
symbols = []
|
|
277
|
+
_skip = {"index.json", "portfolio_state.json"}
|
|
278
|
+
if os.path.exists(self.cache_dir):
|
|
279
|
+
for f in os.listdir(self.cache_dir):
|
|
280
|
+
if f.endswith(".json") and f not in _skip:
|
|
281
|
+
symbols.append(f.replace(".json", ""))
|
|
282
|
+
return sorted(symbols)
|
|
283
|
+
|
|
284
|
+
def _build_tag_index(self) -> None:
|
|
285
|
+
"""Build reverse index: symbol → set of searchable text (sector, industry, summary)."""
|
|
286
|
+
if hasattr(self, "_tag_index"):
|
|
287
|
+
return
|
|
288
|
+
self._tag_index: Dict[str, str] = {} # symbol → concatenated searchable text
|
|
289
|
+
for symbol in self.get_available_symbols():
|
|
290
|
+
data = self.get_stock_data(symbol)
|
|
291
|
+
if data:
|
|
292
|
+
info = data.get("info", {})
|
|
293
|
+
parts = [
|
|
294
|
+
info.get("sector", ""),
|
|
295
|
+
info.get("industry", ""),
|
|
296
|
+
info.get("longBusinessSummary", ""),
|
|
297
|
+
info.get("longName", ""),
|
|
298
|
+
]
|
|
299
|
+
self._tag_index[symbol] = " ".join(parts).lower()
|
|
300
|
+
|
|
301
|
+
def search(self, query: str, limit: int = 20) -> List[Dict[str, str]]:
|
|
302
|
+
"""Search for stocks by name, symbol, sector, industry, or concept.
|
|
303
|
+
|
|
304
|
+
Uses a tiered scoring approach:
|
|
305
|
+
1. Exact symbol match (highest priority)
|
|
306
|
+
2. Semantic map match (theme / concept / stock group)
|
|
307
|
+
3. Substring in name, sector, industry, or summary
|
|
308
|
+
Results are de-duplicated and ordered by score.
|
|
309
|
+
"""
|
|
310
|
+
query_upper = query.upper().strip()
|
|
311
|
+
query_lower = query.lower().strip()
|
|
312
|
+
if not query_lower:
|
|
313
|
+
# Empty query: return top stocks by market cap (first N)
|
|
314
|
+
return self._top_stocks(limit)
|
|
315
|
+
|
|
316
|
+
self._build_tag_index()
|
|
317
|
+
|
|
318
|
+
scored: Dict[str, int] = {} # symbol → priority score (lower = better)
|
|
319
|
+
|
|
320
|
+
# Tier 1: exact symbol match
|
|
321
|
+
for symbol in self.get_available_symbols():
|
|
322
|
+
if query_upper == symbol:
|
|
323
|
+
scored[symbol] = 0
|
|
324
|
+
|
|
325
|
+
# Tier 2: semantic map lookup
|
|
326
|
+
sem = _SEMANTIC_MAP.get(query_lower)
|
|
327
|
+
if not sem:
|
|
328
|
+
# Try multi-word: "EV stocks" → "ev"
|
|
329
|
+
for word in re.split(r"[\s,]+", query_lower):
|
|
330
|
+
if word in ("stock", "stocks", "company", "companies", "shares",
|
|
331
|
+
"etf", "etfs", "sector", "industry"):
|
|
332
|
+
continue
|
|
333
|
+
if word in _SEMANTIC_MAP:
|
|
334
|
+
sem = _SEMANTIC_MAP[word]
|
|
335
|
+
break
|
|
336
|
+
|
|
337
|
+
if sem:
|
|
338
|
+
# Direct ticker hits
|
|
339
|
+
for t in sem.get("tickers", set()):
|
|
340
|
+
if t not in scored:
|
|
341
|
+
scored[t] = 1
|
|
342
|
+
# Sector / industry hits
|
|
343
|
+
target_sectors = sem.get("sectors", set())
|
|
344
|
+
target_industries = sem.get("industries", set())
|
|
345
|
+
if target_sectors or target_industries:
|
|
346
|
+
for symbol in self.get_available_symbols():
|
|
347
|
+
if symbol in scored:
|
|
348
|
+
continue
|
|
349
|
+
data = self.get_stock_data(symbol)
|
|
350
|
+
if not data:
|
|
351
|
+
continue
|
|
352
|
+
info = data.get("info", {})
|
|
353
|
+
if info.get("sector", "") in target_sectors:
|
|
354
|
+
scored[symbol] = 2
|
|
355
|
+
elif info.get("industry", "") in target_industries:
|
|
356
|
+
scored[symbol] = 2
|
|
357
|
+
# Keyword match against longBusinessSummary
|
|
358
|
+
for kw in sem.get("keywords", set()):
|
|
359
|
+
kw_lower = kw.lower()
|
|
360
|
+
for symbol, text in self._tag_index.items():
|
|
361
|
+
if symbol not in scored and kw_lower in text:
|
|
362
|
+
scored[symbol] = 3
|
|
363
|
+
|
|
364
|
+
# Tier 3: substring match on symbol, name, sector, industry, summary
|
|
365
|
+
for symbol in self.get_available_symbols():
|
|
366
|
+
if symbol in scored:
|
|
367
|
+
continue
|
|
368
|
+
data = self.get_stock_data(symbol)
|
|
369
|
+
if not data:
|
|
370
|
+
continue
|
|
371
|
+
info = data.get("info", {})
|
|
372
|
+
name = info.get("shortName", "")
|
|
373
|
+
if query_upper in symbol:
|
|
374
|
+
scored[symbol] = 4
|
|
375
|
+
elif query_lower in name.lower():
|
|
376
|
+
scored[symbol] = 5
|
|
377
|
+
elif query_lower in self._tag_index.get(symbol, ""):
|
|
378
|
+
scored[symbol] = 6
|
|
379
|
+
|
|
380
|
+
# Build results sorted by score, then alphabetically
|
|
381
|
+
sorted_symbols = sorted(scored.keys(), key=lambda s: (scored[s], s))
|
|
382
|
+
results = []
|
|
383
|
+
for symbol in sorted_symbols[:limit]:
|
|
384
|
+
data = self.get_stock_data(symbol)
|
|
385
|
+
if data:
|
|
386
|
+
info = data.get("info", {})
|
|
387
|
+
results.append({
|
|
388
|
+
"symbol": symbol,
|
|
389
|
+
"name": info.get("shortName", ""),
|
|
390
|
+
"price": info.get("regularMarketPrice", 0),
|
|
391
|
+
})
|
|
392
|
+
return results
|
|
393
|
+
|
|
394
|
+
def _top_stocks(self, limit: int) -> List[Dict[str, str]]:
|
|
395
|
+
"""Return first N stocks (alphabetical) for empty queries."""
|
|
396
|
+
results = []
|
|
397
|
+
for symbol in self.get_available_symbols()[:limit]:
|
|
398
|
+
data = self.get_stock_data(symbol)
|
|
399
|
+
if data:
|
|
400
|
+
info = data.get("info", {})
|
|
401
|
+
results.append({
|
|
402
|
+
"symbol": symbol,
|
|
403
|
+
"name": info.get("shortName", ""),
|
|
404
|
+
"price": info.get("regularMarketPrice", 0),
|
|
405
|
+
})
|
|
406
|
+
return results
|
|
407
|
+
|
|
408
|
+
def search_news(self, query: str, limit: int = 20) -> List[Dict[str, Any]]:
|
|
409
|
+
"""Search news by keyword in headline or summary.
|
|
410
|
+
|
|
411
|
+
Also expands the search to related stocks via the semantic map so that
|
|
412
|
+
a query like "crypto" returns news from COIN, MARA, RIOT etc.
|
|
413
|
+
"""
|
|
414
|
+
query_lower = query.lower().strip()
|
|
415
|
+
if not query_lower:
|
|
416
|
+
return self.get_all_news(limit=limit)
|
|
417
|
+
|
|
418
|
+
results = []
|
|
419
|
+
seen_ids: Set[Any] = set()
|
|
420
|
+
|
|
421
|
+
# Determine which symbols are semantically relevant (to prioritize)
|
|
422
|
+
priority_symbols: Set[str] = set()
|
|
423
|
+
sem = _SEMANTIC_MAP.get(query_lower)
|
|
424
|
+
if not sem:
|
|
425
|
+
for word in re.split(r"[\s,]+", query_lower):
|
|
426
|
+
if word in ("stock", "stocks", "news", "latest", "recent"):
|
|
427
|
+
continue
|
|
428
|
+
if word in _SEMANTIC_MAP:
|
|
429
|
+
sem = _SEMANTIC_MAP[word]
|
|
430
|
+
break
|
|
431
|
+
if sem:
|
|
432
|
+
priority_symbols = sem.get("tickers", set()).copy()
|
|
433
|
+
|
|
434
|
+
# Collect extra keywords from semantic map
|
|
435
|
+
extra_keywords: Set[str] = set()
|
|
436
|
+
if sem:
|
|
437
|
+
extra_keywords = {kw.lower() for kw in sem.get("keywords", set())}
|
|
438
|
+
|
|
439
|
+
# Include the full query plus individual words (so "airbus recall" matches either word)
|
|
440
|
+
query_words = {w for w in re.split(r"[\s,]+", query_lower)
|
|
441
|
+
if len(w) > 2 and w not in ("the", "and", "for", "news", "latest", "recent", "stock", "stocks")}
|
|
442
|
+
all_keywords = {query_lower} | query_words | extra_keywords
|
|
443
|
+
|
|
444
|
+
# Search priority symbols first, then all
|
|
445
|
+
search_order = sorted(priority_symbols) + [
|
|
446
|
+
s for s in self.get_available_symbols() if s not in priority_symbols
|
|
447
|
+
]
|
|
448
|
+
|
|
449
|
+
for symbol in search_order:
|
|
450
|
+
data = self.get_stock_data(symbol)
|
|
451
|
+
if not data:
|
|
452
|
+
continue
|
|
453
|
+
news = data.get("news", {}).get("all", [])
|
|
454
|
+
for article in news:
|
|
455
|
+
aid = article.get("id")
|
|
456
|
+
if aid in seen_ids:
|
|
457
|
+
continue
|
|
458
|
+
headline = article.get("headline", "").lower()
|
|
459
|
+
summary = article.get("summary", "").lower()
|
|
460
|
+
text = headline + " " + summary
|
|
461
|
+
if any(kw in text for kw in all_keywords):
|
|
462
|
+
seen_ids.add(aid)
|
|
463
|
+
results.append({**article, "symbol": symbol})
|
|
464
|
+
|
|
465
|
+
# Sort by date (newest first)
|
|
466
|
+
results.sort(key=lambda x: x.get("datetime", ""), reverse=True)
|
|
467
|
+
return results[:limit]
|
|
468
|
+
|
|
469
|
+
def get_stock_info(self, symbol: str) -> Optional[Dict[str, Any]]:
|
|
470
|
+
"""Get company info for a stock."""
|
|
471
|
+
data = self.get_stock_data(symbol)
|
|
472
|
+
return data.get("info", {}) if data else None
|
|
473
|
+
|
|
474
|
+
def get_stock_history(self, symbol: str, days: int = 30) -> List[Dict[str, Any]]:
|
|
475
|
+
"""Get price history for a stock."""
|
|
476
|
+
data = self.get_stock_data(symbol)
|
|
477
|
+
if data and "history" in data:
|
|
478
|
+
history = data["history"]
|
|
479
|
+
return history[-days:] if len(history) > days else history
|
|
480
|
+
return []
|
|
481
|
+
|
|
482
|
+
def get_current_price(self, symbol: str) -> float:
|
|
483
|
+
"""Get current/latest price for a stock."""
|
|
484
|
+
data = self.get_stock_data(symbol)
|
|
485
|
+
if data:
|
|
486
|
+
info = data.get("info", {})
|
|
487
|
+
price = info.get("regularMarketPrice")
|
|
488
|
+
if price:
|
|
489
|
+
return float(price)
|
|
490
|
+
return 0.0
|
|
491
|
+
|
|
492
|
+
# News Methods
|
|
493
|
+
def get_news(self, symbol: str, limit: int = 20, month: Optional[str] = None, date: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
494
|
+
"""Get news for a stock with optional filtering."""
|
|
495
|
+
data = self.get_stock_data(symbol)
|
|
496
|
+
if not data or "news" not in data:
|
|
497
|
+
return []
|
|
498
|
+
|
|
499
|
+
news_data = data["news"]
|
|
500
|
+
|
|
501
|
+
if date:
|
|
502
|
+
by_date = news_data.get("by_date", {})
|
|
503
|
+
return by_date.get(date, [])[:limit]
|
|
504
|
+
|
|
505
|
+
if month:
|
|
506
|
+
month = month.lower()
|
|
507
|
+
return news_data.get(month, [])[:limit]
|
|
508
|
+
|
|
509
|
+
return news_data.get("all", [])[:limit]
|
|
510
|
+
|
|
511
|
+
def get_news_dates(self, symbol: str) -> List[str]:
|
|
512
|
+
"""Get available news dates for a stock."""
|
|
513
|
+
data = self.get_stock_data(symbol)
|
|
514
|
+
if data and "news" in data:
|
|
515
|
+
by_date = data["news"].get("by_date", {})
|
|
516
|
+
return sorted(by_date.keys(), reverse=True)
|
|
517
|
+
return []
|
|
518
|
+
|
|
519
|
+
def get_news_months(self, symbol: str) -> List[str]:
|
|
520
|
+
"""Get available news months for a stock."""
|
|
521
|
+
data = self.get_stock_data(symbol)
|
|
522
|
+
if data and "news" in data:
|
|
523
|
+
return [k for k in data["news"].keys() if k not in ["all", "by_date", "_failed_months"]]
|
|
524
|
+
return []
|
|
525
|
+
|
|
526
|
+
def get_all_news(self, limit: int = 50) -> List[Dict[str, Any]]:
|
|
527
|
+
"""Get news from ALL stocks, deduplicated, sorted by timestamp."""
|
|
528
|
+
all_news = []
|
|
529
|
+
seen_ids: set = set()
|
|
530
|
+
for symbol in self.get_available_symbols():
|
|
531
|
+
news = self.get_news(symbol, limit=5)
|
|
532
|
+
for article in news:
|
|
533
|
+
aid = article.get('id')
|
|
534
|
+
if aid and aid in seen_ids:
|
|
535
|
+
continue
|
|
536
|
+
if aid:
|
|
537
|
+
seen_ids.add(aid)
|
|
538
|
+
article['_symbol'] = symbol
|
|
539
|
+
all_news.append(article)
|
|
540
|
+
all_news.sort(key=lambda x: x.get('timestamp', 0), reverse=True)
|
|
541
|
+
return all_news[:limit]
|
|
542
|
+
|
|
543
|
+
def clear_cache(self):
|
|
544
|
+
"""Clear the in-memory cache."""
|
|
545
|
+
self._cache.clear()
|
|
546
|
+
|
|
547
|
+
def reload(self):
|
|
548
|
+
"""Reload all data from disk."""
|
|
549
|
+
self.clear_cache()
|
|
550
|
+
self._load_index()
|
|
551
|
+
|
|
552
|
+
def get_article_by_id(self, symbol: str, article_id: int) -> Optional[Dict[str, Any]]:
|
|
553
|
+
"""Get a specific article by its ID."""
|
|
554
|
+
news = self.get_news(symbol, limit=500) # Get all news
|
|
555
|
+
for article in news:
|
|
556
|
+
if article.get('id') == article_id:
|
|
557
|
+
return article
|
|
558
|
+
return None
|