decodingtrust-agent-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent/__init__.py +30 -0
- agent/claudesdk/__init__.py +8 -0
- agent/claudesdk/example.py +221 -0
- agent/claudesdk/src/__init__.py +8 -0
- agent/claudesdk/src/agent.py +400 -0
- agent/claudesdk/src/mcp_proxy.py +409 -0
- agent/claudesdk/src/utils.py +420 -0
- agent/googleadk/__init__.py +15 -0
- agent/googleadk/example.py +237 -0
- agent/googleadk/src/__init__.py +12 -0
- agent/googleadk/src/agent.py +401 -0
- agent/googleadk/src/mcp_wrapper.py +163 -0
- agent/googleadk/src/utils.py +602 -0
- agent/langchain/__init__.py +8 -0
- agent/langchain/example.py +213 -0
- agent/langchain/src/__init__.py +8 -0
- agent/langchain/src/agent.py +645 -0
- agent/langchain/src/utils.py +433 -0
- agent/openaisdk/__init__.py +17 -0
- agent/openaisdk/example.py +228 -0
- agent/openaisdk/src/__init__.py +12 -0
- agent/openaisdk/src/agent.py +491 -0
- agent/openaisdk/src/agent_wrapper.py +143 -0
- agent/openaisdk/src/mcp_wrapper.py +395 -0
- agent/openaisdk/src/utils.py +493 -0
- agent/openclaw/__init__.py +10 -0
- agent/openclaw/example.py +251 -0
- agent/openclaw/src/__init__.py +14 -0
- agent/openclaw/src/agent.py +930 -0
- agent/openclaw/src/helpers/__init__.py +1 -0
- agent/openclaw/src/helpers/auth_helpers.py +55 -0
- agent/openclaw/src/mcp_proxy.py +564 -0
- agent/openclaw/src/plugin_generator.py +231 -0
- agent/openclaw/src/utils.py +341 -0
- agent/pocketflow/__init__.py +18 -0
- agent/pocketflow/example.py +221 -0
- agent/pocketflow/prompts/react_agent.py +46 -0
- agent/pocketflow/src/__init__.py +6 -0
- agent/pocketflow/src/agent.py +507 -0
- agent/pocketflow/src/agent_wrapper.py +159 -0
- agent/pocketflow/src/async_helper.py +92 -0
- agent/pocketflow/src/mcp_react_agent.py +279 -0
- agent/pocketflow/src/native_agent.py +74 -0
- agent/pocketflow/src/nodes.py +467 -0
- benchmark/__init__.py +0 -0
- benchmark/browser/benign.jsonl +34 -0
- benchmark/browser/direct.jsonl +85 -0
- benchmark/browser/indirect.jsonl +82 -0
- benchmark/code/benign.jsonl +0 -0
- benchmark/code/direct.jsonl +121 -0
- benchmark/code/indirect.jsonl +165 -0
- benchmark/crm/benign.jsonl +165 -0
- benchmark/crm/direct.jsonl +90 -0
- benchmark/crm/indirect.jsonl +150 -0
- benchmark/customer-service/benign.jsonl +160 -0
- benchmark/customer-service/direct.jsonl +100 -0
- benchmark/customer-service/indirect.jsonl +101 -0
- benchmark/finance/benign.jsonl +0 -0
- benchmark/finance/direct.jsonl +200 -0
- benchmark/finance/indirect.jsonl +200 -0
- benchmark/legal/benign.jsonl +0 -0
- benchmark/legal/direct.jsonl +200 -0
- benchmark/legal/indirect.jsonl +200 -0
- benchmark/macos/benign.jsonl +30 -0
- benchmark/macos/direct.jsonl +50 -0
- benchmark/macos/indirect.jsonl +50 -0
- benchmark/medical/benign.jsonl +642 -0
- benchmark/medical/direct.jsonl +229 -0
- benchmark/medical/indirect.jsonl +222 -0
- benchmark/os-filesystem/benign.jsonl +200 -0
- benchmark/os-filesystem/direct.jsonl +200 -0
- benchmark/os-filesystem/indirect.jsonl +200 -0
- benchmark/research/benign.jsonl +0 -0
- benchmark/research/direct.jsonl +119 -0
- benchmark/research/indirect.jsonl +125 -0
- benchmark/telecom/benign.jsonl +120 -0
- benchmark/telecom/direct.jsonl +161 -0
- benchmark/telecom/indirect.jsonl +166 -0
- benchmark/travel/benign.jsonl +130 -0
- benchmark/travel/direct.jsonl +105 -0
- benchmark/travel/indirect.jsonl +120 -0
- benchmark/windows/benign.jsonl +100 -0
- benchmark/windows/direct.jsonl +140 -0
- benchmark/windows/indirect.jsonl +107 -0
- benchmark/workflow/benign.jsonl +335 -0
- benchmark/workflow/direct.jsonl +78 -0
- benchmark/workflow/indirect.jsonl +107 -0
- cli/__init__.py +5 -0
- cli/main.py +182 -0
- cli/scaffold.py +334 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
- dt_arena/config/env.yaml +515 -0
- dt_arena/config/injection_mcp.yaml +430 -0
- dt_arena/config/mcp.yaml +642 -0
- dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
- dt_arena/envs/arxiv/docker-compose.yml +36 -0
- dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
- dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
- dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
- dt_arena/envs/atlassian/docker-compose.yml +72 -0
- dt_arena/envs/bigquery/docker-compose.yml +20 -0
- dt_arena/envs/booking/docker-compose.yml +59 -0
- dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
- dt_arena/envs/calendar/docker-compose.yml +42 -0
- dt_arena/envs/custom-website/docker-compose.yml +6 -0
- dt_arena/envs/customer_service/docker-compose.yml +59 -0
- dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
- dt_arena/envs/databricks/docker-compose.yml +51 -0
- dt_arena/envs/ecommerce/docker-compose.yml +6 -0
- dt_arena/envs/ers/docker-compose.yml +36 -0
- dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
- dt_arena/envs/finance/docker-compose.yml +23 -0
- dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
- dt_arena/envs/github/docker/docker-compose.yml +50 -0
- dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
- dt_arena/envs/gmail/docker-compose.yml +65 -0
- dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
- dt_arena/envs/google-form/docker-compose.yml +41 -0
- dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
- dt_arena/envs/googledocs/docker-compose.yml +78 -0
- dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
- dt_arena/envs/hospital/docker-compose.yml +27 -0
- dt_arena/envs/legal/docker-compose.yml +22 -0
- dt_arena/envs/linkedin/docker-compose.yml +63 -0
- dt_arena/envs/macos/docker-compose.yml +79 -0
- dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
- dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
- dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
- dt_arena/envs/paypal/docker-compose.yml +63 -0
- dt_arena/envs/research/docker-compose-hub.yml +13 -0
- dt_arena/envs/research/docker-compose.yml +24 -0
- dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
- dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
- dt_arena/envs/slack/docker-compose-hub.yml +28 -0
- dt_arena/envs/slack/docker-compose.yml +41 -0
- dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
- dt_arena/envs/snowflake/docker-compose.yml +44 -0
- dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
- dt_arena/envs/telecom/docker-compose.yml +17 -0
- dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
- dt_arena/envs/telegram/docker-compose.yml +62 -0
- dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
- dt_arena/envs/terminal/docker-compose.yml +26 -0
- dt_arena/envs/travel/docker-compose-hub.yml +19 -0
- dt_arena/envs/travel/docker-compose.yml +19 -0
- dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
- dt_arena/envs/whatsapp/docker-compose.yml +78 -0
- dt_arena/envs/windows/docker-compose.yml +71 -0
- dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
- dt_arena/envs/zoom/docker-compose.yml +40 -0
- dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
- dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
- dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
- dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
- dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
- dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
- dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
- dt_arena/injection_mcp_server/github/env_injection.py +206 -0
- dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
- dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
- dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
- dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
- dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
- dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
- dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
- dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
- dt_arena/injection_mcp_server/research/env_injection.py +616 -0
- dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
- dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
- dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
- dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
- dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
- dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
- dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
- dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
- dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
- dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
- dt_arena/mcp_server/atlassian/main.py +1554 -0
- dt_arena/mcp_server/atlassian/test_server.py +66 -0
- dt_arena/mcp_server/bigquery/main.py +333 -0
- dt_arena/mcp_server/booking/main.py +310 -0
- dt_arena/mcp_server/browser/main.py +1741 -0
- dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
- dt_arena/mcp_server/calendar/main.py +792 -0
- dt_arena/mcp_server/calendar/test_mcp.py +135 -0
- dt_arena/mcp_server/customer_service/main.py +1063 -0
- dt_arena/mcp_server/databricks/main.py +566 -0
- dt_arena/mcp_server/databricks/probe.py +102 -0
- dt_arena/mcp_server/ers/main.py +845 -0
- dt_arena/mcp_server/finance/__init__.py +87 -0
- dt_arena/mcp_server/finance/core/__init__.py +12 -0
- dt_arena/mcp_server/finance/core/data_loader.py +558 -0
- dt_arena/mcp_server/finance/core/portfolio.py +565 -0
- dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
- dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
- dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
- dt_arena/mcp_server/finance/injection/__init__.py +66 -0
- dt_arena/mcp_server/finance/injection/config.py +176 -0
- dt_arena/mcp_server/finance/injection/content.py +755 -0
- dt_arena/mcp_server/finance/injection/html.py +409 -0
- dt_arena/mcp_server/finance/injection/locations.py +167 -0
- dt_arena/mcp_server/finance/injection/methods.py +193 -0
- dt_arena/mcp_server/finance/injection/presets.py +1023 -0
- dt_arena/mcp_server/finance/main.py +361 -0
- dt_arena/mcp_server/finance/run_mcp.py +21 -0
- dt_arena/mcp_server/finance/run_web.py +26 -0
- dt_arena/mcp_server/finance/server/__init__.py +41 -0
- dt_arena/mcp_server/finance/server/extractor.py +1453 -0
- dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
- dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
- dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
- dt_arena/mcp_server/finance/server/mcp.py +451 -0
- dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
- dt_arena/mcp_server/finance/server/tools/account.py +88 -0
- dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
- dt_arena/mcp_server/finance/server/tools/social.py +73 -0
- dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
- dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
- dt_arena/mcp_server/finance/server/web.py +2139 -0
- dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
- dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
- dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
- dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
- dt_arena/mcp_server/github/main.py +441 -0
- dt_arena/mcp_server/gmail/main.py +1004 -0
- dt_arena/mcp_server/google_form/main.py +141 -0
- dt_arena/mcp_server/googledocs/main.py +458 -0
- dt_arena/mcp_server/hospital/mcp_server.py +458 -0
- dt_arena/mcp_server/legal/__init__.py +9 -0
- dt_arena/mcp_server/legal/core/__init__.py +14 -0
- dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
- dt_arena/mcp_server/legal/core/data_loader.py +266 -0
- dt_arena/mcp_server/legal/core/document_store.py +197 -0
- dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
- dt_arena/mcp_server/legal/main.py +89 -0
- dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
- dt_arena/mcp_server/legal/server/__init__.py +14 -0
- dt_arena/mcp_server/legal/server/mcp.py +2330 -0
- dt_arena/mcp_server/macos/client_test.py +270 -0
- dt_arena/mcp_server/macos/mcp_server.py +285 -0
- dt_arena/mcp_server/os-filesystem/main.py +1380 -0
- dt_arena/mcp_server/paypal/main.py +501 -0
- dt_arena/mcp_server/research/main.py +777 -0
- dt_arena/mcp_server/salesforce/main.py +2006 -0
- dt_arena/mcp_server/slack/main.py +318 -0
- dt_arena/mcp_server/snowflake/main.py +612 -0
- dt_arena/mcp_server/snowflake/probe.py +183 -0
- dt_arena/mcp_server/telecom/mcp_client.py +423 -0
- dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
- dt_arena/mcp_server/telegram/main.py +338 -0
- dt_arena/mcp_server/terminal/main.py +163 -0
- dt_arena/mcp_server/travel/client_test.py +16 -0
- dt_arena/mcp_server/travel/mcp_server.py +404 -0
- dt_arena/mcp_server/whatsapp/main.py +318 -0
- dt_arena/mcp_server/windows/client_test.py +270 -0
- dt_arena/mcp_server/windows/mcp_server.py +218 -0
- dt_arena/mcp_server/zoom/main.py +466 -0
- dt_arena/src/__init__.py +0 -0
- dt_arena/src/hooks/__init__.py +0 -0
- dt_arena/src/hooks/audit_log.py +30 -0
- dt_arena/src/hooks/hooks.json +3 -0
- dt_arena/src/run_benign.py +142 -0
- dt_arena/src/types/__init__.py +0 -0
- dt_arena/src/types/agent.py +441 -0
- dt_arena/src/types/attacks.py +2 -0
- dt_arena/src/types/environment.py +2 -0
- dt_arena/src/types/hooks.py +174 -0
- dt_arena/src/types/judge.py +52 -0
- dt_arena/src/types/red_teaming_trajectory.py +385 -0
- dt_arena/src/types/task.py +260 -0
- dt_arena/src/types/trajectory.py +315 -0
- dt_arena/utils/__init__.py +1 -0
- dt_arena/utils/atlassian/__init__.py +27 -0
- dt_arena/utils/atlassian/helpers.py +520 -0
- dt_arena/utils/bigquery/__init__.py +1 -0
- dt_arena/utils/bigquery/helpers.py +246 -0
- dt_arena/utils/calendar/__init__.py +1 -0
- dt_arena/utils/calendar/helpers.py +87 -0
- dt_arena/utils/customer_service/__init__.py +17 -0
- dt_arena/utils/customer_service/cs_env_client.py +940 -0
- dt_arena/utils/customer_service/helpers.py +339 -0
- dt_arena/utils/customer_service/judges/__init__.py +20 -0
- dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
- dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
- dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
- dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
- dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
- dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
- dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
- dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
- dt_arena/utils/customer_service/judges/text_utils.py +21 -0
- dt_arena/utils/databricks/__init__.py +2 -0
- dt_arena/utils/databricks/helpers.py +210 -0
- dt_arena/utils/finance/__init__.py +0 -0
- dt_arena/utils/finance/helpers.py +263 -0
- dt_arena/utils/github/__init__.py +1 -0
- dt_arena/utils/github/helpers.py +249 -0
- dt_arena/utils/gmail/__init__.py +1 -0
- dt_arena/utils/gmail/helpers.py +344 -0
- dt_arena/utils/google_form/__init__.py +2 -0
- dt_arena/utils/google_form/helpers.py +133 -0
- dt_arena/utils/legal/__init__.py +0 -0
- dt_arena/utils/legal/helpers.py +228 -0
- dt_arena/utils/macos/__init__.py +0 -0
- dt_arena/utils/macos/env_setup.py +215 -0
- dt_arena/utils/macos/helpers.py +61 -0
- dt_arena/utils/os_filesystem/__init__.py +1 -0
- dt_arena/utils/os_filesystem/helpers.py +366 -0
- dt_arena/utils/paypal/__init__.py +1 -0
- dt_arena/utils/paypal/helpers.py +178 -0
- dt_arena/utils/port_allocator.py +266 -0
- dt_arena/utils/research/__init__.py +0 -0
- dt_arena/utils/research/helpers.py +251 -0
- dt_arena/utils/salesforce/__init__.py +1 -0
- dt_arena/utils/salesforce/helpers.py +719 -0
- dt_arena/utils/slack/__init__.py +1 -0
- dt_arena/utils/slack/helpers.py +176 -0
- dt_arena/utils/snowflake/__init__.py +1 -0
- dt_arena/utils/snowflake/helpers.py +166 -0
- dt_arena/utils/telecom/__init__.py +1 -0
- dt_arena/utils/telecom/helpers.py +760 -0
- dt_arena/utils/telegram/__init__.py +0 -0
- dt_arena/utils/telegram/helpers.py +174 -0
- dt_arena/utils/terminal/__init__.py +0 -0
- dt_arena/utils/terminal/helpers.py +20 -0
- dt_arena/utils/travel/__init__.py +0 -0
- dt_arena/utils/travel/env_client.py +537 -0
- dt_arena/utils/travel/llm_judge.py +137 -0
- dt_arena/utils/travel/prompts.py +64 -0
- dt_arena/utils/utils/__init__.py +122 -0
- dt_arena/utils/whatsapp/__init__.py +0 -0
- dt_arena/utils/whatsapp/helpers.py +226 -0
- dt_arena/utils/windows/__init__.py +0 -0
- dt_arena/utils/windows/env_reset.py +224 -0
- dt_arena/utils/windows/env_setup.py +280 -0
- dt_arena/utils/windows/exfil_helpers.py +170 -0
- dt_arena/utils/windows/helpers.py +74 -0
- dt_arena/utils/zoom/__init__.py +1 -0
- dt_arena/utils/zoom/helpers.py +70 -0
- eval/__init__.py +1 -0
- eval/evaluation.py +426 -0
- eval/task_runner.py +449 -0
- utils/__init__.py +148 -0
- utils/agent_helpers.py +308 -0
- utils/agent_wrapper.py +189 -0
- utils/compose_utils.py +135 -0
- utils/config.py +77 -0
- utils/env_helpers.py +104 -0
- utils/eval_stats.py +88 -0
- utils/injection_helpers.py +429 -0
- utils/injection_mcp_helpers.py +152 -0
- utils/judge_helpers.py +181 -0
- utils/judge_utils.py +472 -0
- utils/llm.py +196 -0
- utils/logging.py +45 -0
- utils/mcp_helpers.py +232 -0
- utils/mcp_manager.py +235 -0
- utils/memory_guard.py +18 -0
- utils/red_teaming_sandbox.py +476 -0
- utils/reset_helpers.py +318 -0
- utils/resource_manager.py +370 -0
- utils/skill_helpers.py +447 -0
- utils/task_executor.py +904 -0
- utils/task_helpers.py +270 -0
- utils/template_helpers.py +179 -0
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Legal Data Loader - Load statutes and document templates.
|
|
3
|
+
|
|
4
|
+
Case law, judges, dockets, and disclosures are now served by CourtListenerStore.
|
|
5
|
+
This loader only handles statutes (not in CourtListener) and document templates.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, List, Optional, Any
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Statute:
|
|
16
|
+
"""Legal statute/regulation data structure."""
|
|
17
|
+
id: str
|
|
18
|
+
citation: str
|
|
19
|
+
title: str
|
|
20
|
+
jurisdiction: str
|
|
21
|
+
code: str
|
|
22
|
+
section: str
|
|
23
|
+
text: str
|
|
24
|
+
effective_date: str = ""
|
|
25
|
+
last_amended: str = ""
|
|
26
|
+
related_sections: List[str] = field(default_factory=list)
|
|
27
|
+
annotations: List[str] = field(default_factory=list)
|
|
28
|
+
topics: List[str] = field(default_factory=list)
|
|
29
|
+
is_valid: bool = True
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
32
|
+
return {
|
|
33
|
+
"id": self.id,
|
|
34
|
+
"citation": self.citation,
|
|
35
|
+
"title": self.title,
|
|
36
|
+
"jurisdiction": self.jurisdiction,
|
|
37
|
+
"code": self.code,
|
|
38
|
+
"section": self.section,
|
|
39
|
+
"text": self.text,
|
|
40
|
+
"effective_date": self.effective_date,
|
|
41
|
+
"related_sections": self.related_sections,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class DocumentTemplate:
|
|
47
|
+
"""Legal document template."""
|
|
48
|
+
template_id: str
|
|
49
|
+
name: str
|
|
50
|
+
doc_type: str # motion, contract, letter, brief, etc.
|
|
51
|
+
jurisdiction: str
|
|
52
|
+
description: str
|
|
53
|
+
sections: List[Dict[str, Any]] = field(default_factory=list)
|
|
54
|
+
variables: List[str] = field(default_factory=list)
|
|
55
|
+
content: str = ""
|
|
56
|
+
|
|
57
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
58
|
+
return {
|
|
59
|
+
"template_id": self.template_id,
|
|
60
|
+
"name": self.name,
|
|
61
|
+
"type": self.doc_type,
|
|
62
|
+
"jurisdiction": self.jurisdiction,
|
|
63
|
+
"description": self.description,
|
|
64
|
+
"sections": self.sections,
|
|
65
|
+
"variables": self.variables,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class LegalDataLoader:
|
|
70
|
+
"""Load and manage legal data (statutes, templates).
|
|
71
|
+
|
|
72
|
+
Case law, judges, dockets, and disclosures are now handled by
|
|
73
|
+
CourtListenerStore. This class only loads statutes and templates.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, data_dir: str = None):
|
|
77
|
+
if data_dir is None:
|
|
78
|
+
data_dir = Path(__file__).parent.parent / "data"
|
|
79
|
+
self.data_dir = Path(data_dir)
|
|
80
|
+
|
|
81
|
+
self._statutes: Dict[str, Statute] = {}
|
|
82
|
+
self._templates: Dict[str, DocumentTemplate] = {}
|
|
83
|
+
|
|
84
|
+
self._load_data()
|
|
85
|
+
|
|
86
|
+
def _load_data(self):
|
|
87
|
+
"""Load all data from JSON files."""
|
|
88
|
+
self._load_statutes()
|
|
89
|
+
self._load_templates()
|
|
90
|
+
|
|
91
|
+
def _load_statutes(self):
|
|
92
|
+
"""Load statute data from JSON files."""
|
|
93
|
+
statutes_dir = self.data_dir / "statutes"
|
|
94
|
+
if not statutes_dir.exists():
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
for json_file in statutes_dir.glob("*.json"):
|
|
98
|
+
try:
|
|
99
|
+
with open(json_file, 'r') as f:
|
|
100
|
+
data = json.load(f)
|
|
101
|
+
for i, statute_data in enumerate(data.get("statutes", [])):
|
|
102
|
+
# Auto-fill missing fields
|
|
103
|
+
citation = statute_data.get("citation", "")
|
|
104
|
+
if "id" not in statute_data:
|
|
105
|
+
statute_data["id"] = f"statute_{json_file.stem}_{i:03d}"
|
|
106
|
+
if "code" not in statute_data:
|
|
107
|
+
# Extract code from citation (e.g., "42 U.S.C." from "42 U.S.C. § 1983")
|
|
108
|
+
parts = citation.split("§")
|
|
109
|
+
statute_data["code"] = parts[0].strip() if parts else ""
|
|
110
|
+
if "section" not in statute_data:
|
|
111
|
+
parts = citation.split("§")
|
|
112
|
+
statute_data["section"] = parts[1].strip() if len(parts) > 1 else ""
|
|
113
|
+
statute = Statute(**{k: v for k, v in statute_data.items() if k in Statute.__dataclass_fields__})
|
|
114
|
+
self._statutes[statute.citation.lower()] = statute
|
|
115
|
+
self._statutes[statute.id] = statute
|
|
116
|
+
except Exception as e:
|
|
117
|
+
print(f"Error loading {json_file}: {e}")
|
|
118
|
+
|
|
119
|
+
def _load_templates(self):
|
|
120
|
+
"""Load document templates from JSON files."""
|
|
121
|
+
templates_file = self.data_dir / "templates" / "document_templates.json"
|
|
122
|
+
if not templates_file.exists():
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
with open(templates_file, 'r') as f:
|
|
127
|
+
data = json.load(f)
|
|
128
|
+
for template_data in data.get("templates", []):
|
|
129
|
+
template = DocumentTemplate(**{k: v for k, v in template_data.items() if k in DocumentTemplate.__dataclass_fields__})
|
|
130
|
+
self._templates[template.template_id] = template
|
|
131
|
+
except Exception as e:
|
|
132
|
+
print(f"Error loading templates: {e}")
|
|
133
|
+
|
|
134
|
+
@staticmethod
|
|
135
|
+
def _normalize_citation(text: str) -> str:
|
|
136
|
+
"""Normalize citation text for flexible matching.
|
|
137
|
+
Handles: C.F.R. vs CFR, U.S.C. vs USC, etc."""
|
|
138
|
+
return text.lower().replace(".", "").replace(" ", "").replace("§", "").replace("–", "-")
|
|
139
|
+
|
|
140
|
+
def search_statutes(
|
|
141
|
+
self,
|
|
142
|
+
query: str,
|
|
143
|
+
jurisdiction: str = "all",
|
|
144
|
+
limit: int = 20
|
|
145
|
+
) -> List[Statute]:
|
|
146
|
+
"""Search for statutes matching the query.
|
|
147
|
+
|
|
148
|
+
Matching strategy (in priority order):
|
|
149
|
+
1. Exact substring: full query found in citation/title/text/topics
|
|
150
|
+
2. Normalized citation: stripped dots/spaces/§ match
|
|
151
|
+
3. Token overlap: score statutes by how many query words appear in their fields
|
|
152
|
+
"""
|
|
153
|
+
# Clean query: strip surrounding quotes agents often add
|
|
154
|
+
query_clean = query.strip().strip('"').strip("'")
|
|
155
|
+
query_lower = query_clean.lower()
|
|
156
|
+
query_norm = self._normalize_citation(query_clean)
|
|
157
|
+
|
|
158
|
+
scored: Dict[str, float] = {}
|
|
159
|
+
seen_statutes: Dict[str, 'Statute'] = {}
|
|
160
|
+
|
|
161
|
+
for statute in self._statutes.values():
|
|
162
|
+
if jurisdiction != "all" and statute.jurisdiction.lower() != jurisdiction.lower():
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
sid = statute.id
|
|
166
|
+
if sid in seen_statutes:
|
|
167
|
+
continue
|
|
168
|
+
seen_statutes[sid] = statute
|
|
169
|
+
|
|
170
|
+
title_lower = statute.title.lower()
|
|
171
|
+
text_lower = statute.text.lower()
|
|
172
|
+
citation_lower = statute.citation.lower()
|
|
173
|
+
citation_norm = self._normalize_citation(statute.citation)
|
|
174
|
+
topics_lower = " ".join(
|
|
175
|
+
t.lower() for t in (statute.topics if hasattr(statute, 'topics') and statute.topics else [])
|
|
176
|
+
)
|
|
177
|
+
all_text = f"{citation_lower} {title_lower} {text_lower} {topics_lower}"
|
|
178
|
+
|
|
179
|
+
score = 0.0
|
|
180
|
+
|
|
181
|
+
# Priority 1: exact substring match (full query in a field)
|
|
182
|
+
if (query_lower in title_lower or query_lower in text_lower or
|
|
183
|
+
query_lower in citation_lower or query_lower in topics_lower):
|
|
184
|
+
score = 100.0
|
|
185
|
+
# Priority 2: normalized citation match (handles § vs no-§, dots vs no-dots)
|
|
186
|
+
elif query_norm in citation_norm or citation_norm in query_norm:
|
|
187
|
+
score = 90.0
|
|
188
|
+
else:
|
|
189
|
+
# Priority 3: extract citation-like patterns from query and match
|
|
190
|
+
# e.g., query "asylum 8 U.S.C. 1158 particular social group" -> try "8 U.S.C. 1158"
|
|
191
|
+
import re
|
|
192
|
+
cite_patterns = re.findall(
|
|
193
|
+
r'\d+\s*(?:U\.?S\.?C\.?|C\.?F\.?R\.?)\s*(?:§\s*|Part\s*)?[\d()\w.-]+',
|
|
194
|
+
query_clean, re.IGNORECASE
|
|
195
|
+
)
|
|
196
|
+
for cp in cite_patterns:
|
|
197
|
+
cp_norm = self._normalize_citation(cp)
|
|
198
|
+
if cp_norm in citation_norm or citation_norm in cp_norm:
|
|
199
|
+
score = max(score, 85.0)
|
|
200
|
+
break
|
|
201
|
+
|
|
202
|
+
# Priority 4: token overlap scoring
|
|
203
|
+
if score == 0:
|
|
204
|
+
# Split query into meaningful tokens (skip short noise/stop words)
|
|
205
|
+
stop_words = {'the','and','or','of','in','for','to','a','an','is','on','at','by','with','from','that','this','its'}
|
|
206
|
+
tokens = [t for t in query_lower.split() if len(t) > 2 and t not in stop_words]
|
|
207
|
+
if tokens:
|
|
208
|
+
matched = sum(1 for t in tokens if t in all_text)
|
|
209
|
+
ratio = matched / len(tokens)
|
|
210
|
+
# Only count if at least 40% of tokens match (avoid noise)
|
|
211
|
+
if ratio >= 0.4:
|
|
212
|
+
score = ratio * 50.0
|
|
213
|
+
|
|
214
|
+
if score > 0:
|
|
215
|
+
scored[sid] = score
|
|
216
|
+
|
|
217
|
+
# Sort by score descending, return top results
|
|
218
|
+
ranked = sorted(scored.items(), key=lambda x: -x[1])
|
|
219
|
+
results = []
|
|
220
|
+
for sid, sc in ranked[:limit]:
|
|
221
|
+
statute = seen_statutes[sid]
|
|
222
|
+
if statute not in results:
|
|
223
|
+
results.append(statute)
|
|
224
|
+
return results
|
|
225
|
+
|
|
226
|
+
def get_statute(self, statute_id: str) -> Optional[Statute]:
|
|
227
|
+
"""Get a statute by ID or citation."""
|
|
228
|
+
return self._statutes.get(statute_id) or self._statutes.get(statute_id.lower())
|
|
229
|
+
|
|
230
|
+
def get_templates(self, doc_type: str = None) -> List[DocumentTemplate]:
|
|
231
|
+
"""Get document templates, optionally filtered by type."""
|
|
232
|
+
if doc_type:
|
|
233
|
+
return [t for t in self._templates.values() if t.doc_type == doc_type]
|
|
234
|
+
return list(self._templates.values())
|
|
235
|
+
|
|
236
|
+
def get_template(self, template_id: str) -> Optional[DocumentTemplate]:
|
|
237
|
+
"""Get a specific template by ID."""
|
|
238
|
+
return self._templates.get(template_id)
|
|
239
|
+
|
|
240
|
+
def add_statute(self, statute_data: Dict[str, Any]):
|
|
241
|
+
"""Add a statute from dictionary data."""
|
|
242
|
+
statute_id = statute_data.get("id", "")
|
|
243
|
+
citation = statute_data.get("citation", "")
|
|
244
|
+
|
|
245
|
+
self._statutes[citation.lower()] = statute_data
|
|
246
|
+
self._statutes[statute_id] = statute_data
|
|
247
|
+
|
|
248
|
+
def add_template(self, template_data: Dict[str, Any]):
|
|
249
|
+
"""Add a template from dictionary data."""
|
|
250
|
+
template_id = template_data.get("id", "")
|
|
251
|
+
self._templates[template_id] = template_data
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def templates(self) -> List[Dict[str, Any]]:
|
|
255
|
+
"""Get all templates as a list."""
|
|
256
|
+
seen = set()
|
|
257
|
+
result = []
|
|
258
|
+
for key, template in self._templates.items():
|
|
259
|
+
template_id = template.get("id", key) if isinstance(template, dict) else getattr(template, 'template_id', key)
|
|
260
|
+
if template_id not in seen:
|
|
261
|
+
seen.add(template_id)
|
|
262
|
+
if isinstance(template, dict):
|
|
263
|
+
result.append(template)
|
|
264
|
+
else:
|
|
265
|
+
result.append(template.to_dict())
|
|
266
|
+
return result
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Document Store - Global in-memory storage for documents, drafts, and injected content.
|
|
3
|
+
Docker containers provide per-task isolation, so no session management is needed.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import uuid
|
|
7
|
+
from typing import Dict, List, Optional, Any
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class ConversationMessage:
|
|
14
|
+
"""A message in the conversation history."""
|
|
15
|
+
id: str
|
|
16
|
+
role: str # user, assistant
|
|
17
|
+
content: str
|
|
18
|
+
timestamp: str
|
|
19
|
+
matter_id: Optional[str] = None
|
|
20
|
+
citations: List[str] = field(default_factory=list)
|
|
21
|
+
tool_calls: List[Dict[str, Any]] = field(default_factory=list)
|
|
22
|
+
|
|
23
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
24
|
+
return {
|
|
25
|
+
"id": self.id,
|
|
26
|
+
"role": self.role,
|
|
27
|
+
"content": self.content,
|
|
28
|
+
"timestamp": self.timestamp,
|
|
29
|
+
"matter_id": self.matter_id,
|
|
30
|
+
"citations": self.citations,
|
|
31
|
+
"tool_calls": self.tool_calls,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class UploadedDocument:
|
|
37
|
+
"""An uploaded document."""
|
|
38
|
+
document_id: str
|
|
39
|
+
filename: str
|
|
40
|
+
doc_type: str
|
|
41
|
+
upload_time: str
|
|
42
|
+
matter_id: Optional[str] = None
|
|
43
|
+
content: str = ""
|
|
44
|
+
summary: str = ""
|
|
45
|
+
extracted_data: Dict[str, Any] = field(default_factory=dict)
|
|
46
|
+
|
|
47
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
48
|
+
return {
|
|
49
|
+
"document_id": self.document_id,
|
|
50
|
+
"filename": self.filename,
|
|
51
|
+
"type": self.doc_type,
|
|
52
|
+
"upload_time": self.upload_time,
|
|
53
|
+
"matter_id": self.matter_id,
|
|
54
|
+
"has_summary": bool(self.summary),
|
|
55
|
+
"summary": self.summary or "",
|
|
56
|
+
"content_length": len(self.content) if self.content else 0,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class Draft:
|
|
62
|
+
"""A document draft."""
|
|
63
|
+
draft_id: str
|
|
64
|
+
template_id: str
|
|
65
|
+
matter_id: str
|
|
66
|
+
doc_type: str
|
|
67
|
+
title: str
|
|
68
|
+
content: str
|
|
69
|
+
created_at: str
|
|
70
|
+
updated_at: str
|
|
71
|
+
status: str = "draft" # draft, reviewed, finalized
|
|
72
|
+
|
|
73
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
74
|
+
return {
|
|
75
|
+
"draft_id": self.draft_id,
|
|
76
|
+
"template_id": self.template_id,
|
|
77
|
+
"matter_id": self.matter_id,
|
|
78
|
+
"type": self.doc_type,
|
|
79
|
+
"title": self.title,
|
|
80
|
+
"content": self.content,
|
|
81
|
+
"created_at": self.created_at,
|
|
82
|
+
"updated_at": self.updated_at,
|
|
83
|
+
"status": self.status,
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class DocumentStore:
|
|
88
|
+
"""Global in-memory store for documents, drafts, conversation, and injected content.
|
|
89
|
+
|
|
90
|
+
Docker containers provide per-task isolation — each task runs in its own
|
|
91
|
+
container, so all state is naturally isolated without session management.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
def __init__(self):
|
|
95
|
+
self.documents: Dict[str, UploadedDocument] = {}
|
|
96
|
+
self.drafts: Dict[str, Draft] = {}
|
|
97
|
+
self.conversation: List[ConversationMessage] = []
|
|
98
|
+
self.research_history: List[Dict[str, Any]] = []
|
|
99
|
+
self.injected_cases: Dict[str, Any] = {}
|
|
100
|
+
self.injected_suggestions: List[Dict[str, Any]] = []
|
|
101
|
+
self.current_matter_id: Optional[str] = None
|
|
102
|
+
|
|
103
|
+
# Conversation management
|
|
104
|
+
|
|
105
|
+
def add_message(self, role: str, content: str,
|
|
106
|
+
matter_id: str = None, citations: List[str] = None,
|
|
107
|
+
tool_calls: List[Dict] = None) -> ConversationMessage:
|
|
108
|
+
msg = ConversationMessage(
|
|
109
|
+
id=str(uuid.uuid4())[:8],
|
|
110
|
+
role=role, content=content,
|
|
111
|
+
timestamp=datetime.now().isoformat(),
|
|
112
|
+
matter_id=matter_id,
|
|
113
|
+
citations=citations or [],
|
|
114
|
+
tool_calls=tool_calls or [],
|
|
115
|
+
)
|
|
116
|
+
self.conversation.append(msg)
|
|
117
|
+
return msg
|
|
118
|
+
|
|
119
|
+
def get_conversation(self, limit: int = 50) -> List[Dict[str, Any]]:
|
|
120
|
+
messages = self.conversation[-limit:] if limit else self.conversation
|
|
121
|
+
return [m.to_dict() for m in messages]
|
|
122
|
+
|
|
123
|
+
# Document management
|
|
124
|
+
|
|
125
|
+
def add_document(self, filename: str, doc_type: str, content: str,
|
|
126
|
+
matter_id: str = None) -> UploadedDocument:
|
|
127
|
+
doc = UploadedDocument(
|
|
128
|
+
document_id=str(uuid.uuid4())[:8],
|
|
129
|
+
filename=filename, doc_type=doc_type,
|
|
130
|
+
upload_time=datetime.now().isoformat(),
|
|
131
|
+
matter_id=matter_id, content=content,
|
|
132
|
+
)
|
|
133
|
+
self.documents[doc.document_id] = doc
|
|
134
|
+
return doc
|
|
135
|
+
|
|
136
|
+
def get_document(self, document_id: str) -> Optional[UploadedDocument]:
|
|
137
|
+
return self.documents.get(document_id)
|
|
138
|
+
|
|
139
|
+
def list_documents(self, matter_id: str = None) -> List[Dict[str, Any]]:
|
|
140
|
+
docs = list(self.documents.values())
|
|
141
|
+
if matter_id:
|
|
142
|
+
docs = [d for d in docs if d.matter_id == matter_id]
|
|
143
|
+
return [d.to_dict() for d in docs]
|
|
144
|
+
|
|
145
|
+
# Draft management
|
|
146
|
+
|
|
147
|
+
def add_draft(self, template_id: str, matter_id: str, doc_type: str,
|
|
148
|
+
title: str, content: str) -> Draft:
|
|
149
|
+
now = datetime.now().isoformat()
|
|
150
|
+
draft = Draft(
|
|
151
|
+
draft_id=str(uuid.uuid4())[:8],
|
|
152
|
+
template_id=template_id, matter_id=matter_id,
|
|
153
|
+
doc_type=doc_type, title=title, content=content,
|
|
154
|
+
created_at=now, updated_at=now,
|
|
155
|
+
)
|
|
156
|
+
self.drafts[draft.draft_id] = draft
|
|
157
|
+
return draft
|
|
158
|
+
|
|
159
|
+
def get_draft(self, draft_id: str) -> Optional[Draft]:
|
|
160
|
+
return self.drafts.get(draft_id)
|
|
161
|
+
|
|
162
|
+
def update_draft(self, draft_id: str, content: str = None,
|
|
163
|
+
status: str = None) -> Optional[Draft]:
|
|
164
|
+
draft = self.drafts.get(draft_id)
|
|
165
|
+
if draft:
|
|
166
|
+
if content is not None:
|
|
167
|
+
draft.content = content
|
|
168
|
+
if status is not None:
|
|
169
|
+
draft.status = status
|
|
170
|
+
draft.updated_at = datetime.now().isoformat()
|
|
171
|
+
return draft
|
|
172
|
+
|
|
173
|
+
def list_drafts(self, matter_id: str = None) -> List[Dict[str, Any]]:
|
|
174
|
+
drafts = list(self.drafts.values())
|
|
175
|
+
if matter_id:
|
|
176
|
+
drafts = [d for d in drafts if d.matter_id == matter_id]
|
|
177
|
+
return [d.to_dict() for d in drafts]
|
|
178
|
+
|
|
179
|
+
# Injection management
|
|
180
|
+
|
|
181
|
+
def inject_case(self, case_id: str, case_data: Dict[str, Any]):
|
|
182
|
+
self.injected_cases[case_id] = case_data
|
|
183
|
+
|
|
184
|
+
def get_injected_cases(self) -> Dict[str, Any]:
|
|
185
|
+
return self.injected_cases
|
|
186
|
+
|
|
187
|
+
# Reset
|
|
188
|
+
|
|
189
|
+
def clear(self):
|
|
190
|
+
"""Clear all state."""
|
|
191
|
+
self.documents.clear()
|
|
192
|
+
self.drafts.clear()
|
|
193
|
+
self.conversation.clear()
|
|
194
|
+
self.research_history.clear()
|
|
195
|
+
self.injected_cases.clear()
|
|
196
|
+
self.injected_suggestions.clear()
|
|
197
|
+
self.current_matter_id = None
|