decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,612 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Snowflake MCP Server (local, no OAuth/account)
4
+ - Implements the Snowflake-documented tool names exactly:
5
+ - product-search (CORTEX_SEARCH_SERVICE_QUERY)
6
+ - revenue-semantic-view (CORTEX_ANALYST_MESSAGE)
7
+ - sql_exec_tool (SYSTEM_EXECUTE_SQL)
8
+ - agent_1 (CORTEX_AGENT_RUN)
9
+
10
+ Data storage: local PostgreSQL (via sync psycopg to localhost; can be docker-compose).
11
+ product-search supports two backends, selectable via env SEARCH_MODE:
12
+ - "faiss": vector search using FAISS with simple hashing embeddings
13
+ - "simple": n-gram/fuzzy style text similarity without external model
14
+ """
15
+ import os
16
+ import json
17
+ import asyncio
18
+ import time
19
+ from typing import Any, Dict, List, Optional, Tuple
20
+
21
+ import numpy as np
22
+
23
+ try:
24
+ import faiss # type: ignore
25
+ except Exception:
26
+ faiss = None # Will be checked when SEARCH_MODE=faiss
27
+
28
+ import psycopg
29
+ try:
30
+ from openai import OpenAI # Optional OpenAI integration
31
+ except Exception:
32
+ OpenAI = None
33
+
34
+ from fastmcp import FastMCP
35
+
36
+ mcp = FastMCP("Snowflake MCP (Local)")
37
+
38
+ # -----------------------------------------------------------------------------
39
+ # Environment configuration
40
+ # -----------------------------------------------------------------------------
41
+ PORT = int(os.getenv("PORT", "8842"))
42
+ HOST = os.getenv("HOST", "0.0.0.0")
43
+
44
+ # Local PostgreSQL DSN (e.g. postgresql://snow:snow@127.0.0.1:5452/snowdb)
45
+ POSTGRES_DSN = os.getenv(
46
+ "POSTGRES_DSN",
47
+ "postgresql://snow:snow@127.0.0.1:5452/snowdb",
48
+ )
49
+
50
+ # Search behavior
51
+ SEARCH_MODE = os.getenv("SEARCH_MODE", "simple").lower() # "faiss" or "simple"
52
+ SEARCH_TABLE = os.getenv("SEARCH_TABLE", "products")
53
+ # Comma-separated list of text columns to search over
54
+ SEARCH_COLUMNS = [c.strip() for c in os.getenv("SEARCH_COLUMNS", "name,description").split(",") if c.strip()]
55
+
56
+ # Optional row limit for indexing
57
+ INDEX_ROW_LIMIT = int(os.getenv("INDEX_ROW_LIMIT", "10000"))
58
+
59
+ # -----------------------------------------------------------------------------
60
+ # Globals for connections and search index
61
+ # -----------------------------------------------------------------------------
62
+ _pg_conn: Optional["psycopg.Connection"] = None
63
+
64
+ _faiss_index: Optional["faiss.Index"] = None
65
+ _faiss_id_to_row: List[Dict[str, Any]] = []
66
+ _faiss_dim: int = int(os.getenv("FAISS_DIM", "384"))
67
+ _faiss_norm: bool = True
68
+
69
+ # Optional: OpenAI configuration
70
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
71
+ OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "").strip()
72
+ OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini").strip()
73
+ _openai_client: Optional["OpenAI"] = None
74
+
75
+ def get_openai() -> Optional["OpenAI"]:
76
+ global _openai_client
77
+ if not OPENAI_API_KEY or OpenAI is None:
78
+ return None
79
+ if _openai_client is None:
80
+ if OPENAI_BASE_URL:
81
+ _openai_client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)
82
+ else:
83
+ _openai_client = OpenAI(api_key=OPENAI_API_KEY)
84
+ return _openai_client
85
+
86
+ def _connect_pg_with_retry(
87
+ dsn: str,
88
+ attempts: int = 3,
89
+ delay: float = 0.75,
90
+ ) -> "psycopg.Connection":
91
+ """
92
+ Connect to Postgres with simple retry.
93
+
94
+ In per-task eval runs the snowflake Postgres container may still be
95
+ starting up when the MCP server begins handling requests, so the first
96
+ few connection attempts can see \"connection refused\". Other envs have
97
+ explicit HTTP health waits in env_up.py; for Postgres we handle this at
98
+ the MCP layer with a short retry loop.
99
+ """
100
+ last_exc: Optional[Exception] = None
101
+ for _ in range(max(1, attempts)):
102
+ try:
103
+ return psycopg.connect(dsn, autocommit=True)
104
+ except Exception as e: # connection refused, DNS issues, etc.
105
+ last_exc = e
106
+ time.sleep(delay)
107
+ if last_exc is not None:
108
+ raise last_exc
109
+ return psycopg.connect(dsn, autocommit=True)
110
+
111
+
112
+ def get_pg() -> "psycopg.Connection":
113
+ global _pg_conn
114
+ if _pg_conn is None or _pg_conn.closed:
115
+ _pg_conn = _connect_pg_with_retry(POSTGRES_DSN)
116
+ _ensure_postgres_initialized(_pg_conn)
117
+ return _pg_conn
118
+
119
+
120
+ # -----------------------------------------------------------------------------
121
+ # Utility: simple hashing-based embedding for text
122
+ # -----------------------------------------------------------------------------
123
+ def _hashing_embed(text: str, dim: int) -> np.ndarray:
124
+ # Simple n-gram hashing into fixed-size vector
125
+ text = (text or "").lower()
126
+ vec = np.zeros(dim, dtype=np.float32)
127
+ if not text:
128
+ return vec
129
+ n = 3
130
+ for i in range(len(text) - n + 1):
131
+ gram = text[i : i + n]
132
+ h = (hash(gram) % dim + dim) % dim
133
+ vec[h] += 1.0
134
+ # Normalize
135
+ norm = np.linalg.norm(vec)
136
+ if norm > 0:
137
+ vec /= norm
138
+ return vec
139
+
140
+
141
+ def _concat_columns(row: Dict[str, Any], columns: List[str]) -> str:
142
+ parts: List[str] = []
143
+ for c in columns:
144
+ v = row.get(c)
145
+ if v is None:
146
+ continue
147
+ parts.append(str(v))
148
+ return " ".join(parts)
149
+
150
+
151
+ def _simple_similarity(a: str, b: str) -> float:
152
+ # Lightweight token-based similarity (Jaccard of tokens)
153
+ ta = set(a.lower().split())
154
+ tb = set(b.lower().split())
155
+ if not ta or not tb:
156
+ return 0.0
157
+ inter = len(ta & tb)
158
+ union = len(ta | tb)
159
+ return inter / union if union else 0.0
160
+
161
+
162
+ # -----------------------------------------------------------------------------
163
+ # PostgreSQL initialization and helpers
164
+ # -----------------------------------------------------------------------------
165
+ def _ensure_postgres_initialized(conn: "psycopg.Connection") -> None:
166
+ with conn.cursor() as cur:
167
+ cur.execute(
168
+ """
169
+ CREATE TABLE IF NOT EXISTS products (
170
+ id SERIAL PRIMARY KEY,
171
+ name TEXT,
172
+ description TEXT,
173
+ category TEXT
174
+ );
175
+ """
176
+ )
177
+ cur.execute(
178
+ """
179
+ CREATE TABLE IF NOT EXISTS revenue (
180
+ id SERIAL PRIMARY KEY,
181
+ date DATE,
182
+ product_id INT,
183
+ product_name TEXT,
184
+ revenue NUMERIC
185
+ );
186
+ """
187
+ )
188
+ # Seed minimal data if empty
189
+ cur.execute("SELECT COUNT(*) FROM products;")
190
+ cnt = cur.fetchone()[0]
191
+ if cnt == 0:
192
+ cur.execute(
193
+ """
194
+ INSERT INTO products (name, description, category) VALUES
195
+ ('Laptop Pro 14', 'High-performance laptop with retina display', 'Electronics'),
196
+ ('Noise Cancelling Headphones', 'Over-ear ANC headphones with long battery life', 'Audio'),
197
+ ('Smartwatch X', 'Fitness tracking and notifications', 'Wearables');
198
+ """
199
+ )
200
+ cur.execute("SELECT COUNT(*) FROM revenue;")
201
+ cnt2 = cur.fetchone()[0]
202
+ if cnt2 == 0:
203
+ cur.execute(
204
+ """
205
+ INSERT INTO revenue (date, product_id, product_name, revenue) VALUES
206
+ ('2025-10-01', 1, 'Laptop Pro 14', 250000),
207
+ ('2025-10-01', 2, 'Noise Cancelling Headphones', 85000),
208
+ ('2025-10-01', 3, 'Smartwatch X', 120000),
209
+ ('2025-10-02', 1, 'Laptop Pro 14', 265000),
210
+ ('2025-10-02', 2, 'Noise Cancelling Headphones', 82000),
211
+ ('2025-10-02', 3, 'Smartwatch X', 110000);
212
+ """
213
+ )
214
+
215
+ def _get_table_columns(conn: "psycopg.Connection", table: str) -> List[str]:
216
+ with conn.cursor() as cur:
217
+ cur.execute(
218
+ """
219
+ SELECT column_name
220
+ FROM information_schema.columns
221
+ WHERE table_schema = 'public' AND table_name = %s
222
+ ORDER BY ordinal_position;
223
+ """,
224
+ (table,),
225
+ )
226
+ rows = cur.fetchall()
227
+ return [r[0] for r in rows] if rows else []
228
+
229
+
230
+ # -----------------------------------------------------------------------------
231
+ # FAISS Index building/query
232
+ # -----------------------------------------------------------------------------
233
+ async def _ensure_faiss_index() -> None:
234
+ global _faiss_index, _faiss_id_to_row
235
+ if _faiss_index is not None:
236
+ return
237
+ if faiss is None:
238
+ raise RuntimeError("FAISS not installed but SEARCH_MODE=faiss was requested.")
239
+
240
+ conn = get_pg()
241
+ cols = ", ".join([f"{c}" for c in SEARCH_COLUMNS if c])
242
+ query = f"SELECT id, {cols} FROM {SEARCH_TABLE} LIMIT %s;"
243
+ with conn.cursor() as cur:
244
+ cur.execute(query, (INDEX_ROW_LIMIT,))
245
+ rows = cur.fetchall()
246
+ colnames = [desc[0] for desc in cur.description]
247
+
248
+ _faiss_id_to_row = []
249
+ vectors: List[np.ndarray] = []
250
+ for r in rows:
251
+ row = {colnames[i]: r[i] for i in range(len(colnames))}
252
+ text = _concat_columns(row, SEARCH_COLUMNS)
253
+ emb = _hashing_embed(text, _faiss_dim)
254
+ vectors.append(emb)
255
+ _faiss_id_to_row.append(row)
256
+
257
+ if not vectors:
258
+ # Empty index
259
+ _faiss_index = faiss.IndexFlatIP(_faiss_dim)
260
+ return
261
+
262
+ xb = np.stack(vectors, axis=0)
263
+ if _faiss_norm:
264
+ faiss.normalize_L2(xb)
265
+ _faiss_index = faiss.IndexFlatIP(_faiss_dim)
266
+ _faiss_index.add(xb)
267
+
268
+
269
+ async def _faiss_search(query: str, k: int) -> List[Tuple[Dict[str, Any], float]]:
270
+ await _ensure_faiss_index()
271
+ assert _faiss_index is not None
272
+ if not _faiss_id_to_row:
273
+ return []
274
+ qv = _hashing_embed(query, _faiss_dim).reshape(1, -1)
275
+ if _faiss_norm:
276
+ faiss.normalize_L2(qv)
277
+ scores, idxs = _faiss_index.search(qv, min(k, len(_faiss_id_to_row)))
278
+ out: List[Tuple[Dict[str, Any], float]] = []
279
+ for i, s in zip(idxs[0].tolist(), scores[0].tolist()):
280
+ if i < 0 or i >= len(_faiss_id_to_row):
281
+ continue
282
+ out.append((_faiss_id_to_row[i], float(s)))
283
+ return out
284
+
285
+
286
+ # -----------------------------------------------------------------------------
287
+ # Mock product data (when database is not available)
288
+ # -----------------------------------------------------------------------------
289
+ MOCK_PRODUCTS = [
290
+ {"id": 1, "name": "Laptop Pro 14", "description": "High-performance laptop with 14-inch display", "price": 1299.99},
291
+ {"id": 2, "name": "Noise Cancelling Headphones", "description": "Premium wireless headphones with ANC", "price": 349.99},
292
+ {"id": 3, "name": "Smartwatch X", "description": "Advanced smartwatch with health tracking", "price": 299.99},
293
+ {"id": 4, "name": "Laptop Air 13", "description": "Ultralight laptop for everyday use", "price": 999.99},
294
+ {"id": 5, "name": "Wireless Mouse Pro", "description": "Ergonomic wireless mouse for productivity", "price": 79.99},
295
+ {"id": 6, "name": "Mechanical Keyboard", "description": "RGB mechanical keyboard for gaming", "price": 149.99},
296
+ {"id": 7, "name": "4K Monitor 27", "description": "Ultra HD monitor with HDR support", "price": 449.99},
297
+ {"id": 8, "name": "USB-C Hub", "description": "Multi-port USB-C hub with HDMI", "price": 59.99},
298
+ ]
299
+
300
+
301
+ def _mock_search(query: str, limit: int) -> List[Tuple[Dict[str, Any], float]]:
302
+ """Fallback search using mock data when database is not available."""
303
+ scored = []
304
+ for product in MOCK_PRODUCTS:
305
+ text = f"{product.get('name', '')} {product.get('description', '')}"
306
+ score = _simple_similarity(query, text)
307
+ scored.append((product, score))
308
+ scored.sort(key=lambda x: x[1], reverse=True)
309
+ return scored[:limit]
310
+
311
+
312
+ # -----------------------------------------------------------------------------
313
+ # Simple fuzzy search over DB (no FAISS)
314
+ # -----------------------------------------------------------------------------
315
+ async def _simple_search(
316
+ query: str,
317
+ limit: int,
318
+ filter_obj: Optional[Dict[str, Any]],
319
+ ) -> List[Tuple[Dict[str, Any], float]]:
320
+ try:
321
+ conn = get_pg()
322
+ table_cols = _get_table_columns(conn, SEARCH_TABLE)
323
+ where = ""
324
+ params: List[Any] = []
325
+ if filter_obj:
326
+ clauses: List[str] = []
327
+ for k, v in filter_obj.items():
328
+ if k in table_cols:
329
+ clauses.append(f"{k} = %s")
330
+ params.append(v)
331
+ if clauses:
332
+ where = " WHERE " + " AND ".join(clauses)
333
+ cols = ", ".join([f"{c}" for c in SEARCH_COLUMNS if c])
334
+ sql = f"SELECT id, {cols} FROM {SEARCH_TABLE}{where} LIMIT %s;"
335
+ params.append(max(limit * 5, 50))
336
+ with conn.cursor() as cur:
337
+ cur.execute(sql, params)
338
+ rows = cur.fetchall()
339
+ colnames = [desc[0] for desc in cur.description]
340
+
341
+ scored: List[Tuple[Dict[str, Any], float]] = []
342
+ for r in rows:
343
+ row = {colnames[i]: r[i] for i in range(len(colnames))}
344
+ text = _concat_columns(row, SEARCH_COLUMNS)
345
+ scored.append((row, _simple_similarity(query, text)))
346
+
347
+ scored.sort(key=lambda x: x[1], reverse=True)
348
+ return scored[:limit]
349
+ except Exception:
350
+ # Fallback to mock data when database is not available
351
+ return _mock_search(query, limit)
352
+
353
+
354
+ # -----------------------------------------------------------------------------
355
+ # Tools
356
+ # -----------------------------------------------------------------------------
357
+ @mcp.tool(name="product-search")
358
+ async def product_search(
359
+ query: str,
360
+ columns: Optional[List[str]] = None,
361
+ filter: Optional[Dict[str, Any]] = None,
362
+ limit: int = 10,
363
+ access_token: Optional[str] = None,
364
+ ) -> str:
365
+ """
366
+ Search tool (CORTEX_SEARCH_SERVICE_QUERY equivalent).
367
+ Args match Snowflake docs: query, columns, filter, limit.
368
+ """
369
+ try:
370
+ columns = columns or []
371
+ limit = max(1, min(200, int(limit)))
372
+
373
+ if SEARCH_MODE == "faiss":
374
+ results = await _faiss_search(query, limit)
375
+ else:
376
+ results = await _simple_search(query, limit, filter)
377
+
378
+ formatted = []
379
+ for row, score in results:
380
+ record = {}
381
+ if columns:
382
+ for c in columns:
383
+ if c in row:
384
+ record[c] = row[c]
385
+ else:
386
+ # Return all known fields
387
+ record = {k: v for k, v in row.items()}
388
+ record["_score"] = score
389
+ formatted.append(record)
390
+
391
+ return json.dumps(
392
+ {
393
+ "results": formatted,
394
+ "request_id": f"req-{abs(hash(query)) % (10**9)}",
395
+ "mode": SEARCH_MODE,
396
+ },
397
+ ensure_ascii=False,
398
+ indent=2,
399
+ default=str,
400
+ )
401
+ except Exception as e:
402
+ return json.dumps({"error": str(e)})
403
+
404
+
405
+ @mcp.tool(name="revenue-semantic-view")
406
+ async def revenue_semantic_view(message: str, access_token: Optional[str] = None) -> str:
407
+ """
408
+ Analyst tool (CORTEX_ANALYST_MESSAGE equivalent).
409
+ We return a text response that includes a suggested SQL for the local Postgres.
410
+ If OpenAI is configured via OPENAI_API_KEY, we generate AI suggestions; otherwise fallback.
411
+ """
412
+ try:
413
+ client = get_openai()
414
+ if client is not None:
415
+ # Minimal schema context to help the model
416
+ conn = get_pg()
417
+ revenue_cols = _get_table_columns(conn, "revenue")
418
+ search_cols = _get_table_columns(conn, SEARCH_TABLE)
419
+ sys_prompt = (
420
+ "You are an analyst assistant for a local Postgres database.\n"
421
+ "Your job is to propose a helpful SQL for the user's request.\n"
422
+ f"Tables available include revenue({', '.join(revenue_cols) or 'unknown'}) "
423
+ f"and {SEARCH_TABLE}({', '.join(search_cols) or 'unknown'}).\n"
424
+ "Constraints:\n"
425
+ "- Only produce a single SELECT/WITH query that runs on Postgres.\n"
426
+ "- Limit output rows to ~50.\n"
427
+ "- If user asks for top revenue, aggregate and order by total revenue desc.\n"
428
+ )
429
+ user_prompt = (
430
+ f"User message:\n{message or ''}\n\n"
431
+ "Return a concise explanation (2-3 lines) and a SQL fenced in triple backticks."
432
+ )
433
+ try:
434
+ resp = client.chat.completions.create(
435
+ model=OPENAI_MODEL,
436
+ messages=[
437
+ {"role": "system", "content": sys_prompt},
438
+ {"role": "user", "content": user_prompt},
439
+ ],
440
+ temperature=0.2,
441
+ max_tokens=700,
442
+ )
443
+ ai_text = resp.choices[0].message.content or ""
444
+ return json.dumps({"content": [{"type": "text", "text": ai_text}]}, indent=2)
445
+ except Exception:
446
+ # Fall through to deterministic suggestion
447
+ pass
448
+
449
+ # Fallback deterministic suggestion
450
+ msg = (message or "").lower()
451
+ if "top" in msg and "revenue" in msg:
452
+ sql = (
453
+ 'SELECT product_id, product_name, SUM(revenue) AS total_revenue\n'
454
+ 'FROM "revenue"\n'
455
+ "GROUP BY product_id, product_name\n"
456
+ "ORDER BY total_revenue DESC\n"
457
+ "LIMIT 10;"
458
+ )
459
+ else:
460
+ sql = (
461
+ 'SELECT date, product_id, product_name, revenue\n'
462
+ 'FROM "revenue"\n'
463
+ "ORDER BY date DESC\n"
464
+ "LIMIT 20;"
465
+ )
466
+ text = (
467
+ "CORTEX Analyst (local simulation)\n"
468
+ "Proposed SQL based on your message:\n\n"
469
+ f"{sql}\n\n"
470
+ "Note: This is a textual response and does not execute the SQL."
471
+ )
472
+ return json.dumps({"content": [{"type": "text", "text": text}]}, indent=2)
473
+ except Exception as e:
474
+ return json.dumps({"error": str(e)})
475
+
476
+
477
+ @mcp.tool(name="sql_exec_tool")
478
+ async def sql_exec_tool(sql: str, access_token: Optional[str] = None) -> str:
479
+ """
480
+ SQL execution tool (SYSTEM_EXECUTE_SQL equivalent).
481
+ Read-only guard: only allow SELECTs.
482
+ """
483
+ try:
484
+ if not sql or sql.strip() == "":
485
+ return json.dumps({"error": "sql is required"})
486
+ first = sql.strip().split(None, 1)[0].upper()
487
+ if first not in {"SELECT", "WITH"}:
488
+ return json.dumps({"error": "Only read-only SELECT queries are allowed."})
489
+
490
+ conn = get_pg()
491
+ with conn.cursor() as cur:
492
+ cur.execute(sql)
493
+ rows = cur.fetchall()
494
+ colnames = [desc[0] for desc in cur.description]
495
+ out = [{colnames[i]: r[i] for i in range(len(colnames))} for r in rows] if rows else []
496
+ return json.dumps({"rows": out, "row_count": len(out)}, indent=2, default=str)
497
+ except Exception as e:
498
+ return json.dumps({"error": str(e)})
499
+
500
+
501
+ @mcp.tool(name="agent_1")
502
+ async def agent_1(message: str, access_token: Optional[str] = None) -> str:
503
+ """
504
+ Agent tool (CORTEX_AGENT_RUN equivalent).
505
+ If OpenAI is configured, use it to suggest the next tool with brief reasoning.
506
+ Otherwise return a simple heuristic suggestion.
507
+ """
508
+ try:
509
+ client = get_openai()
510
+ if client is not None:
511
+ sys_prompt = (
512
+ "You are a tool-routing assistant in an MCP server.\n"
513
+ "Available tools:\n"
514
+ "- product-search: text search over products (FAISS/simple)\n"
515
+ "- sql_exec_tool: run read-only SELECT/WITH SQL\n"
516
+ "- revenue-semantic-view: produce helpful SQL suggestion text\n"
517
+ "Choose a single best next tool and give 1-2 lines of reasoning. "
518
+ "Respond with a short JSON first, then a brief explanation."
519
+ )
520
+ user_prompt = (
521
+ f"User message:\n{message or ''}\n\n"
522
+ "Return JSON like {\"suggested_tool\":\"product-search\"} followed by a brief explanation."
523
+ )
524
+ try:
525
+ resp = client.chat.completions.create(
526
+ model=OPENAI_MODEL,
527
+ messages=[
528
+ {"role": "system", "content": sys_prompt},
529
+ {"role": "user", "content": user_prompt},
530
+ ],
531
+ temperature=0.2,
532
+ max_tokens=300,
533
+ )
534
+ ai_text = resp.choices[0].message.content or ""
535
+ return json.dumps({"content": [{"type": "text", "text": ai_text}]}, indent=2)
536
+ except Exception:
537
+ pass
538
+
539
+ suggestion = "sql_exec_tool" if "sql" in (message or "").lower() else "product-search"
540
+ text = (
541
+ "Agent V2 (local simulation)\n"
542
+ f"Received: {message}\n"
543
+ f"Suggested next tool: {suggestion}"
544
+ )
545
+ return json.dumps({"content": [{"type": "text", "text": text}]}, indent=2)
546
+ except Exception as e:
547
+ return json.dumps({"error": str(e)})
548
+
549
+
550
+ # -----------------------------------------------------------------------------
551
+ # Internal reset helper (used by env reset flows, not exposed as an MCP tool)
552
+ # -----------------------------------------------------------------------------
553
+ async def admin_reset(access_token: Optional[str] = None) -> str:
554
+ """
555
+ Reset Snowflake environment to clean state.
556
+ Clears all data from products and revenue tables, then re-seeds with defaults.
557
+ Called during container reset to prepare for a new task.
558
+ """
559
+ global _faiss_index, _faiss_id_to_row
560
+
561
+ try:
562
+ conn = get_pg()
563
+ with conn.cursor() as cur:
564
+ # Clear existing data
565
+ cur.execute("DELETE FROM revenue;")
566
+ cur.execute("DELETE FROM products;")
567
+
568
+ # Re-seed with default data
569
+ cur.execute(
570
+ """
571
+ INSERT INTO products (name, description, category) VALUES
572
+ ('Laptop Pro 14', 'High-performance laptop with retina display', 'Electronics'),
573
+ ('Noise Cancelling Headphones', 'Over-ear ANC headphones with long battery life', 'Audio'),
574
+ ('Smartwatch X', 'Fitness tracking and notifications', 'Wearables');
575
+ """
576
+ )
577
+ cur.execute(
578
+ """
579
+ INSERT INTO revenue (date, product_id, product_name, revenue) VALUES
580
+ ('2025-10-01', 1, 'Laptop Pro 14', 250000),
581
+ ('2025-10-01', 2, 'Noise Cancelling Headphones', 85000),
582
+ ('2025-10-01', 3, 'Smartwatch X', 120000),
583
+ ('2025-10-02', 1, 'Laptop Pro 14', 265000),
584
+ ('2025-10-02', 2, 'Noise Cancelling Headphones', 82000),
585
+ ('2025-10-02', 3, 'Smartwatch X', 110000);
586
+ """
587
+ )
588
+
589
+ # Reset FAISS index
590
+ _faiss_index = None
591
+ _faiss_id_to_row = []
592
+
593
+ return json.dumps({
594
+ "ok": True,
595
+ "message": "Snowflake environment reset"
596
+ })
597
+ except Exception as e:
598
+ return json.dumps({"error": str(e)})
599
+
600
+
601
+ def main() -> None:
602
+ print(f"Starting Snowflake MCP (Local) on http://{HOST}:{PORT}/mcp")
603
+ print(f"- POSTGRES_DSN: {POSTGRES_DSN}")
604
+ print(f"- SEARCH_MODE : {SEARCH_MODE} (faiss available: {faiss is not None})")
605
+ print(f"- SEARCH_TABLE: {SEARCH_TABLE}, columns={SEARCH_COLUMNS}")
606
+ mcp.run(transport="http", host=HOST, port=PORT)
607
+
608
+
609
+ if __name__ == "__main__":
610
+ main()
611
+
612
+