agentprobe-framework 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (292) hide show
  1. agentprobe_framework-1.0.0/.env.example +9 -0
  2. agentprobe_framework-1.0.0/.github/ISSUE_TEMPLATE/bug_report.md +26 -0
  3. agentprobe_framework-1.0.0/.github/ISSUE_TEMPLATE/feature_request.md +19 -0
  4. agentprobe_framework-1.0.0/.github/PULL_REQUEST_TEMPLATE.md +22 -0
  5. agentprobe_framework-1.0.0/.github/dependabot.yml +21 -0
  6. agentprobe_framework-1.0.0/.github/workflows/ci.yml +72 -0
  7. agentprobe_framework-1.0.0/.github/workflows/docs.yml +23 -0
  8. agentprobe_framework-1.0.0/.github/workflows/release.yml +42 -0
  9. agentprobe_framework-1.0.0/.gitignore +42 -0
  10. agentprobe_framework-1.0.0/.pre-commit-config.yaml +22 -0
  11. agentprobe_framework-1.0.0/CONTRIBUTING.md +181 -0
  12. agentprobe_framework-1.0.0/Dockerfile +33 -0
  13. agentprobe_framework-1.0.0/LICENSE +191 -0
  14. agentprobe_framework-1.0.0/Makefile +43 -0
  15. agentprobe_framework-1.0.0/PKG-INFO +447 -0
  16. agentprobe_framework-1.0.0/README.md +397 -0
  17. agentprobe_framework-1.0.0/agentprobe.yaml.example +94 -0
  18. agentprobe_framework-1.0.0/docker-compose.yml +30 -0
  19. agentprobe_framework-1.0.0/docs/changelog.md +106 -0
  20. agentprobe_framework-1.0.0/docs/getting-started/configuration.md +207 -0
  21. agentprobe_framework-1.0.0/docs/getting-started/installation.md +106 -0
  22. agentprobe_framework-1.0.0/docs/getting-started/quickstart.md +123 -0
  23. agentprobe_framework-1.0.0/docs/guides/ci-cd-integration.md +121 -0
  24. agentprobe_framework-1.0.0/docs/guides/cost-management.md +104 -0
  25. agentprobe_framework-1.0.0/docs/guides/evaluators.md +158 -0
  26. agentprobe_framework-1.0.0/docs/guides/multi-agent-testing.md +95 -0
  27. agentprobe_framework-1.0.0/docs/guides/regression-testing.md +93 -0
  28. agentprobe_framework-1.0.0/docs/guides/safety-testing.md +122 -0
  29. agentprobe_framework-1.0.0/docs/guides/writing-tests.md +214 -0
  30. agentprobe_framework-1.0.0/docs/index.md +53 -0
  31. agentprobe_framework-1.0.0/docs/plugins/creating-plugins.md +158 -0
  32. agentprobe_framework-1.0.0/docs/plugins/overview.md +91 -0
  33. agentprobe_framework-1.0.0/docs/plugins/plugin-types.md +196 -0
  34. agentprobe_framework-1.0.0/docs/reference/adapters.md +116 -0
  35. agentprobe_framework-1.0.0/docs/reference/api/adapters.md +23 -0
  36. agentprobe_framework-1.0.0/docs/reference/api/core.md +47 -0
  37. agentprobe_framework-1.0.0/docs/reference/api/cost.md +11 -0
  38. agentprobe_framework-1.0.0/docs/reference/api/eval.md +27 -0
  39. agentprobe_framework-1.0.0/docs/reference/api/metrics.md +19 -0
  40. agentprobe_framework-1.0.0/docs/reference/api/plugins.md +20 -0
  41. agentprobe_framework-1.0.0/docs/reference/api/regression.md +11 -0
  42. agentprobe_framework-1.0.0/docs/reference/api/reporting.md +27 -0
  43. agentprobe_framework-1.0.0/docs/reference/api/safety.md +7 -0
  44. agentprobe_framework-1.0.0/docs/reference/api/security.md +7 -0
  45. agentprobe_framework-1.0.0/docs/reference/api/storage.md +15 -0
  46. agentprobe_framework-1.0.0/docs/reference/api/testing.md +127 -0
  47. agentprobe_framework-1.0.0/docs/reference/api/trace.md +15 -0
  48. agentprobe_framework-1.0.0/docs/reference/cli.md +274 -0
  49. agentprobe_framework-1.0.0/docs/reference/config.md +153 -0
  50. agentprobe_framework-1.0.0/examples/.gitkeep +0 -0
  51. agentprobe_framework-1.0.0/examples/basic_test.py +101 -0
  52. agentprobe_framework-1.0.0/examples/conversation_testing.py +86 -0
  53. agentprobe_framework-1.0.0/examples/cost_management.py +89 -0
  54. agentprobe_framework-1.0.0/examples/custom_adapter.py +126 -0
  55. agentprobe_framework-1.0.0/examples/dashboard_client.py +73 -0
  56. agentprobe_framework-1.0.0/examples/metrics_trending.py +108 -0
  57. agentprobe_framework-1.0.0/examples/plugin_creation.py +82 -0
  58. agentprobe_framework-1.0.0/examples/regression_testing.py +127 -0
  59. agentprobe_framework-1.0.0/examples/safety_testing.py +102 -0
  60. agentprobe_framework-1.0.0/examples/test_real_agent.py +196 -0
  61. agentprobe_framework-1.0.0/mkdocs.yml +100 -0
  62. agentprobe_framework-1.0.0/pyproject.toml +198 -0
  63. agentprobe_framework-1.0.0/src/agentprobe/__init__.py +49 -0
  64. agentprobe_framework-1.0.0/src/agentprobe/adapters/__init__.py +6 -0
  65. agentprobe_framework-1.0.0/src/agentprobe/adapters/autogen.py +187 -0
  66. agentprobe_framework-1.0.0/src/agentprobe/adapters/base.py +166 -0
  67. agentprobe_framework-1.0.0/src/agentprobe/adapters/crewai.py +134 -0
  68. agentprobe_framework-1.0.0/src/agentprobe/adapters/langchain.py +257 -0
  69. agentprobe_framework-1.0.0/src/agentprobe/adapters/mcp.py +189 -0
  70. agentprobe_framework-1.0.0/src/agentprobe/cli/__init__.py +5 -0
  71. agentprobe_framework-1.0.0/src/agentprobe/cli/commands/__init__.py +1 -0
  72. agentprobe_framework-1.0.0/src/agentprobe/cli/commands/baseline.py +78 -0
  73. agentprobe_framework-1.0.0/src/agentprobe/cli/commands/cost.py +80 -0
  74. agentprobe_framework-1.0.0/src/agentprobe/cli/commands/dashboard.py +30 -0
  75. agentprobe_framework-1.0.0/src/agentprobe/cli/commands/metrics.py +60 -0
  76. agentprobe_framework-1.0.0/src/agentprobe/cli/commands/safety.py +70 -0
  77. agentprobe_framework-1.0.0/src/agentprobe/cli/commands/snapshot.py +87 -0
  78. agentprobe_framework-1.0.0/src/agentprobe/cli/commands/test.py +60 -0
  79. agentprobe_framework-1.0.0/src/agentprobe/cli/commands/trace.py +92 -0
  80. agentprobe_framework-1.0.0/src/agentprobe/cli/main.py +134 -0
  81. agentprobe_framework-1.0.0/src/agentprobe/core/__init__.py +47 -0
  82. agentprobe_framework-1.0.0/src/agentprobe/core/assertions.py +307 -0
  83. agentprobe_framework-1.0.0/src/agentprobe/core/chaos.py +143 -0
  84. agentprobe_framework-1.0.0/src/agentprobe/core/config.py +334 -0
  85. agentprobe_framework-1.0.0/src/agentprobe/core/conversation.py +158 -0
  86. agentprobe_framework-1.0.0/src/agentprobe/core/discovery.py +110 -0
  87. agentprobe_framework-1.0.0/src/agentprobe/core/exceptions.py +135 -0
  88. agentprobe_framework-1.0.0/src/agentprobe/core/models.py +813 -0
  89. agentprobe_framework-1.0.0/src/agentprobe/core/protocols.py +219 -0
  90. agentprobe_framework-1.0.0/src/agentprobe/core/runner.py +214 -0
  91. agentprobe_framework-1.0.0/src/agentprobe/core/scenario.py +95 -0
  92. agentprobe_framework-1.0.0/src/agentprobe/core/snapshot.py +239 -0
  93. agentprobe_framework-1.0.0/src/agentprobe/cost/__init__.py +5 -0
  94. agentprobe_framework-1.0.0/src/agentprobe/cost/budget.py +103 -0
  95. agentprobe_framework-1.0.0/src/agentprobe/cost/calculator.py +190 -0
  96. agentprobe_framework-1.0.0/src/agentprobe/cost/pricing_data/.gitkeep +0 -0
  97. agentprobe_framework-1.0.0/src/agentprobe/cost/pricing_data/anthropic.yaml +15 -0
  98. agentprobe_framework-1.0.0/src/agentprobe/cost/pricing_data/cohere.yaml +12 -0
  99. agentprobe_framework-1.0.0/src/agentprobe/cost/pricing_data/google.yaml +12 -0
  100. agentprobe_framework-1.0.0/src/agentprobe/cost/pricing_data/mistral.yaml +12 -0
  101. agentprobe_framework-1.0.0/src/agentprobe/cost/pricing_data/openai.yaml +12 -0
  102. agentprobe_framework-1.0.0/src/agentprobe/dashboard/__init__.py +9 -0
  103. agentprobe_framework-1.0.0/src/agentprobe/dashboard/app.py +51 -0
  104. agentprobe_framework-1.0.0/src/agentprobe/dashboard/dependencies.py +17 -0
  105. agentprobe_framework-1.0.0/src/agentprobe/dashboard/routes/__init__.py +1 -0
  106. agentprobe_framework-1.0.0/src/agentprobe/dashboard/routes/health.py +16 -0
  107. agentprobe_framework-1.0.0/src/agentprobe/dashboard/routes/metrics.py +51 -0
  108. agentprobe_framework-1.0.0/src/agentprobe/dashboard/routes/results.py +52 -0
  109. agentprobe_framework-1.0.0/src/agentprobe/dashboard/routes/traces.py +52 -0
  110. agentprobe_framework-1.0.0/src/agentprobe/dashboard/schemas.py +41 -0
  111. agentprobe_framework-1.0.0/src/agentprobe/eval/__init__.py +14 -0
  112. agentprobe_framework-1.0.0/src/agentprobe/eval/base.py +100 -0
  113. agentprobe_framework-1.0.0/src/agentprobe/eval/embedding.py +180 -0
  114. agentprobe_framework-1.0.0/src/agentprobe/eval/llm_judge.py +262 -0
  115. agentprobe_framework-1.0.0/src/agentprobe/eval/rubrics/.gitkeep +0 -0
  116. agentprobe_framework-1.0.0/src/agentprobe/eval/rubrics/coding_agent.yaml +34 -0
  117. agentprobe_framework-1.0.0/src/agentprobe/eval/rubrics/customer_support.yaml +34 -0
  118. agentprobe_framework-1.0.0/src/agentprobe/eval/rubrics/default.yaml +34 -0
  119. agentprobe_framework-1.0.0/src/agentprobe/eval/rubrics/research_agent.yaml +34 -0
  120. agentprobe_framework-1.0.0/src/agentprobe/eval/rules.py +178 -0
  121. agentprobe_framework-1.0.0/src/agentprobe/eval/statistical.py +195 -0
  122. agentprobe_framework-1.0.0/src/agentprobe/eval/trace_compare.py +173 -0
  123. agentprobe_framework-1.0.0/src/agentprobe/metrics/__init__.py +19 -0
  124. agentprobe_framework-1.0.0/src/agentprobe/metrics/aggregator.py +128 -0
  125. agentprobe_framework-1.0.0/src/agentprobe/metrics/collector.py +148 -0
  126. agentprobe_framework-1.0.0/src/agentprobe/metrics/definitions.py +75 -0
  127. agentprobe_framework-1.0.0/src/agentprobe/metrics/trend.py +111 -0
  128. agentprobe_framework-1.0.0/src/agentprobe/plugins/__init__.py +23 -0
  129. agentprobe_framework-1.0.0/src/agentprobe/plugins/base.py +179 -0
  130. agentprobe_framework-1.0.0/src/agentprobe/plugins/loader.py +182 -0
  131. agentprobe_framework-1.0.0/src/agentprobe/plugins/manager.py +219 -0
  132. agentprobe_framework-1.0.0/src/agentprobe/plugins/registry.py +94 -0
  133. agentprobe_framework-1.0.0/src/agentprobe/pytest_plugin.py +250 -0
  134. agentprobe_framework-1.0.0/src/agentprobe/regression/__init__.py +1 -0
  135. agentprobe_framework-1.0.0/src/agentprobe/regression/baseline.py +106 -0
  136. agentprobe_framework-1.0.0/src/agentprobe/regression/detector.py +112 -0
  137. agentprobe_framework-1.0.0/src/agentprobe/reporting/__init__.py +6 -0
  138. agentprobe_framework-1.0.0/src/agentprobe/reporting/csv_reporter.py +94 -0
  139. agentprobe_framework-1.0.0/src/agentprobe/reporting/html.py +146 -0
  140. agentprobe_framework-1.0.0/src/agentprobe/reporting/json_reporter.py +52 -0
  141. agentprobe_framework-1.0.0/src/agentprobe/reporting/junit.py +113 -0
  142. agentprobe_framework-1.0.0/src/agentprobe/reporting/markdown.py +114 -0
  143. agentprobe_framework-1.0.0/src/agentprobe/reporting/terminal.py +119 -0
  144. agentprobe_framework-1.0.0/src/agentprobe/safety/__init__.py +5 -0
  145. agentprobe_framework-1.0.0/src/agentprobe/safety/payloads/.gitkeep +0 -0
  146. agentprobe_framework-1.0.0/src/agentprobe/safety/payloads/data_leakage.yaml +55 -0
  147. agentprobe_framework-1.0.0/src/agentprobe/safety/payloads/hallucination.yaml +71 -0
  148. agentprobe_framework-1.0.0/src/agentprobe/safety/payloads/jailbreak.yaml +64 -0
  149. agentprobe_framework-1.0.0/src/agentprobe/safety/payloads/prompt_injection.yaml +130 -0
  150. agentprobe_framework-1.0.0/src/agentprobe/safety/payloads/role_confusion.yaml +39 -0
  151. agentprobe_framework-1.0.0/src/agentprobe/safety/payloads/tool_abuse.yaml +48 -0
  152. agentprobe_framework-1.0.0/src/agentprobe/safety/scanner.py +182 -0
  153. agentprobe_framework-1.0.0/src/agentprobe/safety/suites/.gitkeep +0 -0
  154. agentprobe_framework-1.0.0/src/agentprobe/safety/suites/__init__.py +1 -0
  155. agentprobe_framework-1.0.0/src/agentprobe/safety/suites/data_leakage.py +85 -0
  156. agentprobe_framework-1.0.0/src/agentprobe/safety/suites/hallucination.py +96 -0
  157. agentprobe_framework-1.0.0/src/agentprobe/safety/suites/jailbreak.py +88 -0
  158. agentprobe_framework-1.0.0/src/agentprobe/safety/suites/prompt_injection.py +126 -0
  159. agentprobe_framework-1.0.0/src/agentprobe/safety/suites/role_confusion.py +88 -0
  160. agentprobe_framework-1.0.0/src/agentprobe/safety/suites/tool_abuse.py +108 -0
  161. agentprobe_framework-1.0.0/src/agentprobe/security/__init__.py +7 -0
  162. agentprobe_framework-1.0.0/src/agentprobe/security/audit.py +88 -0
  163. agentprobe_framework-1.0.0/src/agentprobe/security/encryption.py +43 -0
  164. agentprobe_framework-1.0.0/src/agentprobe/security/pii.py +140 -0
  165. agentprobe_framework-1.0.0/src/agentprobe/storage/__init__.py +5 -0
  166. agentprobe_framework-1.0.0/src/agentprobe/storage/migrations.py +129 -0
  167. agentprobe_framework-1.0.0/src/agentprobe/storage/postgres.py +330 -0
  168. agentprobe_framework-1.0.0/src/agentprobe/storage/sqlite.py +378 -0
  169. agentprobe_framework-1.0.0/src/agentprobe/testing.py +285 -0
  170. agentprobe_framework-1.0.0/src/agentprobe/trace/__init__.py +6 -0
  171. agentprobe_framework-1.0.0/src/agentprobe/trace/diff.py +135 -0
  172. agentprobe_framework-1.0.0/src/agentprobe/trace/recorder.py +243 -0
  173. agentprobe_framework-1.0.0/src/agentprobe/trace/replay.py +155 -0
  174. agentprobe_framework-1.0.0/src/agentprobe/trace/time_travel.py +128 -0
  175. agentprobe_framework-1.0.0/tests/__init__.py +1 -0
  176. agentprobe_framework-1.0.0/tests/conftest.py +31 -0
  177. agentprobe_framework-1.0.0/tests/e2e/__init__.py +0 -0
  178. agentprobe_framework-1.0.0/tests/e2e/conftest.py +74 -0
  179. agentprobe_framework-1.0.0/tests/e2e/test_autogen_e2e.py +113 -0
  180. agentprobe_framework-1.0.0/tests/e2e/test_cost_live.py +109 -0
  181. agentprobe_framework-1.0.0/tests/e2e/test_crewai_e2e.py +109 -0
  182. agentprobe_framework-1.0.0/tests/e2e/test_langchain_e2e.py +120 -0
  183. agentprobe_framework-1.0.0/tests/e2e/test_mcp_e2e.py +160 -0
  184. agentprobe_framework-1.0.0/tests/e2e/test_system_e2e.py +201 -0
  185. agentprobe_framework-1.0.0/tests/fixtures/__init__.py +1 -0
  186. agentprobe_framework-1.0.0/tests/fixtures/agents.py +85 -0
  187. agentprobe_framework-1.0.0/tests/fixtures/regression.py +50 -0
  188. agentprobe_framework-1.0.0/tests/fixtures/results.py +176 -0
  189. agentprobe_framework-1.0.0/tests/fixtures/safety.py +43 -0
  190. agentprobe_framework-1.0.0/tests/fixtures/sample_data/.gitkeep +0 -0
  191. agentprobe_framework-1.0.0/tests/fixtures/traces.py +335 -0
  192. agentprobe_framework-1.0.0/tests/integration/__init__.py +1 -0
  193. agentprobe_framework-1.0.0/tests/integration/conftest.py +38 -0
  194. agentprobe_framework-1.0.0/tests/integration/test_cli_e2e.py +104 -0
  195. agentprobe_framework-1.0.0/tests/integration/test_cost_budget_flow.py +159 -0
  196. agentprobe_framework-1.0.0/tests/integration/test_metrics_flow.py +159 -0
  197. agentprobe_framework-1.0.0/tests/integration/test_plugin_lifecycle.py +173 -0
  198. agentprobe_framework-1.0.0/tests/integration/test_regression_flow.py +136 -0
  199. agentprobe_framework-1.0.0/tests/integration/test_runner_pipeline.py +121 -0
  200. agentprobe_framework-1.0.0/tests/integration/test_safety_flow.py +133 -0
  201. agentprobe_framework-1.0.0/tests/integration/test_trace_pipeline.py +162 -0
  202. agentprobe_framework-1.0.0/tests/unit/__init__.py +1 -0
  203. agentprobe_framework-1.0.0/tests/unit/adapters/__init__.py +1 -0
  204. agentprobe_framework-1.0.0/tests/unit/adapters/test_autogen.py +166 -0
  205. agentprobe_framework-1.0.0/tests/unit/adapters/test_base.py +123 -0
  206. agentprobe_framework-1.0.0/tests/unit/adapters/test_crewai.py +136 -0
  207. agentprobe_framework-1.0.0/tests/unit/adapters/test_langchain.py +143 -0
  208. agentprobe_framework-1.0.0/tests/unit/adapters/test_mcp.py +186 -0
  209. agentprobe_framework-1.0.0/tests/unit/cli/__init__.py +1 -0
  210. agentprobe_framework-1.0.0/tests/unit/cli/test_baseline_cmd.py +60 -0
  211. agentprobe_framework-1.0.0/tests/unit/cli/test_cost_cmd.py +50 -0
  212. agentprobe_framework-1.0.0/tests/unit/cli/test_dashboard_cmd.py +70 -0
  213. agentprobe_framework-1.0.0/tests/unit/cli/test_main.py +98 -0
  214. agentprobe_framework-1.0.0/tests/unit/cli/test_metrics_cmd.py +51 -0
  215. agentprobe_framework-1.0.0/tests/unit/cli/test_safety_cmd.py +38 -0
  216. agentprobe_framework-1.0.0/tests/unit/cli/test_snapshot_cmd.py +76 -0
  217. agentprobe_framework-1.0.0/tests/unit/cli/test_test_cmd.py +136 -0
  218. agentprobe_framework-1.0.0/tests/unit/cli/test_trace_cmd.py +192 -0
  219. agentprobe_framework-1.0.0/tests/unit/core/__init__.py +1 -0
  220. agentprobe_framework-1.0.0/tests/unit/core/test_assertions.py +197 -0
  221. agentprobe_framework-1.0.0/tests/unit/core/test_chaos.py +136 -0
  222. agentprobe_framework-1.0.0/tests/unit/core/test_config.py +253 -0
  223. agentprobe_framework-1.0.0/tests/unit/core/test_conversation.py +175 -0
  224. agentprobe_framework-1.0.0/tests/unit/core/test_discovery.py +212 -0
  225. agentprobe_framework-1.0.0/tests/unit/core/test_exceptions.py +150 -0
  226. agentprobe_framework-1.0.0/tests/unit/core/test_models.py +889 -0
  227. agentprobe_framework-1.0.0/tests/unit/core/test_protocols.py +137 -0
  228. agentprobe_framework-1.0.0/tests/unit/core/test_runner.py +154 -0
  229. agentprobe_framework-1.0.0/tests/unit/core/test_scenario.py +124 -0
  230. agentprobe_framework-1.0.0/tests/unit/core/test_snapshot.py +144 -0
  231. agentprobe_framework-1.0.0/tests/unit/cost/__init__.py +1 -0
  232. agentprobe_framework-1.0.0/tests/unit/cost/test_budget.py +94 -0
  233. agentprobe_framework-1.0.0/tests/unit/cost/test_calculator.py +191 -0
  234. agentprobe_framework-1.0.0/tests/unit/dashboard/__init__.py +0 -0
  235. agentprobe_framework-1.0.0/tests/unit/dashboard/conftest.py +26 -0
  236. agentprobe_framework-1.0.0/tests/unit/dashboard/test_app.py +57 -0
  237. agentprobe_framework-1.0.0/tests/unit/dashboard/test_metrics.py +88 -0
  238. agentprobe_framework-1.0.0/tests/unit/dashboard/test_results.py +85 -0
  239. agentprobe_framework-1.0.0/tests/unit/dashboard/test_schemas.py +64 -0
  240. agentprobe_framework-1.0.0/tests/unit/dashboard/test_traces.py +86 -0
  241. agentprobe_framework-1.0.0/tests/unit/eval/__init__.py +1 -0
  242. agentprobe_framework-1.0.0/tests/unit/eval/test_base.py +80 -0
  243. agentprobe_framework-1.0.0/tests/unit/eval/test_embedding.py +97 -0
  244. agentprobe_framework-1.0.0/tests/unit/eval/test_llm_judge.py +98 -0
  245. agentprobe_framework-1.0.0/tests/unit/eval/test_rules.py +228 -0
  246. agentprobe_framework-1.0.0/tests/unit/eval/test_statistical.py +196 -0
  247. agentprobe_framework-1.0.0/tests/unit/eval/test_trace_compare.py +361 -0
  248. agentprobe_framework-1.0.0/tests/unit/fixtures/__init__.py +0 -0
  249. agentprobe_framework-1.0.0/tests/unit/fixtures/test_factories.py +232 -0
  250. agentprobe_framework-1.0.0/tests/unit/metrics/__init__.py +1 -0
  251. agentprobe_framework-1.0.0/tests/unit/metrics/test_aggregator.py +132 -0
  252. agentprobe_framework-1.0.0/tests/unit/metrics/test_collector.py +190 -0
  253. agentprobe_framework-1.0.0/tests/unit/metrics/test_definitions.py +79 -0
  254. agentprobe_framework-1.0.0/tests/unit/metrics/test_trend.py +98 -0
  255. agentprobe_framework-1.0.0/tests/unit/plugins/__init__.py +1 -0
  256. agentprobe_framework-1.0.0/tests/unit/plugins/test_base.py +179 -0
  257. agentprobe_framework-1.0.0/tests/unit/plugins/test_loader.py +174 -0
  258. agentprobe_framework-1.0.0/tests/unit/plugins/test_manager.py +255 -0
  259. agentprobe_framework-1.0.0/tests/unit/plugins/test_registry.py +92 -0
  260. agentprobe_framework-1.0.0/tests/unit/regression/__init__.py +0 -0
  261. agentprobe_framework-1.0.0/tests/unit/regression/test_baseline.py +82 -0
  262. agentprobe_framework-1.0.0/tests/unit/regression/test_detector.py +113 -0
  263. agentprobe_framework-1.0.0/tests/unit/reporting/__init__.py +1 -0
  264. agentprobe_framework-1.0.0/tests/unit/reporting/test_csv.py +120 -0
  265. agentprobe_framework-1.0.0/tests/unit/reporting/test_html.py +108 -0
  266. agentprobe_framework-1.0.0/tests/unit/reporting/test_json_reporter.py +53 -0
  267. agentprobe_framework-1.0.0/tests/unit/reporting/test_junit.py +145 -0
  268. agentprobe_framework-1.0.0/tests/unit/reporting/test_markdown.py +92 -0
  269. agentprobe_framework-1.0.0/tests/unit/reporting/test_terminal.py +67 -0
  270. agentprobe_framework-1.0.0/tests/unit/safety/__init__.py +1 -0
  271. agentprobe_framework-1.0.0/tests/unit/safety/test_data_leakage.py +48 -0
  272. agentprobe_framework-1.0.0/tests/unit/safety/test_hallucination.py +53 -0
  273. agentprobe_framework-1.0.0/tests/unit/safety/test_jailbreak.py +48 -0
  274. agentprobe_framework-1.0.0/tests/unit/safety/test_prompt_injection.py +66 -0
  275. agentprobe_framework-1.0.0/tests/unit/safety/test_role_confusion.py +50 -0
  276. agentprobe_framework-1.0.0/tests/unit/safety/test_scanner.py +199 -0
  277. agentprobe_framework-1.0.0/tests/unit/safety/test_tool_abuse.py +65 -0
  278. agentprobe_framework-1.0.0/tests/unit/security/__init__.py +1 -0
  279. agentprobe_framework-1.0.0/tests/unit/security/test_audit.py +73 -0
  280. agentprobe_framework-1.0.0/tests/unit/security/test_encryption.py +58 -0
  281. agentprobe_framework-1.0.0/tests/unit/security/test_pii.py +148 -0
  282. agentprobe_framework-1.0.0/tests/unit/storage/__init__.py +1 -0
  283. agentprobe_framework-1.0.0/tests/unit/storage/test_migrations.py +105 -0
  284. agentprobe_framework-1.0.0/tests/unit/storage/test_postgres.py +300 -0
  285. agentprobe_framework-1.0.0/tests/unit/storage/test_sqlite.py +374 -0
  286. agentprobe_framework-1.0.0/tests/unit/test_pytest_plugin.py +188 -0
  287. agentprobe_framework-1.0.0/tests/unit/test_testing.py +235 -0
  288. agentprobe_framework-1.0.0/tests/unit/trace/__init__.py +1 -0
  289. agentprobe_framework-1.0.0/tests/unit/trace/test_diff.py +123 -0
  290. agentprobe_framework-1.0.0/tests/unit/trace/test_recorder.py +175 -0
  291. agentprobe_framework-1.0.0/tests/unit/trace/test_replay.py +130 -0
  292. agentprobe_framework-1.0.0/tests/unit/trace/test_time_travel.py +165 -0
@@ -0,0 +1,9 @@
1
+ # AgentProbe Environment Variables
2
+ # Copy this file to .env and fill in your values.
3
+
4
+ # Provider API keys (for agents that call these providers)
5
+ # ANTHROPIC_API_KEY=your-key-here
6
+ # OPENAI_API_KEY=your-key-here
7
+
8
+ # PostgreSQL connection (optional, for team storage backend)
9
+ # AGENTPROBE_PG_DSN=postgresql://user:pass@localhost:5432/agentprobe
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: Bug Report
3
+ about: Report a bug in AgentProbe
4
+ title: "[BUG] "
5
+ labels: bug
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Description
10
+ <!-- A clear description of the bug. -->
11
+
12
+ ## Steps to Reproduce
13
+ 1.
14
+ 2.
15
+ 3.
16
+
17
+ ## Expected Behavior
18
+ <!-- What you expected to happen. -->
19
+
20
+ ## Actual Behavior
21
+ <!-- What actually happened. -->
22
+
23
+ ## Environment
24
+ - AgentProbe version:
25
+ - Python version:
26
+ - OS:
@@ -0,0 +1,19 @@
1
+ ---
2
+ name: Feature Request
3
+ about: Suggest a new feature for AgentProbe
4
+ title: "[FEATURE] "
5
+ labels: enhancement
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Problem
10
+ <!-- What problem does this feature solve? -->
11
+
12
+ ## Proposed Solution
13
+ <!-- How should it work? -->
14
+
15
+ ## Alternatives Considered
16
+ <!-- Any alternative approaches you've considered. -->
17
+
18
+ ## Additional Context
19
+ <!-- Any other context, screenshots, or examples. -->
@@ -0,0 +1,22 @@
1
+ ## Summary
2
+ <!-- What does this PR do? 1-3 sentences. -->
3
+
4
+ ## Related Issues
5
+ <!-- Closes #123 -->
6
+
7
+ ## Changes
8
+ <!-- List of significant changes -->
9
+
10
+ ## Testing
11
+ <!-- How was this tested? -->
12
+ - [ ] Unit tests added/updated
13
+ - [ ] Integration tests added/updated (if applicable)
14
+ - [ ] Manual testing performed
15
+
16
+ ## Checklist
17
+ - [ ] Code is type-annotated (mypy strict passes)
18
+ - [ ] Docstrings added for all public APIs
19
+ - [ ] Tests pass with >90% coverage for new code
20
+ - [ ] No linting errors (ruff)
21
+ - [ ] Changelog entry added (if user-facing)
22
+ - [ ] Documentation updated (if applicable)
@@ -0,0 +1,21 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "pip"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
7
+ day: "monday"
8
+ target-branch: "develop"
9
+ open-pull-requests-limit: 10
10
+ labels:
11
+ - "dependencies"
12
+
13
+ - package-ecosystem: "github-actions"
14
+ directory: "/"
15
+ schedule:
16
+ interval: "weekly"
17
+ day: "monday"
18
+ target-branch: "develop"
19
+ open-pull-requests-limit: 5
20
+ labels:
21
+ - "ci"
@@ -0,0 +1,72 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, develop]
6
+ pull_request:
7
+ branches: [main, develop]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.11"
17
+ - name: Install dependencies
18
+ run: pip install -e ".[dev]"
19
+ - name: Run ruff check
20
+ run: ruff check src/ tests/
21
+ - name: Run ruff format check
22
+ run: ruff format --check src/ tests/
23
+
24
+ type-check:
25
+ runs-on: ubuntu-latest
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - uses: actions/setup-python@v5
29
+ with:
30
+ python-version: "3.11"
31
+ - name: Install dependencies
32
+ run: pip install -e ".[dev]"
33
+ - name: Run mypy
34
+ run: mypy src/agentprobe/
35
+
36
+ test:
37
+ runs-on: ubuntu-latest
38
+ strategy:
39
+ matrix:
40
+ python-version: ["3.11", "3.12", "3.13"]
41
+ steps:
42
+ - uses: actions/checkout@v4
43
+ - uses: actions/setup-python@v5
44
+ with:
45
+ python-version: ${{ matrix.python-version }}
46
+ - name: Install dependencies
47
+ run: pip install -e ".[dev,test]"
48
+ - name: Run tests
49
+ run: |
50
+ pytest tests/ \
51
+ --cov=agentprobe \
52
+ --cov-report=xml:coverage.xml \
53
+ --cov-report=term-missing \
54
+ --junitxml=test-results.xml \
55
+ -v
56
+ - name: Upload test results
57
+ if: always()
58
+ uses: actions/upload-artifact@v4
59
+ with:
60
+ name: test-results-${{ matrix.python-version }}
61
+ path: test-results.xml
62
+ - name: Upload coverage report
63
+ if: always()
64
+ uses: actions/upload-artifact@v4
65
+ with:
66
+ name: coverage-${{ matrix.python-version }}
67
+ path: coverage.xml
68
+ - name: Upload coverage to Codecov
69
+ if: matrix.python-version == '3.11'
70
+ uses: codecov/codecov-action@v4
71
+ with:
72
+ file: coverage.xml
@@ -0,0 +1,23 @@
1
+ name: Deploy Documentation
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ permissions:
8
+ contents: write
9
+
10
+ jobs:
11
+ deploy:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ with:
16
+ fetch-depth: 0
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.11"
20
+ - name: Install dependencies
21
+ run: pip install -e ".[docs]"
22
+ - name: Deploy to GitHub Pages
23
+ run: mkdocs gh-deploy --force
@@ -0,0 +1,42 @@
1
+ name: Release to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ build:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.11"
19
+ - name: Install build dependencies
20
+ run: pip install build
21
+ - name: Build package
22
+ run: python -m build
23
+ - name: Upload dist artifacts
24
+ uses: actions/upload-artifact@v4
25
+ with:
26
+ name: dist
27
+ path: dist/
28
+
29
+ publish:
30
+ needs: build
31
+ runs-on: ubuntu-latest
32
+ environment: pypi
33
+ permissions:
34
+ id-token: write
35
+ steps:
36
+ - name: Download dist artifacts
37
+ uses: actions/download-artifact@v4
38
+ with:
39
+ name: dist
40
+ path: dist/
41
+ - name: Publish to PyPI
42
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,42 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ .eggs/
9
+ *.egg
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ ENV/
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+ *.swp
20
+ *.swo
21
+ *~
22
+
23
+ # Testing / Coverage
24
+ .pytest_cache/
25
+ .mypy_cache/
26
+ .ruff_cache/
27
+ htmlcov/
28
+ .coverage
29
+ coverage.xml
30
+
31
+ # Environment
32
+ .env
33
+ .env.*
34
+ !.env.example
35
+
36
+ # OS
37
+ .DS_Store
38
+ Thumbs.db
39
+
40
+ # AgentProbe runtime
41
+ .agentprobe/
42
+ agentprobe-report/
@@ -0,0 +1,22 @@
1
+ repos:
2
+ - repo: local
3
+ hooks:
4
+ - id: ruff-check
5
+ name: ruff lint
6
+ entry: ruff check --fix
7
+ language: system
8
+ types: [python]
9
+
10
+ - id: ruff-format
11
+ name: ruff format
12
+ entry: ruff format
13
+ language: system
14
+ types: [python]
15
+
16
+ - id: mypy
17
+ name: mypy
18
+ entry: mypy
19
+ language: system
20
+ types: [python]
21
+ args: [--config-file=pyproject.toml]
22
+
@@ -0,0 +1,181 @@
1
+ # Contributing to AgentProbe
2
+
3
+ Thank you for your interest in contributing to AgentProbe! This document covers how to set up your development environment, our coding standards, and the contribution workflow.
4
+
5
+ ## Development Setup
6
+
7
+ ### Prerequisites
8
+
9
+ - Python 3.11 or later
10
+ - Git
11
+
12
+ ### Getting Started
13
+
14
+ ```bash
15
+ # Clone the repository
16
+ git clone https://github.com/dyrach1o/agentprobe-framework.git
17
+ cd agentprobe-framework
18
+
19
+ # Create a virtual environment
20
+ python -m venv .venv
21
+ source .venv/bin/activate # or .venv\Scripts\activate on Windows
22
+
23
+ # Install with dev dependencies
24
+ make dev
25
+ # or manually: pip install -e ".[dev,test,docs]"
26
+
27
+ # Verify setup
28
+ make check
29
+ ```
30
+
31
+ ## Coding Standards
32
+
33
+ ### Python
34
+
35
+ - **Python 3.11+** required
36
+ - **ruff** for linting and formatting --- run `make lint` and `make format`
37
+ - **mypy strict mode** for type checking --- run `make type-check`
38
+ - **100% type annotations** on all public and private functions
39
+ - **Google-style docstrings** on all public classes, methods, and functions
40
+ - Line length: 100 characters
41
+
42
+ ### Naming Conventions
43
+
44
+ | Scope | Convention | Example |
45
+ |-------|-----------|---------|
46
+ | Modules | `snake_case` | `trace_compare.py` |
47
+ | Classes | `PascalCase` | `TraceRecorder` |
48
+ | Functions | `snake_case` | `calculate_cost()` |
49
+ | Constants | `SCREAMING_SNAKE_CASE` | `MAX_TIMEOUT_SECONDS` |
50
+ | Variables | `snake_case` | `total_cost` |
51
+
52
+ ### Import Order
53
+
54
+ Enforced by ruff (isort):
55
+
56
+ 1. Standard library
57
+ 2. Third-party packages
58
+ 3. First-party (`agentprobe`)
59
+
60
+ ## Testing
61
+
62
+ ### Running Tests
63
+
64
+ ```bash
65
+ # All tests
66
+ make test
67
+
68
+ # Unit tests only
69
+ make test-unit
70
+
71
+ # Fast tests (skip slow and API tests)
72
+ make test-fast
73
+
74
+ # Specific test file
75
+ pytest tests/unit/core/test_runner.py -v
76
+ ```
77
+
78
+ ### Writing Tests
79
+
80
+ - Test files go in `tests/unit/` mirroring the `src/` structure
81
+ - Use pytest fixtures from `tests/fixtures/` for test data
82
+ - Use factory functions (`make_trace`, `make_eval_result`, etc.) for model instances
83
+ - Follow the pattern: `test_{method}_{scenario}_{expected_outcome}`
84
+ - Async tests use `@pytest.mark.asyncio`
85
+ - Minimum **90% coverage** for new code
86
+
87
+ ### Test Structure
88
+
89
+ ```python
90
+ class TestMyClass:
91
+ """Tests for MyClass."""
92
+
93
+ def test_method_with_valid_input_returns_expected(self) -> None:
94
+ # Arrange
95
+ obj = MyClass()
96
+
97
+ # Act
98
+ result = obj.method(valid_input)
99
+
100
+ # Assert
101
+ assert result == expected
102
+
103
+ def test_method_with_invalid_input_raises_error(self) -> None:
104
+ obj = MyClass()
105
+
106
+ with pytest.raises(ValueError, match="specific message"):
107
+ obj.method(invalid_input)
108
+ ```
109
+
110
+ ## Git Workflow
111
+
112
+ ### Branches
113
+
114
+ - `main` --- production-ready, protected
115
+ - `develop` --- integration branch
116
+ - `feature/TEAM-X.YY.ZZ-description` --- feature branches from `develop`
117
+ - `bugfix/TEAM-X.YY.ZZ-description` --- bug fixes
118
+ - `hotfix/description` --- urgent fixes from `main`
119
+
120
+ ### Commit Messages
121
+
122
+ We use [Conventional Commits](https://www.conventionalcommits.org/):
123
+
124
+ ```
125
+ <type>(<scope>): <description>
126
+ ```
127
+
128
+ **Types:** `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, `chore`
129
+
130
+ **Scopes:** `core`, `eval`, `safety`, `adapters`, `cli`, `trace`, `cost`, `storage`, `reporting`, `plugins`, `docs`, `infra`
131
+
132
+ **Examples:**
133
+
134
+ ```
135
+ feat(core): implement test runner with parallel execution
136
+ fix(eval): correct cosine similarity for zero vectors
137
+ docs(cli): add usage examples for trace commands
138
+ test(safety): add prompt injection detection tests
139
+ ```
140
+
141
+ ### Pull Request Process
142
+
143
+ 1. Create a feature branch from `develop`
144
+ 2. Make your changes following the coding standards
145
+ 3. Ensure all checks pass: `make check`
146
+ 4. Push and open a PR against `develop`
147
+ 5. Fill out the PR template
148
+ 6. Address review feedback
149
+
150
+ ### PR Checklist
151
+
152
+ - [ ] Code is fully type-annotated (mypy strict passes)
153
+ - [ ] Docstrings added for all public APIs
154
+ - [ ] Tests pass with >= 90% coverage for new code
155
+ - [ ] No linting errors (`ruff check`)
156
+ - [ ] Changelog entry added (if user-facing)
157
+ - [ ] Documentation updated (if applicable)
158
+
159
+ ## Project Layout
160
+
161
+ ```
162
+ src/agentprobe/
163
+ ├── core/ # Test runner, discovery, assertions, config
164
+ ├── eval/ # Evaluators
165
+ ├── trace/ # Recording and replay
166
+ ├── cost/ # Cost tracking
167
+ ├── safety/ # Safety scanning
168
+ ├── regression/ # Regression detection
169
+ ├── adapters/ # Framework adapters
170
+ ├── metrics/ # Metric collection
171
+ ├── storage/ # Storage backends
172
+ ├── reporting/ # Output formatters
173
+ ├── plugins/ # Plugin system
174
+ └── cli/ # CLI commands
175
+ ```
176
+
177
+ ## Getting Help
178
+
179
+ - Open a [GitHub Issue](https://github.com/dyrach1o/agentprobe-framework/issues) for bugs or feature requests
180
+ - Check existing issues before creating new ones
181
+ - Include reproduction steps for bug reports
@@ -0,0 +1,33 @@
1
+ # ── Stage 1: Builder ──
2
+ FROM python:3.11-slim AS builder
3
+
4
+ WORKDIR /build
5
+
6
+ COPY pyproject.toml README.md LICENSE ./
7
+ COPY src/ src/
8
+
9
+ RUN pip install --no-cache-dir build \
10
+ && python -m build --wheel --outdir /build/dist
11
+
12
+ # ── Stage 2: Runtime ──
13
+ FROM python:3.11-slim AS runtime
14
+
15
+ # Create non-root user
16
+ RUN groupadd --gid 1000 agentprobe \
17
+ && useradd --uid 1000 --gid 1000 --create-home agentprobe
18
+
19
+ WORKDIR /app
20
+
21
+ # Install the wheel from builder stage
22
+ COPY --from=builder /build/dist/*.whl /tmp/
23
+ RUN pip install --no-cache-dir /tmp/*.whl \
24
+ && rm -rf /tmp/*.whl
25
+
26
+ # Switch to non-root user
27
+ USER agentprobe
28
+
29
+ # Default working directory for user projects
30
+ WORKDIR /project
31
+
32
+ ENTRYPOINT ["agentprobe"]
33
+ CMD ["--help"]