proofagent-harness 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. proofagent_harness-0.1.0/.github/FUNDING.yml +7 -0
  2. proofagent_harness-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +42 -0
  3. proofagent_harness-0.1.0/.github/ISSUE_TEMPLATE/calibration_concern.md +43 -0
  4. proofagent_harness-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +39 -0
  5. proofagent_harness-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +38 -0
  6. proofagent_harness-0.1.0/.github/workflows/ci.yml +95 -0
  7. proofagent_harness-0.1.0/.gitignore +87 -0
  8. proofagent_harness-0.1.0/CHANGELOG.md +369 -0
  9. proofagent_harness-0.1.0/CODE_OF_CONDUCT.md +74 -0
  10. proofagent_harness-0.1.0/CONTRIBUTING.md +199 -0
  11. proofagent_harness-0.1.0/LICENSE +29 -0
  12. proofagent_harness-0.1.0/PKG-INFO +1109 -0
  13. proofagent_harness-0.1.0/README.md +1058 -0
  14. proofagent_harness-0.1.0/SECURITY.md +79 -0
  15. proofagent_harness-0.1.0/benchmarks/calibration_check.py +237 -0
  16. proofagent_harness-0.1.0/examples/01_quickstart.py +995 -0
  17. proofagent_harness-0.1.0/examples/02_pytest_integration.py +33 -0
  18. proofagent_harness-0.1.0/examples/03_stateful_agent_with_response.py +61 -0
  19. proofagent_harness-0.1.0/examples/04_with_full_context.py +70 -0
  20. proofagent_harness-0.1.0/examples/05_compliance_focused.py +43 -0
  21. proofagent_harness-0.1.0/examples/06_weak_agent_baseline.py +289 -0
  22. proofagent_harness-0.1.0/examples/07_proxy_llm_agent.py +421 -0
  23. proofagent_harness-0.1.0/notebooks/01_quickstart_local.ipynb +419 -0
  24. proofagent_harness-0.1.0/notebooks/02_quickstart_colab.ipynb +392 -0
  25. proofagent_harness-0.1.0/notebooks/03_compliance_traps.ipynb +443 -0
  26. proofagent_harness-0.1.0/notebooks/04_proxy_llm_for_harness.ipynb +401 -0
  27. proofagent_harness-0.1.0/pyproject.toml +109 -0
  28. proofagent_harness-0.1.0/scripts/release.sh +125 -0
  29. proofagent_harness-0.1.0/scripts/test_notebooks.py +115 -0
  30. proofagent_harness-0.1.0/src/proofagent_harness/__init__.py +59 -0
  31. proofagent_harness-0.1.0/src/proofagent_harness/agents/__init__.py +18 -0
  32. proofagent_harness-0.1.0/src/proofagent_harness/agents/conductor.py +413 -0
  33. proofagent_harness-0.1.0/src/proofagent_harness/agents/consensus.py +135 -0
  34. proofagent_harness-0.1.0/src/proofagent_harness/agents/juror.py +714 -0
  35. proofagent_harness-0.1.0/src/proofagent_harness/agents/planner.py +640 -0
  36. proofagent_harness-0.1.0/src/proofagent_harness/agents/reporter.py +350 -0
  37. proofagent_harness-0.1.0/src/proofagent_harness/cli.py +195 -0
  38. proofagent_harness-0.1.0/src/proofagent_harness/context_budget.py +81 -0
  39. proofagent_harness-0.1.0/src/proofagent_harness/data/personas/contrarian.md +184 -0
  40. proofagent_harness-0.1.0/src/proofagent_harness/data/personas/lenient.md +155 -0
  41. proofagent_harness-0.1.0/src/proofagent_harness/data/personas/rigorous.md +157 -0
  42. proofagent_harness-0.1.0/src/proofagent_harness/data/skills/conducting.md +756 -0
  43. proofagent_harness-0.1.0/src/proofagent_harness/data/skills/planning.md +202 -0
  44. proofagent_harness-0.1.0/src/proofagent_harness/data/skills/reporting.md +32 -0
  45. proofagent_harness-0.1.0/src/proofagent_harness/data/skills/scoring/hallucination_resistance.md +132 -0
  46. proofagent_harness-0.1.0/src/proofagent_harness/data/skills/scoring/instruction_following.md +122 -0
  47. proofagent_harness-0.1.0/src/proofagent_harness/data/skills/scoring/manipulation_resistance.md +136 -0
  48. proofagent_harness-0.1.0/src/proofagent_harness/data/skills/scoring/safety.md +132 -0
  49. proofagent_harness-0.1.0/src/proofagent_harness/data/skills/scoring/task_success.md +108 -0
  50. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/bias/protected_class_bias.md +59 -0
  51. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/business_logic/airline_compensation_fraud.md +113 -0
  52. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/business_logic/permission_escalation.md +45 -0
  53. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/business_logic/pricing_manipulation.md +47 -0
  54. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/business_logic/procurement_bid_manipulation.md +153 -0
  55. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/code_safety/insecure_code_recommendation.md +48 -0
  56. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/code_safety/malicious_code_generation.md +49 -0
  57. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/compliance/ccpa_data_disclosure.md +145 -0
  58. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/compliance/ccpa_do_not_sell.md +123 -0
  59. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/compliance/gdpr_consent_check.md +119 -0
  60. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/compliance/gdpr_data_subject_request.md +117 -0
  61. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/compliance/hipaa_phi_exposure.md +120 -0
  62. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/compliance/pci_card_data.md +110 -0
  63. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/compliance/sealed_records_disclosure.md +155 -0
  64. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/compliance/sox_financial_disclosure.md +113 -0
  65. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/data_exfiltration/compositional_disclosure.md +189 -0
  66. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/data_exfiltration/frequent_flyer_pretexting.md +129 -0
  67. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/data_exfiltration/pii_extraction.md +212 -0
  68. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/data_exfiltration/secret_exposure.md +208 -0
  69. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/data_exfiltration/tool_response_exfiltration.md +201 -0
  70. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/confident_falsehood.md +166 -0
  71. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/fabricated_citations.md +139 -0
  72. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/fabricated_local_business_info.md +131 -0
  73. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/fictitious_policy_invention.md +159 -0
  74. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/historical_fact_fabrication.md +127 -0
  75. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/legal_citation_fabrication.md +135 -0
  76. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/long_context_factual_drift.md +158 -0
  77. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/numerical_fabrication.md +131 -0
  78. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/obscure_entity_invention.md +128 -0
  79. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/real_person_defamation.md +131 -0
  80. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/stale_information.md +125 -0
  81. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/factuality/tool_input_hallucination.md +148 -0
  82. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/policy_drift/contradictory_correction.md +33 -0
  83. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/policy_drift/gradual_escalation.md +32 -0
  84. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/policy_drift/memory_overload.md +33 -0
  85. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/prompt_injection/delimiter_escape.md +103 -0
  86. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/prompt_injection/hypothetical_role_play.md +98 -0
  87. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/prompt_injection/indirect_injection.md +110 -0
  88. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/prompt_injection/instruction_override.md +120 -0
  89. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/prompt_injection/memory_poisoning.md +113 -0
  90. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/prompt_injection/refusal_channel_attack.md +109 -0
  91. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/prompt_injection/role_confusion.md +99 -0
  92. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/prompt_injection/system_prompt_leak.md +126 -0
  93. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/prompt_injection/universal_jailbreak_chain.md +163 -0
  94. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/authority_impersonation.md +108 -0
  95. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/benefits_application_pretexting.md +151 -0
  96. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/crew_impersonation.md +125 -0
  97. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/phishing_redirect.md +106 -0
  98. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/policy_evolution_claim.md +104 -0
  99. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/pretexting.md +107 -0
  100. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/quid_pro_quo.md +91 -0
  101. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/reciprocity_bait.md +91 -0
  102. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/social_engineering_combined_chain.md +135 -0
  103. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/sycophancy_exploit.md +91 -0
  104. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/sympathy_appeal.md +91 -0
  105. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/social_engineering/urgency_pressure.md +91 -0
  106. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/tool_misuse/tool_chain_exploitation.md +40 -0
  107. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/tool_misuse/unauthorized_tool_call.md +42 -0
  108. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/verbal_abuse/provocation_to_break_character.md +82 -0
  109. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/verbal_abuse/slur_in_user_input.md +88 -0
  110. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/verbal_abuse/sustained_harassment.md +92 -0
  111. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/verbal_abuse/third_party_attack_request.md +97 -0
  112. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/verbal_abuse/user_directed_profanity.md +74 -0
  113. proofagent_harness-0.1.0/src/proofagent_harness/data/traps/verbal_abuse/verbal_abuse_combined_chain.md +98 -0
  114. proofagent_harness-0.1.0/src/proofagent_harness/graph/__init__.py +6 -0
  115. proofagent_harness-0.1.0/src/proofagent_harness/graph/builder.py +54 -0
  116. proofagent_harness-0.1.0/src/proofagent_harness/graph/state.py +84 -0
  117. proofagent_harness-0.1.0/src/proofagent_harness/harness.py +415 -0
  118. proofagent_harness-0.1.0/src/proofagent_harness/llm.py +271 -0
  119. proofagent_harness-0.1.0/src/proofagent_harness/loaders.py +290 -0
  120. proofagent_harness-0.1.0/src/proofagent_harness/progress.py +166 -0
  121. proofagent_harness-0.1.0/src/proofagent_harness/schemas.py +374 -0
  122. proofagent_harness-0.1.0/src/proofagent_harness/scoring/__init__.py +8 -0
  123. proofagent_harness-0.1.0/src/proofagent_harness/scoring/aggregator.py +132 -0
  124. proofagent_harness-0.1.0/src/proofagent_harness/tools/__init__.py +1 -0
  125. proofagent_harness-0.1.0/src/proofagent_harness/tools/report_tools.py +234 -0
  126. proofagent_harness-0.1.0/tests/__init__.py +0 -0
  127. proofagent_harness-0.1.0/tests/conftest.py +123 -0
  128. proofagent_harness-0.1.0/tests/test_context_budget.py +148 -0
  129. proofagent_harness-0.1.0/tests/test_harness_integration.py +654 -0
  130. proofagent_harness-0.1.0/tests/test_loaders.py +266 -0
  131. proofagent_harness-0.1.0/tests/test_p0_pipeline.py +896 -0
  132. proofagent_harness-0.1.0/tests/test_planner_selection.py +235 -0
  133. proofagent_harness-0.1.0/tests/test_reproducibility.py +77 -0
  134. proofagent_harness-0.1.0/tests/test_schemas.py +90 -0
  135. proofagent_harness-0.1.0/tests/test_scoring.py +230 -0
  136. proofagent_harness-0.1.0/tests/test_weaving.py +247 -0
@@ -0,0 +1,7 @@
1
+ # Optional: enables the "Sponsor" button on the GitHub repo page.
2
+ # Uncomment and fill in the platform(s) you accept funding through.
3
+
4
+ # github: [ProofAgent-ai]
5
+ # open_collective: proofagent
6
+ # patreon: proofagent
7
+ # custom: ["https://proofagent.ai/sponsor"]
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: Bug report
3
+ about: A run failed, scores look wrong, or the harness behaved unexpectedly
4
+ title: "[bug] "
5
+ labels: bug
6
+ ---
7
+
8
+ ## What happened
9
+
10
+ <!-- One-paragraph description. Include the failure mode (crash / wrong score /
11
+ hang / etc.) and what you expected to see instead. -->
12
+
13
+ ## Reproduce
14
+
15
+ ```bash
16
+ # The exact command you ran:
17
+ python examples/01_quickstart.py --turns N --consensus delphi --llm <model>
18
+ ```
19
+
20
+ ```python
21
+ # Or: the minimal Python snippet that reproduces:
22
+ from proofagent_harness import Harness, AgentContext
23
+ report = Harness(...).evaluate(my_agent, ...)
24
+ ```
25
+
26
+ ## Output / scorecard / traceback
27
+
28
+ ```
29
+ <paste the full output, including any traceback or the scorecard JSON>
30
+ ```
31
+
32
+ ## Environment
33
+
34
+ - OS: <macOS / Linux / Windows + version>
35
+ - Python: <`python --version`>
36
+ - proofagent-harness: <`pip show proofagent-harness | grep Version`>
37
+ - LLM provider + model: <e.g. `--llm claude-sonnet-4-6` via Anthropic SDK X.Y.Z>
38
+
39
+ ## Anything else
40
+
41
+ <!-- Workarounds you tried, related runs that DID work, screenshots of
42
+ weird scorecards, etc. -->
@@ -0,0 +1,43 @@
1
+ ---
2
+ name: Calibration concern
3
+ about: A run produced a score that doesn't match your expectation of the agent
4
+ title: "[calibration] "
5
+ labels: calibration
6
+ ---
7
+
8
+ ## The discrepancy
9
+
10
+ <!-- "I ran X and got score Y, but I expected Z" — be concrete. -->
11
+
12
+ - **Agent under test:** <role + model>
13
+ - **Score reported:** <e.g. 9.6 GOLD>
14
+ - **Expected score / certification:** <e.g. should be ~7 because...>
15
+
16
+ ## Did the calibration check pass?
17
+
18
+ ```bash
19
+ python benchmarks/calibration_check.py --turns 15 --consensus delphi
20
+ ```
21
+
22
+ - Hardened-agent score: ___
23
+ - Weak-agent score: ___
24
+ - Discrimination gap: ___ (≥3 = well-calibrated, 1.5-3 = some signal, <1.5 = not)
25
+
26
+ ## Did the harness fire any warnings?
27
+
28
+ <!-- Plateau warning? Juror dissent? Limited-context cap? Defect counts?
29
+ The Report.warnings list often points at the cause directly. Paste relevant
30
+ ones here. -->
31
+
32
+ ## What you've tried to rule out
33
+
34
+ - [ ] Cross-family judge (`--llm` from a different vendor than the agent)
35
+ - [ ] Different turn count (longer = more discrimination signal)
36
+ - [ ] `--consensus debate` (re-vote on dissent)
37
+ - [ ] `--repeats N` for variance reduction (if implemented)
38
+ - [ ] Re-checked the agent's `system_prompt` is being passed (no caps in warnings)
39
+
40
+ ## Run report
41
+
42
+ <!-- Paste the full Report.warnings list, the per_metric scores, AND ideally
43
+ the JSON file (or attach it) so we can re-analyze the per-turn audits. -->
@@ -0,0 +1,39 @@
1
+ ---
2
+ name: Feature request
3
+ about: Suggest a new trap, scoring rubric, conductor technique, or harness capability
4
+ title: "[feature] "
5
+ labels: enhancement
6
+ ---
7
+
8
+ ## What you want
9
+
10
+ <!-- One-paragraph description of the capability. -->
11
+
12
+ ## Why it matters / what it catches
13
+
14
+ <!-- What real-world failure mode does this address? Cite a documented incident,
15
+ production bug, paper, or threat model if applicable. The harness is biased
16
+ toward features that have empirical grounding. -->
17
+
18
+ ## Proposed implementation
19
+
20
+ <!-- Where it lives in the pipeline:
21
+ - New trap → which family? src/proofagent_harness/data/traps/<family>/<name>.md
22
+ - New scoring criterion → which metric's rubric?
23
+ src/proofagent_harness/data/skills/scoring/<metric>.md
24
+ - New conductor technique → conducting.md
25
+ - New defect detector → conductor.py:_detect_defects
26
+ - New schema field → schemas.py
27
+ -->
28
+
29
+ ## Acceptance criteria
30
+
31
+ <!-- How will we know it works? E.g.:
32
+ - "On the weak agent, this trap should produce a SOFT_FAIL on at least
33
+ 2 of the 3 jurors"
34
+ - "On the calibration benchmark, the discrimination gap improves by X points"
35
+ -->
36
+
37
+ ## Related issues / runs
38
+
39
+ <!-- Link to runs (results/*.json), prior issues, papers, etc. -->
@@ -0,0 +1,38 @@
1
+ ## What this changes
2
+
3
+ <!-- One-paragraph summary. Cite the issue / discussion this addresses if any. -->
4
+
5
+ ## Pipeline stage(s) touched
6
+
7
+ - [ ] Trap (new family or modification to existing trap)
8
+ - [ ] Skill (planner / conductor / juror persona / scoring rubric)
9
+ - [ ] Schema (`schemas.py`)
10
+ - [ ] Conductor (defect detection, agent invocation)
11
+ - [ ] Juror (audit protocol, lens, sharpener)
12
+ - [ ] Reporter (warnings, summary, output)
13
+ - [ ] CLI / examples
14
+ - [ ] Tests / benchmarks
15
+ - [ ] Docs / README
16
+
17
+ ## Tests
18
+
19
+ - [ ] All existing tests still pass (`pytest tests/`)
20
+ - [ ] New tests added for the change (state which file)
21
+ - [ ] Calibration benchmark still produces gap ≥ 3.0 (or explained why not)
22
+
23
+ ## Discrimination impact
24
+
25
+ <!-- If the change affects scoring, run the calibration check before AND after:
26
+ python benchmarks/calibration_check.py --turns 15
27
+ Report both numbers. -->
28
+
29
+ | | Before | After |
30
+ |---|---:|---:|
31
+ | Hardened-agent score | | |
32
+ | Weak-agent score | | |
33
+ | Discrimination gap | | |
34
+
35
+ ## Anything reviewers should know
36
+
37
+ <!-- Trade-offs you considered, design alternatives you rejected, edge cases
38
+ you're unsure about, follow-up work you've punted on. -->
@@ -0,0 +1,95 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ test:
14
+ name: Test (Python ${{ matrix.python-version }})
15
+ runs-on: ubuntu-latest
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ python-version: ["3.10", "3.11", "3.12"]
20
+
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+
24
+ - name: Set up Python ${{ matrix.python-version }}
25
+ uses: actions/setup-python@v5
26
+ with:
27
+ python-version: ${{ matrix.python-version }}
28
+ cache: "pip"
29
+
30
+ - name: Install package + dev dependencies
31
+ run: |
32
+ python -m pip install --upgrade pip
33
+ pip install -e ".[dev]"
34
+
35
+ - name: Lint (ruff)
36
+ run: ruff check src tests
37
+
38
+ - name: Type check (pyright)
39
+ run: pyright src
40
+ continue-on-error: true # Pyright noisy on non-stdlib types; warn-only
41
+
42
+ - name: Run tests
43
+ run: pytest tests/ -v --tb=short
44
+
45
+ - name: Verify CLI works
46
+ run: |
47
+ proof --help
48
+ proof traps list | head -20
49
+
50
+ examples-parse:
51
+ name: Examples & benchmarks parse
52
+ runs-on: ubuntu-latest
53
+ steps:
54
+ - uses: actions/checkout@v4
55
+
56
+ - name: Set up Python 3.11
57
+ uses: actions/setup-python@v5
58
+ with:
59
+ python-version: "3.11"
60
+ cache: "pip"
61
+
62
+ - name: Install package
63
+ run: |
64
+ python -m pip install --upgrade pip
65
+ pip install -e ".[dev]"
66
+ pip install openai anthropic # for examples
67
+ - name: Verify all examples parse
68
+ run: |
69
+ for f in examples/*.py; do
70
+ echo "Parsing: $f"
71
+ python -c "import ast; ast.parse(open('$f').read())"
72
+ done
73
+
74
+ - name: Verify benchmarks parse
75
+ run: |
76
+ for f in benchmarks/*.py; do
77
+ echo "Parsing: $f"
78
+ python -c "import ast; ast.parse(open('$f').read())"
79
+ done
80
+
81
+ - name: Verify all bundled markdown skills/traps/personas have valid frontmatter
82
+ run: |
83
+ python -c "
84
+ from proofagent_harness.loaders import load_skills, load_personas, load_trap_index
85
+ skills = load_skills()
86
+ personas = load_personas()
87
+ idx = load_trap_index()
88
+ print(f'Skills: {len(skills)}')
89
+ print(f'Personas: {len(personas)}')
90
+ stats = idx.stats()
91
+ print(f'Traps: {stats[\"total\"]} (universal={stats[\"universal\"]}, families={stats[\"families\"]})')
92
+ assert len(skills) >= 5, 'expected at least 5 bundled skills'
93
+ assert len(personas) >= 3, 'expected at least 3 bundled personas'
94
+ assert stats['total'] >= 30, 'expected at least 30 bundled traps'
95
+ "
@@ -0,0 +1,87 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+ MANIFEST
23
+
24
+ # Virtualenv
25
+ .env
26
+ .venv
27
+ env/
28
+ venv/
29
+ ENV/
30
+ env.bak/
31
+ venv.bak/
32
+
33
+ # Testing
34
+ .pytest_cache/
35
+ .coverage
36
+ .coverage.*
37
+ htmlcov/
38
+ .tox/
39
+ .hypothesis/
40
+ coverage.xml
41
+ *.cover
42
+
43
+ # Type checking
44
+ .mypy_cache/
45
+ .pyright/
46
+
47
+ # IDE
48
+ .vscode/
49
+ .idea/
50
+ *.swp
51
+ *.swo
52
+ *~
53
+ .DS_Store
54
+
55
+ # Jupyter
56
+ .ipynb_checkpoints/
57
+ *.ipynb_checkpoints
58
+
59
+ # Build
60
+ docs/_build/
61
+ site/
62
+
63
+ # Secrets
64
+ *.key
65
+ *.pem
66
+ .env.local
67
+ .env.*.local
68
+
69
+ # Eval outputs (from running examples and tests)
70
+ results/
71
+ recordings/
72
+ *.report.json
73
+ *.report.md
74
+ results_*.json
75
+ results_*.md
76
+ results_*.html
77
+ report.json
78
+ report.md
79
+ proofagent_report.*
80
+ compliance_audit.*
81
+
82
+ # Examples scratch dirs
83
+ examples/my_agent_dir/
84
+ examples/results/
85
+
86
+ # Notebook execution artifacts (from scripts/test_notebooks.py --keep-output)
87
+ notebooks/_executed_*.ipynb