codejury 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codejury-0.1.0 → codejury-0.2.0}/PKG-INFO +30 -4
- {codejury-0.1.0 → codejury-0.2.0}/README.md +29 -3
- {codejury-0.1.0 → codejury-0.2.0}/codejury/assembly.py +8 -4
- {codejury-0.1.0 → codejury-0.2.0}/codejury/cli.py +21 -7
- codejury-0.2.0/codejury/data/golden/authn_jwt_noverify_vuln.yaml +6 -0
- codejury-0.2.0/codejury/data/golden/authn_jwt_verified_safe.yaml +6 -0
- codejury-0.2.0/codejury/data/golden/authn_sha256_checksum_safe.yaml +6 -0
- codejury-0.2.0/codejury/data/golden/authz_idor_vuln.yaml +5 -0
- codejury-0.2.0/codejury/data/golden/authz_owner_safe.yaml +5 -0
- codejury-0.2.0/codejury/data/golden/cmdi_ossystem_vuln.yaml +5 -0
- codejury-0.2.0/codejury/data/golden/cmdi_subprocess_safe.yaml +5 -0
- codejury-0.2.0/codejury/data/golden/crypto_aesgcm_safe.yaml +6 -0
- codejury-0.2.0/codejury/data/golden/crypto_ecb_vuln.yaml +6 -0
- codejury-0.2.0/codejury/data/golden/path_contained_safe.yaml +8 -0
- codejury-0.2.0/codejury/data/golden/path_traversal_vuln.yaml +5 -0
- codejury-0.2.0/codejury/data/golden/secrets_env_safe.yaml +5 -0
- codejury-0.2.0/codejury/data/golden/secrets_hardcoded_vuln.yaml +5 -0
- codejury-0.2.0/codejury/data/golden/sqli_format_vuln.yaml +5 -0
- codejury-0.2.0/codejury/data/golden/xss_innerhtml_constant_safe.yaml +7 -0
- codejury-0.2.0/codejury/data/golden/xss_innerhtml_vuln.yaml +6 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/anthropic.py +10 -2
- {codejury-0.1.0 → codejury-0.2.0}/codejury/tasks/base.py +17 -2
- {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/PKG-INFO +30 -4
- {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/SOURCES.txt +16 -0
- {codejury-0.1.0 → codejury-0.2.0}/pyproject.toml +1 -1
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_assembly.py +18 -1
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_evaluation.py +27 -13
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_tasks.py +25 -0
- {codejury-0.1.0 → codejury-0.2.0}/LICENSE +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/__init__.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/__init__.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/base.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/debate.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/mock.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/parsing.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/verifier.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/authentication.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/authorization.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/business_logic.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/crypto.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/data_protection.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/dependency_config.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/error_logging.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/input_validation.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/output_encoding.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/secrets.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/session.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/golden/authn_sha256_password.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/data/tasks/quick_scan_single.yaml +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/__init__.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/artifact.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/capability.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/context.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/observation.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/result.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/evaluation.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/infrastructure/__init__.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/infrastructure/json_parse.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/__init__.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/base.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/debate.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/pipeline.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/reflexion.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/single.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/__init__.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/base.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/litellm.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/mock.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/openai.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/openai_format.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/retry.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/reporting.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/resources.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/__init__.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/base.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/chunker.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/diff.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/function.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/mock.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/repo.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/tasks/__init__.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury/tasks/registry.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/dependency_links.txt +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/entry_points.txt +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/requires.txt +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/top_level.txt +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/setup.cfg +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_anthropic_provider.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_audit_pipeline.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_capability.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_cli_audit.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_context.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_debate_agents.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_debate_orchestrator.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_diff_source.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_function_source.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_json_parse.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_litellm_provider.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_openai_provider.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_orchestrator.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_pipeline_orchestrator.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_reflexion_orchestrator.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_repo_source.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_reporting.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_retry_provider.py +0 -0
- {codejury-0.1.0 → codejury-0.2.0}/tests/test_verifier.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codejury
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
|
|
5
5
|
Author: 4234288
|
|
6
6
|
License-Expression: MIT
|
|
@@ -84,6 +84,12 @@ pip install 'codejury[anthropic]' # add the provider you'll use (anthropic /
|
|
|
84
84
|
|
|
85
85
|
## Usage
|
|
86
86
|
|
|
87
|
+
A real audit calls a model, so set the provider's key first (see `.env.example`):
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
export ANTHROPIC_API_KEY=sk-ant-... # or OPENAI_API_KEY for --provider openai
|
|
91
|
+
```
|
|
92
|
+
|
|
87
93
|
```bash
|
|
88
94
|
# Audit a unified diff against the capability library
|
|
89
95
|
git diff | codejury audit --orchestrator debate --provider anthropic --format markdown -
|
|
@@ -91,15 +97,35 @@ git diff | codejury audit --orchestrator debate --provider anthropic --format ma
|
|
|
91
97
|
# Run a named task preset (tasks/*.yaml)
|
|
92
98
|
git diff | codejury run audit_diff_debate -
|
|
93
99
|
|
|
94
|
-
# Score detection quality against the golden cases
|
|
100
|
+
# Score detection quality against the golden cases
|
|
95
101
|
codejury eval --provider anthropic
|
|
96
102
|
|
|
103
|
+
# Through a LiteLLM proxy / gateway. The flags default to CODEJURY_API_BASE /
|
|
104
|
+
# CODEJURY_API_KEY / CODEJURY_MODEL, so with those in a sourced .env this is just:
|
|
105
|
+
# codejury audit --provider litellm -
|
|
106
|
+
git diff | codejury audit --provider litellm \
|
|
107
|
+
--api-base https://litellm.example.com --api-key "$LITELLM_KEY" --model your-alias -
|
|
108
|
+
|
|
97
109
|
# No API key needed: prove the pipeline composes with mock layers
|
|
98
110
|
codejury dry-run
|
|
99
111
|
```
|
|
100
112
|
|
|
101
|
-
`audit` and `run` read a diff from a file argument or stdin (`-`).
|
|
102
|
-
|
|
113
|
+
`audit` and `run` read a diff from a file argument or stdin (`-`). The provider
|
|
114
|
+
key is read from the environment: `ANTHROPIC_API_KEY` for `--provider anthropic`,
|
|
115
|
+
`OPENAI_API_KEY` for `--provider openai`. Without a key the model providers
|
|
116
|
+
raise an authentication error; `codejury dry-run` needs no key.
|
|
117
|
+
|
|
118
|
+
A task YAML can pin the provider, model, and base URL (the key stays in the
|
|
119
|
+
environment), so `codejury run` works through a proxy too:
|
|
120
|
+
|
|
121
|
+
```yaml
|
|
122
|
+
# mytasks/proxy_scan.yaml -> codejury run proxy_scan --tasks mytasks
|
|
123
|
+
name: proxy_scan
|
|
124
|
+
orchestrator: debate
|
|
125
|
+
provider: litellm
|
|
126
|
+
model: your-alias
|
|
127
|
+
api_base: https://litellm.example.com # key from CODEJURY_API_KEY
|
|
128
|
+
```
|
|
103
129
|
|
|
104
130
|
## Development
|
|
105
131
|
|
|
@@ -55,6 +55,12 @@ pip install 'codejury[anthropic]' # add the provider you'll use (anthropic /
|
|
|
55
55
|
|
|
56
56
|
## Usage
|
|
57
57
|
|
|
58
|
+
A real audit calls a model, so set the provider's key first (see `.env.example`):
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
export ANTHROPIC_API_KEY=sk-ant-... # or OPENAI_API_KEY for --provider openai
|
|
62
|
+
```
|
|
63
|
+
|
|
58
64
|
```bash
|
|
59
65
|
# Audit a unified diff against the capability library
|
|
60
66
|
git diff | codejury audit --orchestrator debate --provider anthropic --format markdown -
|
|
@@ -62,15 +68,35 @@ git diff | codejury audit --orchestrator debate --provider anthropic --format ma
|
|
|
62
68
|
# Run a named task preset (tasks/*.yaml)
|
|
63
69
|
git diff | codejury run audit_diff_debate -
|
|
64
70
|
|
|
65
|
-
# Score detection quality against the golden cases
|
|
71
|
+
# Score detection quality against the golden cases
|
|
66
72
|
codejury eval --provider anthropic
|
|
67
73
|
|
|
74
|
+
# Through a LiteLLM proxy / gateway. The flags default to CODEJURY_API_BASE /
|
|
75
|
+
# CODEJURY_API_KEY / CODEJURY_MODEL, so with those in a sourced .env this is just:
|
|
76
|
+
# codejury audit --provider litellm -
|
|
77
|
+
git diff | codejury audit --provider litellm \
|
|
78
|
+
--api-base https://litellm.example.com --api-key "$LITELLM_KEY" --model your-alias -
|
|
79
|
+
|
|
68
80
|
# No API key needed: prove the pipeline composes with mock layers
|
|
69
81
|
codejury dry-run
|
|
70
82
|
```
|
|
71
83
|
|
|
72
|
-
`audit` and `run` read a diff from a file argument or stdin (`-`).
|
|
73
|
-
|
|
84
|
+
`audit` and `run` read a diff from a file argument or stdin (`-`). The provider
|
|
85
|
+
key is read from the environment: `ANTHROPIC_API_KEY` for `--provider anthropic`,
|
|
86
|
+
`OPENAI_API_KEY` for `--provider openai`. Without a key the model providers
|
|
87
|
+
raise an authentication error; `codejury dry-run` needs no key.
|
|
88
|
+
|
|
89
|
+
A task YAML can pin the provider, model, and base URL (the key stays in the
|
|
90
|
+
environment), so `codejury run` works through a proxy too:
|
|
91
|
+
|
|
92
|
+
```yaml
|
|
93
|
+
# mytasks/proxy_scan.yaml -> codejury run proxy_scan --tasks mytasks
|
|
94
|
+
name: proxy_scan
|
|
95
|
+
orchestrator: debate
|
|
96
|
+
provider: litellm
|
|
97
|
+
model: your-alias
|
|
98
|
+
api_base: https://litellm.example.com # key from CODEJURY_API_KEY
|
|
99
|
+
```
|
|
74
100
|
|
|
75
101
|
## Development
|
|
76
102
|
|
|
@@ -29,15 +29,19 @@ from codejury.sources.base import Source
|
|
|
29
29
|
STRATEGIES = ("single", "pipeline", "debate", "reflexion")
|
|
30
30
|
PROVIDERS = ("anthropic", "openai", "litellm")
|
|
31
31
|
DEFAULT_MODEL = os.environ.get("CODEJURY_MODEL", "claude-sonnet-4-6")
|
|
32
|
+
DEFAULT_API_BASE = os.environ.get("CODEJURY_API_BASE")
|
|
33
|
+
DEFAULT_API_KEY = os.environ.get("CODEJURY_API_KEY")
|
|
32
34
|
|
|
33
35
|
|
|
34
|
-
def make_provider(
|
|
36
|
+
def make_provider(
|
|
37
|
+
name: str, *, api_key: str | None = None, api_base: str | None = None, retries: int = 0
|
|
38
|
+
) -> Provider:
|
|
35
39
|
if name == "openai":
|
|
36
|
-
provider: Provider = OpenAIProvider()
|
|
40
|
+
provider: Provider = OpenAIProvider(api_key=api_key, base_url=api_base)
|
|
37
41
|
elif name == "litellm":
|
|
38
|
-
provider = LiteLLMProvider()
|
|
42
|
+
provider = LiteLLMProvider(api_key=api_key, api_base=api_base)
|
|
39
43
|
else:
|
|
40
|
-
provider = AnthropicProvider()
|
|
44
|
+
provider = AnthropicProvider(api_key=api_key, base_url=api_base)
|
|
41
45
|
if retries > 0:
|
|
42
46
|
provider = RetryProvider(provider, max_attempts=retries + 1)
|
|
43
47
|
return provider
|
|
@@ -13,6 +13,8 @@ import sys
|
|
|
13
13
|
|
|
14
14
|
from codejury.agents.mock import MockAgent
|
|
15
15
|
from codejury.assembly import (
|
|
16
|
+
DEFAULT_API_BASE,
|
|
17
|
+
DEFAULT_API_KEY,
|
|
16
18
|
DEFAULT_MODEL,
|
|
17
19
|
PROVIDERS,
|
|
18
20
|
STRATEGIES,
|
|
@@ -135,6 +137,8 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
135
137
|
audit_p.add_argument("--model", default=DEFAULT_MODEL)
|
|
136
138
|
audit_p.add_argument("--max-tokens", type=int, default=2048)
|
|
137
139
|
audit_p.add_argument("--retries", type=int, default=0, help="provider retry attempts on failure")
|
|
140
|
+
audit_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
|
|
141
|
+
audit_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
|
|
138
142
|
|
|
139
143
|
run_p = sub.add_parser("run", help="run a named task preset against a unified diff")
|
|
140
144
|
run_p.add_argument("task", help="task name")
|
|
@@ -148,6 +152,8 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
148
152
|
eval_p.add_argument("--capabilities", default=CAPABILITIES_DIR, help="capability YAML directory")
|
|
149
153
|
eval_p.add_argument("--provider", choices=PROVIDERS, default="anthropic")
|
|
150
154
|
eval_p.add_argument("--model", default=DEFAULT_MODEL)
|
|
155
|
+
eval_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
|
|
156
|
+
eval_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
|
|
151
157
|
|
|
152
158
|
args = parser.parse_args(argv)
|
|
153
159
|
|
|
@@ -155,7 +161,9 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
155
161
|
results = audit(
|
|
156
162
|
_read_diff(args.diff),
|
|
157
163
|
load_capabilities(args.capabilities),
|
|
158
|
-
provider=make_provider(
|
|
164
|
+
provider=make_provider(
|
|
165
|
+
args.provider, api_key=args.api_key, api_base=args.api_base, retries=args.retries
|
|
166
|
+
),
|
|
159
167
|
model=args.model,
|
|
160
168
|
max_tokens=args.max_tokens,
|
|
161
169
|
strategy=args.orchestrator,
|
|
@@ -175,12 +183,18 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
175
183
|
return 0
|
|
176
184
|
|
|
177
185
|
if args.command == "eval":
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
186
|
+
try:
|
|
187
|
+
metrics = evaluate(
|
|
188
|
+
load_cases(args.golden),
|
|
189
|
+
load_capabilities(args.capabilities),
|
|
190
|
+
provider=make_provider(args.provider, api_key=args.api_key, api_base=args.api_base),
|
|
191
|
+
model=args.model,
|
|
192
|
+
)
|
|
193
|
+
except Exception as exc:
|
|
194
|
+
# e.g. a missing API key surfaces as a provider auth error -- report it
|
|
195
|
+
# as one line, not a traceback (audit gets this via the orchestrator).
|
|
196
|
+
print(f"eval failed: {exc}")
|
|
197
|
+
return 1
|
|
184
198
|
print(_render_metrics(metrics))
|
|
185
199
|
return 0
|
|
186
200
|
|
|
@@ -17,8 +17,11 @@ from codejury.providers.base import CompletionResult, Message, Provider
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class AnthropicProvider(Provider):
|
|
20
|
-
def __init__(
|
|
20
|
+
def __init__(
|
|
21
|
+
self, *, api_key: str | None = None, base_url: str | None = None, client: Any | None = None
|
|
22
|
+
) -> None:
|
|
21
23
|
self._api_key = api_key
|
|
24
|
+
self._base_url = base_url
|
|
22
25
|
self._client = client
|
|
23
26
|
|
|
24
27
|
def _get_client(self) -> Any:
|
|
@@ -29,7 +32,12 @@ class AnthropicProvider(Provider):
|
|
|
29
32
|
raise RuntimeError(
|
|
30
33
|
"anthropic SDK not installed; run: pip install 'codejury[anthropic]'"
|
|
31
34
|
) from exc
|
|
32
|
-
|
|
35
|
+
kwargs: dict[str, Any] = {}
|
|
36
|
+
if self._api_key:
|
|
37
|
+
kwargs["api_key"] = self._api_key
|
|
38
|
+
if self._base_url:
|
|
39
|
+
kwargs["base_url"] = self._base_url
|
|
40
|
+
self._client = anthropic.Anthropic(**kwargs)
|
|
33
41
|
return self._client
|
|
34
42
|
|
|
35
43
|
def complete(
|
|
@@ -9,7 +9,14 @@ from __future__ import annotations
|
|
|
9
9
|
from dataclasses import dataclass
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
|
-
from codejury.assembly import
|
|
12
|
+
from codejury.assembly import (
|
|
13
|
+
DEFAULT_API_BASE,
|
|
14
|
+
DEFAULT_API_KEY,
|
|
15
|
+
DEFAULT_MODEL,
|
|
16
|
+
build_orchestration,
|
|
17
|
+
make_provider,
|
|
18
|
+
run_over_source,
|
|
19
|
+
)
|
|
13
20
|
from codejury.domain.capability import Capability
|
|
14
21
|
from codejury.domain.result import AnalysisResult
|
|
15
22
|
from codejury.sources.base import Source
|
|
@@ -24,6 +31,7 @@ class Task:
|
|
|
24
31
|
capabilities: tuple[str, ...] | None = None # capability ids to check; None = all
|
|
25
32
|
max_tokens: int = 2048
|
|
26
33
|
retries: int = 0 # provider retry attempts on transient failure
|
|
34
|
+
api_base: str | None = None # provider base URL (e.g. a LiteLLM proxy); the key stays in the env
|
|
27
35
|
|
|
28
36
|
@classmethod
|
|
29
37
|
def from_dict(cls, data: dict[str, Any]) -> Task:
|
|
@@ -36,6 +44,7 @@ class Task:
|
|
|
36
44
|
capabilities=tuple(caps) if caps is not None else None,
|
|
37
45
|
max_tokens=int(data.get("max_tokens", 2048)),
|
|
38
46
|
retries=int(data.get("retries", 0)),
|
|
47
|
+
api_base=data.get("api_base"),
|
|
39
48
|
)
|
|
40
49
|
|
|
41
50
|
def select(self, capabilities: list[Capability]) -> list[Capability]:
|
|
@@ -48,7 +57,13 @@ class Task:
|
|
|
48
57
|
def run_task(
|
|
49
58
|
task: Task, source: Source, capabilities: list[Capability]
|
|
50
59
|
) -> list[tuple[str, AnalysisResult]]:
|
|
51
|
-
|
|
60
|
+
# api_base may come from the task (non-secret URL); the key only from the env.
|
|
61
|
+
provider = make_provider(
|
|
62
|
+
task.provider,
|
|
63
|
+
api_key=DEFAULT_API_KEY,
|
|
64
|
+
api_base=task.api_base or DEFAULT_API_BASE,
|
|
65
|
+
retries=task.retries,
|
|
66
|
+
)
|
|
52
67
|
agents, orchestrator = build_orchestration(
|
|
53
68
|
task.orchestrator, provider=provider, model=task.model, max_tokens=task.max_tokens
|
|
54
69
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codejury
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
|
|
5
5
|
Author: 4234288
|
|
6
6
|
License-Expression: MIT
|
|
@@ -84,6 +84,12 @@ pip install 'codejury[anthropic]' # add the provider you'll use (anthropic /
|
|
|
84
84
|
|
|
85
85
|
## Usage
|
|
86
86
|
|
|
87
|
+
A real audit calls a model, so set the provider's key first (see `.env.example`):
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
export ANTHROPIC_API_KEY=sk-ant-... # or OPENAI_API_KEY for --provider openai
|
|
91
|
+
```
|
|
92
|
+
|
|
87
93
|
```bash
|
|
88
94
|
# Audit a unified diff against the capability library
|
|
89
95
|
git diff | codejury audit --orchestrator debate --provider anthropic --format markdown -
|
|
@@ -91,15 +97,35 @@ git diff | codejury audit --orchestrator debate --provider anthropic --format ma
|
|
|
91
97
|
# Run a named task preset (tasks/*.yaml)
|
|
92
98
|
git diff | codejury run audit_diff_debate -
|
|
93
99
|
|
|
94
|
-
# Score detection quality against the golden cases
|
|
100
|
+
# Score detection quality against the golden cases
|
|
95
101
|
codejury eval --provider anthropic
|
|
96
102
|
|
|
103
|
+
# Through a LiteLLM proxy / gateway. The flags default to CODEJURY_API_BASE /
|
|
104
|
+
# CODEJURY_API_KEY / CODEJURY_MODEL, so with those in a sourced .env this is just:
|
|
105
|
+
# codejury audit --provider litellm -
|
|
106
|
+
git diff | codejury audit --provider litellm \
|
|
107
|
+
--api-base https://litellm.example.com --api-key "$LITELLM_KEY" --model your-alias -
|
|
108
|
+
|
|
97
109
|
# No API key needed: prove the pipeline composes with mock layers
|
|
98
110
|
codejury dry-run
|
|
99
111
|
```
|
|
100
112
|
|
|
101
|
-
`audit` and `run` read a diff from a file argument or stdin (`-`).
|
|
102
|
-
|
|
113
|
+
`audit` and `run` read a diff from a file argument or stdin (`-`). The provider
|
|
114
|
+
key is read from the environment: `ANTHROPIC_API_KEY` for `--provider anthropic`,
|
|
115
|
+
`OPENAI_API_KEY` for `--provider openai`. Without a key the model providers
|
|
116
|
+
raise an authentication error; `codejury dry-run` needs no key.
|
|
117
|
+
|
|
118
|
+
A task YAML can pin the provider, model, and base URL (the key stays in the
|
|
119
|
+
environment), so `codejury run` works through a proxy too:
|
|
120
|
+
|
|
121
|
+
```yaml
|
|
122
|
+
# mytasks/proxy_scan.yaml -> codejury run proxy_scan --tasks mytasks
|
|
123
|
+
name: proxy_scan
|
|
124
|
+
orchestrator: debate
|
|
125
|
+
provider: litellm
|
|
126
|
+
model: your-alias
|
|
127
|
+
api_base: https://litellm.example.com # key from CODEJURY_API_KEY
|
|
128
|
+
```
|
|
103
129
|
|
|
104
130
|
## Development
|
|
105
131
|
|
|
@@ -31,9 +31,25 @@ codejury/data/capabilities/output_encoding.yaml
|
|
|
31
31
|
codejury/data/capabilities/secrets.yaml
|
|
32
32
|
codejury/data/capabilities/session.yaml
|
|
33
33
|
codejury/data/golden/authn_bcrypt_password.yaml
|
|
34
|
+
codejury/data/golden/authn_jwt_noverify_vuln.yaml
|
|
35
|
+
codejury/data/golden/authn_jwt_verified_safe.yaml
|
|
36
|
+
codejury/data/golden/authn_sha256_checksum_safe.yaml
|
|
34
37
|
codejury/data/golden/authn_sha256_password.yaml
|
|
38
|
+
codejury/data/golden/authz_idor_vuln.yaml
|
|
39
|
+
codejury/data/golden/authz_owner_safe.yaml
|
|
40
|
+
codejury/data/golden/cmdi_ossystem_vuln.yaml
|
|
41
|
+
codejury/data/golden/cmdi_subprocess_safe.yaml
|
|
42
|
+
codejury/data/golden/crypto_aesgcm_safe.yaml
|
|
43
|
+
codejury/data/golden/crypto_ecb_vuln.yaml
|
|
44
|
+
codejury/data/golden/path_contained_safe.yaml
|
|
45
|
+
codejury/data/golden/path_traversal_vuln.yaml
|
|
46
|
+
codejury/data/golden/secrets_env_safe.yaml
|
|
47
|
+
codejury/data/golden/secrets_hardcoded_vuln.yaml
|
|
48
|
+
codejury/data/golden/sqli_format_vuln.yaml
|
|
35
49
|
codejury/data/golden/sqli_fstring_query.yaml
|
|
36
50
|
codejury/data/golden/sqli_parameterized_query.yaml
|
|
51
|
+
codejury/data/golden/xss_innerhtml_constant_safe.yaml
|
|
52
|
+
codejury/data/golden/xss_innerhtml_vuln.yaml
|
|
37
53
|
codejury/data/tasks/audit_diff_debate.yaml
|
|
38
54
|
codejury/data/tasks/quick_scan_single.yaml
|
|
39
55
|
codejury/domain/__init__.py
|
|
@@ -1,11 +1,15 @@
|
|
|
1
|
+
from types import SimpleNamespace
|
|
2
|
+
|
|
1
3
|
import pytest
|
|
2
4
|
|
|
3
|
-
from codejury.assembly import build_orchestration, run_over_source
|
|
5
|
+
from codejury.assembly import build_orchestration, make_provider, run_over_source
|
|
4
6
|
from codejury.domain.capability import Capability
|
|
5
7
|
from codejury.orchestrators.debate import DebateOrchestrator
|
|
6
8
|
from codejury.orchestrators.pipeline import PipelineOrchestrator
|
|
7
9
|
from codejury.orchestrators.reflexion import ReflexionOrchestrator
|
|
8
10
|
from codejury.orchestrators.single import SingleOrchestrator
|
|
11
|
+
from codejury.providers.base import Message
|
|
12
|
+
from codejury.providers.litellm import LiteLLMProvider
|
|
9
13
|
from codejury.providers.mock import MockProvider
|
|
10
14
|
from codejury.sources.mock import MockSource
|
|
11
15
|
|
|
@@ -25,6 +29,19 @@ def test_build_orchestration_maps_strategy(strategy, orch_cls, roles):
|
|
|
25
29
|
assert set(agents) == roles
|
|
26
30
|
|
|
27
31
|
|
|
32
|
+
def test_make_provider_forwards_api_base_and_key():
|
|
33
|
+
provider = make_provider("litellm", api_base="https://proxy.example", api_key="sk-test")
|
|
34
|
+
assert isinstance(provider, LiteLLMProvider)
|
|
35
|
+
|
|
36
|
+
captured = {}
|
|
37
|
+
provider._completion = lambda **kw: captured.update(kw) or SimpleNamespace(
|
|
38
|
+
choices=[SimpleNamespace(message=SimpleNamespace(content="ok"))]
|
|
39
|
+
)
|
|
40
|
+
provider.complete(system="s", messages=[Message(role="user", content="x")], model="m", max_tokens=8)
|
|
41
|
+
assert captured["api_base"] == "https://proxy.example"
|
|
42
|
+
assert captured["api_key"] == "sk-test"
|
|
43
|
+
|
|
44
|
+
|
|
28
45
|
def test_run_over_source_runs_each_artifact():
|
|
29
46
|
provider = MockProvider(default='{"verdicts": [{"sub_capability": "x", "status": "SECURE"}]}')
|
|
30
47
|
agents, orchestrator = build_orchestration("single", provider=provider, model="m", max_tokens=8)
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
2
|
|
|
3
|
-
from codejury
|
|
3
|
+
from codejury import cli
|
|
4
|
+
from codejury.domain.capability import load_capabilities
|
|
4
5
|
from codejury.evaluation import Metrics, evaluate, load_cases
|
|
6
|
+
from codejury.providers.base import Provider
|
|
5
7
|
from codejury.providers.mock import MockProvider
|
|
6
8
|
|
|
7
9
|
from codejury.resources import CAPABILITIES_DIR, GOLDEN_DIR
|
|
@@ -36,20 +38,32 @@ def test_golden_cases_load():
|
|
|
36
38
|
assert vuln.capability == "authn" and vuln.vulnerable is True
|
|
37
39
|
|
|
38
40
|
|
|
39
|
-
def _caps():
|
|
40
|
-
return [load_capability(CAPABILITIES_DIR / "authentication.yaml"),
|
|
41
|
-
load_capability(CAPABILITIES_DIR / "input_validation.yaml")]
|
|
42
|
-
|
|
43
|
-
|
|
44
41
|
def test_evaluate_always_vulnerable_provider():
|
|
45
|
-
#
|
|
46
|
-
#
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
42
|
+
# A provider that always flags VULNERABLE: every vulnerable case is a true
|
|
43
|
+
# positive (recall 1.0), every safe case a false positive.
|
|
44
|
+
cases = load_cases(GOLDEN_DIR)
|
|
45
|
+
n_vuln = sum(c.vulnerable for c in cases)
|
|
46
|
+
n_safe = len(cases) - n_vuln
|
|
47
|
+
m = evaluate(cases, load_capabilities(CAPABILITIES_DIR), provider=MockProvider(default=_VULN), model="m")
|
|
48
|
+
assert m.tp == n_vuln and m.fp == n_safe and m.fn == 0 and m.tn == 0
|
|
49
|
+
assert m.recall == 1.0
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
def test_evaluate_always_secure_provider():
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
cases = load_cases(GOLDEN_DIR)
|
|
54
|
+
n_vuln = sum(c.vulnerable for c in cases)
|
|
55
|
+
n_safe = len(cases) - n_vuln
|
|
56
|
+
m = evaluate(cases, load_capabilities(CAPABILITIES_DIR), provider=MockProvider(default=_SECURE), model="m")
|
|
57
|
+
assert m.tp == 0 and m.fp == 0 and m.fn == n_vuln and m.tn == n_safe
|
|
55
58
|
assert m.recall == 0.0
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_eval_cli_reports_provider_error_without_traceback(monkeypatch, capsys):
|
|
62
|
+
class _Boom(Provider):
|
|
63
|
+
def complete(self, **kwargs):
|
|
64
|
+
raise RuntimeError("Could not resolve authentication method")
|
|
65
|
+
|
|
66
|
+
monkeypatch.setattr("codejury.cli.make_provider", lambda name: _Boom())
|
|
67
|
+
rc = cli.main(["eval"])
|
|
68
|
+
assert rc == 1
|
|
69
|
+
assert "eval failed" in capsys.readouterr().out
|
|
@@ -16,6 +16,31 @@ def test_from_dict_parses_and_defaults():
|
|
|
16
16
|
assert task.capabilities == ("authn", "crypto")
|
|
17
17
|
assert task.provider == "anthropic" # default
|
|
18
18
|
assert task.max_tokens == 2048 # default
|
|
19
|
+
assert task.api_base is None # default
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_from_dict_reads_api_base():
|
|
23
|
+
task = Task.from_dict({"name": "t", "provider": "litellm", "api_base": "https://proxy.example"})
|
|
24
|
+
assert task.api_base == "https://proxy.example"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_run_task_forwards_proxy_config_with_key_from_env(monkeypatch):
|
|
28
|
+
captured = {}
|
|
29
|
+
|
|
30
|
+
def fake_make_provider(name, **kwargs):
|
|
31
|
+
captured["name"] = name
|
|
32
|
+
captured.update(kwargs)
|
|
33
|
+
return MockProvider(default='{"verdicts": []}')
|
|
34
|
+
|
|
35
|
+
monkeypatch.setattr("codejury.tasks.base.make_provider", fake_make_provider)
|
|
36
|
+
monkeypatch.setattr("codejury.tasks.base.DEFAULT_API_KEY", "sk-from-env")
|
|
37
|
+
|
|
38
|
+
task = Task(name="t", provider="litellm", api_base="https://proxy.example")
|
|
39
|
+
run_task(task, MockSource(), [Capability(id="authn", name="A")])
|
|
40
|
+
|
|
41
|
+
assert captured["name"] == "litellm"
|
|
42
|
+
assert captured["api_base"] == "https://proxy.example" # from the task
|
|
43
|
+
assert captured["api_key"] == "sk-from-env" # from the environment, not the task
|
|
19
44
|
|
|
20
45
|
|
|
21
46
|
def test_select_filters_by_id_and_none_means_all():
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|