redcodegen 0.0.3__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of redcodegen might be problematic. Click here for more details.
- {redcodegen-0.0.3 → redcodegen-0.0.4}/PKG-INFO +1 -1
- {redcodegen-0.0.3 → redcodegen-0.0.4}/pyproject.toml +1 -1
- redcodegen-0.0.4/redcodegen/#main.py# +263 -0
- redcodegen-0.0.4/redcodegen/#seeds.py# +17 -0
- {redcodegen-0.0.3 → redcodegen-0.0.4}/redcodegen/scenarios.py +1 -1
- {redcodegen-0.0.3 → redcodegen-0.0.4}/README.md +0 -0
- {redcodegen-0.0.3 → redcodegen-0.0.4}/redcodegen/__init__.py +0 -0
- {redcodegen-0.0.3 → redcodegen-0.0.4}/redcodegen/constants.py +0 -0
- {redcodegen-0.0.3 → redcodegen-0.0.4}/redcodegen/data/__init__.py +0 -0
- {redcodegen-0.0.3 → redcodegen-0.0.4}/redcodegen/data/scenario_dow.jsonl +0 -0
- {redcodegen-0.0.3 → redcodegen-0.0.4}/redcodegen/generator.py +0 -0
- {redcodegen-0.0.3 → redcodegen-0.0.4}/redcodegen/main.py +0 -0
- {redcodegen-0.0.3 → redcodegen-0.0.4}/redcodegen/seeds.py +0 -0
- {redcodegen-0.0.3 → redcodegen-0.0.4}/redcodegen/validator.py +0 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
"""
|
|
2
|
+
main.py
|
|
3
|
+
Main script for generating and evaluating vulnerable code samples
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import rich_click as click
|
|
7
|
+
import jsonlines
|
|
8
|
+
import logging
|
|
9
|
+
import dspy
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Set, Dict, Any
|
|
13
|
+
from cwe2.database import Database
|
|
14
|
+
|
|
15
|
+
from redcodegen.constants import CWE_TOP_25, create_lm
|
|
16
|
+
|
|
17
|
+
from rich.logging import RichHandler
|
|
18
|
+
|
|
19
|
+
# Setup logging
|
|
20
|
+
logging.basicConfig(
|
|
21
|
+
level=logging.INFO,
|
|
22
|
+
format="%(message)s",
|
|
23
|
+
handlers=[RichHandler(rich_tracebacks=True)]
|
|
24
|
+
)
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def load_completed_cwes(output_path: Path) -> Set[int]:
|
|
29
|
+
"""Load CWE IDs that have already been processed.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
output_path: Path to the output JSONL file
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Set of CWE IDs that are already in the output file
|
|
36
|
+
"""
|
|
37
|
+
completed = set()
|
|
38
|
+
|
|
39
|
+
if not output_path.exists():
|
|
40
|
+
return completed
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
with jsonlines.open(output_path) as reader:
|
|
44
|
+
for record in reader:
|
|
45
|
+
if 'cwe_id' in record:
|
|
46
|
+
completed.add(record['cwe_id'])
|
|
47
|
+
logger.info(f"Found {len(completed)} already-completed CWEs in {output_path}")
|
|
48
|
+
except Exception as e:
|
|
49
|
+
logger.warning(f"Could not read existing output file: {e}")
|
|
50
|
+
|
|
51
|
+
return completed
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_model_config() -> Dict[str, Any]:
|
|
55
|
+
"""Extract model configuration from current DSPy settings.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Dict with model configuration info
|
|
59
|
+
"""
|
|
60
|
+
lm = dspy.settings.lm
|
|
61
|
+
config = {
|
|
62
|
+
"model": getattr(lm, 'model', 'unknown'),
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return config
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def build_record(
|
|
69
|
+
cwe_id: int,
|
|
70
|
+
cwe_name: str,
|
|
71
|
+
cwe_description: str,
|
|
72
|
+
scenarios: List[str],
|
|
73
|
+
codes: List[str],
|
|
74
|
+
evaluations: List[Any],
|
|
75
|
+
errors: List[str],
|
|
76
|
+
min_scenarios: int
|
|
77
|
+
) -> Dict[str, Any]:
|
|
78
|
+
"""Build a record for JSONL output.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
cwe_id: CWE identifier
|
|
82
|
+
cwe_name: CWE name
|
|
83
|
+
cwe_description: CWE description
|
|
84
|
+
scenarios: List of scenario descriptions
|
|
85
|
+
codes: List of generated code samples
|
|
86
|
+
evaluations: List of evaluation results (can contain None for failures)
|
|
87
|
+
errors: List of error messages (None for successful evaluations)
|
|
88
|
+
min_scenarios: Minimum scenarios parameter used
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Dict representing the complete record for this CWE
|
|
92
|
+
"""
|
|
93
|
+
samples = []
|
|
94
|
+
for scenario, code, evaluation, error in zip(scenarios, codes, evaluations, errors):
|
|
95
|
+
samples.append({
|
|
96
|
+
"scenario": scenario,
|
|
97
|
+
"code": code,
|
|
98
|
+
"evaluation": evaluation
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
"cwe_id": cwe_id,
|
|
103
|
+
"cwe_name": cwe_name,
|
|
104
|
+
"cwe_description": cwe_description,
|
|
105
|
+
"timestamp": datetime.utcnow().isoformat() + 'Z',
|
|
106
|
+
"model_config": get_model_config(),
|
|
107
|
+
"min_scenarios": min_scenarios,
|
|
108
|
+
"samples": samples
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def append_to_jsonl(record: Dict[str, Any], output_path: Path):
|
|
113
|
+
"""Append a record to the JSONL file.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
record: Record to append
|
|
117
|
+
output_path: Path to output file
|
|
118
|
+
"""
|
|
119
|
+
with jsonlines.open(output_path, mode='a') as writer:
|
|
120
|
+
writer.write(record)
|
|
121
|
+
logger.info(f"Saved CWE-{record['cwe_id']} to {output_path}")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@click.command()
|
|
125
|
+
@click.option(
|
|
126
|
+
'--cwes', '-c',
|
|
127
|
+
multiple=True,
|
|
128
|
+
type=int,
|
|
129
|
+
help='CWE IDs to process (can specify multiple times, e.g., -c 89 -c 79)'
|
|
130
|
+
)
|
|
131
|
+
@click.option(
|
|
132
|
+
'--use-top-25',
|
|
133
|
+
is_flag=True,
|
|
134
|
+
help='Process all CWE Top 25'
|
|
135
|
+
)
|
|
136
|
+
@click.option(
|
|
137
|
+
'--min-samples', '-n',
|
|
138
|
+
default=3,
|
|
139
|
+
type=int,
|
|
140
|
+
help='Minimum samples per CWE (default: 3)'
|
|
141
|
+
)
|
|
142
|
+
@click.option(
|
|
143
|
+
'--output', '-o',
|
|
144
|
+
default='results.jsonl',
|
|
145
|
+
type=click.Path(),
|
|
146
|
+
help='Output JSONL file (default: results.jsonl)'
|
|
147
|
+
)
|
|
148
|
+
@click.option(
|
|
149
|
+
'--model', '-m',
|
|
150
|
+
default='openai/gpt-4o-mini',
|
|
151
|
+
help='Model identifier (default: openai/gpt-4o-mini)'
|
|
152
|
+
)
|
|
153
|
+
@click.option(
|
|
154
|
+
'--api-key',
|
|
155
|
+
default=None,
|
|
156
|
+
help='API key (defaults to OPENAI_API_KEY env var)'
|
|
157
|
+
)
|
|
158
|
+
@
|
|
159
|
+
def main(cwes, use_top_25, min_samples, output, model, api_key):
|
|
160
|
+
"""Generate and evaluate vulnerable code samples for specified CWEs.
|
|
161
|
+
|
|
162
|
+
Examples:
|
|
163
|
+
python -m redcodegen -c 89 -c 79 # manually specify cwe
|
|
164
|
+
python -m redcodegen -n 5 # specify number of rollouts
|
|
165
|
+
python -m redcodegen --use-top-25 # run CWE top 25
|
|
166
|
+
python -m redcodegen --use-top-25 -o results.jsonl # resume existing run
|
|
167
|
+
python -m redcodegen --use-top-25 --model openai/gpt-4o # switch model
|
|
168
|
+
"""
|
|
169
|
+
# Configure DSPy with specified model
|
|
170
|
+
lm = create_lm(model_name=model, api_key=api_key)
|
|
171
|
+
dspy.configure(lm=lm)
|
|
172
|
+
logger.info(f"Configured model: {model}")
|
|
173
|
+
|
|
174
|
+
# Import generator and validator after configuring dspy
|
|
175
|
+
from redcodegen.generator import run_cwe
|
|
176
|
+
from redcodegen.validator import evaluate
|
|
177
|
+
|
|
178
|
+
output_path = Path(output)
|
|
179
|
+
|
|
180
|
+
# Determine which CWEs to process
|
|
181
|
+
if use_top_25:
|
|
182
|
+
cwes_to_process = CWE_TOP_25
|
|
183
|
+
logger.info(f"Processing CWE Top 25 ({len(cwes_to_process)} CWEs)")
|
|
184
|
+
elif cwes:
|
|
185
|
+
cwes_to_process = list(cwes)
|
|
186
|
+
logger.info(f"Processing {len(cwes_to_process)} specified CWEs")
|
|
187
|
+
else:
|
|
188
|
+
logger.error("Must specify either --cwes or --use-top-25")
|
|
189
|
+
raise click.UsageError("Must specify either --cwes or --use-top-25")
|
|
190
|
+
|
|
191
|
+
# Load already-completed CWEs for idempotency
|
|
192
|
+
completed_cwes = load_completed_cwes(output_path)
|
|
193
|
+
cwes_to_process = [cwe for cwe in cwes_to_process if cwe not in completed_cwes]
|
|
194
|
+
|
|
195
|
+
if not cwes_to_process:
|
|
196
|
+
logger.info("All CWEs already completed!")
|
|
197
|
+
return
|
|
198
|
+
|
|
199
|
+
logger.info(f"Processing {len(cwes_to_process)} CWEs (skipped {len(completed_cwes)} already completed)")
|
|
200
|
+
|
|
201
|
+
# Initialize CWE database
|
|
202
|
+
db = Database()
|
|
203
|
+
|
|
204
|
+
# Process each CWE
|
|
205
|
+
for idx, cwe_id in enumerate(cwes_to_process, 1):
|
|
206
|
+
logger.info(f"[{idx}/{len(cwes_to_process)}] Processing CWE-{cwe_id}...")
|
|
207
|
+
|
|
208
|
+
try:
|
|
209
|
+
# Get CWE metadata
|
|
210
|
+
entry = db.get(cwe_id)
|
|
211
|
+
cwe_name = entry.name
|
|
212
|
+
cwe_description = entry.extended_description or entry.description
|
|
213
|
+
|
|
214
|
+
# Generate code samples
|
|
215
|
+
logger.info(f" Generating {min_samples} code samples...")
|
|
216
|
+
codes = run_cwe(cwe_id, min_scenarios=min_samples)
|
|
217
|
+
logger.info(f" Generated {len(codes)} code samples")
|
|
218
|
+
|
|
219
|
+
# Get scenarios (need to call generate again to get scenarios)
|
|
220
|
+
from redcodegen.scenarios import generate
|
|
221
|
+
scenario_data = generate(cwe_id, min_scenarios=min_samples)
|
|
222
|
+
scenarios = scenario_data["scenarios"][:len(codes)] # Match code count
|
|
223
|
+
|
|
224
|
+
# Evaluate each code sample
|
|
225
|
+
evaluations = []
|
|
226
|
+
errors = []
|
|
227
|
+
|
|
228
|
+
for i, code in enumerate(codes, 1):
|
|
229
|
+
logger.info(f" Evaluating sample {i}/{len(codes)}...")
|
|
230
|
+
try:
|
|
231
|
+
evaluation = evaluate(code)
|
|
232
|
+
evaluations.append(evaluation)
|
|
233
|
+
errors.append(None)
|
|
234
|
+
logger.info(f" Found {len(evaluation)} vulnerabilities")
|
|
235
|
+
except Exception as e:
|
|
236
|
+
logger.warning(f" Evaluation failed: {e}")
|
|
237
|
+
evaluations.append(None)
|
|
238
|
+
errors.append(str(e))
|
|
239
|
+
|
|
240
|
+
# Build and save record
|
|
241
|
+
record = build_record(
|
|
242
|
+
cwe_id=cwe_id,
|
|
243
|
+
cwe_name=cwe_name,
|
|
244
|
+
cwe_description=cwe_description,
|
|
245
|
+
scenarios=scenarios,
|
|
246
|
+
codes=codes,
|
|
247
|
+
evaluations=evaluations,
|
|
248
|
+
errors=errors,
|
|
249
|
+
min_scenarios=min_samples
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
append_to_jsonl(record, output_path)
|
|
253
|
+
logger.info(f"✓ Completed CWE-{cwe_id}")
|
|
254
|
+
|
|
255
|
+
except Exception as e:
|
|
256
|
+
logger.error(f"✗ Failed to process CWE-{cwe_id}: {e}")
|
|
257
|
+
continue
|
|
258
|
+
|
|
259
|
+
logger.info(f"Completed! Results saved to {output_path}")
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
if __name__ == '__main__':
|
|
263
|
+
main()
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import dspy
|
|
2
|
+
import jsonlines
|
|
3
|
+
from cwe2.database import Database
|
|
4
|
+
|
|
5
|
+
from redcodegen.constants import LM
|
|
6
|
+
|
|
7
|
+
dspy.configure(lm=LM)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
db = Database()
|
|
11
|
+
entry = db.get(502)
|
|
12
|
+
|
|
13
|
+
print(entry.name)
|
|
14
|
+
print(entry.extended_description)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
@@ -48,7 +48,7 @@ def generate(cwe_id, min_scenarios=3):
|
|
|
48
48
|
output_scenarios = []
|
|
49
49
|
while len(output_scenarios) < min_scenarios:
|
|
50
50
|
scenarios = extract_scenarios(name=entry.name, description=entry.extended_description,
|
|
51
|
-
config={"
|
|
51
|
+
config={"rollout_id": len(output_scenarios)}).scenarios
|
|
52
52
|
output_scenarios.extend(scenarios)
|
|
53
53
|
scenarios = [strip_vulnerability(scenario=i).coding_task for i in output_scenarios]
|
|
54
54
|
suggestions = [suggest_libraries(task=i, suggested_libraries=CODEQL_LIBRARIES) for i in scenarios]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|