redcodegen 0.0.4__tar.gz → 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of redcodegen might be problematic. Click here for more details.
- {redcodegen-0.0.4 → redcodegen-0.0.5}/PKG-INFO +1 -1
- {redcodegen-0.0.4 → redcodegen-0.0.5}/pyproject.toml +1 -1
- {redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/constants.py +19 -7
- {redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/main.py +7 -2
- {redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/scenarios.py +3 -3
- redcodegen-0.0.4/redcodegen/#main.py# +0 -263
- redcodegen-0.0.4/redcodegen/#seeds.py# +0 -17
- {redcodegen-0.0.4 → redcodegen-0.0.5}/README.md +0 -0
- {redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/__init__.py +0 -0
- {redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/data/__init__.py +0 -0
- {redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/data/scenario_dow.jsonl +0 -0
- {redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/generator.py +0 -0
- {redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/seeds.py +0 -0
- {redcodegen-0.0.4 → redcodegen-0.0.5}/redcodegen/validator.py +0 -0
|
@@ -11,7 +11,7 @@ CWE_TOP_25 = [
|
|
|
11
11
|
798, 190, 400, 306
|
|
12
12
|
]
|
|
13
13
|
|
|
14
|
-
def create_lm(model_name="openai/gpt-4o-mini", temperature=0.8, api_key=None):
|
|
14
|
+
def create_lm(model_name="openai/gpt-4o-mini", temperature=0.8, api_key=None, api_base=None):
|
|
15
15
|
"""Create a DSPy language model instance.
|
|
16
16
|
|
|
17
17
|
Args:
|
|
@@ -23,12 +23,24 @@ def create_lm(model_name="openai/gpt-4o-mini", temperature=0.8, api_key=None):
|
|
|
23
23
|
"""
|
|
24
24
|
if api_key is None:
|
|
25
25
|
api_key = os.environ.get("OPENAI_API_KEY")
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
26
|
+
if api_base is None:
|
|
27
|
+
api_base = os.environ.get("OPENAI_API_BASE")
|
|
28
|
+
|
|
29
|
+
if api_base is None:
|
|
30
|
+
return dspy.LM(
|
|
31
|
+
model_name,
|
|
32
|
+
api_key=api_key,
|
|
33
|
+
temperature=temperature,
|
|
34
|
+
max_tokens=16000
|
|
35
|
+
)
|
|
36
|
+
else:
|
|
37
|
+
return dspy.LM(
|
|
38
|
+
model_name,
|
|
39
|
+
api_key=api_key,
|
|
40
|
+
api_base=api_base,
|
|
41
|
+
temperature=temperature,
|
|
42
|
+
max_tokens=16000
|
|
43
|
+
)
|
|
32
44
|
|
|
33
45
|
SCENARIO_EXAMPLES = [
|
|
34
46
|
dspy.Example(
|
|
@@ -155,13 +155,18 @@ def append_to_jsonl(record: Dict[str, Any], output_path: Path):
|
|
|
155
155
|
default=None,
|
|
156
156
|
help='API key (defaults to OPENAI_API_KEY env var)'
|
|
157
157
|
)
|
|
158
|
+
@click.option(
|
|
159
|
+
'--api-base',
|
|
160
|
+
default=None,
|
|
161
|
+
help='API base URL (defaults to OPENAI_API_BASE env var)'
|
|
162
|
+
)
|
|
158
163
|
@click.option(
|
|
159
164
|
'--temperature',
|
|
160
165
|
default=0.8,
|
|
161
166
|
type=float,
|
|
162
167
|
help='Temperature for code generation (default: 0.8)'
|
|
163
168
|
)
|
|
164
|
-
def main(cwes, use_top_25, min_samples, output, model, api_key, temperature):
|
|
169
|
+
def main(cwes, use_top_25, min_samples, output, model, api_key, api_base, temperature):
|
|
165
170
|
"""Generate and evaluate vulnerable code samples for specified CWEs.
|
|
166
171
|
|
|
167
172
|
Examples:
|
|
@@ -172,7 +177,7 @@ def main(cwes, use_top_25, min_samples, output, model, api_key, temperature):
|
|
|
172
177
|
python -m redcodegen --use-top-25 --model openai/gpt-4o # switch model
|
|
173
178
|
"""
|
|
174
179
|
# Configure DSPy with specified model
|
|
175
|
-
lm = create_lm(model_name=model, temperature=temperature, api_key=api_key)
|
|
180
|
+
lm = create_lm(model_name=model, temperature=temperature, api_key=api_key, api_base=api_base)
|
|
176
181
|
dspy.configure(lm=lm)
|
|
177
182
|
logger.info(f"Configured model: {model}")
|
|
178
183
|
|
|
@@ -29,8 +29,8 @@ class SuggestLibraries(dspy.Signature):
|
|
|
29
29
|
task: str = dspy.InputField()
|
|
30
30
|
suggested_libraries: List[str] = dspy.InputField()
|
|
31
31
|
|
|
32
|
-
chosen_library:
|
|
33
|
-
rephrased_task:
|
|
32
|
+
chosen_library: str = dspy.OutputField(desc="choose a library that would best help solve the task, or say None")
|
|
33
|
+
rephrased_task: str = dspy.OutputField(desc="rephrase the task in terms of the chosen library, or say None")
|
|
34
34
|
suggest_libraries = dspy.Predict(SuggestLibraries)
|
|
35
35
|
|
|
36
36
|
def generate(cwe_id, min_scenarios=3):
|
|
@@ -53,7 +53,7 @@ def generate(cwe_id, min_scenarios=3):
|
|
|
53
53
|
scenarios = [strip_vulnerability(scenario=i).coding_task for i in output_scenarios]
|
|
54
54
|
suggestions = [suggest_libraries(task=i, suggested_libraries=CODEQL_LIBRARIES) for i in scenarios]
|
|
55
55
|
results = [
|
|
56
|
-
i.rephrased_task if i.rephrased_task is not None else j
|
|
56
|
+
i.rephrased_task if ((i.rephrased_task is not None) and (i.rephrased_task.lower().strip() != "none")) else j
|
|
57
57
|
for i,j in zip(suggestions, scenarios)
|
|
58
58
|
]
|
|
59
59
|
|
|
@@ -1,263 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
main.py
|
|
3
|
-
Main script for generating and evaluating vulnerable code samples
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import rich_click as click
|
|
7
|
-
import jsonlines
|
|
8
|
-
import logging
|
|
9
|
-
import dspy
|
|
10
|
-
from datetime import datetime
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from typing import List, Set, Dict, Any
|
|
13
|
-
from cwe2.database import Database
|
|
14
|
-
|
|
15
|
-
from redcodegen.constants import CWE_TOP_25, create_lm
|
|
16
|
-
|
|
17
|
-
from rich.logging import RichHandler
|
|
18
|
-
|
|
19
|
-
# Setup logging
|
|
20
|
-
logging.basicConfig(
|
|
21
|
-
level=logging.INFO,
|
|
22
|
-
format="%(message)s",
|
|
23
|
-
handlers=[RichHandler(rich_tracebacks=True)]
|
|
24
|
-
)
|
|
25
|
-
logger = logging.getLogger(__name__)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def load_completed_cwes(output_path: Path) -> Set[int]:
|
|
29
|
-
"""Load CWE IDs that have already been processed.
|
|
30
|
-
|
|
31
|
-
Args:
|
|
32
|
-
output_path: Path to the output JSONL file
|
|
33
|
-
|
|
34
|
-
Returns:
|
|
35
|
-
Set of CWE IDs that are already in the output file
|
|
36
|
-
"""
|
|
37
|
-
completed = set()
|
|
38
|
-
|
|
39
|
-
if not output_path.exists():
|
|
40
|
-
return completed
|
|
41
|
-
|
|
42
|
-
try:
|
|
43
|
-
with jsonlines.open(output_path) as reader:
|
|
44
|
-
for record in reader:
|
|
45
|
-
if 'cwe_id' in record:
|
|
46
|
-
completed.add(record['cwe_id'])
|
|
47
|
-
logger.info(f"Found {len(completed)} already-completed CWEs in {output_path}")
|
|
48
|
-
except Exception as e:
|
|
49
|
-
logger.warning(f"Could not read existing output file: {e}")
|
|
50
|
-
|
|
51
|
-
return completed
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def get_model_config() -> Dict[str, Any]:
|
|
55
|
-
"""Extract model configuration from current DSPy settings.
|
|
56
|
-
|
|
57
|
-
Returns:
|
|
58
|
-
Dict with model configuration info
|
|
59
|
-
"""
|
|
60
|
-
lm = dspy.settings.lm
|
|
61
|
-
config = {
|
|
62
|
-
"model": getattr(lm, 'model', 'unknown'),
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
return config
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def build_record(
|
|
69
|
-
cwe_id: int,
|
|
70
|
-
cwe_name: str,
|
|
71
|
-
cwe_description: str,
|
|
72
|
-
scenarios: List[str],
|
|
73
|
-
codes: List[str],
|
|
74
|
-
evaluations: List[Any],
|
|
75
|
-
errors: List[str],
|
|
76
|
-
min_scenarios: int
|
|
77
|
-
) -> Dict[str, Any]:
|
|
78
|
-
"""Build a record for JSONL output.
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
cwe_id: CWE identifier
|
|
82
|
-
cwe_name: CWE name
|
|
83
|
-
cwe_description: CWE description
|
|
84
|
-
scenarios: List of scenario descriptions
|
|
85
|
-
codes: List of generated code samples
|
|
86
|
-
evaluations: List of evaluation results (can contain None for failures)
|
|
87
|
-
errors: List of error messages (None for successful evaluations)
|
|
88
|
-
min_scenarios: Minimum scenarios parameter used
|
|
89
|
-
|
|
90
|
-
Returns:
|
|
91
|
-
Dict representing the complete record for this CWE
|
|
92
|
-
"""
|
|
93
|
-
samples = []
|
|
94
|
-
for scenario, code, evaluation, error in zip(scenarios, codes, evaluations, errors):
|
|
95
|
-
samples.append({
|
|
96
|
-
"scenario": scenario,
|
|
97
|
-
"code": code,
|
|
98
|
-
"evaluation": evaluation
|
|
99
|
-
})
|
|
100
|
-
|
|
101
|
-
return {
|
|
102
|
-
"cwe_id": cwe_id,
|
|
103
|
-
"cwe_name": cwe_name,
|
|
104
|
-
"cwe_description": cwe_description,
|
|
105
|
-
"timestamp": datetime.utcnow().isoformat() + 'Z',
|
|
106
|
-
"model_config": get_model_config(),
|
|
107
|
-
"min_scenarios": min_scenarios,
|
|
108
|
-
"samples": samples
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def append_to_jsonl(record: Dict[str, Any], output_path: Path):
|
|
113
|
-
"""Append a record to the JSONL file.
|
|
114
|
-
|
|
115
|
-
Args:
|
|
116
|
-
record: Record to append
|
|
117
|
-
output_path: Path to output file
|
|
118
|
-
"""
|
|
119
|
-
with jsonlines.open(output_path, mode='a') as writer:
|
|
120
|
-
writer.write(record)
|
|
121
|
-
logger.info(f"Saved CWE-{record['cwe_id']} to {output_path}")
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
@click.command()
|
|
125
|
-
@click.option(
|
|
126
|
-
'--cwes', '-c',
|
|
127
|
-
multiple=True,
|
|
128
|
-
type=int,
|
|
129
|
-
help='CWE IDs to process (can specify multiple times, e.g., -c 89 -c 79)'
|
|
130
|
-
)
|
|
131
|
-
@click.option(
|
|
132
|
-
'--use-top-25',
|
|
133
|
-
is_flag=True,
|
|
134
|
-
help='Process all CWE Top 25'
|
|
135
|
-
)
|
|
136
|
-
@click.option(
|
|
137
|
-
'--min-samples', '-n',
|
|
138
|
-
default=3,
|
|
139
|
-
type=int,
|
|
140
|
-
help='Minimum samples per CWE (default: 3)'
|
|
141
|
-
)
|
|
142
|
-
@click.option(
|
|
143
|
-
'--output', '-o',
|
|
144
|
-
default='results.jsonl',
|
|
145
|
-
type=click.Path(),
|
|
146
|
-
help='Output JSONL file (default: results.jsonl)'
|
|
147
|
-
)
|
|
148
|
-
@click.option(
|
|
149
|
-
'--model', '-m',
|
|
150
|
-
default='openai/gpt-4o-mini',
|
|
151
|
-
help='Model identifier (default: openai/gpt-4o-mini)'
|
|
152
|
-
)
|
|
153
|
-
@click.option(
|
|
154
|
-
'--api-key',
|
|
155
|
-
default=None,
|
|
156
|
-
help='API key (defaults to OPENAI_API_KEY env var)'
|
|
157
|
-
)
|
|
158
|
-
@
|
|
159
|
-
def main(cwes, use_top_25, min_samples, output, model, api_key):
|
|
160
|
-
"""Generate and evaluate vulnerable code samples for specified CWEs.
|
|
161
|
-
|
|
162
|
-
Examples:
|
|
163
|
-
python -m redcodegen -c 89 -c 79 # manually specify cwe
|
|
164
|
-
python -m redcodegen -n 5 # specify number of rollouts
|
|
165
|
-
python -m redcodegen --use-top-25 # run CWE top 25
|
|
166
|
-
python -m redcodegen --use-top-25 -o results.jsonl # resume existing run
|
|
167
|
-
python -m redcodegen --use-top-25 --model openai/gpt-4o # switch model
|
|
168
|
-
"""
|
|
169
|
-
# Configure DSPy with specified model
|
|
170
|
-
lm = create_lm(model_name=model, api_key=api_key)
|
|
171
|
-
dspy.configure(lm=lm)
|
|
172
|
-
logger.info(f"Configured model: {model}")
|
|
173
|
-
|
|
174
|
-
# Import generator and validator after configuring dspy
|
|
175
|
-
from redcodegen.generator import run_cwe
|
|
176
|
-
from redcodegen.validator import evaluate
|
|
177
|
-
|
|
178
|
-
output_path = Path(output)
|
|
179
|
-
|
|
180
|
-
# Determine which CWEs to process
|
|
181
|
-
if use_top_25:
|
|
182
|
-
cwes_to_process = CWE_TOP_25
|
|
183
|
-
logger.info(f"Processing CWE Top 25 ({len(cwes_to_process)} CWEs)")
|
|
184
|
-
elif cwes:
|
|
185
|
-
cwes_to_process = list(cwes)
|
|
186
|
-
logger.info(f"Processing {len(cwes_to_process)} specified CWEs")
|
|
187
|
-
else:
|
|
188
|
-
logger.error("Must specify either --cwes or --use-top-25")
|
|
189
|
-
raise click.UsageError("Must specify either --cwes or --use-top-25")
|
|
190
|
-
|
|
191
|
-
# Load already-completed CWEs for idempotency
|
|
192
|
-
completed_cwes = load_completed_cwes(output_path)
|
|
193
|
-
cwes_to_process = [cwe for cwe in cwes_to_process if cwe not in completed_cwes]
|
|
194
|
-
|
|
195
|
-
if not cwes_to_process:
|
|
196
|
-
logger.info("All CWEs already completed!")
|
|
197
|
-
return
|
|
198
|
-
|
|
199
|
-
logger.info(f"Processing {len(cwes_to_process)} CWEs (skipped {len(completed_cwes)} already completed)")
|
|
200
|
-
|
|
201
|
-
# Initialize CWE database
|
|
202
|
-
db = Database()
|
|
203
|
-
|
|
204
|
-
# Process each CWE
|
|
205
|
-
for idx, cwe_id in enumerate(cwes_to_process, 1):
|
|
206
|
-
logger.info(f"[{idx}/{len(cwes_to_process)}] Processing CWE-{cwe_id}...")
|
|
207
|
-
|
|
208
|
-
try:
|
|
209
|
-
# Get CWE metadata
|
|
210
|
-
entry = db.get(cwe_id)
|
|
211
|
-
cwe_name = entry.name
|
|
212
|
-
cwe_description = entry.extended_description or entry.description
|
|
213
|
-
|
|
214
|
-
# Generate code samples
|
|
215
|
-
logger.info(f" Generating {min_samples} code samples...")
|
|
216
|
-
codes = run_cwe(cwe_id, min_scenarios=min_samples)
|
|
217
|
-
logger.info(f" Generated {len(codes)} code samples")
|
|
218
|
-
|
|
219
|
-
# Get scenarios (need to call generate again to get scenarios)
|
|
220
|
-
from redcodegen.scenarios import generate
|
|
221
|
-
scenario_data = generate(cwe_id, min_scenarios=min_samples)
|
|
222
|
-
scenarios = scenario_data["scenarios"][:len(codes)] # Match code count
|
|
223
|
-
|
|
224
|
-
# Evaluate each code sample
|
|
225
|
-
evaluations = []
|
|
226
|
-
errors = []
|
|
227
|
-
|
|
228
|
-
for i, code in enumerate(codes, 1):
|
|
229
|
-
logger.info(f" Evaluating sample {i}/{len(codes)}...")
|
|
230
|
-
try:
|
|
231
|
-
evaluation = evaluate(code)
|
|
232
|
-
evaluations.append(evaluation)
|
|
233
|
-
errors.append(None)
|
|
234
|
-
logger.info(f" Found {len(evaluation)} vulnerabilities")
|
|
235
|
-
except Exception as e:
|
|
236
|
-
logger.warning(f" Evaluation failed: {e}")
|
|
237
|
-
evaluations.append(None)
|
|
238
|
-
errors.append(str(e))
|
|
239
|
-
|
|
240
|
-
# Build and save record
|
|
241
|
-
record = build_record(
|
|
242
|
-
cwe_id=cwe_id,
|
|
243
|
-
cwe_name=cwe_name,
|
|
244
|
-
cwe_description=cwe_description,
|
|
245
|
-
scenarios=scenarios,
|
|
246
|
-
codes=codes,
|
|
247
|
-
evaluations=evaluations,
|
|
248
|
-
errors=errors,
|
|
249
|
-
min_scenarios=min_samples
|
|
250
|
-
)
|
|
251
|
-
|
|
252
|
-
append_to_jsonl(record, output_path)
|
|
253
|
-
logger.info(f"✓ Completed CWE-{cwe_id}")
|
|
254
|
-
|
|
255
|
-
except Exception as e:
|
|
256
|
-
logger.error(f"✗ Failed to process CWE-{cwe_id}: {e}")
|
|
257
|
-
continue
|
|
258
|
-
|
|
259
|
-
logger.info(f"Completed! Results saved to {output_path}")
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
if __name__ == '__main__':
|
|
263
|
-
main()
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
import dspy
|
|
2
|
-
import jsonlines
|
|
3
|
-
from cwe2.database import Database
|
|
4
|
-
|
|
5
|
-
from redcodegen.constants import LM
|
|
6
|
-
|
|
7
|
-
dspy.configure(lm=LM)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
db = Database()
|
|
11
|
-
entry = db.get(502)
|
|
12
|
-
|
|
13
|
-
print(entry.name)
|
|
14
|
-
print(entry.extended_description)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|